projects
/
youtube-dl
/ blobdiff
commit
grep
author
committer
pickaxe
?
search:
re
summary
|
shortlog
|
log
|
commit
|
commitdiff
|
tree
raw
|
inline
| side by side
[youtube] fix extraction for embed restricted live streams(fixes #16433)
[youtube-dl]
/
youtube_dl
/
extractor
/
youtube.py
diff --git
a/youtube_dl/extractor/youtube.py
b/youtube_dl/extractor/youtube.py
index 43051512bc1013640bc99d98437b3cf2a7b258a3..1f29e8a4ea940418ad335d27459c07082161a9d7 100644
(file)
--- a/
youtube_dl/extractor/youtube.py
+++ b/
youtube_dl/extractor/youtube.py
@@
-87,7
+87,7
@@
class YoutubeBaseInfoExtractor(InfoExtractor):
(username, password) = self._get_login_info()
# No authentication to be performed
if username is None:
(username, password) = self._get_login_info()
# No authentication to be performed
if username is None:
- if self._LOGIN_REQUIRED:
+ if self._LOGIN_REQUIRED
and self._downloader.params.get('cookiefile') is None
:
raise ExtractorError('No login info available, needed for using %s.' % self.IE_NAME, expected=True)
return True
raise ExtractorError('No login info available, needed for using %s.' % self.IE_NAME, expected=True)
return True
@@
-246,9
+246,9
@@
class YoutubeBaseInfoExtractor(InfoExtractor):
return True
return True
- def _download_webpage(self, *args, **kwargs):
+ def _download_webpage
_handle
(self, *args, **kwargs):
kwargs.setdefault('query', {})['disable_polymer'] = 'true'
kwargs.setdefault('query', {})['disable_polymer'] = 'true'
- return super(YoutubeBaseInfoExtractor, self)._download_webpage(
+ return super(YoutubeBaseInfoExtractor, self)._download_webpage
_handle
(
*args, **compat_kwargs(kwargs))
def _real_initialize(self):
*args, **compat_kwargs(kwargs))
def _real_initialize(self):
@@
-1537,7
+1537,7
@@
class YoutubeIE(YoutubeBaseInfoExtractor):
ytplayer_config = self._get_ytplayer_config(video_id, video_webpage)
if ytplayer_config:
args = ytplayer_config['args']
ytplayer_config = self._get_ytplayer_config(video_id, video_webpage)
if ytplayer_config:
args = ytplayer_config['args']
- if args.get('url_encoded_fmt_stream_map'):
+ if args.get('url_encoded_fmt_stream_map')
or args.get('hlsvp')
:
# Convert to the same format returned by compat_parse_qs
video_info = dict((k, [v]) for k, v in args.items())
add_dash_mpd(video_info)
# Convert to the same format returned by compat_parse_qs
video_info = dict((k, [v]) for k, v in args.items())
add_dash_mpd(video_info)
@@
-1944,6
+1944,11
@@
class YoutubeIE(YoutubeBaseInfoExtractor):
break
if codecs:
dct.update(parse_codecs(codecs))
break
if codecs:
dct.update(parse_codecs(codecs))
+ if dct.get('acodec') == 'none' or dct.get('vcodec') == 'none':
+ dct['downloader_options'] = {
+ # Youtube throttles chunks >~10M
+ 'http_chunk_size': 10485760,
+ }
formats.append(dct)
elif video_info.get('hlsvp'):
manifest_url = video_info['hlsvp'][0]
formats.append(dct)
elif video_info.get('hlsvp'):
manifest_url = video_info['hlsvp'][0]
@@
-1964,9
+1969,11
@@
class YoutubeIE(YoutubeBaseInfoExtractor):
a_format.setdefault('http_headers', {})['Youtubedl-no-compression'] = 'True'
formats.append(a_format)
else:
a_format.setdefault('http_headers', {})['Youtubedl-no-compression'] = 'True'
formats.append(a_format)
else:
- unavailable_message = extract_unavailable_message()
- if unavailable_message:
- raise ExtractorError(unavailable_message, expected=True)
+ error_message = clean_html(video_info.get('reason', [None])[0])
+ if not error_message:
+ error_message = extract_unavailable_message()
+ if error_message:
+ raise ExtractorError(error_message, expected=True)
raise ExtractorError('no conn, hlsvp or url_encoded_fmt_stream_map information found in video info')
# Look for the DASH manifest
raise ExtractorError('no conn, hlsvp or url_encoded_fmt_stream_map information found in video info')
# Look for the DASH manifest
@@
-2446,7
+2453,7
@@
class YoutubeChannelIE(YoutubePlaylistBaseInfoExtractor):
class YoutubeUserIE(YoutubeChannelIE):
IE_DESC = 'YouTube.com user videos (URL or "ytuser" keyword)'
class YoutubeUserIE(YoutubeChannelIE):
IE_DESC = 'YouTube.com user videos (URL or "ytuser" keyword)'
- _VALID_URL = r'(?:(?:https?://(?:\w+\.)?youtube\.com/(?:(?P<user>user|c)/)?(?!(?:attribution_link|watch|results)(?:$|[^a-z_A-Z0-9-])))|ytuser:)(?!feed/)(?P<id>[A-Za-z0-9_-]+)'
+ _VALID_URL = r'(?:(?:https?://(?:\w+\.)?youtube\.com/(?:(?P<user>user|c)/)?(?!(?:attribution_link|watch|results
|shared
)(?:$|[^a-z_A-Z0-9-])))|ytuser:)(?!feed/)(?P<id>[A-Za-z0-9_-]+)'
_TEMPLATE_URL = 'https://www.youtube.com/%s/%s/videos'
IE_NAME = 'youtube:user'
_TEMPLATE_URL = 'https://www.youtube.com/%s/%s/videos'
IE_NAME = 'youtube:user'
@@
-2578,7
+2585,11
@@
class YoutubePlaylistsIE(YoutubePlaylistsBaseInfoExtractor):
}]
}]
-class YoutubeSearchIE(SearchInfoExtractor, YoutubePlaylistIE):
+class YoutubeSearchBaseInfoExtractor(YoutubePlaylistBaseInfoExtractor):
+ _VIDEO_RE = r'href="\s*/watch\?v=(?P<id>[0-9A-Za-z_-]{11})(?:[^"]*"[^>]+\btitle="(?P<title>[^"]+))?'
+
+
+class YoutubeSearchIE(SearchInfoExtractor, YoutubeSearchBaseInfoExtractor):
IE_DESC = 'YouTube.com searches'
# there doesn't appear to be a real limit, for example if you search for
# 'python' you get more than 8.000.000 results
IE_DESC = 'YouTube.com searches'
# there doesn't appear to be a real limit, for example if you search for
# 'python' you get more than 8.000.000 results
@@
-2612,8
+2623,7
@@
class YoutubeSearchIE(SearchInfoExtractor, YoutubePlaylistIE):
raise ExtractorError(
'[youtube] No video results', expected=True)
raise ExtractorError(
'[youtube] No video results', expected=True)
- new_videos = self._ids_to_results(orderedSet(re.findall(
- r'href="/watch\?v=(.{11})', html_content)))
+ new_videos = list(self._process_page(html_content))
videos += new_videos
if not new_videos or len(videos) > limit:
break
videos += new_videos
if not new_videos or len(videos) > limit:
break
@@
-2636,11
+2646,10
@@
class YoutubeSearchDateIE(YoutubeSearchIE):
_EXTRA_QUERY_ARGS = {'search_sort': 'video_date_uploaded'}
_EXTRA_QUERY_ARGS = {'search_sort': 'video_date_uploaded'}
-class YoutubeSearchURLIE(Youtube
Playlist
BaseInfoExtractor):
+class YoutubeSearchURLIE(Youtube
Search
BaseInfoExtractor):
IE_DESC = 'YouTube.com search URLs'
IE_NAME = 'youtube:search_url'
_VALID_URL = r'https?://(?:www\.)?youtube\.com/results\?(.*?&)?(?:search_query|q)=(?P<query>[^&]+)(?:[&]|$)'
IE_DESC = 'YouTube.com search URLs'
IE_NAME = 'youtube:search_url'
_VALID_URL = r'https?://(?:www\.)?youtube\.com/results\?(.*?&)?(?:search_query|q)=(?P<query>[^&]+)(?:[&]|$)'
- _VIDEO_RE = r'href="\s*/watch\?v=(?P<id>[0-9A-Za-z_-]{11})(?:[^"]*"[^>]+\btitle="(?P<title>[^"]+))?'
_TESTS = [{
'url': 'https://www.youtube.com/results?baz=bar&search_query=youtube-dl+test+video&filters=video&lclk=video',
'playlist_mincount': 5,
_TESTS = [{
'url': 'https://www.youtube.com/results?baz=bar&search_query=youtube-dl+test+video&filters=video&lclk=video',
'playlist_mincount': 5,
@@
-2692,10
+2701,7
@@
class YoutubeFeedsInfoExtractor(YoutubeBaseInfoExtractor):
def _real_initialize(self):
self._login()
def _real_initialize(self):
self._login()
- def _real_extract(self, url):
- page = self._download_webpage(
- 'https://www.youtube.com/feed/%s' % self._FEED_NAME, self._PLAYLIST_TITLE)
-
+ def _entries(self, page):
# The extraction process is the same as for playlists, but the regex
# for the video ids doesn't contain an index
ids = []
# The extraction process is the same as for playlists, but the regex
# for the video ids doesn't contain an index
ids = []
@@
-2706,12
+2712,15
@@
class YoutubeFeedsInfoExtractor(YoutubeBaseInfoExtractor):
# 'recommended' feed has infinite 'load more' and each new portion spins
# the same videos in (sometimes) slightly different order, so we'll check
# for unicity and break when portion has no new videos
# 'recommended' feed has infinite 'load more' and each new portion spins
# the same videos in (sometimes) slightly different order, so we'll check
# for unicity and break when portion has no new videos
- new_ids =
filter(lambda video_id: video_id not in ids, orderedSet(matches
))
+ new_ids =
list(filter(lambda video_id: video_id not in ids, orderedSet(matches)
))
if not new_ids:
break
ids.extend(new_ids)
if not new_ids:
break
ids.extend(new_ids)
+ for entry in self._ids_to_results(new_ids):
+ yield entry
+
mobj = re.search(r'data-uix-load-more-href="/?(?P<more>[^"]+)"', more_widget_html)
if not mobj:
break
mobj = re.search(r'data-uix-load-more-href="/?(?P<more>[^"]+)"', more_widget_html)
if not mobj:
break
@@
-2723,8
+2732,12
@@
class YoutubeFeedsInfoExtractor(YoutubeBaseInfoExtractor):
content_html = more['content_html']
more_widget_html = more['load_more_widget_html']
content_html = more['content_html']
more_widget_html = more['load_more_widget_html']
+ def _real_extract(self, url):
+ page = self._download_webpage(
+ 'https://www.youtube.com/feed/%s' % self._FEED_NAME,
+ self._PLAYLIST_TITLE)
return self.playlist_result(
return self.playlist_result(
- self._
ids_to_results(ids
), playlist_title=self._PLAYLIST_TITLE)
+ self._
entries(page
), playlist_title=self._PLAYLIST_TITLE)
class YoutubeWatchLaterIE(YoutubePlaylistIE):
class YoutubeWatchLaterIE(YoutubePlaylistIE):