int_or_none,
mimetype2ext,
orderedSet,
+ parse_codecs,
parse_duration,
remove_quotes,
remove_start,
unsmuggle_url,
uppercase_escape,
urlencode_postdata,
- ISO3166Utils,
)
# If True it will raise an error if no login info is provided
_LOGIN_REQUIRED = False
+ _PLAYLIST_ID_RE = r'(?:PL|LL|EC|UU|FL|RD|UL|TL)[0-9A-Za-z-_]{10,}'
+
def _set_language(self):
self._set_cookie(
'.youtube.com', 'PREF', 'f1=50000000&hl=en',
)
)? # all until now is optional -> you can pass the naked ID
([0-9A-Za-z_-]{11}) # here is it! the YouTube video ID
- (?!.*?\blist=) # combined list/video URLs are handled by the playlist IE
+ (?!.*?\blist=
+ (?:
+ %(playlist_id)s| # combined list/video URLs are handled by the playlist IE
+ WL # WL are handled by the watch later IE
+ )
+ )
(?(1).+)? # if we found the ID, everything can follow
- $"""
+ $""" % {'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE}
_NEXT_URL_RE = r'[\?&]next_url=([^&]+)'
_formats = {
'5': {'ext': 'flv', 'width': 400, 'height': 240, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
'151': {'ext': 'mp4', 'height': 72, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 24, 'vcodec': 'h264', 'preference': -10},
# DASH mp4 video
- '133': {'ext': 'mp4', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'h264', 'preference': -40},
- '134': {'ext': 'mp4', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'h264', 'preference': -40},
- '135': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264', 'preference': -40},
- '136': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264', 'preference': -40},
- '137': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264', 'preference': -40},
- '138': {'ext': 'mp4', 'format_note': 'DASH video', 'vcodec': 'h264', 'preference': -40}, # Height can vary (https://github.com/rg3/youtube-dl/issues/4559)
- '160': {'ext': 'mp4', 'height': 144, 'format_note': 'DASH video', 'vcodec': 'h264', 'preference': -40},
- '212': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264', 'preference': -40},
- '264': {'ext': 'mp4', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'h264', 'preference': -40},
- '298': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60, 'preference': -40},
- '299': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60, 'preference': -40},
- '266': {'ext': 'mp4', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'h264', 'preference': -40},
+ '133': {'ext': 'mp4', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'h264'},
+ '134': {'ext': 'mp4', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'h264'},
+ '135': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},
+ '136': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264'},
+ '137': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264'},
+ '138': {'ext': 'mp4', 'format_note': 'DASH video', 'vcodec': 'h264'}, # Height can vary (https://github.com/rg3/youtube-dl/issues/4559)
+ '160': {'ext': 'mp4', 'height': 144, 'format_note': 'DASH video', 'vcodec': 'h264'},
+ '212': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},
+ '264': {'ext': 'mp4', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'h264'},
+ '298': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},
+ '299': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},
+ '266': {'ext': 'mp4', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'h264'},
# Dash mp4 audio
- '139': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 48, 'preference': -50, 'container': 'm4a_dash'},
- '140': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 128, 'preference': -50, 'container': 'm4a_dash'},
- '141': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 256, 'preference': -50, 'container': 'm4a_dash'},
- '256': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'preference': -50, 'container': 'm4a_dash'},
- '258': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'preference': -50, 'container': 'm4a_dash'},
+ '139': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 48, 'container': 'm4a_dash'},
+ '140': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 128, 'container': 'm4a_dash'},
+ '141': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 256, 'container': 'm4a_dash'},
+ '256': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},
+ '258': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},
+ '325': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'dtse', 'container': 'm4a_dash'},
+ '328': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'ec-3', 'container': 'm4a_dash'},
# Dash webm
- '167': {'ext': 'webm', 'height': 360, 'width': 640, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8', 'preference': -40},
- '168': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8', 'preference': -40},
- '169': {'ext': 'webm', 'height': 720, 'width': 1280, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8', 'preference': -40},
- '170': {'ext': 'webm', 'height': 1080, 'width': 1920, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8', 'preference': -40},
- '218': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8', 'preference': -40},
- '219': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8', 'preference': -40},
- '278': {'ext': 'webm', 'height': 144, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp9', 'preference': -40},
- '242': {'ext': 'webm', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'vp9', 'preference': -40},
- '243': {'ext': 'webm', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'vp9', 'preference': -40},
- '244': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9', 'preference': -40},
- '245': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9', 'preference': -40},
- '246': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9', 'preference': -40},
- '247': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9', 'preference': -40},
- '248': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9', 'preference': -40},
- '271': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9', 'preference': -40},
+ '167': {'ext': 'webm', 'height': 360, 'width': 640, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
+ '168': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
+ '169': {'ext': 'webm', 'height': 720, 'width': 1280, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
+ '170': {'ext': 'webm', 'height': 1080, 'width': 1920, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
+ '218': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
+ '219': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
+ '278': {'ext': 'webm', 'height': 144, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp9'},
+ '242': {'ext': 'webm', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'vp9'},
+ '243': {'ext': 'webm', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'vp9'},
+ '244': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
+ '245': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
+ '246': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
+ '247': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9'},
+ '248': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9'},
+ '271': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9'},
# itag 272 videos are either 3840x2160 (e.g. RtoitU2A-3E) or 7680x4320 (sLprVF6d7Ug)
- '272': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9', 'preference': -40},
- '302': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60, 'preference': -40},
- '303': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60, 'preference': -40},
- '308': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60, 'preference': -40},
- '313': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9', 'preference': -40},
- '315': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60, 'preference': -40},
+ '272': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},
+ '302': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
+ '303': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
+ '308': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
+ '313': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},
+ '315': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
# Dash webm audio
- '171': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 128, 'preference': -50},
- '172': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 256, 'preference': -50},
+ '171': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 128},
+ '172': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 256},
# Dash webm audio with opus inside
- '249': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 50, 'preference': -50},
- '250': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 70, 'preference': -50},
- '251': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 160, 'preference': -50},
+ '249': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 50},
+ '250': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 70},
+ '251': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 160},
# RTMP (unnamed)
'_rtmp': {'protocol': 'rtmp'},
}
_SUBTITLE_FORMATS = ('ttml', 'vtt')
+ _GEO_BYPASS = False
+
IE_NAME = 'youtube'
_TESTS = [
{
# itag 212
'url': '1t24XAntNCY',
'only_matching': True,
- }
+ },
+ {
+ # geo restricted to JP
+ 'url': 'sJL6WA-aGkQ',
+ 'only_matching': True,
+ },
+ {
+ 'url': 'https://www.youtube.com/watch?v=MuAGGZNfUkU&list=RDMM',
+ 'only_matching': True,
+ },
]
def __init__(self, *args, **kwargs):
def _extract_signature_function(self, video_id, player_url, example_sig):
id_m = re.match(
- r'.*?-(?P<id>[a-zA-Z0-9_-]+)(?:/watch_as3|/html5player(?:-new)?|/base)?\.(?P<ext>[a-z]+)$',
+ r'.*?-(?P<id>[a-zA-Z0-9_-]+)(?:/watch_as3|/html5player(?:-new)?|(?:/[a-z]{2}_[A-Z]{2})?/base)?\.(?P<ext>[a-z]+)$',
player_url)
if not id_m:
raise ExtractorError('Cannot identify player %r' % player_url)
video_id = mobj.group(2)
return video_id
- def _extract_from_m3u8(self, manifest_url, video_id):
- url_map = {}
-
- def _get_urls(_manifest):
- lines = _manifest.split('\n')
- urls = filter(lambda l: l and not l.startswith('#'),
- lines)
- return urls
- manifest = self._download_webpage(manifest_url, video_id, 'Downloading formats manifest')
- formats_urls = _get_urls(manifest)
- for format_url in formats_urls:
- itag = self._search_regex(r'itag/(\d+?)/', format_url, 'itag')
- url_map[itag] = format_url
- return url_map
-
def _extract_annotations(self, video_id):
url = 'https://www.youtube.com/annotations_invideo?features=1&legacy=1&video_id=%s' % video_id
return self._download_webpage(url, video_id, note='Searching for annotations.', errnote='Unable to download video annotations.')
+ @staticmethod
+ def _extract_chapters(description, duration):
+ if not description:
+ return None
+ chapter_lines = re.findall(
+ r'(?:^|<br\s*/>)([^<]*<a[^>]+onclick=["\']yt\.www\.watch\.player\.seekTo[^>]+>(\d{1,2}:\d{1,2}(?::\d{1,2})?)</a>[^>]*)(?=$|<br\s*/>)',
+ description)
+ if not chapter_lines:
+ return None
+ chapters = []
+ for next_num, (chapter_line, time_point) in enumerate(
+ chapter_lines, start=1):
+ start_time = parse_duration(time_point)
+ if start_time is None:
+ continue
+ end_time = (duration if next_num == len(chapter_lines)
+ else parse_duration(chapter_lines[next_num][1]))
+ if end_time is None:
+ continue
+ chapter_title = re.sub(
+ r'<a[^>]+>[^<]+</a>', '', chapter_line).strip(' \t-')
+ chapter_title = re.sub(r'\s+', ' ', chapter_title)
+ chapters.append({
+ 'start_time': start_time,
+ 'end_time': end_time,
+ 'title': chapter_title,
+ })
+ return chapters
+
def _real_extract(self, url):
url, smuggled_data = unsmuggle_url(url, {})
if 'token' not in video_info:
if 'reason' in video_info:
if 'The uploader has not made this video available in your country.' in video_info['reason']:
- regions_allowed = self._html_search_meta('regionsAllowed', video_webpage, default=None)
- if regions_allowed:
- raise ExtractorError('YouTube said: This video is available in %s only' % (
- ', '.join(map(ISO3166Utils.short2full, regions_allowed.split(',')))),
- expected=True)
+ regions_allowed = self._html_search_meta(
+ 'regionsAllowed', video_webpage, default=None)
+ countries = regions_allowed.split(',') if regions_allowed else None
+ self.raise_geo_restricted(
+ msg=video_info['reason'][0], countries=countries)
raise ExtractorError(
'YouTube said: %s' % video_info['reason'][0],
expected=True, video_id=video_id)
video_title = '_'
# description
- video_description = get_element_by_id("eow-description", video_webpage)
+ description_original = video_description = get_element_by_id("eow-description", video_webpage)
if video_description:
- video_description = re.sub(r'''(?x)
+ description_original = video_description = re.sub(r'''(?x)
<a\s+
(?:[a-zA-Z-]+="[^"]*"\s+)*?
(?:title|href)="([^"]+)"\s+
# Check for "rental" videos
if 'ypc_video_rental_bar_text' in video_info and 'author' not in video_info:
- raise ExtractorError('"rental" videos not supported')
+ raise ExtractorError('"rental" videos not supported. See https://github.com/rg3/youtube-dl/issues/359 for more information.', expected=True)
# Start extracting information
self.report_information_extraction(video_id)
if self._downloader.params.get('writeannotations', False):
video_annotations = self._extract_annotations(video_id)
- def _map_to_format_list(urlmap):
- formats = []
- for itag, video_real_url in urlmap.items():
- dct = {
- 'format_id': itag,
- 'url': video_real_url,
- 'player_url': player_url,
- }
- if itag in self._formats:
- dct.update(self._formats[itag])
- formats.append(dct)
- return formats
+ chapters = self._extract_chapters(description_original, video_duration)
if 'conn' in video_info and video_info['conn'][0].startswith('rtmp'):
self.report_rtmp_download()
player_desc = 'flash player %s' % player_version
else:
player_version = self._search_regex(
- [r'html5player-([^/]+?)(?:/html5player(?:-new)?)?\.js', r'(?:www|player)-([^/]+)/base\.js'],
+ [r'html5player-([^/]+?)(?:/html5player(?:-new)?)?\.js',
+ r'(?:www|player)-([^/]+)(?:/[a-z]{2}_[A-Z]{2})?/base\.js'],
player_url,
'html5 player', fatal=False)
player_desc = 'html5 player %s' % player_version
codecs = mobj.group('val')
break
if codecs:
- codecs = codecs.split(',')
- if len(codecs) == 2:
- acodec, vcodec = codecs[1], codecs[0]
- else:
- acodec, vcodec = (codecs[0], 'none') if kind == 'audio' else ('none', codecs[0])
- dct.update({
- 'acodec': acodec,
- 'vcodec': vcodec,
- })
+ dct.update(parse_codecs(codecs))
formats.append(dct)
elif video_info.get('hlsvp'):
manifest_url = video_info['hlsvp'][0]
- url_map = self._extract_from_m3u8(manifest_url, video_id)
- formats = _map_to_format_list(url_map)
- # Accept-Encoding header causes failures in live streams on Youtube and Youtube Gaming
- for a_format in formats:
+ formats = []
+ m3u8_formats = self._extract_m3u8_formats(
+ manifest_url, video_id, 'mp4', fatal=False)
+ for a_format in m3u8_formats:
+ itag = self._search_regex(
+ r'/itag/(\d+)/', a_format['url'], 'itag', default=None)
+ if itag:
+ a_format['format_id'] = itag
+ if itag in self._formats:
+ dct = self._formats[itag].copy()
+ dct.update(a_format)
+ a_format = dct
+ a_format['player_url'] = player_url
+ # Accept-Encoding header causes failures in live streams on Youtube and Youtube Gaming
a_format.setdefault('http_headers', {})['Youtubedl-no-compression'] = 'True'
+ formats.append(a_format)
else:
unavailable_message = self._html_search_regex(
r'(?s)<h1[^>]+id="unavailable-message"[^>]*>(.+?)</h1>',
'duration': video_duration,
'age_limit': 18 if age_gate else 0,
'annotations': video_annotations,
+ 'chapters': chapters,
'webpage_url': proto + '://www.youtube.com/watch?v=%s' % video_id,
'view_count': view_count,
'like_count': like_count,
(?:
youtube\.com/
(?:
- (?:course|view_play_list|my_playlists|artist|playlist|watch|embed/videoseries)
+ (?:course|view_play_list|my_playlists|artist|playlist|watch|embed/(?:videoseries|[0-9A-Za-z_-]{11}))
\? (?:.*?[&;])*? (?:p|a|list)=
| p/
)|
)
.*
|
- ((?:PL|LL|EC|UU|FL|RD|UL|TL)[0-9A-Za-z-_]{10,})
- )"""
+ (%(playlist_id)s)
+ )""" % {'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE}
_TEMPLATE_URL = 'https://www.youtube.com/playlist?list=%s&disable_polymer=true'
_VIDEO_RE = r'href="\s*/watch\?v=(?P<id>[0-9A-Za-z_-]{11})&[^"]*?index=(?P<index>\d+)(?:[^>]+>(?P<title>[^<]+))?'
IE_NAME = 'youtube:playlist'
'title': 'JODA15',
'id': 'PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',
}
+ }, {
+ 'url': 'http://www.youtube.com/embed/_xDOZElKyNU?list=PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',
+ 'playlist_mincount': 485,
+ 'info_dict': {
+ 'title': '2017 華語最新單曲 (2/24更新)',
+ 'id': 'PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',
+ }
}, {
'note': 'Embedded SWF player',
'url': 'https://www.youtube.com/p/YN5VISEtHet5D4NEvfTd0zcgFk84NqFZ?hl=en_US&fs=1&rel=0',
# Check if it's a video-specific URL
query_dict = compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query)
video_id = query_dict.get('v', [None])[0] or self._search_regex(
- r'(?:^|//)youtu\.be/([0-9A-Za-z_-]{11})', url,
+ r'(?:(?:^|//)youtu\.be/|youtube\.com/embed/(?!videoseries))([0-9A-Za-z_-]{11})', url,
'video id', default=None)
if video_id:
if self._downloader.params.get('noplaylist'):
'url': 'https://www.youtube.com/gametrailers',
'only_matching': True,
}, {
- # This channel is not available.
+ # This channel is not available, geo restricted to JP
'url': 'https://www.youtube.com/user/kananishinoSMEJ/videos',
'only_matching': True,
}]
videos = []
limit = n
+ url_query = {
+ 'search_query': query.encode('utf-8'),
+ }
+ url_query.update(self._EXTRA_QUERY_ARGS)
+ result_url = 'https://www.youtube.com/results?' + compat_urllib_parse_urlencode(url_query)
+
for pagenum in itertools.count(1):
- url_query = {
- 'search_query': query.encode('utf-8'),
- 'page': pagenum,
- 'spf': 'navigate',
- }
- url_query.update(self._EXTRA_QUERY_ARGS)
- result_url = 'https://www.youtube.com/results?' + compat_urllib_parse_urlencode(url_query)
data = self._download_json(
result_url, video_id='query "%s"' % query,
note='Downloading page %s' % pagenum,
- errnote='Unable to download API page')
+ errnote='Unable to download API page',
+ query={'spf': 'navigate'})
html_content = data[1]['body']['content']
if 'class="search-message' in html_content:
videos += new_videos
if not new_videos or len(videos) > limit:
break
+ next_link = self._html_search_regex(
+ r'href="(/results\?[^"]*\bsp=[^"]+)"[^>]*>\s*<span[^>]+class="[^"]*\byt-uix-button-content\b[^"]*"[^>]*>Next',
+ html_content, 'next link', default=None)
+ if next_link is None:
+ break
+ result_url = compat_urlparse.urljoin('https://www.youtube.com/', next_link)
if len(videos) > n:
videos = videos[:n]