9 from .common import InfoExtractor, SearchInfoExtractor
10 from .subtitles import SubtitlesIE
16 compat_urllib_request,
28 class YoutubeSubtitlesIE(SubtitlesIE):
30 def _get_available_subtitles(self, video_id):
31 request = compat_urllib_request.Request('http://video.google.com/timedtext?hl=en&type=list&v=%s' % video_id)
33 sub_list = compat_urllib_request.urlopen(request).read().decode('utf-8')
34 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
35 self._downloader.report_warning(u'unable to download video subtitles: %s' % compat_str(err))
37 lang_list = re.findall(r'name="([^"]*)"[^>]+lang_code="([\w\-]+)"', sub_list)
42 params = compat_urllib_parse.urlencode({
45 'fmt': self._downloader.params.get('subtitlesformat'),
47 url = u'http://www.youtube.com/api/timedtext?' + params
48 sub_lang_list[lang] = url
50 self._downloader.report_warning(u'video doesn\'t have subtitles')
54 def _request_automatic_caption(self, video_id, webpage):
55 """We need the webpage for getting the captions url, pass it as an
56 argument to speed up the process."""
57 sub_lang = self._downloader.params.get('subtitleslang') or 'en'
58 sub_format = self._downloader.params.get('subtitlesformat')
59 self.to_screen(u'%s: Looking for automatic captions' % video_id)
60 mobj = re.search(r';ytplayer.config = ({.*?});', webpage)
61 err_msg = u'Couldn\'t find automatic captions for "%s"' % sub_lang
63 self._downloader.report_warning(err_msg)
65 player_config = json.loads(mobj.group(1))
67 args = player_config[u'args']
68 caption_url = args[u'ttsurl']
69 timestamp = args[u'timestamp']
70 params = compat_urllib_parse.urlencode({
77 subtitles_url = caption_url + '&' + params
78 sub = self._download_webpage(subtitles_url, video_id, u'Downloading automatic captions')
79 return {sub_lang: sub}
80 # An extractor error can be raise by the download process if there are
81 # no automatic captions but there are subtitles
82 except (KeyError, ExtractorError):
83 self._downloader.report_warning(err_msg)
87 class YoutubeIE(YoutubeSubtitlesIE):
88 IE_DESC = u'YouTube.com'
91 (?:https?://)? # http(s):// (optional)
92 (?:youtu\.be/|(?:\w+\.)?youtube(?:-nocookie)?\.com/|
93 tube\.majestyc\.net/) # the various hostnames, with wildcard subdomains
94 (?:.*?\#/)? # handle anchor (#/) redirect urls
95 (?: # the various things that can precede the ID:
96 (?:(?:v|embed|e)/) # v/ or embed/ or e/
97 |(?: # or the v= param in all its forms
98 (?:watch|movie(?:_popup)?(?:\.php)?)? # preceding watch(_popup|.php) or nothing (like /?v=xxxx)
99 (?:\?|\#!?) # the params delimiter ? or # or #!
100 (?:.*?&)? # any other preceding param (like /?s=tuff&v=xxxx)
103 )? # optional -> youtube.com/xxxx is OK
104 )? # all until now is optional -> you can pass the naked ID
105 ([0-9A-Za-z_-]+) # here is it! the YouTube video ID
106 (?(1).+)? # if we found the ID, everything can follow
108 _NEXT_URL_RE = r'[\?&]next_url=([^&]+)'
109 # Listed in order of quality
110 _available_formats = ['38', '37', '46', '22', '45', '35', '44', '34', '18', '43', '6', '5', '17', '13',
111 '95', '94', '93', '92', '132', '151',
113 '85', '84', '102', '83', '101', '82', '100',
115 '138', '137', '248', '136', '247', '135', '246',
116 '245', '244', '134', '243', '133', '242', '160',
118 '141', '172', '140', '171', '139',
120 _available_formats_prefer_free = ['38', '46', '37', '45', '22', '44', '35', '43', '34', '18', '6', '5', '17', '13',
121 '95', '94', '93', '92', '132', '151',
122 '85', '102', '84', '101', '83', '100', '82',
124 '138', '248', '137', '247', '136', '246', '245',
125 '244', '135', '243', '134', '242', '133', '160',
127 '172', '141', '171', '140', '139',
129 _video_extensions = {
150 # videos that use m3u8
182 _video_dimensions = {
263 u"url": u"http://www.youtube.com/watch?v=BaW_jenozKc",
264 u"file": u"BaW_jenozKc.mp4",
266 u"title": u"youtube-dl test video \"'/\\ä↭𝕐",
267 u"uploader": u"Philipp Hagemeister",
268 u"uploader_id": u"phihag",
269 u"upload_date": u"20121002",
270 u"description": u"test chars: \"'/\\ä↭𝕐\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de ."
274 u"url": u"http://www.youtube.com/watch?v=1ltcDfZMA3U",
275 u"file": u"1ltcDfZMA3U.flv",
276 u"note": u"Test VEVO video (#897)",
278 u"upload_date": u"20070518",
279 u"title": u"Maps - It Will Find You",
280 u"description": u"Music video by Maps performing It Will Find You.",
281 u"uploader": u"MuteUSA",
282 u"uploader_id": u"MuteUSA"
286 u"url": u"http://www.youtube.com/watch?v=UxxajLWwzqY",
287 u"file": u"UxxajLWwzqY.mp4",
288 u"note": u"Test generic use_cipher_signature video (#897)",
290 u"upload_date": u"20120506",
291 u"title": u"Icona Pop - I Love It (feat. Charli XCX) [OFFICIAL VIDEO]",
292 u"description": u"md5:b085c9804f5ab69f4adea963a2dceb3c",
293 u"uploader": u"Icona Pop",
294 u"uploader_id": u"IconaPop"
298 u"url": u"https://www.youtube.com/watch?v=07FYdnEawAQ",
299 u"file": u"07FYdnEawAQ.mp4",
300 u"note": u"Test VEVO video with age protection (#956)",
302 u"upload_date": u"20130703",
303 u"title": u"Justin Timberlake - Tunnel Vision (Explicit)",
304 u"description": u"md5:64249768eec3bc4276236606ea996373",
305 u"uploader": u"justintimberlakeVEVO",
306 u"uploader_id": u"justintimberlakeVEVO"
310 u'url': u'https://www.youtube.com/watch?v=TGi3HqYrWHE',
311 u'file': u'TGi3HqYrWHE.mp4',
312 u'note': u'm3u8 video',
314 u'title': u'Triathlon - Men - London 2012 Olympic Games',
315 u'description': u'- Men - TR02 - Triathlon - 07 August 2012 - London 2012 Olympic Games',
316 u'uploader': u'olympic',
317 u'upload_date': u'20120807',
318 u'uploader_id': u'olympic',
321 u'skip_download': True,
328 def suitable(cls, url):
329 """Receives a URL and returns True if suitable for this IE."""
330 if YoutubePlaylistIE.suitable(url) or YoutubeSubscriptionsIE.suitable(url): return False
331 return re.match(cls._VALID_URL, url, re.VERBOSE) is not None
333 def report_video_webpage_download(self, video_id):
334 """Report attempt to download video webpage."""
335 self.to_screen(u'%s: Downloading video webpage' % video_id)
337 def report_video_info_webpage_download(self, video_id):
338 """Report attempt to download video info webpage."""
339 self.to_screen(u'%s: Downloading video info webpage' % video_id)
341 def report_information_extraction(self, video_id):
342 """Report attempt to extract video information."""
343 self.to_screen(u'%s: Extracting video information' % video_id)
345 def report_unavailable_format(self, video_id, format):
346 """Report extracted video URL."""
347 self.to_screen(u'%s: Format %s not available' % (video_id, format))
349 def report_rtmp_download(self):
350 """Indicate the download will use the RTMP protocol."""
351 self.to_screen(u'RTMP download detected')
353 def _decrypt_signature(self, s):
354 """Turn the encrypted s field into a working signature"""
357 return s[25] + s[3:25] + s[0] + s[26:42] + s[79] + s[43:79] + s[91] + s[80:83]
359 return s[25] + s[3:25] + s[2] + s[26:40] + s[77] + s[41:77] + s[89] + s[78:81]
361 return s[84:78:-1] + s[87] + s[77:60:-1] + s[0] + s[59:3:-1]
363 return s[48] + s[81:67:-1] + s[82] + s[66:62:-1] + s[85] + s[61:48:-1] + s[67] + s[47:12:-1] + s[3] + s[11:3:-1] + s[2] + s[12]
365 return s[6:27] + s[4] + s[28:39] + s[27] + s[40:59] + s[2] + s[60:]
367 return s[5:20] + s[2] + s[21:]
369 return s[83:34:-1] + s[0] + s[33:27:-1] + s[3] + s[26:19:-1] + s[34] + s[18:3:-1] + s[27]
371 return s[83:27:-1] + s[0] + s[26:5:-1] + s[2:0:-1] + s[27]
373 return s[81:64:-1] + s[82] + s[63:52:-1] + s[45] + s[51:45:-1] + s[1] + s[44:1:-1] + s[0]
375 return s[36] + s[79:67:-1] + s[81] + s[66:40:-1] + s[33] + s[39:36:-1] + s[40] + s[35] + s[0] + s[67] + s[32:0:-1] + s[34]
377 return s[56] + s[79:56:-1] + s[41] + s[55:41:-1] + s[80] + s[40:34:-1] + s[0] + s[33:29:-1] + s[34] + s[28:9:-1] + s[29] + s[8:0:-1] + s[9]
379 return s[54] + s[77:54:-1] + s[39] + s[53:39:-1] + s[78] + s[38:34:-1] + s[0] + s[33:29:-1] + s[34] + s[28:9:-1] + s[29] + s[8:0:-1] + s[9]
382 raise ExtractorError(u'Unable to decrypt signature, key length %d not supported; retrying might work' % (len(s)))
384 def _decrypt_signature_age_gate(self, s):
385 # The videos with age protection use another player, so the algorithms
388 return s[2:63] + s[82] + s[64:82] + s[63]
390 # Fallback to the other algortihms
391 return self._decrypt_signature(s)
394 def _get_available_subtitles(self, video_id):
395 self.report_video_subtitles_download(video_id)
396 request = compat_urllib_request.Request('http://video.google.com/timedtext?hl=en&type=list&v=%s' % video_id)
398 sub_list = compat_urllib_request.urlopen(request).read().decode('utf-8')
399 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
400 return (u'unable to download video subtitles: %s' % compat_str(err), None)
401 sub_lang_list = re.findall(r'name="([^"]*)"[^>]+lang_code="([\w\-]+)"', sub_list)
402 sub_lang_list = dict((l[1], l[0]) for l in sub_lang_list)
403 if not sub_lang_list:
404 return (u'video doesn\'t have subtitles', None)
407 def _list_available_subtitles(self, video_id):
408 sub_lang_list = self._get_available_subtitles(video_id)
409 self.report_video_subtitles_available(video_id, sub_lang_list)
411 def _request_subtitle(self, sub_lang, sub_name, video_id, format):
414 (error_message, sub_lang, sub)
416 self.report_video_subtitles_request(video_id, sub_lang, format)
417 params = compat_urllib_parse.urlencode({
423 url = 'http://www.youtube.com/api/timedtext?' + params
425 sub = compat_urllib_request.urlopen(url).read().decode('utf-8')
426 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
427 return (u'unable to download video subtitles: %s' % compat_str(err), None, None)
429 return (u'Did not fetch video subtitles', None, None)
430 return (None, sub_lang, sub)
432 def _request_automatic_caption(self, video_id, webpage):
433 """We need the webpage for getting the captions url, pass it as an
434 argument to speed up the process."""
435 sub_lang = self._downloader.params.get('subtitleslang') or 'en'
436 sub_format = self._downloader.params.get('subtitlesformat')
437 self.to_screen(u'%s: Looking for automatic captions' % video_id)
438 mobj = re.search(r';ytplayer.config = ({.*?});', webpage)
439 err_msg = u'Couldn\'t find automatic captions for "%s"' % sub_lang
441 return [(err_msg, None, None)]
442 player_config = json.loads(mobj.group(1))
444 args = player_config[u'args']
445 caption_url = args[u'ttsurl']
446 timestamp = args[u'timestamp']
447 params = compat_urllib_parse.urlencode({
454 subtitles_url = caption_url + '&' + params
455 sub = self._download_webpage(subtitles_url, video_id, u'Downloading automatic captions')
456 return [(None, sub_lang, sub)]
458 return [(err_msg, None, None)]
460 def _extract_subtitle(self, video_id):
462 Return a list with a tuple:
463 [(error_message, sub_lang, sub)]
465 sub_lang_list = self._get_available_subtitles(video_id)
466 sub_format = self._downloader.params.get('subtitlesformat')
467 if isinstance(sub_lang_list,tuple): #There was some error, it didn't get the available subtitles
468 return [(sub_lang_list[0], None, None)]
469 if self._downloader.params.get('subtitleslang', False):
470 sub_lang = self._downloader.params.get('subtitleslang')
471 elif 'en' in sub_lang_list:
474 sub_lang = list(sub_lang_list.keys())[0]
475 if not sub_lang in sub_lang_list:
476 return [(u'no closed captions found in the specified language "%s"' % sub_lang, None, None)]
478 subtitle = self._request_subtitle(sub_lang, sub_lang_list[sub_lang].encode('utf-8'), video_id, sub_format)
481 def _extract_all_subtitles(self, video_id):
482 sub_lang_list = self._get_available_subtitles(video_id)
483 sub_format = self._downloader.params.get('subtitlesformat')
484 if isinstance(sub_lang_list,tuple): #There was some error, it didn't get the available subtitles
485 return [(sub_lang_list[0], None, None)]
487 for sub_lang in sub_lang_list:
488 subtitle = self._request_subtitle(sub_lang, sub_lang_list[sub_lang].encode('utf-8'), video_id, sub_format)
489 subtitles.append(subtitle)
492 def _print_formats(self, formats):
493 print('Available formats:')
495 print('%s\t:\t%s\t[%s]%s' %(x, self._video_extensions.get(x, 'flv'),
496 self._video_dimensions.get(x, '???'),
497 ' ('+self._special_itags[x]+')' if x in self._special_itags else ''))
499 def _extract_id(self, url):
500 mobj = re.match(self._VALID_URL, url, re.VERBOSE)
502 raise ExtractorError(u'Invalid URL: %s' % url)
503 video_id = mobj.group(2)
506 def _get_video_url_list(self, url_map):
508 Transform a dictionary in the format {itag:url} to a list of (itag, url)
509 with the requested formats.
511 req_format = self._downloader.params.get('format', None)
512 format_limit = self._downloader.params.get('format_limit', None)
513 available_formats = self._available_formats_prefer_free if self._downloader.params.get('prefer_free_formats', False) else self._available_formats
514 if format_limit is not None and format_limit in available_formats:
515 format_list = available_formats[available_formats.index(format_limit):]
517 format_list = available_formats
518 existing_formats = [x for x in format_list if x in url_map]
519 if len(existing_formats) == 0:
520 raise ExtractorError(u'no known formats available for video')
521 if self._downloader.params.get('listformats', None):
522 self._print_formats(existing_formats)
524 if req_format is None or req_format == 'best':
525 video_url_list = [(existing_formats[0], url_map[existing_formats[0]])] # Best quality
526 elif req_format == 'worst':
527 video_url_list = [(existing_formats[-1], url_map[existing_formats[-1]])] # worst quality
528 elif req_format in ('-1', 'all'):
529 video_url_list = [(f, url_map[f]) for f in existing_formats] # All formats
531 # Specific formats. We pick the first in a slash-delimeted sequence.
532 # For example, if '1/2/3/4' is requested and '2' and '4' are available, we pick '2'.
533 req_formats = req_format.split('/')
534 video_url_list = None
535 for rf in req_formats:
537 video_url_list = [(rf, url_map[rf])]
539 if video_url_list is None:
540 raise ExtractorError(u'requested format not available')
541 return video_url_list
543 def _extract_from_m3u8(self, manifest_url, video_id):
545 def _get_urls(_manifest):
546 lines = _manifest.split('\n')
547 urls = filter(lambda l: l and not l.startswith('#'),
550 manifest = self._download_webpage(manifest_url, video_id, u'Downloading formats manifest')
551 formats_urls = _get_urls(manifest)
552 for format_url in formats_urls:
553 itag = self._search_regex(r'itag/(\d+?)/', format_url, 'itag')
554 url_map[itag] = format_url
557 def _real_extract(self, url):
558 if re.match(r'(?:https?://)?[^/]+/watch\?feature=[a-z_]+$', url):
559 self._downloader.report_warning(u'Did you forget to quote the URL? Remember that & is a meta-character in most shells, so you want to put the URL in quotes, like youtube-dl \'http://www.youtube.com/watch?feature=foo&v=BaW_jenozKc\' (or simply youtube-dl BaW_jenozKc ).')
561 # Extract original video URL from URL with redirection, like age verification, using next_url parameter
562 mobj = re.search(self._NEXT_URL_RE, url)
564 url = 'https://www.youtube.com/' + compat_urllib_parse.unquote(mobj.group(1)).lstrip('/')
565 video_id = self._extract_id(url)
568 self.report_video_webpage_download(video_id)
569 url = 'https://www.youtube.com/watch?v=%s&gl=US&hl=en&has_verified=1' % video_id
570 request = compat_urllib_request.Request(url)
572 video_webpage_bytes = compat_urllib_request.urlopen(request).read()
573 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
574 raise ExtractorError(u'Unable to download video webpage: %s' % compat_str(err))
576 video_webpage = video_webpage_bytes.decode('utf-8', 'ignore')
578 # Attempt to extract SWF player URL
579 mobj = re.search(r'swfConfig.*?"(http:\\/\\/.*?watch.*?-.*?\.swf)"', video_webpage)
581 player_url = re.sub(r'\\(.)', r'\1', mobj.group(1))
586 self.report_video_info_webpage_download(video_id)
587 if re.search(r'player-age-gate-content">', video_webpage) is not None:
588 self.report_age_confirmation()
590 # We simulate the access to the video from www.youtube.com/v/{video_id}
591 # this can be viewed without login into Youtube
592 data = compat_urllib_parse.urlencode({'video_id': video_id,
596 'eurl': 'https://youtube.googleapis.com/v/' + video_id,
600 video_info_url = 'https://www.youtube.com/get_video_info?' + data
601 video_info_webpage = self._download_webpage(video_info_url, video_id,
603 errnote='unable to download video info webpage')
604 video_info = compat_parse_qs(video_info_webpage)
607 for el_type in ['&el=embedded', '&el=detailpage', '&el=vevo', '']:
608 video_info_url = ('https://www.youtube.com/get_video_info?&video_id=%s%s&ps=default&eurl=&gl=US&hl=en'
609 % (video_id, el_type))
610 video_info_webpage = self._download_webpage(video_info_url, video_id,
612 errnote='unable to download video info webpage')
613 video_info = compat_parse_qs(video_info_webpage)
614 if 'token' in video_info:
616 if 'token' not in video_info:
617 if 'reason' in video_info:
618 raise ExtractorError(u'YouTube said: %s' % video_info['reason'][0], expected=True)
620 raise ExtractorError(u'"token" parameter not in video info for unknown reason')
622 # Check for "rental" videos
623 if 'ypc_video_rental_bar_text' in video_info and 'author' not in video_info:
624 raise ExtractorError(u'"rental" videos not supported')
626 # Start extracting information
627 self.report_information_extraction(video_id)
630 if 'author' not in video_info:
631 raise ExtractorError(u'Unable to extract uploader name')
632 video_uploader = compat_urllib_parse.unquote_plus(video_info['author'][0])
635 video_uploader_id = None
636 mobj = re.search(r'<link itemprop="url" href="http://www.youtube.com/(?:user|channel)/([^"]+)">', video_webpage)
638 video_uploader_id = mobj.group(1)
640 self._downloader.report_warning(u'unable to extract uploader nickname')
643 if 'title' not in video_info:
644 raise ExtractorError(u'Unable to extract video title')
645 video_title = compat_urllib_parse.unquote_plus(video_info['title'][0])
648 # We try first to get a high quality image:
649 m_thumb = re.search(r'<span itemprop="thumbnail".*?href="(.*?)">',
650 video_webpage, re.DOTALL)
651 if m_thumb is not None:
652 video_thumbnail = m_thumb.group(1)
653 elif 'thumbnail_url' not in video_info:
654 self._downloader.report_warning(u'unable to extract video thumbnail')
656 else: # don't panic if we can't find it
657 video_thumbnail = compat_urllib_parse.unquote_plus(video_info['thumbnail_url'][0])
661 mobj = re.search(r'id="eow-date.*?>(.*?)</span>', video_webpage, re.DOTALL)
663 upload_date = ' '.join(re.sub(r'[/,-]', r' ', mobj.group(1)).split())
664 upload_date = unified_strdate(upload_date)
667 video_description = get_element_by_id("eow-description", video_webpage)
668 if video_description:
669 video_description = clean_html(video_description)
671 fd_mobj = re.search(r'<meta name="description" content="([^"]+)"', video_webpage)
673 video_description = unescapeHTML(fd_mobj.group(1))
675 video_description = u''
678 video_subtitles = None
680 if self._downloader.params.get('writesubtitles', False) or self._downloader.params.get('allsubtitles', False):
681 video_subtitles = self._extract_subtitles(video_id)
682 elif self._downloader.params.get('writeautomaticsub', False):
683 video_subtitles = self._request_automatic_caption(video_id, video_webpage)
685 if self._downloader.params.get('listsubtitles', False):
686 self._list_available_subtitles(video_id)
689 if 'length_seconds' not in video_info:
690 self._downloader.report_warning(u'unable to extract video duration')
693 video_duration = compat_urllib_parse.unquote_plus(video_info['length_seconds'][0])
695 # Decide which formats to download
698 mobj = re.search(r';ytplayer.config = ({.*?});', video_webpage)
700 raise ValueError('Could not find vevo ID')
701 info = json.loads(mobj.group(1))
703 # Easy way to know if the 's' value is in url_encoded_fmt_stream_map
704 # this signatures are encrypted
705 m_s = re.search(r'[&,]s=', args['url_encoded_fmt_stream_map'])
707 self.to_screen(u'%s: Encrypted signatures detected.' % video_id)
708 video_info['url_encoded_fmt_stream_map'] = [args['url_encoded_fmt_stream_map']]
709 m_s = re.search(r'[&,]s=', args.get('adaptive_fmts', u''))
711 if 'url_encoded_fmt_stream_map' in video_info:
712 video_info['url_encoded_fmt_stream_map'][0] += ',' + args['adaptive_fmts']
714 video_info['url_encoded_fmt_stream_map'] = [args['adaptive_fmts']]
715 elif 'adaptive_fmts' in video_info:
716 if 'url_encoded_fmt_stream_map' in video_info:
717 video_info['url_encoded_fmt_stream_map'][0] += ',' + video_info['adaptive_fmts'][0]
719 video_info['url_encoded_fmt_stream_map'] = video_info['adaptive_fmts']
723 if 'conn' in video_info and video_info['conn'][0].startswith('rtmp'):
724 self.report_rtmp_download()
725 video_url_list = [(None, video_info['conn'][0])]
726 elif 'url_encoded_fmt_stream_map' in video_info and len(video_info['url_encoded_fmt_stream_map']) >= 1:
727 if 'rtmpe%3Dyes' in video_info['url_encoded_fmt_stream_map'][0]:
728 raise ExtractorError('rtmpe downloads are not supported, see https://github.com/rg3/youtube-dl/issues/343 for more information.', expected=True)
730 for url_data_str in video_info['url_encoded_fmt_stream_map'][0].split(','):
731 url_data = compat_parse_qs(url_data_str)
732 if 'itag' in url_data and 'url' in url_data:
733 url = url_data['url'][0]
734 if 'sig' in url_data:
735 url += '&signature=' + url_data['sig'][0]
736 elif 's' in url_data:
737 if self._downloader.params.get('verbose'):
740 player_version = self._search_regex(r'ad3-(.+?)\.swf',
741 video_info['ad3_module'][0] if 'ad3_module' in video_info else 'NOT FOUND',
742 'flash player', fatal=False)
743 player = 'flash player %s' % player_version
745 player = u'html5 player %s' % self._search_regex(r'html5player-(.+?)\.js', video_webpage,
746 'html5 player', fatal=False)
747 parts_sizes = u'.'.join(compat_str(len(part)) for part in s.split('.'))
748 self.to_screen(u'encrypted signature length %d (%s), itag %s, %s' %
749 (len(s), parts_sizes, url_data['itag'][0], player))
750 encrypted_sig = url_data['s'][0]
752 signature = self._decrypt_signature_age_gate(encrypted_sig)
754 signature = self._decrypt_signature(encrypted_sig)
755 url += '&signature=' + signature
756 if 'ratebypass' not in url:
757 url += '&ratebypass=yes'
758 url_map[url_data['itag'][0]] = url
759 video_url_list = self._get_video_url_list(url_map)
760 if not video_url_list:
762 elif video_info.get('hlsvp'):
763 manifest_url = video_info['hlsvp'][0]
764 url_map = self._extract_from_m3u8(manifest_url, video_id)
765 video_url_list = self._get_video_url_list(url_map)
766 if not video_url_list:
770 raise ExtractorError(u'no conn or url_encoded_fmt_stream_map information found in video info')
773 for format_param, video_real_url in video_url_list:
775 video_extension = self._video_extensions.get(format_param, 'flv')
777 video_format = '{0} - {1}{2}'.format(format_param if format_param else video_extension,
778 self._video_dimensions.get(format_param, '???'),
779 ' ('+self._special_itags[format_param]+')' if format_param in self._special_itags else '')
783 'url': video_real_url,
784 'uploader': video_uploader,
785 'uploader_id': video_uploader_id,
786 'upload_date': upload_date,
787 'title': video_title,
788 'ext': video_extension,
789 'format': video_format,
790 'thumbnail': video_thumbnail,
791 'description': video_description,
792 'player_url': player_url,
793 'subtitles': video_subtitles,
794 'duration': video_duration
798 class YoutubePlaylistIE(InfoExtractor):
799 IE_DESC = u'YouTube.com playlists'
805 (?:course|view_play_list|my_playlists|artist|playlist|watch)
806 \? (?:.*?&)*? (?:p|a|list)=
809 ((?:PL|EC|UU|FL)?[0-9A-Za-z-_]{10,})
812 ((?:PL|EC|UU|FL)[0-9A-Za-z-_]{10,})
814 _TEMPLATE_URL = 'https://gdata.youtube.com/feeds/api/playlists/%s?max-results=%i&start-index=%i&v=2&alt=json&safeSearch=none'
816 IE_NAME = u'youtube:playlist'
819 def suitable(cls, url):
820 """Receives a URL and returns True if suitable for this IE."""
821 return re.match(cls._VALID_URL, url, re.VERBOSE) is not None
823 def _real_extract(self, url):
824 # Extract playlist id
825 mobj = re.match(self._VALID_URL, url, re.VERBOSE)
827 raise ExtractorError(u'Invalid URL: %s' % url)
829 # Download playlist videos from API
830 playlist_id = mobj.group(1) or mobj.group(2)
833 for page_num in itertools.count(1):
834 start_index = self._MAX_RESULTS * (page_num - 1) + 1
835 if start_index >= 1000:
836 self._downloader.report_warning(u'Max number of results reached')
838 url = self._TEMPLATE_URL % (playlist_id, self._MAX_RESULTS, start_index)
839 page = self._download_webpage(url, playlist_id, u'Downloading page #%s' % page_num)
842 response = json.loads(page)
843 except ValueError as err:
844 raise ExtractorError(u'Invalid JSON in API response: ' + compat_str(err))
846 if 'feed' not in response:
847 raise ExtractorError(u'Got a malformed response from YouTube API')
848 playlist_title = response['feed']['title']['$t']
849 if 'entry' not in response['feed']:
850 # Number of videos is a multiple of self._MAX_RESULTS
853 for entry in response['feed']['entry']:
854 index = entry['yt$position']['$t']
855 if 'media$group' in entry and 'media$player' in entry['media$group']:
856 videos.append((index, entry['media$group']['media$player']['url']))
858 videos = [v[1] for v in sorted(videos)]
860 url_results = [self.url_result(vurl, 'Youtube') for vurl in videos]
861 return [self.playlist_result(url_results, playlist_id, playlist_title)]
864 class YoutubeChannelIE(InfoExtractor):
865 IE_DESC = u'YouTube.com channels'
866 _VALID_URL = r"^(?:https?://)?(?:youtu\.be|(?:\w+\.)?youtube(?:-nocookie)?\.com)/channel/([0-9A-Za-z_-]+)"
867 _TEMPLATE_URL = 'http://www.youtube.com/channel/%s/videos?sort=da&flow=list&view=0&page=%s&gl=US&hl=en'
868 _MORE_PAGES_INDICATOR = 'yt-uix-load-more'
869 _MORE_PAGES_URL = 'http://www.youtube.com/c4_browse_ajax?action_load_more_videos=1&flow=list&paging=%s&view=0&sort=da&channel_id=%s'
870 IE_NAME = u'youtube:channel'
872 def extract_videos_from_page(self, page):
874 for mobj in re.finditer(r'href="/watch\?v=([0-9A-Za-z_-]+)&?', page):
875 if mobj.group(1) not in ids_in_page:
876 ids_in_page.append(mobj.group(1))
879 def _real_extract(self, url):
881 mobj = re.match(self._VALID_URL, url)
883 raise ExtractorError(u'Invalid URL: %s' % url)
885 # Download channel page
886 channel_id = mobj.group(1)
890 url = self._TEMPLATE_URL % (channel_id, pagenum)
891 page = self._download_webpage(url, channel_id,
892 u'Downloading page #%s' % pagenum)
894 # Extract video identifiers
895 ids_in_page = self.extract_videos_from_page(page)
896 video_ids.extend(ids_in_page)
898 # Download any subsequent channel pages using the json-based channel_ajax query
899 if self._MORE_PAGES_INDICATOR in page:
900 for pagenum in itertools.count(1):
901 url = self._MORE_PAGES_URL % (pagenum, channel_id)
902 page = self._download_webpage(url, channel_id,
903 u'Downloading page #%s' % pagenum)
905 page = json.loads(page)
907 ids_in_page = self.extract_videos_from_page(page['content_html'])
908 video_ids.extend(ids_in_page)
910 if self._MORE_PAGES_INDICATOR not in page['load_more_widget_html']:
913 self._downloader.to_screen(u'[youtube] Channel %s: Found %i videos' % (channel_id, len(video_ids)))
915 urls = ['http://www.youtube.com/watch?v=%s' % id for id in video_ids]
916 url_entries = [self.url_result(eurl, 'Youtube') for eurl in urls]
917 return [self.playlist_result(url_entries, channel_id)]
920 class YoutubeUserIE(InfoExtractor):
921 IE_DESC = u'YouTube.com user videos (URL or "ytuser" keyword)'
922 _VALID_URL = r'(?:(?:(?:https?://)?(?:\w+\.)?youtube\.com/user/)|ytuser:)([A-Za-z0-9_-]+)'
923 _TEMPLATE_URL = 'http://gdata.youtube.com/feeds/api/users/%s'
924 _GDATA_PAGE_SIZE = 50
925 _GDATA_URL = 'http://gdata.youtube.com/feeds/api/users/%s/uploads?max-results=%d&start-index=%d'
926 _VIDEO_INDICATOR = r'/watch\?v=(.+?)[\<&]'
927 IE_NAME = u'youtube:user'
929 def _real_extract(self, url):
931 mobj = re.match(self._VALID_URL, url)
933 raise ExtractorError(u'Invalid URL: %s' % url)
935 username = mobj.group(1)
937 # Download video ids using YouTube Data API. Result size per
938 # query is limited (currently to 50 videos) so we need to query
939 # page by page until there are no video ids - it means we got
944 for pagenum in itertools.count(0):
945 start_index = pagenum * self._GDATA_PAGE_SIZE + 1
947 gdata_url = self._GDATA_URL % (username, self._GDATA_PAGE_SIZE, start_index)
948 page = self._download_webpage(gdata_url, username,
949 u'Downloading video ids from %d to %d' % (start_index, start_index + self._GDATA_PAGE_SIZE))
951 # Extract video identifiers
954 for mobj in re.finditer(self._VIDEO_INDICATOR, page):
955 if mobj.group(1) not in ids_in_page:
956 ids_in_page.append(mobj.group(1))
958 video_ids.extend(ids_in_page)
960 # A little optimization - if current page is not
961 # "full", ie. does not contain PAGE_SIZE video ids then
962 # we can assume that this page is the last one - there
963 # are no more ids on further pages - no need to query
966 if len(ids_in_page) < self._GDATA_PAGE_SIZE:
969 urls = ['http://www.youtube.com/watch?v=%s' % video_id for video_id in video_ids]
970 url_results = [self.url_result(rurl, 'Youtube') for rurl in urls]
971 return [self.playlist_result(url_results, playlist_title = username)]
973 class YoutubeSearchIE(SearchInfoExtractor):
974 IE_DESC = u'YouTube.com searches'
975 _API_URL = 'https://gdata.youtube.com/feeds/api/videos?q=%s&start-index=%i&max-results=50&v=2&alt=jsonc'
977 IE_NAME = u'youtube:search'
978 _SEARCH_KEY = 'ytsearch'
980 def report_download_page(self, query, pagenum):
981 """Report attempt to download search page with given number."""
982 self._downloader.to_screen(u'[youtube] query "%s": Downloading page %s' % (query, pagenum))
984 def _get_n_results(self, query, n):
985 """Get a specified number of results for a query"""
991 while (50 * pagenum) < limit:
992 self.report_download_page(query, pagenum+1)
993 result_url = self._API_URL % (compat_urllib_parse.quote_plus(query), (50*pagenum)+1)
994 request = compat_urllib_request.Request(result_url)
996 data = compat_urllib_request.urlopen(request).read().decode('utf-8')
997 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
998 raise ExtractorError(u'Unable to download API page: %s' % compat_str(err))
999 api_response = json.loads(data)['data']
1001 if not 'items' in api_response:
1002 raise ExtractorError(u'[youtube] No video results')
1004 new_ids = list(video['id'] for video in api_response['items'])
1005 video_ids += new_ids
1007 limit = min(n, api_response['totalItems'])
1010 if len(video_ids) > n:
1011 video_ids = video_ids[:n]
1012 videos = [self.url_result('http://www.youtube.com/watch?v=%s' % id, 'Youtube') for id in video_ids]
1013 return self.playlist_result(videos, query)
1016 class YoutubeShowIE(InfoExtractor):
1017 IE_DESC = u'YouTube.com (multi-season) shows'
1018 _VALID_URL = r'https?://www\.youtube\.com/show/(.*)'
1019 IE_NAME = u'youtube:show'
1021 def _real_extract(self, url):
1022 mobj = re.match(self._VALID_URL, url)
1023 show_name = mobj.group(1)
1024 webpage = self._download_webpage(url, show_name, u'Downloading show webpage')
1025 # There's one playlist for each season of the show
1026 m_seasons = list(re.finditer(r'href="(/playlist\?list=.*?)"', webpage))
1027 self.to_screen(u'%s: Found %s seasons' % (show_name, len(m_seasons)))
1028 return [self.url_result('https://www.youtube.com' + season.group(1), 'YoutubePlaylist') for season in m_seasons]
1031 class YoutubeFeedsInfoExtractor(YoutubeBaseInfoExtractor):
1033 Base class for extractors that fetch info from
1034 http://www.youtube.com/feed_ajax
1035 Subclasses must define the _FEED_NAME and _PLAYLIST_TITLE properties.
1037 _LOGIN_REQUIRED = True
1039 # use action_load_personal_feed instead of action_load_system_feed
1040 _PERSONAL_FEED = False
1043 def _FEED_TEMPLATE(self):
1044 action = 'action_load_system_feed'
1045 if self._PERSONAL_FEED:
1046 action = 'action_load_personal_feed'
1047 return 'http://www.youtube.com/feed_ajax?%s=1&feed_name=%s&paging=%%s' % (action, self._FEED_NAME)
1051 return u'youtube:%s' % self._FEED_NAME
1053 def _real_initialize(self):
1056 def _real_extract(self, url):
1058 # The step argument is available only in 2.7 or higher
1059 for i in itertools.count(0):
1060 paging = i*self._PAGING_STEP
1061 info = self._download_webpage(self._FEED_TEMPLATE % paging,
1062 u'%s feed' % self._FEED_NAME,
1063 u'Downloading page %s' % i)
1064 info = json.loads(info)
1065 feed_html = info['feed_html']
1066 m_ids = re.finditer(r'"/watch\?v=(.*?)["&]', feed_html)
1067 ids = orderedSet(m.group(1) for m in m_ids)
1068 feed_entries.extend(self.url_result(id, 'Youtube') for id in ids)
1069 if info['paging'] is None:
1071 return self.playlist_result(feed_entries, playlist_title=self._PLAYLIST_TITLE)
1073 class YoutubeSubscriptionsIE(YoutubeFeedsInfoExtractor):
1074 IE_DESC = u'YouTube.com subscriptions feed, "ytsubs" keyword(requires authentication)'
1075 _VALID_URL = r'https?://www\.youtube\.com/feed/subscriptions|:ytsubs(?:criptions)?'
1076 _FEED_NAME = 'subscriptions'
1077 _PLAYLIST_TITLE = u'Youtube Subscriptions'
1079 class YoutubeRecommendedIE(YoutubeFeedsInfoExtractor):
1080 IE_DESC = u'YouTube.com recommended videos, "ytrec" keyword (requires authentication)'
1081 _VALID_URL = r'https?://www\.youtube\.com/feed/recommended|:ytrec(?:ommended)?'
1082 _FEED_NAME = 'recommended'
1083 _PLAYLIST_TITLE = u'Youtube Recommended videos'
1085 class YoutubeWatchLaterIE(YoutubeFeedsInfoExtractor):
1086 IE_DESC = u'Youtube watch later list, "ytwatchlater" keyword (requires authentication)'
1087 _VALID_URL = r'https?://www\.youtube\.com/feed/watch_later|:ytwatchlater'
1088 _FEED_NAME = 'watch_later'
1089 _PLAYLIST_TITLE = u'Youtube Watch Later'
1091 _PERSONAL_FEED = True
1093 class YoutubeFavouritesIE(YoutubeBaseInfoExtractor):
1094 IE_NAME = u'youtube:favorites'
1095 IE_DESC = u'YouTube.com favourite videos, "ytfav" keyword (requires authentication)'
1096 _VALID_URL = r'https?://www\.youtube\.com/my_favorites|:ytfav(?:o?rites)?'
1097 _LOGIN_REQUIRED = True
1099 def _real_extract(self, url):
1100 webpage = self._download_webpage('https://www.youtube.com/my_favorites', 'Youtube Favourites videos')
1101 playlist_id = self._search_regex(r'list=(.+?)["&]', webpage, u'favourites playlist id')
1102 return self.url_result(playlist_id, 'YoutubePlaylist')