9 from .common import InfoExtractor, SearchInfoExtractor
15 compat_urllib_request,
26 class YoutubeBaseInfoExtractor(InfoExtractor):
27 """Provide base functions for Youtube extractors"""
28 _LOGIN_URL = 'https://accounts.google.com/ServiceLogin'
29 _LANG_URL = r'https://www.youtube.com/?hl=en&persist_hl=1&gl=US&persist_gl=1&opt_out_ackd=1'
30 _AGE_URL = 'http://www.youtube.com/verify_age?next_url=/&gl=US&hl=en'
31 _NETRC_MACHINE = 'youtube'
32 # If True it will raise an error if no login info is provided
33 _LOGIN_REQUIRED = False
35 def report_lang(self):
36 """Report attempt to set language."""
37 self.to_screen(u'Setting language')
39 def _set_language(self):
40 request = compat_urllib_request.Request(self._LANG_URL)
43 compat_urllib_request.urlopen(request).read()
44 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
45 self._downloader.report_warning(u'unable to set language: %s' % compat_str(err))
50 (username, password) = self._get_login_info()
51 # No authentication to be performed
53 if self._LOGIN_REQUIRED:
54 raise ExtractorError(u'No login info available, needed for using %s.' % self.IE_NAME, expected=True)
57 request = compat_urllib_request.Request(self._LOGIN_URL)
59 login_page = compat_urllib_request.urlopen(request).read().decode('utf-8')
60 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
61 self._downloader.report_warning(u'unable to fetch login page: %s' % compat_str(err))
66 match = re.search(re.compile(r'<input.+?name="GALX".+?value="(.+?)"', re.DOTALL), login_page)
69 match = re.search(re.compile(r'<input.+?name="dsh".+?value="(.+?)"', re.DOTALL), login_page)
75 u'continue': u'https://www.youtube.com/signin?action_handle_signin=true&feature=sign_in_button&hl=en_US&nomobiletemp=1',
79 u'PersistentCookie': u'yes',
81 u'bgresponse': u'js_disabled',
82 u'checkConnection': u'',
83 u'checkedDomains': u'youtube',
89 u'signIn': u'Sign in',
91 u'service': u'youtube',
95 # Convert to UTF-8 *before* urlencode because Python 2.x's urlencode
97 login_form = dict((k.encode('utf-8'), v.encode('utf-8')) for k,v in login_form_strs.items())
98 login_data = compat_urllib_parse.urlencode(login_form).encode('ascii')
99 request = compat_urllib_request.Request(self._LOGIN_URL, login_data)
102 login_results = compat_urllib_request.urlopen(request).read().decode('utf-8')
103 if re.search(r'(?i)<form[^>]* id="gaia_loginform"', login_results) is not None:
104 self._downloader.report_warning(u'unable to log in: bad username or password')
106 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
107 self._downloader.report_warning(u'unable to log in: %s' % compat_str(err))
111 def _confirm_age(self):
114 'action_confirm': 'Confirm',
116 request = compat_urllib_request.Request(self._AGE_URL, compat_urllib_parse.urlencode(age_form))
118 self.report_age_confirmation()
119 compat_urllib_request.urlopen(request).read().decode('utf-8')
120 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
121 raise ExtractorError(u'Unable to confirm age: %s' % compat_str(err))
124 def _real_initialize(self):
125 if self._downloader is None:
127 if not self._set_language():
129 if not self._login():
133 class YoutubeIE(YoutubeBaseInfoExtractor):
134 IE_DESC = u'YouTube.com'
137 (?:https?://)? # http(s):// (optional)
138 (?:youtu\.be/|(?:\w+\.)?youtube(?:-nocookie)?\.com/|
139 tube\.majestyc\.net/) # the various hostnames, with wildcard subdomains
140 (?:.*?\#/)? # handle anchor (#/) redirect urls
141 (?: # the various things that can precede the ID:
142 (?:(?:v|embed|e)/) # v/ or embed/ or e/
143 |(?: # or the v= param in all its forms
144 (?:(?:watch|movie)(?:_popup)?(?:\.php)?)? # preceding watch(_popup|.php) or nothing (like /?v=xxxx)
145 (?:\?|\#!?) # the params delimiter ? or # or #!
146 (?:.*?&)? # any other preceding param (like /?s=tuff&v=xxxx)
149 )? # optional -> youtube.com/xxxx is OK
150 )? # all until now is optional -> you can pass the naked ID
151 ([0-9A-Za-z_-]+) # here is it! the YouTube video ID
152 (?(1).+)? # if we found the ID, everything can follow
154 _NEXT_URL_RE = r'[\?&]next_url=([^&]+)'
155 # Listed in order of quality
156 _available_formats = ['38', '37', '46', '22', '45', '35', '44', '34', '18', '43', '6', '5', '17', '13',
157 '95', '94', '93', '92', '132', '151',
159 '85', '84', '102', '83', '101', '82', '100',
161 '138', '137', '248', '136', '247', '135', '246',
162 '245', '244', '134', '243', '133', '242', '160',
164 '141', '172', '140', '171', '139',
166 _available_formats_prefer_free = ['38', '46', '37', '45', '22', '44', '35', '43', '34', '18', '6', '5', '17', '13',
167 '95', '94', '93', '92', '132', '151',
168 '85', '102', '84', '101', '83', '100', '82',
170 '138', '248', '137', '247', '136', '246', '245',
171 '244', '135', '243', '134', '242', '133', '160',
173 '172', '141', '171', '140', '139',
175 _video_extensions = {
196 # videos that use m3u8
228 _video_dimensions = {
309 u"url": u"http://www.youtube.com/watch?v=BaW_jenozKc",
310 u"file": u"BaW_jenozKc.mp4",
312 u"title": u"youtube-dl test video \"'/\\ä↭𝕐",
313 u"uploader": u"Philipp Hagemeister",
314 u"uploader_id": u"phihag",
315 u"upload_date": u"20121002",
316 u"description": u"test chars: \"'/\\ä↭𝕐\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de ."
320 u"url": u"http://www.youtube.com/watch?v=1ltcDfZMA3U",
321 u"file": u"1ltcDfZMA3U.flv",
322 u"note": u"Test VEVO video (#897)",
324 u"upload_date": u"20070518",
325 u"title": u"Maps - It Will Find You",
326 u"description": u"Music video by Maps performing It Will Find You.",
327 u"uploader": u"MuteUSA",
328 u"uploader_id": u"MuteUSA"
332 u"url": u"http://www.youtube.com/watch?v=UxxajLWwzqY",
333 u"file": u"UxxajLWwzqY.mp4",
334 u"note": u"Test generic use_cipher_signature video (#897)",
336 u"upload_date": u"20120506",
337 u"title": u"Icona Pop - I Love It (feat. Charli XCX) [OFFICIAL VIDEO]",
338 u"description": u"md5:b085c9804f5ab69f4adea963a2dceb3c",
339 u"uploader": u"Icona Pop",
340 u"uploader_id": u"IconaPop"
344 u"url": u"https://www.youtube.com/watch?v=07FYdnEawAQ",
345 u"file": u"07FYdnEawAQ.mp4",
346 u"note": u"Test VEVO video with age protection (#956)",
348 u"upload_date": u"20130703",
349 u"title": u"Justin Timberlake - Tunnel Vision (Explicit)",
350 u"description": u"md5:64249768eec3bc4276236606ea996373",
351 u"uploader": u"justintimberlakeVEVO",
352 u"uploader_id": u"justintimberlakeVEVO"
356 u'url': u'https://www.youtube.com/watch?v=TGi3HqYrWHE',
357 u'file': u'TGi3HqYrWHE.mp4',
358 u'note': u'm3u8 video',
360 u'title': u'Triathlon - Men - London 2012 Olympic Games',
361 u'description': u'- Men - TR02 - Triathlon - 07 August 2012 - London 2012 Olympic Games',
362 u'uploader': u'olympic',
363 u'upload_date': u'20120807',
364 u'uploader_id': u'olympic',
367 u'skip_download': True,
374 def suitable(cls, url):
375 """Receives a URL and returns True if suitable for this IE."""
376 if YoutubePlaylistIE.suitable(url) or YoutubeSubscriptionsIE.suitable(url): return False
377 return re.match(cls._VALID_URL, url, re.VERBOSE) is not None
379 def report_video_webpage_download(self, video_id):
380 """Report attempt to download video webpage."""
381 self.to_screen(u'%s: Downloading video webpage' % video_id)
383 def report_video_info_webpage_download(self, video_id):
384 """Report attempt to download video info webpage."""
385 self.to_screen(u'%s: Downloading video info webpage' % video_id)
387 def report_video_subtitles_download(self, video_id):
388 """Report attempt to download video info webpage."""
389 self.to_screen(u'%s: Checking available subtitles' % video_id)
391 def report_video_subtitles_request(self, video_id, sub_lang, format):
392 """Report attempt to download video info webpage."""
393 self.to_screen(u'%s: Downloading video subtitles for %s.%s' % (video_id, sub_lang, format))
395 def report_video_subtitles_available(self, video_id, sub_lang_list):
396 """Report available subtitles."""
397 sub_lang = ",".join(list(sub_lang_list.keys()))
398 self.to_screen(u'%s: Available subtitles for video: %s' % (video_id, sub_lang))
400 def report_information_extraction(self, video_id):
401 """Report attempt to extract video information."""
402 self.to_screen(u'%s: Extracting video information' % video_id)
404 def report_unavailable_format(self, video_id, format):
405 """Report extracted video URL."""
406 self.to_screen(u'%s: Format %s not available' % (video_id, format))
408 def report_rtmp_download(self):
409 """Indicate the download will use the RTMP protocol."""
410 self.to_screen(u'RTMP download detected')
412 def _decrypt_signature(self, s):
413 """Turn the encrypted s field into a working signature"""
416 return s[25] + s[3:25] + s[0] + s[26:42] + s[79] + s[43:79] + s[91] + s[80:83]
418 return s[25] + s[3:25] + s[2] + s[26:40] + s[77] + s[41:77] + s[89] + s[78:81]
420 return s[84:78:-1] + s[87] + s[77:60:-1] + s[0] + s[59:3:-1]
422 return s[48] + s[81:67:-1] + s[82] + s[66:62:-1] + s[85] + s[61:48:-1] + s[67] + s[47:12:-1] + s[3] + s[11:3:-1] + s[2] + s[12]
424 return s[6:27] + s[4] + s[28:39] + s[27] + s[40:59] + s[2] + s[60:]
426 return s[5:20] + s[2] + s[21:]
428 return s[83:34:-1] + s[0] + s[33:27:-1] + s[3] + s[26:19:-1] + s[34] + s[18:3:-1] + s[27]
430 return s[83:27:-1] + s[0] + s[26:5:-1] + s[2:0:-1] + s[27]
432 return s[81:64:-1] + s[82] + s[63:52:-1] + s[45] + s[51:45:-1] + s[1] + s[44:1:-1] + s[0]
434 return s[1:19] + s[0] + s[20:68] + s[19] + s[69:82]
436 return s[56] + s[79:56:-1] + s[41] + s[55:41:-1] + s[80] + s[40:34:-1] + s[0] + s[33:29:-1] + s[34] + s[28:9:-1] + s[29] + s[8:0:-1] + s[9]
438 return s[1:19] + s[0] + s[20:68] + s[19] + s[69:80]
440 return s[54] + s[77:54:-1] + s[39] + s[53:39:-1] + s[78] + s[38:34:-1] + s[0] + s[33:29:-1] + s[34] + s[28:9:-1] + s[29] + s[8:0:-1] + s[9]
443 raise ExtractorError(u'Unable to decrypt signature, key length %d not supported; retrying might work' % (len(s)))
445 def _decrypt_signature_age_gate(self, s):
446 # The videos with age protection use another player, so the algorithms
449 return s[2:63] + s[82] + s[64:82] + s[63]
451 # Fallback to the other algortihms
452 return self._decrypt_signature(s)
455 def _get_available_subtitles(self, video_id):
456 self.report_video_subtitles_download(video_id)
457 request = compat_urllib_request.Request('http://video.google.com/timedtext?hl=en&type=list&v=%s' % video_id)
459 sub_list = compat_urllib_request.urlopen(request).read().decode('utf-8')
460 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
461 self._downloader.report_warning(u'unable to download video subtitles: %s' % compat_str(err))
463 sub_lang_list = re.findall(r'name="([^"]*)"[^>]+lang_code="([\w\-]+)"', sub_list)
464 sub_lang_list = dict((l[1], l[0]) for l in sub_lang_list)
465 if not sub_lang_list:
466 self._downloader.report_warning(u'video doesn\'t have subtitles')
470 def _list_available_subtitles(self, video_id):
471 sub_lang_list = self._get_available_subtitles(video_id)
472 self.report_video_subtitles_available(video_id, sub_lang_list)
474 def _request_subtitle(self, sub_lang, sub_name, video_id, format):
476 Return the subtitle as a string or None if they are not found
478 self.report_video_subtitles_request(video_id, sub_lang, format)
479 params = compat_urllib_parse.urlencode({
485 url = 'http://www.youtube.com/api/timedtext?' + params
487 sub = compat_urllib_request.urlopen(url).read().decode('utf-8')
488 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
489 self._downloader.report_warning(u'unable to download video subtitles for %s: %s' % (sub_lang, compat_str(err)))
492 self._downloader.report_warning(u'Did not fetch video subtitles')
496 def _request_automatic_caption(self, video_id, webpage):
497 """We need the webpage for getting the captions url, pass it as an
498 argument to speed up the process."""
499 sub_lang = self._downloader.params.get('subtitleslang') or 'en'
500 sub_format = self._downloader.params.get('subtitlesformat')
501 self.to_screen(u'%s: Looking for automatic captions' % video_id)
502 mobj = re.search(r';ytplayer.config = ({.*?});', webpage)
503 err_msg = u'Couldn\'t find automatic captions for "%s"' % sub_lang
505 self._downloader.report_warning(err_msg)
507 player_config = json.loads(mobj.group(1))
509 args = player_config[u'args']
510 caption_url = args[u'ttsurl']
511 timestamp = args[u'timestamp']
512 params = compat_urllib_parse.urlencode({
519 subtitles_url = caption_url + '&' + params
520 sub = self._download_webpage(subtitles_url, video_id, u'Downloading automatic captions')
521 return {sub_lang: sub}
522 # An extractor error can be raise by the download process if there are
523 # no automatic captions but there are subtitles
524 except (KeyError, ExtractorError):
525 self._downloader.report_warning(err_msg)
528 def _extract_subtitles(self, video_id):
530 Return a dictionary: {language: subtitles} or {} if the subtitles
533 sub_lang_list = self._get_available_subtitles(video_id)
534 sub_format = self._downloader.params.get('subtitlesformat')
535 if not sub_lang_list: #There was some error, it didn't get the available subtitles
537 if self._downloader.params.get('allsubtitles', False):
540 if self._downloader.params.get('subtitleslang', False):
541 sub_lang = self._downloader.params.get('subtitleslang')
542 elif 'en' in sub_lang_list:
545 sub_lang = list(sub_lang_list.keys())[0]
546 if not sub_lang in sub_lang_list:
547 self._downloader.report_warning(u'no closed captions found in the specified language "%s"' % sub_lang)
549 sub_lang_list = {sub_lang: sub_lang_list[sub_lang]}
551 for sub_lang in sub_lang_list:
552 subtitle = self._request_subtitle(sub_lang, sub_lang_list[sub_lang].encode('utf-8'), video_id, sub_format)
554 subtitles[sub_lang] = subtitle
557 def _print_formats(self, formats):
558 print('Available formats:')
560 print('%s\t:\t%s\t[%s]%s' %(x, self._video_extensions.get(x, 'flv'),
561 self._video_dimensions.get(x, '???'),
562 ' ('+self._special_itags[x]+')' if x in self._special_itags else ''))
564 def _extract_id(self, url):
565 mobj = re.match(self._VALID_URL, url, re.VERBOSE)
567 raise ExtractorError(u'Invalid URL: %s' % url)
568 video_id = mobj.group(2)
571 def _get_video_url_list(self, url_map):
573 Transform a dictionary in the format {itag:url} to a list of (itag, url)
574 with the requested formats.
576 req_format = self._downloader.params.get('format', None)
577 format_limit = self._downloader.params.get('format_limit', None)
578 available_formats = self._available_formats_prefer_free if self._downloader.params.get('prefer_free_formats', False) else self._available_formats
579 if format_limit is not None and format_limit in available_formats:
580 format_list = available_formats[available_formats.index(format_limit):]
582 format_list = available_formats
583 existing_formats = [x for x in format_list if x in url_map]
584 if len(existing_formats) == 0:
585 raise ExtractorError(u'no known formats available for video')
586 if self._downloader.params.get('listformats', None):
587 self._print_formats(existing_formats)
589 if req_format is None or req_format == 'best':
590 video_url_list = [(existing_formats[0], url_map[existing_formats[0]])] # Best quality
591 elif req_format == 'worst':
592 video_url_list = [(existing_formats[-1], url_map[existing_formats[-1]])] # worst quality
593 elif req_format in ('-1', 'all'):
594 video_url_list = [(f, url_map[f]) for f in existing_formats] # All formats
596 # Specific formats. We pick the first in a slash-delimeted sequence.
597 # For example, if '1/2/3/4' is requested and '2' and '4' are available, we pick '2'.
598 req_formats = req_format.split('/')
599 video_url_list = None
600 for rf in req_formats:
602 video_url_list = [(rf, url_map[rf])]
604 if video_url_list is None:
605 raise ExtractorError(u'requested format not available')
606 return video_url_list
608 def _extract_from_m3u8(self, manifest_url, video_id):
610 def _get_urls(_manifest):
611 lines = _manifest.split('\n')
612 urls = filter(lambda l: l and not l.startswith('#'),
615 manifest = self._download_webpage(manifest_url, video_id, u'Downloading formats manifest')
616 formats_urls = _get_urls(manifest)
617 for format_url in formats_urls:
618 itag = self._search_regex(r'itag/(\d+?)/', format_url, 'itag')
619 url_map[itag] = format_url
622 def _real_extract(self, url):
623 if re.match(r'(?:https?://)?[^/]+/watch\?feature=[a-z_]+$', url):
624 self._downloader.report_warning(u'Did you forget to quote the URL? Remember that & is a meta-character in most shells, so you want to put the URL in quotes, like youtube-dl \'http://www.youtube.com/watch?feature=foo&v=BaW_jenozKc\' (or simply youtube-dl BaW_jenozKc ).')
626 # Extract original video URL from URL with redirection, like age verification, using next_url parameter
627 mobj = re.search(self._NEXT_URL_RE, url)
629 url = 'https://www.youtube.com/' + compat_urllib_parse.unquote(mobj.group(1)).lstrip('/')
630 video_id = self._extract_id(url)
633 self.report_video_webpage_download(video_id)
634 url = 'https://www.youtube.com/watch?v=%s&gl=US&hl=en&has_verified=1' % video_id
635 request = compat_urllib_request.Request(url)
637 video_webpage_bytes = compat_urllib_request.urlopen(request).read()
638 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
639 raise ExtractorError(u'Unable to download video webpage: %s' % compat_str(err))
641 video_webpage = video_webpage_bytes.decode('utf-8', 'ignore')
643 # Attempt to extract SWF player URL
644 mobj = re.search(r'swfConfig.*?"(http:\\/\\/.*?watch.*?-.*?\.swf)"', video_webpage)
646 player_url = re.sub(r'\\(.)', r'\1', mobj.group(1))
651 self.report_video_info_webpage_download(video_id)
652 if re.search(r'player-age-gate-content">', video_webpage) is not None:
653 self.report_age_confirmation()
655 # We simulate the access to the video from www.youtube.com/v/{video_id}
656 # this can be viewed without login into Youtube
657 data = compat_urllib_parse.urlencode({'video_id': video_id,
661 'eurl': 'https://youtube.googleapis.com/v/' + video_id,
665 video_info_url = 'https://www.youtube.com/get_video_info?' + data
666 video_info_webpage = self._download_webpage(video_info_url, video_id,
668 errnote='unable to download video info webpage')
669 video_info = compat_parse_qs(video_info_webpage)
672 for el_type in ['&el=embedded', '&el=detailpage', '&el=vevo', '']:
673 video_info_url = ('https://www.youtube.com/get_video_info?&video_id=%s%s&ps=default&eurl=&gl=US&hl=en'
674 % (video_id, el_type))
675 video_info_webpage = self._download_webpage(video_info_url, video_id,
677 errnote='unable to download video info webpage')
678 video_info = compat_parse_qs(video_info_webpage)
679 if 'token' in video_info:
681 if 'token' not in video_info:
682 if 'reason' in video_info:
683 raise ExtractorError(u'YouTube said: %s' % video_info['reason'][0], expected=True)
685 raise ExtractorError(u'"token" parameter not in video info for unknown reason')
687 # Check for "rental" videos
688 if 'ypc_video_rental_bar_text' in video_info and 'author' not in video_info:
689 raise ExtractorError(u'"rental" videos not supported')
691 # Start extracting information
692 self.report_information_extraction(video_id)
695 if 'author' not in video_info:
696 raise ExtractorError(u'Unable to extract uploader name')
697 video_uploader = compat_urllib_parse.unquote_plus(video_info['author'][0])
700 video_uploader_id = None
701 mobj = re.search(r'<link itemprop="url" href="http://www.youtube.com/(?:user|channel)/([^"]+)">', video_webpage)
703 video_uploader_id = mobj.group(1)
705 self._downloader.report_warning(u'unable to extract uploader nickname')
708 if 'title' not in video_info:
709 raise ExtractorError(u'Unable to extract video title')
710 video_title = compat_urllib_parse.unquote_plus(video_info['title'][0])
713 # We try first to get a high quality image:
714 m_thumb = re.search(r'<span itemprop="thumbnail".*?href="(.*?)">',
715 video_webpage, re.DOTALL)
716 if m_thumb is not None:
717 video_thumbnail = m_thumb.group(1)
718 elif 'thumbnail_url' not in video_info:
719 self._downloader.report_warning(u'unable to extract video thumbnail')
721 else: # don't panic if we can't find it
722 video_thumbnail = compat_urllib_parse.unquote_plus(video_info['thumbnail_url'][0])
726 mobj = re.search(r'id="eow-date.*?>(.*?)</span>', video_webpage, re.DOTALL)
728 upload_date = ' '.join(re.sub(r'[/,-]', r' ', mobj.group(1)).split())
729 upload_date = unified_strdate(upload_date)
732 video_description = get_element_by_id("eow-description", video_webpage)
733 if video_description:
734 video_description = clean_html(video_description)
736 fd_mobj = re.search(r'<meta name="description" content="([^"]+)"', video_webpage)
738 video_description = unescapeHTML(fd_mobj.group(1))
740 video_description = u''
743 video_subtitles = None
745 if self._downloader.params.get('writesubtitles', False) or self._downloader.params.get('allsubtitles', False):
746 video_subtitles = self._extract_subtitles(video_id)
747 elif self._downloader.params.get('writeautomaticsub', False):
748 video_subtitles = self._request_automatic_caption(video_id, video_webpage)
750 if self._downloader.params.get('listsubtitles', False):
751 self._list_available_subtitles(video_id)
754 if 'length_seconds' not in video_info:
755 self._downloader.report_warning(u'unable to extract video duration')
758 video_duration = compat_urllib_parse.unquote_plus(video_info['length_seconds'][0])
760 # Decide which formats to download
763 mobj = re.search(r';ytplayer.config = ({.*?});', video_webpage)
765 raise ValueError('Could not find vevo ID')
766 info = json.loads(mobj.group(1))
768 # Easy way to know if the 's' value is in url_encoded_fmt_stream_map
769 # this signatures are encrypted
770 m_s = re.search(r'[&,]s=', args['url_encoded_fmt_stream_map'])
772 self.to_screen(u'%s: Encrypted signatures detected.' % video_id)
773 video_info['url_encoded_fmt_stream_map'] = [args['url_encoded_fmt_stream_map']]
774 m_s = re.search(r'[&,]s=', args.get('adaptive_fmts', u''))
776 if 'url_encoded_fmt_stream_map' in video_info:
777 video_info['url_encoded_fmt_stream_map'][0] += ',' + args['adaptive_fmts']
779 video_info['url_encoded_fmt_stream_map'] = [args['adaptive_fmts']]
780 elif 'adaptive_fmts' in video_info:
781 if 'url_encoded_fmt_stream_map' in video_info:
782 video_info['url_encoded_fmt_stream_map'][0] += ',' + video_info['adaptive_fmts'][0]
784 video_info['url_encoded_fmt_stream_map'] = video_info['adaptive_fmts']
788 if 'conn' in video_info and video_info['conn'][0].startswith('rtmp'):
789 self.report_rtmp_download()
790 video_url_list = [(None, video_info['conn'][0])]
791 elif 'url_encoded_fmt_stream_map' in video_info and len(video_info['url_encoded_fmt_stream_map']) >= 1:
792 if 'rtmpe%3Dyes' in video_info['url_encoded_fmt_stream_map'][0]:
793 raise ExtractorError('rtmpe downloads are not supported, see https://github.com/rg3/youtube-dl/issues/343 for more information.', expected=True)
795 for url_data_str in video_info['url_encoded_fmt_stream_map'][0].split(','):
796 url_data = compat_parse_qs(url_data_str)
797 if 'itag' in url_data and 'url' in url_data:
798 url = url_data['url'][0]
799 if 'sig' in url_data:
800 url += '&signature=' + url_data['sig'][0]
801 elif 's' in url_data:
802 if self._downloader.params.get('verbose'):
805 player_version = self._search_regex(r'ad3-(.+?)\.swf',
806 video_info['ad3_module'][0] if 'ad3_module' in video_info else 'NOT FOUND',
807 'flash player', fatal=False)
808 player = 'flash player %s' % player_version
810 player = u'html5 player %s' % self._search_regex(r'html5player-(.+?)\.js', video_webpage,
811 'html5 player', fatal=False)
812 parts_sizes = u'.'.join(compat_str(len(part)) for part in s.split('.'))
813 self.to_screen(u'encrypted signature length %d (%s), itag %s, %s' %
814 (len(s), parts_sizes, url_data['itag'][0], player))
815 encrypted_sig = url_data['s'][0]
817 signature = self._decrypt_signature_age_gate(encrypted_sig)
819 signature = self._decrypt_signature(encrypted_sig)
820 url += '&signature=' + signature
821 if 'ratebypass' not in url:
822 url += '&ratebypass=yes'
823 url_map[url_data['itag'][0]] = url
824 video_url_list = self._get_video_url_list(url_map)
825 if not video_url_list:
827 elif video_info.get('hlsvp'):
828 manifest_url = video_info['hlsvp'][0]
829 url_map = self._extract_from_m3u8(manifest_url, video_id)
830 video_url_list = self._get_video_url_list(url_map)
831 if not video_url_list:
835 raise ExtractorError(u'no conn or url_encoded_fmt_stream_map information found in video info')
838 for format_param, video_real_url in video_url_list:
840 video_extension = self._video_extensions.get(format_param, 'flv')
842 video_format = '{0} - {1}{2}'.format(format_param if format_param else video_extension,
843 self._video_dimensions.get(format_param, '???'),
844 ' ('+self._special_itags[format_param]+')' if format_param in self._special_itags else '')
848 'url': video_real_url,
849 'uploader': video_uploader,
850 'uploader_id': video_uploader_id,
851 'upload_date': upload_date,
852 'title': video_title,
853 'ext': video_extension,
854 'format': video_format,
855 'thumbnail': video_thumbnail,
856 'description': video_description,
857 'player_url': player_url,
858 'subtitles': video_subtitles,
859 'duration': video_duration
863 class YoutubePlaylistIE(InfoExtractor):
864 IE_DESC = u'YouTube.com playlists'
870 (?:course|view_play_list|my_playlists|artist|playlist|watch)
871 \? (?:.*?&)*? (?:p|a|list)=
874 ((?:PL|EC|UU|FL)?[0-9A-Za-z-_]{10,})
877 ((?:PL|EC|UU|FL)[0-9A-Za-z-_]{10,})
879 _TEMPLATE_URL = 'https://gdata.youtube.com/feeds/api/playlists/%s?max-results=%i&start-index=%i&v=2&alt=json&safeSearch=none'
881 IE_NAME = u'youtube:playlist'
884 def suitable(cls, url):
885 """Receives a URL and returns True if suitable for this IE."""
886 return re.match(cls._VALID_URL, url, re.VERBOSE) is not None
888 def _real_extract(self, url):
889 # Extract playlist id
890 mobj = re.match(self._VALID_URL, url, re.VERBOSE)
892 raise ExtractorError(u'Invalid URL: %s' % url)
894 # Download playlist videos from API
895 playlist_id = mobj.group(1) or mobj.group(2)
898 for page_num in itertools.count(1):
899 start_index = self._MAX_RESULTS * (page_num - 1) + 1
900 if start_index >= 1000:
901 self._downloader.report_warning(u'Max number of results reached')
903 url = self._TEMPLATE_URL % (playlist_id, self._MAX_RESULTS, start_index)
904 page = self._download_webpage(url, playlist_id, u'Downloading page #%s' % page_num)
907 response = json.loads(page)
908 except ValueError as err:
909 raise ExtractorError(u'Invalid JSON in API response: ' + compat_str(err))
911 if 'feed' not in response:
912 raise ExtractorError(u'Got a malformed response from YouTube API')
913 playlist_title = response['feed']['title']['$t']
914 if 'entry' not in response['feed']:
915 # Number of videos is a multiple of self._MAX_RESULTS
918 for entry in response['feed']['entry']:
919 index = entry['yt$position']['$t']
920 if 'media$group' in entry and 'media$player' in entry['media$group']:
921 videos.append((index, entry['media$group']['media$player']['url']))
923 videos = [v[1] for v in sorted(videos)]
925 url_results = [self.url_result(vurl, 'Youtube') for vurl in videos]
926 return [self.playlist_result(url_results, playlist_id, playlist_title)]
929 class YoutubeChannelIE(InfoExtractor):
930 IE_DESC = u'YouTube.com channels'
931 _VALID_URL = r"^(?:https?://)?(?:youtu\.be|(?:\w+\.)?youtube(?:-nocookie)?\.com)/channel/([0-9A-Za-z_-]+)"
932 _TEMPLATE_URL = 'http://www.youtube.com/channel/%s/videos?sort=da&flow=list&view=0&page=%s&gl=US&hl=en'
933 _MORE_PAGES_INDICATOR = 'yt-uix-load-more'
934 _MORE_PAGES_URL = 'http://www.youtube.com/c4_browse_ajax?action_load_more_videos=1&flow=list&paging=%s&view=0&sort=da&channel_id=%s'
935 IE_NAME = u'youtube:channel'
937 def extract_videos_from_page(self, page):
939 for mobj in re.finditer(r'href="/watch\?v=([0-9A-Za-z_-]+)&?', page):
940 if mobj.group(1) not in ids_in_page:
941 ids_in_page.append(mobj.group(1))
944 def _real_extract(self, url):
946 mobj = re.match(self._VALID_URL, url)
948 raise ExtractorError(u'Invalid URL: %s' % url)
950 # Download channel page
951 channel_id = mobj.group(1)
955 url = self._TEMPLATE_URL % (channel_id, pagenum)
956 page = self._download_webpage(url, channel_id,
957 u'Downloading page #%s' % pagenum)
959 # Extract video identifiers
960 ids_in_page = self.extract_videos_from_page(page)
961 video_ids.extend(ids_in_page)
963 # Download any subsequent channel pages using the json-based channel_ajax query
964 if self._MORE_PAGES_INDICATOR in page:
965 for pagenum in itertools.count(1):
966 url = self._MORE_PAGES_URL % (pagenum, channel_id)
967 page = self._download_webpage(url, channel_id,
968 u'Downloading page #%s' % pagenum)
970 page = json.loads(page)
972 ids_in_page = self.extract_videos_from_page(page['content_html'])
973 video_ids.extend(ids_in_page)
975 if self._MORE_PAGES_INDICATOR not in page['load_more_widget_html']:
978 self._downloader.to_screen(u'[youtube] Channel %s: Found %i videos' % (channel_id, len(video_ids)))
980 urls = ['http://www.youtube.com/watch?v=%s' % id for id in video_ids]
981 url_entries = [self.url_result(eurl, 'Youtube') for eurl in urls]
982 return [self.playlist_result(url_entries, channel_id)]
985 class YoutubeUserIE(InfoExtractor):
986 IE_DESC = u'YouTube.com user videos (URL or "ytuser" keyword)'
987 _VALID_URL = r'(?:(?:(?:https?://)?(?:\w+\.)?youtube\.com/user/)|ytuser:)([A-Za-z0-9_-]+)'
988 _TEMPLATE_URL = 'http://gdata.youtube.com/feeds/api/users/%s'
989 _GDATA_PAGE_SIZE = 50
990 _GDATA_URL = 'http://gdata.youtube.com/feeds/api/users/%s/uploads?max-results=%d&start-index=%d'
991 _VIDEO_INDICATOR = r'/watch\?v=(.+?)[\<&]'
992 IE_NAME = u'youtube:user'
994 def _real_extract(self, url):
996 mobj = re.match(self._VALID_URL, url)
998 raise ExtractorError(u'Invalid URL: %s' % url)
1000 username = mobj.group(1)
1002 # Download video ids using YouTube Data API. Result size per
1003 # query is limited (currently to 50 videos) so we need to query
1004 # page by page until there are no video ids - it means we got
1009 for pagenum in itertools.count(0):
1010 start_index = pagenum * self._GDATA_PAGE_SIZE + 1
1012 gdata_url = self._GDATA_URL % (username, self._GDATA_PAGE_SIZE, start_index)
1013 page = self._download_webpage(gdata_url, username,
1014 u'Downloading video ids from %d to %d' % (start_index, start_index + self._GDATA_PAGE_SIZE))
1016 # Extract video identifiers
1019 for mobj in re.finditer(self._VIDEO_INDICATOR, page):
1020 if mobj.group(1) not in ids_in_page:
1021 ids_in_page.append(mobj.group(1))
1023 video_ids.extend(ids_in_page)
1025 # A little optimization - if current page is not
1026 # "full", ie. does not contain PAGE_SIZE video ids then
1027 # we can assume that this page is the last one - there
1028 # are no more ids on further pages - no need to query
1031 if len(ids_in_page) < self._GDATA_PAGE_SIZE:
1034 urls = ['http://www.youtube.com/watch?v=%s' % video_id for video_id in video_ids]
1035 url_results = [self.url_result(rurl, 'Youtube') for rurl in urls]
1036 return [self.playlist_result(url_results, playlist_title = username)]
1038 class YoutubeSearchIE(SearchInfoExtractor):
1039 IE_DESC = u'YouTube.com searches'
1040 _API_URL = 'https://gdata.youtube.com/feeds/api/videos?q=%s&start-index=%i&max-results=50&v=2&alt=jsonc'
1042 IE_NAME = u'youtube:search'
1043 _SEARCH_KEY = 'ytsearch'
1045 def report_download_page(self, query, pagenum):
1046 """Report attempt to download search page with given number."""
1047 self._downloader.to_screen(u'[youtube] query "%s": Downloading page %s' % (query, pagenum))
1049 def _get_n_results(self, query, n):
1050 """Get a specified number of results for a query"""
1056 while (50 * pagenum) < limit:
1057 self.report_download_page(query, pagenum+1)
1058 result_url = self._API_URL % (compat_urllib_parse.quote_plus(query), (50*pagenum)+1)
1059 request = compat_urllib_request.Request(result_url)
1061 data = compat_urllib_request.urlopen(request).read().decode('utf-8')
1062 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
1063 raise ExtractorError(u'Unable to download API page: %s' % compat_str(err))
1064 api_response = json.loads(data)['data']
1066 if not 'items' in api_response:
1067 raise ExtractorError(u'[youtube] No video results')
1069 new_ids = list(video['id'] for video in api_response['items'])
1070 video_ids += new_ids
1072 limit = min(n, api_response['totalItems'])
1075 if len(video_ids) > n:
1076 video_ids = video_ids[:n]
1077 videos = [self.url_result('http://www.youtube.com/watch?v=%s' % id, 'Youtube') for id in video_ids]
1078 return self.playlist_result(videos, query)
1081 class YoutubeShowIE(InfoExtractor):
1082 IE_DESC = u'YouTube.com (multi-season) shows'
1083 _VALID_URL = r'https?://www\.youtube\.com/show/(.*)'
1084 IE_NAME = u'youtube:show'
1086 def _real_extract(self, url):
1087 mobj = re.match(self._VALID_URL, url)
1088 show_name = mobj.group(1)
1089 webpage = self._download_webpage(url, show_name, u'Downloading show webpage')
1090 # There's one playlist for each season of the show
1091 m_seasons = list(re.finditer(r'href="(/playlist\?list=.*?)"', webpage))
1092 self.to_screen(u'%s: Found %s seasons' % (show_name, len(m_seasons)))
1093 return [self.url_result('https://www.youtube.com' + season.group(1), 'YoutubePlaylist') for season in m_seasons]
1096 class YoutubeFeedsInfoExtractor(YoutubeBaseInfoExtractor):
1098 Base class for extractors that fetch info from
1099 http://www.youtube.com/feed_ajax
1100 Subclasses must define the _FEED_NAME and _PLAYLIST_TITLE properties.
1102 _LOGIN_REQUIRED = True
1104 # use action_load_personal_feed instead of action_load_system_feed
1105 _PERSONAL_FEED = False
1108 def _FEED_TEMPLATE(self):
1109 action = 'action_load_system_feed'
1110 if self._PERSONAL_FEED:
1111 action = 'action_load_personal_feed'
1112 return 'http://www.youtube.com/feed_ajax?%s=1&feed_name=%s&paging=%%s' % (action, self._FEED_NAME)
1116 return u'youtube:%s' % self._FEED_NAME
1118 def _real_initialize(self):
1121 def _real_extract(self, url):
1123 # The step argument is available only in 2.7 or higher
1124 for i in itertools.count(0):
1125 paging = i*self._PAGING_STEP
1126 info = self._download_webpage(self._FEED_TEMPLATE % paging,
1127 u'%s feed' % self._FEED_NAME,
1128 u'Downloading page %s' % i)
1129 info = json.loads(info)
1130 feed_html = info['feed_html']
1131 m_ids = re.finditer(r'"/watch\?v=(.*?)["&]', feed_html)
1132 ids = orderedSet(m.group(1) for m in m_ids)
1133 feed_entries.extend(self.url_result(id, 'Youtube') for id in ids)
1134 if info['paging'] is None:
1136 return self.playlist_result(feed_entries, playlist_title=self._PLAYLIST_TITLE)
1138 class YoutubeSubscriptionsIE(YoutubeFeedsInfoExtractor):
1139 IE_DESC = u'YouTube.com subscriptions feed, "ytsubs" keyword(requires authentication)'
1140 _VALID_URL = r'https?://www\.youtube\.com/feed/subscriptions|:ytsubs(?:criptions)?'
1141 _FEED_NAME = 'subscriptions'
1142 _PLAYLIST_TITLE = u'Youtube Subscriptions'
1144 class YoutubeRecommendedIE(YoutubeFeedsInfoExtractor):
1145 IE_DESC = u'YouTube.com recommended videos, "ytrec" keyword (requires authentication)'
1146 _VALID_URL = r'https?://www\.youtube\.com/feed/recommended|:ytrec(?:ommended)?'
1147 _FEED_NAME = 'recommended'
1148 _PLAYLIST_TITLE = u'Youtube Recommended videos'
1150 class YoutubeWatchLaterIE(YoutubeFeedsInfoExtractor):
1151 IE_DESC = u'Youtube watch later list, "ytwatchlater" keyword (requires authentication)'
1152 _VALID_URL = r'https?://www\.youtube\.com/feed/watch_later|:ytwatchlater'
1153 _FEED_NAME = 'watch_later'
1154 _PLAYLIST_TITLE = u'Youtube Watch Later'
1156 _PERSONAL_FEED = True
1158 class YoutubeFavouritesIE(YoutubeBaseInfoExtractor):
1159 IE_NAME = u'youtube:favorites'
1160 IE_DESC = u'YouTube.com favourite videos, "ytfav" keyword (requires authentication)'
1161 _VALID_URL = r'https?://www\.youtube\.com/my_favorites|:ytfav(?:o?rites)?'
1162 _LOGIN_REQUIRED = True
1164 def _real_extract(self, url):
1165 webpage = self._download_webpage('https://www.youtube.com/my_favorites', 'Youtube Favourites videos')
1166 playlist_id = self._search_regex(r'list=(.+?)["&]', webpage, u'favourites playlist id')
1167 return self.url_result(playlist_id, 'YoutubePlaylist')