9 from .common import InfoExtractor, SearchInfoExtractor
15 compat_urllib_request,
26 class YoutubeBaseInfoExtractor(InfoExtractor):
27 """Provide base functions for Youtube extractors"""
28 _LOGIN_URL = 'https://accounts.google.com/ServiceLogin'
29 _LANG_URL = r'https://www.youtube.com/?hl=en&persist_hl=1&gl=US&persist_gl=1&opt_out_ackd=1'
30 _AGE_URL = 'http://www.youtube.com/verify_age?next_url=/&gl=US&hl=en'
31 _NETRC_MACHINE = 'youtube'
32 # If True it will raise an error if no login info is provided
33 _LOGIN_REQUIRED = False
35 def report_lang(self):
36 """Report attempt to set language."""
37 self.to_screen(u'Setting language')
39 def _set_language(self):
40 request = compat_urllib_request.Request(self._LANG_URL)
43 compat_urllib_request.urlopen(request).read()
44 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
45 self._downloader.report_warning(u'unable to set language: %s' % compat_str(err))
50 (username, password) = self._get_login_info()
51 # No authentication to be performed
53 if self._LOGIN_REQUIRED:
54 raise ExtractorError(u'No login info available, needed for using %s.' % self.IE_NAME, expected=True)
57 request = compat_urllib_request.Request(self._LOGIN_URL)
59 login_page = compat_urllib_request.urlopen(request).read().decode('utf-8')
60 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
61 self._downloader.report_warning(u'unable to fetch login page: %s' % compat_str(err))
66 match = re.search(re.compile(r'<input.+?name="GALX".+?value="(.+?)"', re.DOTALL), login_page)
69 match = re.search(re.compile(r'<input.+?name="dsh".+?value="(.+?)"', re.DOTALL), login_page)
75 u'continue': u'https://www.youtube.com/signin?action_handle_signin=true&feature=sign_in_button&hl=en_US&nomobiletemp=1',
79 u'PersistentCookie': u'yes',
81 u'bgresponse': u'js_disabled',
82 u'checkConnection': u'',
83 u'checkedDomains': u'youtube',
89 u'signIn': u'Sign in',
91 u'service': u'youtube',
95 # Convert to UTF-8 *before* urlencode because Python 2.x's urlencode
97 login_form = dict((k.encode('utf-8'), v.encode('utf-8')) for k,v in login_form_strs.items())
98 login_data = compat_urllib_parse.urlencode(login_form).encode('ascii')
99 request = compat_urllib_request.Request(self._LOGIN_URL, login_data)
102 login_results = compat_urllib_request.urlopen(request).read().decode('utf-8')
103 if re.search(r'(?i)<form[^>]* id="gaia_loginform"', login_results) is not None:
104 self._downloader.report_warning(u'unable to log in: bad username or password')
106 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
107 self._downloader.report_warning(u'unable to log in: %s' % compat_str(err))
111 def _confirm_age(self):
114 'action_confirm': 'Confirm',
116 request = compat_urllib_request.Request(self._AGE_URL, compat_urllib_parse.urlencode(age_form))
118 self.report_age_confirmation()
119 compat_urllib_request.urlopen(request).read().decode('utf-8')
120 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
121 raise ExtractorError(u'Unable to confirm age: %s' % compat_str(err))
124 def _real_initialize(self):
125 if self._downloader is None:
127 if not self._set_language():
129 if not self._login():
133 class YoutubeIE(YoutubeBaseInfoExtractor):
134 IE_DESC = u'YouTube.com'
137 (?:https?://)? # http(s):// (optional)
138 (?:youtu\.be/|(?:\w+\.)?youtube(?:-nocookie)?\.com/|
139 tube\.majestyc\.net/) # the various hostnames, with wildcard subdomains
140 (?:.*?\#/)? # handle anchor (#/) redirect urls
141 (?: # the various things that can precede the ID:
142 (?:(?:v|embed|e)/) # v/ or embed/ or e/
143 |(?: # or the v= param in all its forms
144 (?:(?:watch|movie)(?:_popup)?(?:\.php)?)? # preceding watch(_popup|.php) or nothing (like /?v=xxxx)
145 (?:\?|\#!?) # the params delimiter ? or # or #!
146 (?:.*?&)? # any other preceding param (like /?s=tuff&v=xxxx)
149 )? # optional -> youtube.com/xxxx is OK
150 )? # all until now is optional -> you can pass the naked ID
151 ([0-9A-Za-z_-]+) # here is it! the YouTube video ID
152 (?(1).+)? # if we found the ID, everything can follow
154 _NEXT_URL_RE = r'[\?&]next_url=([^&]+)'
155 # Listed in order of quality
156 _available_formats = ['38', '37', '46', '22', '45', '35', '44', '34', '18', '43', '6', '5', '17', '13',
157 '95', '94', '93', '92', '132', '151',
159 '85', '84', '102', '83', '101', '82', '100',
161 '138', '137', '248', '136', '247', '135', '246',
162 '245', '244', '134', '243', '133', '242', '160',
164 '141', '172', '140', '171', '139',
166 _available_formats_prefer_free = ['38', '46', '37', '45', '22', '44', '35', '43', '34', '18', '6', '5', '17', '13',
167 '95', '94', '93', '92', '132', '151',
168 '85', '102', '84', '101', '83', '100', '82',
170 '138', '248', '137', '247', '136', '246', '245',
171 '244', '135', '243', '134', '242', '133', '160',
173 '172', '141', '171', '140', '139',
175 _video_extensions = {
196 # videos that use m3u8
228 _video_dimensions = {
309 u"url": u"http://www.youtube.com/watch?v=BaW_jenozKc",
310 u"file": u"BaW_jenozKc.mp4",
312 u"title": u"youtube-dl test video \"'/\\ä↭𝕐",
313 u"uploader": u"Philipp Hagemeister",
314 u"uploader_id": u"phihag",
315 u"upload_date": u"20121002",
316 u"description": u"test chars: \"'/\\ä↭𝕐\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de ."
320 u"url": u"http://www.youtube.com/watch?v=1ltcDfZMA3U",
321 u"file": u"1ltcDfZMA3U.flv",
322 u"note": u"Test VEVO video (#897)",
324 u"upload_date": u"20070518",
325 u"title": u"Maps - It Will Find You",
326 u"description": u"Music video by Maps performing It Will Find You.",
327 u"uploader": u"MuteUSA",
328 u"uploader_id": u"MuteUSA"
332 u"url": u"http://www.youtube.com/watch?v=UxxajLWwzqY",
333 u"file": u"UxxajLWwzqY.mp4",
334 u"note": u"Test generic use_cipher_signature video (#897)",
336 u"upload_date": u"20120506",
337 u"title": u"Icona Pop - I Love It (feat. Charli XCX) [OFFICIAL VIDEO]",
338 u"description": u"md5:b085c9804f5ab69f4adea963a2dceb3c",
339 u"uploader": u"Icona Pop",
340 u"uploader_id": u"IconaPop"
344 u"url": u"https://www.youtube.com/watch?v=07FYdnEawAQ",
345 u"file": u"07FYdnEawAQ.mp4",
346 u"note": u"Test VEVO video with age protection (#956)",
348 u"upload_date": u"20130703",
349 u"title": u"Justin Timberlake - Tunnel Vision (Explicit)",
350 u"description": u"md5:64249768eec3bc4276236606ea996373",
351 u"uploader": u"justintimberlakeVEVO",
352 u"uploader_id": u"justintimberlakeVEVO"
356 u'url': u'https://www.youtube.com/watch?v=TGi3HqYrWHE',
357 u'file': u'TGi3HqYrWHE.mp4',
358 u'note': u'm3u8 video',
360 u'title': u'Triathlon - Men - London 2012 Olympic Games',
361 u'description': u'- Men - TR02 - Triathlon - 07 August 2012 - London 2012 Olympic Games',
362 u'uploader': u'olympic',
363 u'upload_date': u'20120807',
364 u'uploader_id': u'olympic',
367 u'skip_download': True,
374 def suitable(cls, url):
375 """Receives a URL and returns True if suitable for this IE."""
376 if YoutubePlaylistIE.suitable(url) or YoutubeSubscriptionsIE.suitable(url): return False
377 return re.match(cls._VALID_URL, url, re.VERBOSE) is not None
379 def report_video_webpage_download(self, video_id):
380 """Report attempt to download video webpage."""
381 self.to_screen(u'%s: Downloading video webpage' % video_id)
383 def report_video_info_webpage_download(self, video_id):
384 """Report attempt to download video info webpage."""
385 self.to_screen(u'%s: Downloading video info webpage' % video_id)
387 def report_video_subtitles_download(self, video_id):
388 """Report attempt to download video info webpage."""
389 self.to_screen(u'%s: Checking available subtitles' % video_id)
391 def report_video_subtitles_request(self, video_id, sub_lang, format):
392 """Report attempt to download video info webpage."""
393 self.to_screen(u'%s: Downloading video subtitles for %s.%s' % (video_id, sub_lang, format))
395 def report_video_subtitles_available(self, video_id, sub_lang_list):
396 """Report available subtitles."""
397 sub_lang = ",".join(list(sub_lang_list.keys()))
398 self.to_screen(u'%s: Available subtitles for video: %s' % (video_id, sub_lang))
400 def report_information_extraction(self, video_id):
401 """Report attempt to extract video information."""
402 self.to_screen(u'%s: Extracting video information' % video_id)
404 def report_unavailable_format(self, video_id, format):
405 """Report extracted video URL."""
406 self.to_screen(u'%s: Format %s not available' % (video_id, format))
408 def report_rtmp_download(self):
409 """Indicate the download will use the RTMP protocol."""
410 self.to_screen(u'RTMP download detected')
412 def _decrypt_signature(self, s):
413 """Turn the encrypted s field into a working signature"""
416 return s[25] + s[3:25] + s[0] + s[26:42] + s[79] + s[43:79] + s[91] + s[80:83]
418 return s[25] + s[3:25] + s[2] + s[26:40] + s[77] + s[41:77] + s[89] + s[78:81]
420 return s[84:78:-1] + s[87] + s[77:60:-1] + s[0] + s[59:3:-1]
422 return s[48] + s[81:67:-1] + s[82] + s[66:62:-1] + s[85] + s[61:48:-1] + s[67] + s[47:12:-1] + s[3] + s[11:3:-1] + s[2] + s[12]
424 return s[6:27] + s[4] + s[28:39] + s[27] + s[40:59] + s[2] + s[60:]
426 return s[5:20] + s[2] + s[21:]
428 return s[83:34:-1] + s[0] + s[33:27:-1] + s[3] + s[26:19:-1] + s[34] + s[18:3:-1] + s[27]
430 return s[83:27:-1] + s[0] + s[26:5:-1] + s[2:0:-1] + s[27]
432 return s[81:64:-1] + s[82] + s[63:52:-1] + s[45] + s[51:45:-1] + s[1] + s[44:1:-1] + s[0]
434 return s[1:19] + s[0] + s[20:68] + s[19] + s[69:82]
436 return s[56] + s[79:56:-1] + s[41] + s[55:41:-1] + s[80] + s[40:34:-1] + s[0] + s[33:29:-1] + s[34] + s[28:9:-1] + s[29] + s[8:0:-1] + s[9]
438 return s[1:19] + s[0] + s[20:68] + s[19] + s[69:80]
440 return s[54] + s[77:54:-1] + s[39] + s[53:39:-1] + s[78] + s[38:34:-1] + s[0] + s[33:29:-1] + s[34] + s[28:9:-1] + s[29] + s[8:0:-1] + s[9]
443 raise ExtractorError(u'Unable to decrypt signature, key length %d not supported; retrying might work' % (len(s)))
445 def _decrypt_signature_age_gate(self, s):
446 # The videos with age protection use another player, so the algorithms
449 return s[2:63] + s[82] + s[64:82] + s[63]
451 # Fallback to the other algortihms
452 return self._decrypt_signature(s)
455 def _get_available_subtitles(self, video_id):
456 self.report_video_subtitles_download(video_id)
457 request = compat_urllib_request.Request('http://video.google.com/timedtext?hl=en&type=list&v=%s' % video_id)
459 sub_list = compat_urllib_request.urlopen(request).read().decode('utf-8')
460 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
461 self._downloader.report_warning(u'unable to download video subtitles: %s' % compat_str(err))
463 sub_lang_list = re.findall(r'name="([^"]*)"[^>]+lang_code="([\w\-]+)"', sub_list)
464 sub_lang_list = dict((l[1], l[0]) for l in sub_lang_list)
465 if not sub_lang_list:
466 self._downloader.report_warning(u'video doesn\'t have subtitles')
470 def _list_available_subtitles(self, video_id):
471 sub_lang_list = self._get_available_subtitles(video_id)
472 self.report_video_subtitles_available(video_id, sub_lang_list)
474 def _request_subtitle(self, sub_lang, sub_name, video_id, format):
476 Return the subtitle as a string or None if they are not found
478 self.report_video_subtitles_request(video_id, sub_lang, format)
479 params = compat_urllib_parse.urlencode({
485 url = 'http://www.youtube.com/api/timedtext?' + params
487 sub = compat_urllib_request.urlopen(url).read().decode('utf-8')
488 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
489 self._downloader.report_warning(u'unable to download video subtitles for %s: %s' % (sub_lang, compat_str(err)))
492 self._downloader.report_warning(u'Did not fetch video subtitles')
496 def _request_automatic_caption(self, video_id, webpage):
497 """We need the webpage for getting the captions url, pass it as an
498 argument to speed up the process."""
499 sub_lang = self._downloader.params.get('subtitleslang') or 'en'
500 sub_format = self._downloader.params.get('subtitlesformat')
501 self.to_screen(u'%s: Looking for automatic captions' % video_id)
502 mobj = re.search(r';ytplayer.config = ({.*?});', webpage)
503 err_msg = u'Couldn\'t find automatic captions for "%s"' % sub_lang
505 self._downloader.report_warning(err_msg)
507 player_config = json.loads(mobj.group(1))
509 args = player_config[u'args']
510 caption_url = args[u'ttsurl']
511 timestamp = args[u'timestamp']
512 params = compat_urllib_parse.urlencode({
519 subtitles_url = caption_url + '&' + params
520 sub = self._download_webpage(subtitles_url, video_id, u'Downloading automatic captions')
521 return {sub_lang: sub}
522 # An extractor error can be raise by the download process if there are
523 # no automatic captions but there are subtitles
524 except (KeyError, ExtractorError):
525 self._downloader.report_warning(err_msg)
528 def _extract_subtitles(self, video_id):
530 Return a dictionary: {language: subtitles} or {} if the subtitles
533 sub_lang_list = self._get_available_subtitles(video_id)
534 sub_format = self._downloader.params.get('subtitlesformat')
535 if not sub_lang_list: #There was some error, it didn't get the available subtitles
537 if self._downloader.params.get('writesubtitles', False):
538 if self._downloader.params.get('subtitleslang', False):
539 sub_lang = self._downloader.params.get('subtitleslang')
540 elif 'en' in sub_lang_list:
543 sub_lang = list(sub_lang_list.keys())[0]
544 if not sub_lang in sub_lang_list:
545 self._downloader.report_warning(u'no closed captions found in the specified language "%s"' % sub_lang)
547 sub_lang_list = {sub_lang: sub_lang_list[sub_lang]}
549 for sub_lang in sub_lang_list:
550 subtitle = self._request_subtitle(sub_lang, sub_lang_list[sub_lang].encode('utf-8'), video_id, sub_format)
552 subtitles[sub_lang] = subtitle
555 def _print_formats(self, formats):
556 print('Available formats:')
558 print('%s\t:\t%s\t[%s]%s' %(x, self._video_extensions.get(x, 'flv'),
559 self._video_dimensions.get(x, '???'),
560 ' ('+self._special_itags[x]+')' if x in self._special_itags else ''))
562 def _extract_id(self, url):
563 mobj = re.match(self._VALID_URL, url, re.VERBOSE)
565 raise ExtractorError(u'Invalid URL: %s' % url)
566 video_id = mobj.group(2)
569 def _get_video_url_list(self, url_map):
571 Transform a dictionary in the format {itag:url} to a list of (itag, url)
572 with the requested formats.
574 req_format = self._downloader.params.get('format', None)
575 format_limit = self._downloader.params.get('format_limit', None)
576 available_formats = self._available_formats_prefer_free if self._downloader.params.get('prefer_free_formats', False) else self._available_formats
577 if format_limit is not None and format_limit in available_formats:
578 format_list = available_formats[available_formats.index(format_limit):]
580 format_list = available_formats
581 existing_formats = [x for x in format_list if x in url_map]
582 if len(existing_formats) == 0:
583 raise ExtractorError(u'no known formats available for video')
584 if self._downloader.params.get('listformats', None):
585 self._print_formats(existing_formats)
587 if req_format is None or req_format == 'best':
588 video_url_list = [(existing_formats[0], url_map[existing_formats[0]])] # Best quality
589 elif req_format == 'worst':
590 video_url_list = [(existing_formats[-1], url_map[existing_formats[-1]])] # worst quality
591 elif req_format in ('-1', 'all'):
592 video_url_list = [(f, url_map[f]) for f in existing_formats] # All formats
594 # Specific formats. We pick the first in a slash-delimeted sequence.
595 # For example, if '1/2/3/4' is requested and '2' and '4' are available, we pick '2'.
596 req_formats = req_format.split('/')
597 video_url_list = None
598 for rf in req_formats:
600 video_url_list = [(rf, url_map[rf])]
602 if video_url_list is None:
603 raise ExtractorError(u'requested format not available')
604 return video_url_list
606 def _extract_from_m3u8(self, manifest_url, video_id):
608 def _get_urls(_manifest):
609 lines = _manifest.split('\n')
610 urls = filter(lambda l: l and not l.startswith('#'),
613 manifest = self._download_webpage(manifest_url, video_id, u'Downloading formats manifest')
614 formats_urls = _get_urls(manifest)
615 for format_url in formats_urls:
616 itag = self._search_regex(r'itag/(\d+?)/', format_url, 'itag')
617 url_map[itag] = format_url
620 def _real_extract(self, url):
621 if re.match(r'(?:https?://)?[^/]+/watch\?feature=[a-z_]+$', url):
622 self._downloader.report_warning(u'Did you forget to quote the URL? Remember that & is a meta-character in most shells, so you want to put the URL in quotes, like youtube-dl \'http://www.youtube.com/watch?feature=foo&v=BaW_jenozKc\' (or simply youtube-dl BaW_jenozKc ).')
624 # Extract original video URL from URL with redirection, like age verification, using next_url parameter
625 mobj = re.search(self._NEXT_URL_RE, url)
627 url = 'https://www.youtube.com/' + compat_urllib_parse.unquote(mobj.group(1)).lstrip('/')
628 video_id = self._extract_id(url)
631 self.report_video_webpage_download(video_id)
632 url = 'https://www.youtube.com/watch?v=%s&gl=US&hl=en&has_verified=1' % video_id
633 request = compat_urllib_request.Request(url)
635 video_webpage_bytes = compat_urllib_request.urlopen(request).read()
636 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
637 raise ExtractorError(u'Unable to download video webpage: %s' % compat_str(err))
639 video_webpage = video_webpage_bytes.decode('utf-8', 'ignore')
641 # Attempt to extract SWF player URL
642 mobj = re.search(r'swfConfig.*?"(http:\\/\\/.*?watch.*?-.*?\.swf)"', video_webpage)
644 player_url = re.sub(r'\\(.)', r'\1', mobj.group(1))
649 self.report_video_info_webpage_download(video_id)
650 if re.search(r'player-age-gate-content">', video_webpage) is not None:
651 self.report_age_confirmation()
653 # We simulate the access to the video from www.youtube.com/v/{video_id}
654 # this can be viewed without login into Youtube
655 data = compat_urllib_parse.urlencode({'video_id': video_id,
659 'eurl': 'https://youtube.googleapis.com/v/' + video_id,
663 video_info_url = 'https://www.youtube.com/get_video_info?' + data
664 video_info_webpage = self._download_webpage(video_info_url, video_id,
666 errnote='unable to download video info webpage')
667 video_info = compat_parse_qs(video_info_webpage)
670 for el_type in ['&el=embedded', '&el=detailpage', '&el=vevo', '']:
671 video_info_url = ('https://www.youtube.com/get_video_info?&video_id=%s%s&ps=default&eurl=&gl=US&hl=en'
672 % (video_id, el_type))
673 video_info_webpage = self._download_webpage(video_info_url, video_id,
675 errnote='unable to download video info webpage')
676 video_info = compat_parse_qs(video_info_webpage)
677 if 'token' in video_info:
679 if 'token' not in video_info:
680 if 'reason' in video_info:
681 raise ExtractorError(u'YouTube said: %s' % video_info['reason'][0], expected=True)
683 raise ExtractorError(u'"token" parameter not in video info for unknown reason')
685 # Check for "rental" videos
686 if 'ypc_video_rental_bar_text' in video_info and 'author' not in video_info:
687 raise ExtractorError(u'"rental" videos not supported')
689 # Start extracting information
690 self.report_information_extraction(video_id)
693 if 'author' not in video_info:
694 raise ExtractorError(u'Unable to extract uploader name')
695 video_uploader = compat_urllib_parse.unquote_plus(video_info['author'][0])
698 video_uploader_id = None
699 mobj = re.search(r'<link itemprop="url" href="http://www.youtube.com/(?:user|channel)/([^"]+)">', video_webpage)
701 video_uploader_id = mobj.group(1)
703 self._downloader.report_warning(u'unable to extract uploader nickname')
706 if 'title' not in video_info:
707 raise ExtractorError(u'Unable to extract video title')
708 video_title = compat_urllib_parse.unquote_plus(video_info['title'][0])
711 # We try first to get a high quality image:
712 m_thumb = re.search(r'<span itemprop="thumbnail".*?href="(.*?)">',
713 video_webpage, re.DOTALL)
714 if m_thumb is not None:
715 video_thumbnail = m_thumb.group(1)
716 elif 'thumbnail_url' not in video_info:
717 self._downloader.report_warning(u'unable to extract video thumbnail')
719 else: # don't panic if we can't find it
720 video_thumbnail = compat_urllib_parse.unquote_plus(video_info['thumbnail_url'][0])
724 mobj = re.search(r'id="eow-date.*?>(.*?)</span>', video_webpage, re.DOTALL)
726 upload_date = ' '.join(re.sub(r'[/,-]', r' ', mobj.group(1)).split())
727 upload_date = unified_strdate(upload_date)
730 video_description = get_element_by_id("eow-description", video_webpage)
731 if video_description:
732 video_description = clean_html(video_description)
734 fd_mobj = re.search(r'<meta name="description" content="([^"]+)"', video_webpage)
736 video_description = unescapeHTML(fd_mobj.group(1))
738 video_description = u''
741 video_subtitles = None
743 if self._downloader.params.get('writesubtitles', False) or self._downloader.params.get('allsubtitles', False):
744 video_subtitles = self._extract_subtitles(video_id)
745 elif self._downloader.params.get('writeautomaticsub', False):
746 video_subtitles = self._request_automatic_caption(video_id, video_webpage)
748 if self._downloader.params.get('listsubtitles', False):
749 self._list_available_subtitles(video_id)
752 if 'length_seconds' not in video_info:
753 self._downloader.report_warning(u'unable to extract video duration')
756 video_duration = compat_urllib_parse.unquote_plus(video_info['length_seconds'][0])
758 # Decide which formats to download
761 mobj = re.search(r';ytplayer.config = ({.*?});', video_webpage)
763 raise ValueError('Could not find vevo ID')
764 info = json.loads(mobj.group(1))
766 # Easy way to know if the 's' value is in url_encoded_fmt_stream_map
767 # this signatures are encrypted
768 m_s = re.search(r'[&,]s=', args['url_encoded_fmt_stream_map'])
770 self.to_screen(u'%s: Encrypted signatures detected.' % video_id)
771 video_info['url_encoded_fmt_stream_map'] = [args['url_encoded_fmt_stream_map']]
772 m_s = re.search(r'[&,]s=', args.get('adaptive_fmts', u''))
774 if 'url_encoded_fmt_stream_map' in video_info:
775 video_info['url_encoded_fmt_stream_map'][0] += ',' + args['adaptive_fmts']
777 video_info['url_encoded_fmt_stream_map'] = [args['adaptive_fmts']]
778 elif 'adaptive_fmts' in video_info:
779 if 'url_encoded_fmt_stream_map' in video_info:
780 video_info['url_encoded_fmt_stream_map'][0] += ',' + video_info['adaptive_fmts'][0]
782 video_info['url_encoded_fmt_stream_map'] = video_info['adaptive_fmts']
786 if 'conn' in video_info and video_info['conn'][0].startswith('rtmp'):
787 self.report_rtmp_download()
788 video_url_list = [(None, video_info['conn'][0])]
789 elif 'url_encoded_fmt_stream_map' in video_info and len(video_info['url_encoded_fmt_stream_map']) >= 1:
790 if 'rtmpe%3Dyes' in video_info['url_encoded_fmt_stream_map'][0]:
791 raise ExtractorError('rtmpe downloads are not supported, see https://github.com/rg3/youtube-dl/issues/343 for more information.', expected=True)
793 for url_data_str in video_info['url_encoded_fmt_stream_map'][0].split(','):
794 url_data = compat_parse_qs(url_data_str)
795 if 'itag' in url_data and 'url' in url_data:
796 url = url_data['url'][0]
797 if 'sig' in url_data:
798 url += '&signature=' + url_data['sig'][0]
799 elif 's' in url_data:
800 if self._downloader.params.get('verbose'):
803 player_version = self._search_regex(r'ad3-(.+?)\.swf',
804 video_info['ad3_module'][0] if 'ad3_module' in video_info else 'NOT FOUND',
805 'flash player', fatal=False)
806 player = 'flash player %s' % player_version
808 player = u'html5 player %s' % self._search_regex(r'html5player-(.+?)\.js', video_webpage,
809 'html5 player', fatal=False)
810 parts_sizes = u'.'.join(compat_str(len(part)) for part in s.split('.'))
811 self.to_screen(u'encrypted signature length %d (%s), itag %s, %s' %
812 (len(s), parts_sizes, url_data['itag'][0], player))
813 encrypted_sig = url_data['s'][0]
815 signature = self._decrypt_signature_age_gate(encrypted_sig)
817 signature = self._decrypt_signature(encrypted_sig)
818 url += '&signature=' + signature
819 if 'ratebypass' not in url:
820 url += '&ratebypass=yes'
821 url_map[url_data['itag'][0]] = url
822 video_url_list = self._get_video_url_list(url_map)
823 if not video_url_list:
825 elif video_info.get('hlsvp'):
826 manifest_url = video_info['hlsvp'][0]
827 url_map = self._extract_from_m3u8(manifest_url, video_id)
828 video_url_list = self._get_video_url_list(url_map)
829 if not video_url_list:
833 raise ExtractorError(u'no conn or url_encoded_fmt_stream_map information found in video info')
836 for format_param, video_real_url in video_url_list:
838 video_extension = self._video_extensions.get(format_param, 'flv')
840 video_format = '{0} - {1}{2}'.format(format_param if format_param else video_extension,
841 self._video_dimensions.get(format_param, '???'),
842 ' ('+self._special_itags[format_param]+')' if format_param in self._special_itags else '')
846 'url': video_real_url,
847 'uploader': video_uploader,
848 'uploader_id': video_uploader_id,
849 'upload_date': upload_date,
850 'title': video_title,
851 'ext': video_extension,
852 'format': video_format,
853 'thumbnail': video_thumbnail,
854 'description': video_description,
855 'player_url': player_url,
856 'subtitles': video_subtitles,
857 'duration': video_duration
861 class YoutubePlaylistIE(InfoExtractor):
862 IE_DESC = u'YouTube.com playlists'
868 (?:course|view_play_list|my_playlists|artist|playlist|watch)
869 \? (?:.*?&)*? (?:p|a|list)=
872 ((?:PL|EC|UU|FL)?[0-9A-Za-z-_]{10,})
875 ((?:PL|EC|UU|FL)[0-9A-Za-z-_]{10,})
877 _TEMPLATE_URL = 'https://gdata.youtube.com/feeds/api/playlists/%s?max-results=%i&start-index=%i&v=2&alt=json&safeSearch=none'
879 IE_NAME = u'youtube:playlist'
882 def suitable(cls, url):
883 """Receives a URL and returns True if suitable for this IE."""
884 return re.match(cls._VALID_URL, url, re.VERBOSE) is not None
886 def _real_extract(self, url):
887 # Extract playlist id
888 mobj = re.match(self._VALID_URL, url, re.VERBOSE)
890 raise ExtractorError(u'Invalid URL: %s' % url)
892 # Download playlist videos from API
893 playlist_id = mobj.group(1) or mobj.group(2)
896 for page_num in itertools.count(1):
897 start_index = self._MAX_RESULTS * (page_num - 1) + 1
898 if start_index >= 1000:
899 self._downloader.report_warning(u'Max number of results reached')
901 url = self._TEMPLATE_URL % (playlist_id, self._MAX_RESULTS, start_index)
902 page = self._download_webpage(url, playlist_id, u'Downloading page #%s' % page_num)
905 response = json.loads(page)
906 except ValueError as err:
907 raise ExtractorError(u'Invalid JSON in API response: ' + compat_str(err))
909 if 'feed' not in response:
910 raise ExtractorError(u'Got a malformed response from YouTube API')
911 playlist_title = response['feed']['title']['$t']
912 if 'entry' not in response['feed']:
913 # Number of videos is a multiple of self._MAX_RESULTS
916 for entry in response['feed']['entry']:
917 index = entry['yt$position']['$t']
918 if 'media$group' in entry and 'media$player' in entry['media$group']:
919 videos.append((index, entry['media$group']['media$player']['url']))
921 videos = [v[1] for v in sorted(videos)]
923 url_results = [self.url_result(vurl, 'Youtube') for vurl in videos]
924 return [self.playlist_result(url_results, playlist_id, playlist_title)]
927 class YoutubeChannelIE(InfoExtractor):
928 IE_DESC = u'YouTube.com channels'
929 _VALID_URL = r"^(?:https?://)?(?:youtu\.be|(?:\w+\.)?youtube(?:-nocookie)?\.com)/channel/([0-9A-Za-z_-]+)"
930 _TEMPLATE_URL = 'http://www.youtube.com/channel/%s/videos?sort=da&flow=list&view=0&page=%s&gl=US&hl=en'
931 _MORE_PAGES_INDICATOR = 'yt-uix-load-more'
932 _MORE_PAGES_URL = 'http://www.youtube.com/c4_browse_ajax?action_load_more_videos=1&flow=list&paging=%s&view=0&sort=da&channel_id=%s'
933 IE_NAME = u'youtube:channel'
935 def extract_videos_from_page(self, page):
937 for mobj in re.finditer(r'href="/watch\?v=([0-9A-Za-z_-]+)&?', page):
938 if mobj.group(1) not in ids_in_page:
939 ids_in_page.append(mobj.group(1))
942 def _real_extract(self, url):
944 mobj = re.match(self._VALID_URL, url)
946 raise ExtractorError(u'Invalid URL: %s' % url)
948 # Download channel page
949 channel_id = mobj.group(1)
953 url = self._TEMPLATE_URL % (channel_id, pagenum)
954 page = self._download_webpage(url, channel_id,
955 u'Downloading page #%s' % pagenum)
957 # Extract video identifiers
958 ids_in_page = self.extract_videos_from_page(page)
959 video_ids.extend(ids_in_page)
961 # Download any subsequent channel pages using the json-based channel_ajax query
962 if self._MORE_PAGES_INDICATOR in page:
963 for pagenum in itertools.count(1):
964 url = self._MORE_PAGES_URL % (pagenum, channel_id)
965 page = self._download_webpage(url, channel_id,
966 u'Downloading page #%s' % pagenum)
968 page = json.loads(page)
970 ids_in_page = self.extract_videos_from_page(page['content_html'])
971 video_ids.extend(ids_in_page)
973 if self._MORE_PAGES_INDICATOR not in page['load_more_widget_html']:
976 self._downloader.to_screen(u'[youtube] Channel %s: Found %i videos' % (channel_id, len(video_ids)))
978 urls = ['http://www.youtube.com/watch?v=%s' % id for id in video_ids]
979 url_entries = [self.url_result(eurl, 'Youtube') for eurl in urls]
980 return [self.playlist_result(url_entries, channel_id)]
983 class YoutubeUserIE(InfoExtractor):
984 IE_DESC = u'YouTube.com user videos (URL or "ytuser" keyword)'
985 _VALID_URL = r'(?:(?:(?:https?://)?(?:\w+\.)?youtube\.com/user/)|ytuser:)([A-Za-z0-9_-]+)'
986 _TEMPLATE_URL = 'http://gdata.youtube.com/feeds/api/users/%s'
987 _GDATA_PAGE_SIZE = 50
988 _GDATA_URL = 'http://gdata.youtube.com/feeds/api/users/%s/uploads?max-results=%d&start-index=%d'
989 _VIDEO_INDICATOR = r'/watch\?v=(.+?)[\<&]'
990 IE_NAME = u'youtube:user'
992 def _real_extract(self, url):
994 mobj = re.match(self._VALID_URL, url)
996 raise ExtractorError(u'Invalid URL: %s' % url)
998 username = mobj.group(1)
1000 # Download video ids using YouTube Data API. Result size per
1001 # query is limited (currently to 50 videos) so we need to query
1002 # page by page until there are no video ids - it means we got
1007 for pagenum in itertools.count(0):
1008 start_index = pagenum * self._GDATA_PAGE_SIZE + 1
1010 gdata_url = self._GDATA_URL % (username, self._GDATA_PAGE_SIZE, start_index)
1011 page = self._download_webpage(gdata_url, username,
1012 u'Downloading video ids from %d to %d' % (start_index, start_index + self._GDATA_PAGE_SIZE))
1014 # Extract video identifiers
1017 for mobj in re.finditer(self._VIDEO_INDICATOR, page):
1018 if mobj.group(1) not in ids_in_page:
1019 ids_in_page.append(mobj.group(1))
1021 video_ids.extend(ids_in_page)
1023 # A little optimization - if current page is not
1024 # "full", ie. does not contain PAGE_SIZE video ids then
1025 # we can assume that this page is the last one - there
1026 # are no more ids on further pages - no need to query
1029 if len(ids_in_page) < self._GDATA_PAGE_SIZE:
1032 urls = ['http://www.youtube.com/watch?v=%s' % video_id for video_id in video_ids]
1033 url_results = [self.url_result(rurl, 'Youtube') for rurl in urls]
1034 return [self.playlist_result(url_results, playlist_title = username)]
1036 class YoutubeSearchIE(SearchInfoExtractor):
1037 IE_DESC = u'YouTube.com searches'
1038 _API_URL = 'https://gdata.youtube.com/feeds/api/videos?q=%s&start-index=%i&max-results=50&v=2&alt=jsonc'
1040 IE_NAME = u'youtube:search'
1041 _SEARCH_KEY = 'ytsearch'
1043 def report_download_page(self, query, pagenum):
1044 """Report attempt to download search page with given number."""
1045 self._downloader.to_screen(u'[youtube] query "%s": Downloading page %s' % (query, pagenum))
1047 def _get_n_results(self, query, n):
1048 """Get a specified number of results for a query"""
1054 while (50 * pagenum) < limit:
1055 self.report_download_page(query, pagenum+1)
1056 result_url = self._API_URL % (compat_urllib_parse.quote_plus(query), (50*pagenum)+1)
1057 request = compat_urllib_request.Request(result_url)
1059 data = compat_urllib_request.urlopen(request).read().decode('utf-8')
1060 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
1061 raise ExtractorError(u'Unable to download API page: %s' % compat_str(err))
1062 api_response = json.loads(data)['data']
1064 if not 'items' in api_response:
1065 raise ExtractorError(u'[youtube] No video results')
1067 new_ids = list(video['id'] for video in api_response['items'])
1068 video_ids += new_ids
1070 limit = min(n, api_response['totalItems'])
1073 if len(video_ids) > n:
1074 video_ids = video_ids[:n]
1075 videos = [self.url_result('http://www.youtube.com/watch?v=%s' % id, 'Youtube') for id in video_ids]
1076 return self.playlist_result(videos, query)
1079 class YoutubeShowIE(InfoExtractor):
1080 IE_DESC = u'YouTube.com (multi-season) shows'
1081 _VALID_URL = r'https?://www\.youtube\.com/show/(.*)'
1082 IE_NAME = u'youtube:show'
1084 def _real_extract(self, url):
1085 mobj = re.match(self._VALID_URL, url)
1086 show_name = mobj.group(1)
1087 webpage = self._download_webpage(url, show_name, u'Downloading show webpage')
1088 # There's one playlist for each season of the show
1089 m_seasons = list(re.finditer(r'href="(/playlist\?list=.*?)"', webpage))
1090 self.to_screen(u'%s: Found %s seasons' % (show_name, len(m_seasons)))
1091 return [self.url_result('https://www.youtube.com' + season.group(1), 'YoutubePlaylist') for season in m_seasons]
1094 class YoutubeFeedsInfoExtractor(YoutubeBaseInfoExtractor):
1096 Base class for extractors that fetch info from
1097 http://www.youtube.com/feed_ajax
1098 Subclasses must define the _FEED_NAME and _PLAYLIST_TITLE properties.
1100 _LOGIN_REQUIRED = True
1102 # use action_load_personal_feed instead of action_load_system_feed
1103 _PERSONAL_FEED = False
1106 def _FEED_TEMPLATE(self):
1107 action = 'action_load_system_feed'
1108 if self._PERSONAL_FEED:
1109 action = 'action_load_personal_feed'
1110 return 'http://www.youtube.com/feed_ajax?%s=1&feed_name=%s&paging=%%s' % (action, self._FEED_NAME)
1114 return u'youtube:%s' % self._FEED_NAME
1116 def _real_initialize(self):
1119 def _real_extract(self, url):
1121 # The step argument is available only in 2.7 or higher
1122 for i in itertools.count(0):
1123 paging = i*self._PAGING_STEP
1124 info = self._download_webpage(self._FEED_TEMPLATE % paging,
1125 u'%s feed' % self._FEED_NAME,
1126 u'Downloading page %s' % i)
1127 info = json.loads(info)
1128 feed_html = info['feed_html']
1129 m_ids = re.finditer(r'"/watch\?v=(.*?)["&]', feed_html)
1130 ids = orderedSet(m.group(1) for m in m_ids)
1131 feed_entries.extend(self.url_result(id, 'Youtube') for id in ids)
1132 if info['paging'] is None:
1134 return self.playlist_result(feed_entries, playlist_title=self._PLAYLIST_TITLE)
1136 class YoutubeSubscriptionsIE(YoutubeFeedsInfoExtractor):
1137 IE_DESC = u'YouTube.com subscriptions feed, "ytsubs" keyword(requires authentication)'
1138 _VALID_URL = r'https?://www\.youtube\.com/feed/subscriptions|:ytsubs(?:criptions)?'
1139 _FEED_NAME = 'subscriptions'
1140 _PLAYLIST_TITLE = u'Youtube Subscriptions'
1142 class YoutubeRecommendedIE(YoutubeFeedsInfoExtractor):
1143 IE_DESC = u'YouTube.com recommended videos, "ytrec" keyword (requires authentication)'
1144 _VALID_URL = r'https?://www\.youtube\.com/feed/recommended|:ytrec(?:ommended)?'
1145 _FEED_NAME = 'recommended'
1146 _PLAYLIST_TITLE = u'Youtube Recommended videos'
1148 class YoutubeWatchLaterIE(YoutubeFeedsInfoExtractor):
1149 IE_DESC = u'Youtube watch later list, "ytwatchlater" keyword (requires authentication)'
1150 _VALID_URL = r'https?://www\.youtube\.com/feed/watch_later|:ytwatchlater'
1151 _FEED_NAME = 'watch_later'
1152 _PLAYLIST_TITLE = u'Youtube Watch Later'
1154 _PERSONAL_FEED = True
1156 class YoutubeFavouritesIE(YoutubeBaseInfoExtractor):
1157 IE_NAME = u'youtube:favorites'
1158 IE_DESC = u'YouTube.com favourite videos, "ytfav" keyword (requires authentication)'
1159 _VALID_URL = r'https?://www\.youtube\.com/my_favorites|:ytfav(?:o?rites)?'
1160 _LOGIN_REQUIRED = True
1162 def _real_extract(self, url):
1163 webpage = self._download_webpage('https://www.youtube.com/my_favorites', 'Youtube Favourites videos')
1164 playlist_id = self._search_regex(r'list=(.+?)["&]', webpage, u'favourites playlist id')
1165 return self.url_result(playlist_id, 'YoutubePlaylist')