9 from .common import InfoExtractor, SearchInfoExtractor
15 compat_urllib_request,
26 class YoutubeBaseInfoExtractor(InfoExtractor):
27 """Provide base functions for Youtube extractors"""
28 _LOGIN_URL = 'https://accounts.google.com/ServiceLogin'
29 _LANG_URL = r'https://www.youtube.com/?hl=en&persist_hl=1&gl=US&persist_gl=1&opt_out_ackd=1'
30 _AGE_URL = 'http://www.youtube.com/verify_age?next_url=/&gl=US&hl=en'
31 _NETRC_MACHINE = 'youtube'
32 # If True it will raise an error if no login info is provided
33 _LOGIN_REQUIRED = False
35 def report_lang(self):
36 """Report attempt to set language."""
37 self.to_screen(u'Setting language')
39 def _set_language(self):
40 request = compat_urllib_request.Request(self._LANG_URL)
43 compat_urllib_request.urlopen(request).read()
44 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
45 self._downloader.report_warning(u'unable to set language: %s' % compat_str(err))
50 (username, password) = self._get_login_info()
51 # No authentication to be performed
53 if self._LOGIN_REQUIRED:
54 raise ExtractorError(u'No login info available, needed for using %s.' % self.IE_NAME, expected=True)
57 request = compat_urllib_request.Request(self._LOGIN_URL)
59 login_page = compat_urllib_request.urlopen(request).read().decode('utf-8')
60 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
61 self._downloader.report_warning(u'unable to fetch login page: %s' % compat_str(err))
66 match = re.search(re.compile(r'<input.+?name="GALX".+?value="(.+?)"', re.DOTALL), login_page)
69 match = re.search(re.compile(r'<input.+?name="dsh".+?value="(.+?)"', re.DOTALL), login_page)
75 u'continue': u'https://www.youtube.com/signin?action_handle_signin=true&feature=sign_in_button&hl=en_US&nomobiletemp=1',
79 u'PersistentCookie': u'yes',
81 u'bgresponse': u'js_disabled',
82 u'checkConnection': u'',
83 u'checkedDomains': u'youtube',
89 u'signIn': u'Sign in',
91 u'service': u'youtube',
95 # Convert to UTF-8 *before* urlencode because Python 2.x's urlencode
97 login_form = dict((k.encode('utf-8'), v.encode('utf-8')) for k,v in login_form_strs.items())
98 login_data = compat_urllib_parse.urlencode(login_form).encode('ascii')
99 request = compat_urllib_request.Request(self._LOGIN_URL, login_data)
102 login_results = compat_urllib_request.urlopen(request).read().decode('utf-8')
103 if re.search(r'(?i)<form[^>]* id="gaia_loginform"', login_results) is not None:
104 self._downloader.report_warning(u'unable to log in: bad username or password')
106 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
107 self._downloader.report_warning(u'unable to log in: %s' % compat_str(err))
111 def _confirm_age(self):
114 'action_confirm': 'Confirm',
116 request = compat_urllib_request.Request(self._AGE_URL, compat_urllib_parse.urlencode(age_form))
118 self.report_age_confirmation()
119 compat_urllib_request.urlopen(request).read().decode('utf-8')
120 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
121 raise ExtractorError(u'Unable to confirm age: %s' % compat_str(err))
124 def _real_initialize(self):
125 if self._downloader is None:
127 if not self._set_language():
129 if not self._login():
133 class YoutubeIE(YoutubeBaseInfoExtractor):
134 IE_DESC = u'YouTube.com'
137 (?:https?://)? # http(s):// (optional)
138 (?:youtu\.be/|(?:\w+\.)?youtube(?:-nocookie)?\.com/|
139 tube\.majestyc\.net/) # the various hostnames, with wildcard subdomains
140 (?:.*?\#/)? # handle anchor (#/) redirect urls
141 (?: # the various things that can precede the ID:
142 (?:(?:v|embed|e)/) # v/ or embed/ or e/
143 |(?: # or the v= param in all its forms
144 (?:(?:watch|movie)(?:_popup)?(?:\.php)?)? # preceding watch(_popup|.php) or nothing (like /?v=xxxx)
145 (?:\?|\#!?) # the params delimiter ? or # or #!
146 (?:.*?&)? # any other preceding param (like /?s=tuff&v=xxxx)
149 )? # optional -> youtube.com/xxxx is OK
150 )? # all until now is optional -> you can pass the naked ID
151 ([0-9A-Za-z_-]+) # here is it! the YouTube video ID
152 (?(1).+)? # if we found the ID, everything can follow
154 _NEXT_URL_RE = r'[\?&]next_url=([^&]+)'
155 # Listed in order of quality
156 _available_formats = ['38', '37', '46', '22', '45', '35', '44', '34', '18', '43', '6', '5', '17', '13',
157 '95', '94', '93', '92', '132', '151',
159 '85', '84', '102', '83', '101', '82', '100',
161 '138', '137', '248', '136', '247', '135', '246',
162 '245', '244', '134', '243', '133', '242', '160',
164 '141', '172', '140', '171', '139',
166 _available_formats_prefer_free = ['38', '46', '37', '45', '22', '44', '35', '43', '34', '18', '6', '5', '17', '13',
167 '95', '94', '93', '92', '132', '151',
168 '85', '102', '84', '101', '83', '100', '82',
170 '138', '248', '137', '247', '136', '246', '245',
171 '244', '135', '243', '134', '242', '133', '160',
173 '172', '141', '171', '140', '139',
175 _video_extensions = {
196 # videos that use m3u8
228 _video_dimensions = {
309 u"url": u"http://www.youtube.com/watch?v=BaW_jenozKc",
310 u"file": u"BaW_jenozKc.mp4",
312 u"title": u"youtube-dl test video \"'/\\ä↭𝕐",
313 u"uploader": u"Philipp Hagemeister",
314 u"uploader_id": u"phihag",
315 u"upload_date": u"20121002",
316 u"description": u"test chars: \"'/\\ä↭𝕐\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de ."
320 u"url": u"http://www.youtube.com/watch?v=1ltcDfZMA3U",
321 u"file": u"1ltcDfZMA3U.flv",
322 u"note": u"Test VEVO video (#897)",
324 u"upload_date": u"20070518",
325 u"title": u"Maps - It Will Find You",
326 u"description": u"Music video by Maps performing It Will Find You.",
327 u"uploader": u"MuteUSA",
328 u"uploader_id": u"MuteUSA"
332 u"url": u"http://www.youtube.com/watch?v=UxxajLWwzqY",
333 u"file": u"UxxajLWwzqY.mp4",
334 u"note": u"Test generic use_cipher_signature video (#897)",
336 u"upload_date": u"20120506",
337 u"title": u"Icona Pop - I Love It (feat. Charli XCX) [OFFICIAL VIDEO]",
338 u"description": u"md5:b085c9804f5ab69f4adea963a2dceb3c",
339 u"uploader": u"Icona Pop",
340 u"uploader_id": u"IconaPop"
344 u"url": u"https://www.youtube.com/watch?v=07FYdnEawAQ",
345 u"file": u"07FYdnEawAQ.mp4",
346 u"note": u"Test VEVO video with age protection (#956)",
348 u"upload_date": u"20130703",
349 u"title": u"Justin Timberlake - Tunnel Vision (Explicit)",
350 u"description": u"md5:64249768eec3bc4276236606ea996373",
351 u"uploader": u"justintimberlakeVEVO",
352 u"uploader_id": u"justintimberlakeVEVO"
356 u'url': u'https://www.youtube.com/watch?v=TGi3HqYrWHE',
357 u'file': u'TGi3HqYrWHE.mp4',
358 u'note': u'm3u8 video',
360 u'title': u'Triathlon - Men - London 2012 Olympic Games',
361 u'description': u'- Men - TR02 - Triathlon - 07 August 2012 - London 2012 Olympic Games',
362 u'uploader': u'olympic',
363 u'upload_date': u'20120807',
364 u'uploader_id': u'olympic',
367 u'skip_download': True,
374 def suitable(cls, url):
375 """Receives a URL and returns True if suitable for this IE."""
376 if YoutubePlaylistIE.suitable(url) or YoutubeSubscriptionsIE.suitable(url): return False
377 return re.match(cls._VALID_URL, url, re.VERBOSE) is not None
379 def report_video_webpage_download(self, video_id):
380 """Report attempt to download video webpage."""
381 self.to_screen(u'%s: Downloading video webpage' % video_id)
383 def report_video_info_webpage_download(self, video_id):
384 """Report attempt to download video info webpage."""
385 self.to_screen(u'%s: Downloading video info webpage' % video_id)
387 def report_video_subtitles_download(self, video_id):
388 """Report attempt to download video info webpage."""
389 self.to_screen(u'%s: Checking available subtitles' % video_id)
391 def report_video_subtitles_request(self, video_id, sub_lang, format):
392 """Report attempt to download video info webpage."""
393 self.to_screen(u'%s: Downloading video subtitles for %s.%s' % (video_id, sub_lang, format))
395 def report_video_subtitles_available(self, video_id, sub_lang_list):
396 """Report available subtitles."""
397 sub_lang = ",".join(list(sub_lang_list.keys()))
398 self.to_screen(u'%s: Available subtitles for video: %s' % (video_id, sub_lang))
400 def report_information_extraction(self, video_id):
401 """Report attempt to extract video information."""
402 self.to_screen(u'%s: Extracting video information' % video_id)
404 def report_unavailable_format(self, video_id, format):
405 """Report extracted video URL."""
406 self.to_screen(u'%s: Format %s not available' % (video_id, format))
408 def report_rtmp_download(self):
409 """Indicate the download will use the RTMP protocol."""
410 self.to_screen(u'RTMP download detected')
412 def _decrypt_signature(self, s):
413 """Turn the encrypted s field into a working signature"""
416 return s[25] + s[3:25] + s[0] + s[26:42] + s[79] + s[43:79] + s[91] + s[80:83]
418 return s[25] + s[3:25] + s[2] + s[26:40] + s[77] + s[41:77] + s[89] + s[78:81]
420 return s[84:78:-1] + s[87] + s[77:60:-1] + s[0] + s[59:3:-1]
422 return s[48] + s[81:67:-1] + s[82] + s[66:62:-1] + s[85] + s[61:48:-1] + s[67] + s[47:12:-1] + s[3] + s[11:3:-1] + s[2] + s[12]
424 return s[6:27] + s[4] + s[28:39] + s[27] + s[40:59] + s[2] + s[60:]
426 return s[5:20] + s[2] + s[21:]
428 return s[83:34:-1] + s[0] + s[33:27:-1] + s[3] + s[26:19:-1] + s[34] + s[18:3:-1] + s[27]
430 return s[83:27:-1] + s[0] + s[26:5:-1] + s[2:0:-1] + s[27]
432 return s[81:64:-1] + s[82] + s[63:52:-1] + s[45] + s[51:45:-1] + s[1] + s[44:1:-1] + s[0]
434 return s[1:19] + s[0] + s[20:68] + s[19] + s[69:82]
436 return s[56] + s[79:56:-1] + s[41] + s[55:41:-1] + s[80] + s[40:34:-1] + s[0] + s[33:29:-1] + s[34] + s[28:9:-1] + s[29] + s[8:0:-1] + s[9]
438 return s[1:19] + s[0] + s[20:68] + s[19] + s[69:80]
440 return s[54] + s[77:54:-1] + s[39] + s[53:39:-1] + s[78] + s[38:34:-1] + s[0] + s[33:29:-1] + s[34] + s[28:9:-1] + s[29] + s[8:0:-1] + s[9]
443 raise ExtractorError(u'Unable to decrypt signature, key length %d not supported; retrying might work' % (len(s)))
445 def _decrypt_signature_age_gate(self, s):
446 # The videos with age protection use another player, so the algorithms
449 return s[2:63] + s[82] + s[64:82] + s[63]
451 # Fallback to the other algortihms
452 return self._decrypt_signature(s)
455 def _get_available_subtitles(self, video_id):
456 self.report_video_subtitles_download(video_id)
457 request = compat_urllib_request.Request('http://video.google.com/timedtext?hl=en&type=list&v=%s' % video_id)
459 sub_list = compat_urllib_request.urlopen(request).read().decode('utf-8')
460 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
461 return (u'unable to download video subtitles: %s' % compat_str(err), None)
462 sub_lang_list = re.findall(r'name="([^"]*)"[^>]+lang_code="([\w\-]+)"', sub_list)
463 sub_lang_list = dict((l[1], l[0]) for l in sub_lang_list)
464 if not sub_lang_list:
465 return (u'video doesn\'t have subtitles', None)
468 def _list_available_subtitles(self, video_id):
469 sub_lang_list = self._get_available_subtitles(video_id)
470 self.report_video_subtitles_available(video_id, sub_lang_list)
472 def _request_subtitle(self, sub_lang, sub_name, video_id, format):
475 (error_message, sub_lang, sub)
477 self.report_video_subtitles_request(video_id, sub_lang, format)
478 params = compat_urllib_parse.urlencode({
484 url = 'http://www.youtube.com/api/timedtext?' + params
486 sub = compat_urllib_request.urlopen(url).read().decode('utf-8')
487 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
488 return (u'unable to download video subtitles: %s' % compat_str(err), None, None)
490 return (u'Did not fetch video subtitles', None, None)
491 return (None, sub_lang, sub)
493 def _request_automatic_caption(self, video_id, webpage):
494 """We need the webpage for getting the captions url, pass it as an
495 argument to speed up the process."""
496 sub_lang = self._downloader.params.get('subtitleslang') or 'en'
497 sub_format = self._downloader.params.get('subtitlesformat')
498 self.to_screen(u'%s: Looking for automatic captions' % video_id)
499 mobj = re.search(r';ytplayer.config = ({.*?});', webpage)
500 err_msg = u'Couldn\'t find automatic captions for "%s"' % sub_lang
502 return [(err_msg, None, None)]
503 player_config = json.loads(mobj.group(1))
505 args = player_config[u'args']
506 caption_url = args[u'ttsurl']
507 timestamp = args[u'timestamp']
508 params = compat_urllib_parse.urlencode({
515 subtitles_url = caption_url + '&' + params
516 sub = self._download_webpage(subtitles_url, video_id, u'Downloading automatic captions')
517 return [(None, sub_lang, sub)]
519 return [(err_msg, None, None)]
521 def _extract_subtitle(self, video_id):
523 Return a list with a tuple:
524 [(error_message, sub_lang, sub)]
526 sub_lang_list = self._get_available_subtitles(video_id)
527 sub_format = self._downloader.params.get('subtitlesformat')
528 if isinstance(sub_lang_list,tuple): #There was some error, it didn't get the available subtitles
529 return [(sub_lang_list[0], None, None)]
530 if self._downloader.params.get('subtitleslang', False):
531 sub_lang = self._downloader.params.get('subtitleslang')
532 elif 'en' in sub_lang_list:
535 sub_lang = list(sub_lang_list.keys())[0]
536 if not sub_lang in sub_lang_list:
537 return [(u'no closed captions found in the specified language "%s"' % sub_lang, None, None)]
539 subtitle = self._request_subtitle(sub_lang, sub_lang_list[sub_lang].encode('utf-8'), video_id, sub_format)
542 def _extract_all_subtitles(self, video_id):
543 sub_lang_list = self._get_available_subtitles(video_id)
544 sub_format = self._downloader.params.get('subtitlesformat')
545 if isinstance(sub_lang_list,tuple): #There was some error, it didn't get the available subtitles
546 return [(sub_lang_list[0], None, None)]
548 for sub_lang in sub_lang_list:
549 subtitle = self._request_subtitle(sub_lang, sub_lang_list[sub_lang].encode('utf-8'), video_id, sub_format)
550 subtitles.append(subtitle)
553 def _print_formats(self, formats):
554 print('Available formats:')
556 print('%s\t:\t%s\t[%s]%s' %(x, self._video_extensions.get(x, 'flv'),
557 self._video_dimensions.get(x, '???'),
558 ' ('+self._special_itags[x]+')' if x in self._special_itags else ''))
560 def _extract_id(self, url):
561 mobj = re.match(self._VALID_URL, url, re.VERBOSE)
563 raise ExtractorError(u'Invalid URL: %s' % url)
564 video_id = mobj.group(2)
567 def _get_video_url_list(self, url_map):
569 Transform a dictionary in the format {itag:url} to a list of (itag, url)
570 with the requested formats.
572 req_format = self._downloader.params.get('format', None)
573 format_limit = self._downloader.params.get('format_limit', None)
574 available_formats = self._available_formats_prefer_free if self._downloader.params.get('prefer_free_formats', False) else self._available_formats
575 if format_limit is not None and format_limit in available_formats:
576 format_list = available_formats[available_formats.index(format_limit):]
578 format_list = available_formats
579 existing_formats = [x for x in format_list if x in url_map]
580 if len(existing_formats) == 0:
581 raise ExtractorError(u'no known formats available for video')
582 if self._downloader.params.get('listformats', None):
583 self._print_formats(existing_formats)
585 if req_format is None or req_format == 'best':
586 video_url_list = [(existing_formats[0], url_map[existing_formats[0]])] # Best quality
587 elif req_format == 'worst':
588 video_url_list = [(existing_formats[-1], url_map[existing_formats[-1]])] # worst quality
589 elif req_format in ('-1', 'all'):
590 video_url_list = [(f, url_map[f]) for f in existing_formats] # All formats
592 # Specific formats. We pick the first in a slash-delimeted sequence.
593 # For example, if '1/2/3/4' is requested and '2' and '4' are available, we pick '2'.
594 req_formats = req_format.split('/')
595 video_url_list = None
596 for rf in req_formats:
598 video_url_list = [(rf, url_map[rf])]
600 if video_url_list is None:
601 raise ExtractorError(u'requested format not available')
602 return video_url_list
604 def _extract_from_m3u8(self, manifest_url, video_id):
606 def _get_urls(_manifest):
607 lines = _manifest.split('\n')
608 urls = filter(lambda l: l and not l.startswith('#'),
611 manifest = self._download_webpage(manifest_url, video_id, u'Downloading formats manifest')
612 formats_urls = _get_urls(manifest)
613 for format_url in formats_urls:
614 itag = self._search_regex(r'itag/(\d+?)/', format_url, 'itag')
615 url_map[itag] = format_url
618 def _real_extract(self, url):
619 if re.match(r'(?:https?://)?[^/]+/watch\?feature=[a-z_]+$', url):
620 self._downloader.report_warning(u'Did you forget to quote the URL? Remember that & is a meta-character in most shells, so you want to put the URL in quotes, like youtube-dl \'http://www.youtube.com/watch?feature=foo&v=BaW_jenozKc\' (or simply youtube-dl BaW_jenozKc ).')
622 # Extract original video URL from URL with redirection, like age verification, using next_url parameter
623 mobj = re.search(self._NEXT_URL_RE, url)
625 url = 'https://www.youtube.com/' + compat_urllib_parse.unquote(mobj.group(1)).lstrip('/')
626 video_id = self._extract_id(url)
629 self.report_video_webpage_download(video_id)
630 url = 'https://www.youtube.com/watch?v=%s&gl=US&hl=en&has_verified=1' % video_id
631 request = compat_urllib_request.Request(url)
633 video_webpage_bytes = compat_urllib_request.urlopen(request).read()
634 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
635 raise ExtractorError(u'Unable to download video webpage: %s' % compat_str(err))
637 video_webpage = video_webpage_bytes.decode('utf-8', 'ignore')
639 # Attempt to extract SWF player URL
640 mobj = re.search(r'swfConfig.*?"(http:\\/\\/.*?watch.*?-.*?\.swf)"', video_webpage)
642 player_url = re.sub(r'\\(.)', r'\1', mobj.group(1))
647 self.report_video_info_webpage_download(video_id)
648 if re.search(r'player-age-gate-content">', video_webpage) is not None:
649 self.report_age_confirmation()
651 # We simulate the access to the video from www.youtube.com/v/{video_id}
652 # this can be viewed without login into Youtube
653 data = compat_urllib_parse.urlencode({'video_id': video_id,
657 'eurl': 'https://youtube.googleapis.com/v/' + video_id,
661 video_info_url = 'https://www.youtube.com/get_video_info?' + data
662 video_info_webpage = self._download_webpage(video_info_url, video_id,
664 errnote='unable to download video info webpage')
665 video_info = compat_parse_qs(video_info_webpage)
668 for el_type in ['&el=embedded', '&el=detailpage', '&el=vevo', '']:
669 video_info_url = ('https://www.youtube.com/get_video_info?&video_id=%s%s&ps=default&eurl=&gl=US&hl=en'
670 % (video_id, el_type))
671 video_info_webpage = self._download_webpage(video_info_url, video_id,
673 errnote='unable to download video info webpage')
674 video_info = compat_parse_qs(video_info_webpage)
675 if 'token' in video_info:
677 if 'token' not in video_info:
678 if 'reason' in video_info:
679 raise ExtractorError(u'YouTube said: %s' % video_info['reason'][0], expected=True)
681 raise ExtractorError(u'"token" parameter not in video info for unknown reason')
683 # Check for "rental" videos
684 if 'ypc_video_rental_bar_text' in video_info and 'author' not in video_info:
685 raise ExtractorError(u'"rental" videos not supported')
687 # Start extracting information
688 self.report_information_extraction(video_id)
691 if 'author' not in video_info:
692 raise ExtractorError(u'Unable to extract uploader name')
693 video_uploader = compat_urllib_parse.unquote_plus(video_info['author'][0])
696 video_uploader_id = None
697 mobj = re.search(r'<link itemprop="url" href="http://www.youtube.com/(?:user|channel)/([^"]+)">', video_webpage)
699 video_uploader_id = mobj.group(1)
701 self._downloader.report_warning(u'unable to extract uploader nickname')
704 if 'title' not in video_info:
705 raise ExtractorError(u'Unable to extract video title')
706 video_title = compat_urllib_parse.unquote_plus(video_info['title'][0])
709 # We try first to get a high quality image:
710 m_thumb = re.search(r'<span itemprop="thumbnail".*?href="(.*?)">',
711 video_webpage, re.DOTALL)
712 if m_thumb is not None:
713 video_thumbnail = m_thumb.group(1)
714 elif 'thumbnail_url' not in video_info:
715 self._downloader.report_warning(u'unable to extract video thumbnail')
717 else: # don't panic if we can't find it
718 video_thumbnail = compat_urllib_parse.unquote_plus(video_info['thumbnail_url'][0])
722 mobj = re.search(r'id="eow-date.*?>(.*?)</span>', video_webpage, re.DOTALL)
724 upload_date = ' '.join(re.sub(r'[/,-]', r' ', mobj.group(1)).split())
725 upload_date = unified_strdate(upload_date)
728 video_description = get_element_by_id("eow-description", video_webpage)
729 if video_description:
730 video_description = clean_html(video_description)
732 fd_mobj = re.search(r'<meta name="description" content="([^"]+)"', video_webpage)
734 video_description = unescapeHTML(fd_mobj.group(1))
736 video_description = u''
739 video_subtitles = None
741 if self._downloader.params.get('writesubtitles', False):
742 video_subtitles = self._extract_subtitle(video_id)
744 (sub_error, sub_lang, sub) = video_subtitles[0]
746 self._downloader.report_warning(sub_error)
748 if self._downloader.params.get('writeautomaticsub', False):
749 video_subtitles = self._request_automatic_caption(video_id, video_webpage)
750 (sub_error, sub_lang, sub) = video_subtitles[0]
752 self._downloader.report_warning(sub_error)
754 if self._downloader.params.get('allsubtitles', False):
755 video_subtitles = self._extract_all_subtitles(video_id)
756 for video_subtitle in video_subtitles:
757 (sub_error, sub_lang, sub) = video_subtitle
759 self._downloader.report_warning(sub_error)
761 if self._downloader.params.get('listsubtitles', False):
762 self._list_available_subtitles(video_id)
765 if 'length_seconds' not in video_info:
766 self._downloader.report_warning(u'unable to extract video duration')
769 video_duration = compat_urllib_parse.unquote_plus(video_info['length_seconds'][0])
771 # Decide which formats to download
774 mobj = re.search(r';ytplayer.config = ({.*?});', video_webpage)
776 raise ValueError('Could not find vevo ID')
777 info = json.loads(mobj.group(1))
779 # Easy way to know if the 's' value is in url_encoded_fmt_stream_map
780 # this signatures are encrypted
781 m_s = re.search(r'[&,]s=', args['url_encoded_fmt_stream_map'])
783 self.to_screen(u'%s: Encrypted signatures detected.' % video_id)
784 video_info['url_encoded_fmt_stream_map'] = [args['url_encoded_fmt_stream_map']]
785 m_s = re.search(r'[&,]s=', args.get('adaptive_fmts', u''))
787 if 'url_encoded_fmt_stream_map' in video_info:
788 video_info['url_encoded_fmt_stream_map'][0] += ',' + args['adaptive_fmts']
790 video_info['url_encoded_fmt_stream_map'] = [args['adaptive_fmts']]
791 elif 'adaptive_fmts' in video_info:
792 if 'url_encoded_fmt_stream_map' in video_info:
793 video_info['url_encoded_fmt_stream_map'][0] += ',' + video_info['adaptive_fmts'][0]
795 video_info['url_encoded_fmt_stream_map'] = video_info['adaptive_fmts']
799 if 'conn' in video_info and video_info['conn'][0].startswith('rtmp'):
800 self.report_rtmp_download()
801 video_url_list = [(None, video_info['conn'][0])]
802 elif 'url_encoded_fmt_stream_map' in video_info and len(video_info['url_encoded_fmt_stream_map']) >= 1:
803 if 'rtmpe%3Dyes' in video_info['url_encoded_fmt_stream_map'][0]:
804 raise ExtractorError('rtmpe downloads are not supported, see https://github.com/rg3/youtube-dl/issues/343 for more information.', expected=True)
806 for url_data_str in video_info['url_encoded_fmt_stream_map'][0].split(','):
807 url_data = compat_parse_qs(url_data_str)
808 if 'itag' in url_data and 'url' in url_data:
809 url = url_data['url'][0]
810 if 'sig' in url_data:
811 url += '&signature=' + url_data['sig'][0]
812 elif 's' in url_data:
813 if self._downloader.params.get('verbose'):
816 player_version = self._search_regex(r'ad3-(.+?)\.swf',
817 video_info['ad3_module'][0] if 'ad3_module' in video_info else 'NOT FOUND',
818 'flash player', fatal=False)
819 player = 'flash player %s' % player_version
821 player = u'html5 player %s' % self._search_regex(r'html5player-(.+?)\.js', video_webpage,
822 'html5 player', fatal=False)
823 parts_sizes = u'.'.join(compat_str(len(part)) for part in s.split('.'))
824 self.to_screen(u'encrypted signature length %d (%s), itag %s, %s' %
825 (len(s), parts_sizes, url_data['itag'][0], player))
826 encrypted_sig = url_data['s'][0]
828 signature = self._decrypt_signature_age_gate(encrypted_sig)
830 signature = self._decrypt_signature(encrypted_sig)
831 url += '&signature=' + signature
832 if 'ratebypass' not in url:
833 url += '&ratebypass=yes'
834 url_map[url_data['itag'][0]] = url
835 video_url_list = self._get_video_url_list(url_map)
836 if not video_url_list:
838 elif video_info.get('hlsvp'):
839 manifest_url = video_info['hlsvp'][0]
840 url_map = self._extract_from_m3u8(manifest_url, video_id)
841 video_url_list = self._get_video_url_list(url_map)
842 if not video_url_list:
846 raise ExtractorError(u'no conn or url_encoded_fmt_stream_map information found in video info')
849 for format_param, video_real_url in video_url_list:
851 video_extension = self._video_extensions.get(format_param, 'flv')
853 video_format = '{0} - {1}{2}'.format(format_param if format_param else video_extension,
854 self._video_dimensions.get(format_param, '???'),
855 ' ('+self._special_itags[format_param]+')' if format_param in self._special_itags else '')
859 'url': video_real_url,
860 'uploader': video_uploader,
861 'uploader_id': video_uploader_id,
862 'upload_date': upload_date,
863 'title': video_title,
864 'ext': video_extension,
865 'format': video_format,
866 'thumbnail': video_thumbnail,
867 'description': video_description,
868 'player_url': player_url,
869 'subtitles': video_subtitles,
870 'duration': video_duration
874 class YoutubePlaylistIE(InfoExtractor):
875 IE_DESC = u'YouTube.com playlists'
881 (?:course|view_play_list|my_playlists|artist|playlist|watch)
882 \? (?:.*?&)*? (?:p|a|list)=
885 ((?:PL|EC|UU|FL)?[0-9A-Za-z-_]{10,})
888 ((?:PL|EC|UU|FL)[0-9A-Za-z-_]{10,})
890 _TEMPLATE_URL = 'https://gdata.youtube.com/feeds/api/playlists/%s?max-results=%i&start-index=%i&v=2&alt=json&safeSearch=none'
892 IE_NAME = u'youtube:playlist'
895 def suitable(cls, url):
896 """Receives a URL and returns True if suitable for this IE."""
897 return re.match(cls._VALID_URL, url, re.VERBOSE) is not None
899 def _real_extract(self, url):
900 # Extract playlist id
901 mobj = re.match(self._VALID_URL, url, re.VERBOSE)
903 raise ExtractorError(u'Invalid URL: %s' % url)
905 # Download playlist videos from API
906 playlist_id = mobj.group(1) or mobj.group(2)
909 for page_num in itertools.count(1):
910 start_index = self._MAX_RESULTS * (page_num - 1) + 1
911 if start_index >= 1000:
912 self._downloader.report_warning(u'Max number of results reached')
914 url = self._TEMPLATE_URL % (playlist_id, self._MAX_RESULTS, start_index)
915 page = self._download_webpage(url, playlist_id, u'Downloading page #%s' % page_num)
918 response = json.loads(page)
919 except ValueError as err:
920 raise ExtractorError(u'Invalid JSON in API response: ' + compat_str(err))
922 if 'feed' not in response:
923 raise ExtractorError(u'Got a malformed response from YouTube API')
924 playlist_title = response['feed']['title']['$t']
925 if 'entry' not in response['feed']:
926 # Number of videos is a multiple of self._MAX_RESULTS
929 for entry in response['feed']['entry']:
930 index = entry['yt$position']['$t']
931 if 'media$group' in entry and 'media$player' in entry['media$group']:
932 videos.append((index, entry['media$group']['media$player']['url']))
934 videos = [v[1] for v in sorted(videos)]
936 url_results = [self.url_result(vurl, 'Youtube') for vurl in videos]
937 return [self.playlist_result(url_results, playlist_id, playlist_title)]
940 class YoutubeChannelIE(InfoExtractor):
941 IE_DESC = u'YouTube.com channels'
942 _VALID_URL = r"^(?:https?://)?(?:youtu\.be|(?:\w+\.)?youtube(?:-nocookie)?\.com)/channel/([0-9A-Za-z_-]+)"
943 _TEMPLATE_URL = 'http://www.youtube.com/channel/%s/videos?sort=da&flow=list&view=0&page=%s&gl=US&hl=en'
944 _MORE_PAGES_INDICATOR = 'yt-uix-load-more'
945 _MORE_PAGES_URL = 'http://www.youtube.com/c4_browse_ajax?action_load_more_videos=1&flow=list&paging=%s&view=0&sort=da&channel_id=%s'
946 IE_NAME = u'youtube:channel'
948 def extract_videos_from_page(self, page):
950 for mobj in re.finditer(r'href="/watch\?v=([0-9A-Za-z_-]+)&?', page):
951 if mobj.group(1) not in ids_in_page:
952 ids_in_page.append(mobj.group(1))
955 def _real_extract(self, url):
957 mobj = re.match(self._VALID_URL, url)
959 raise ExtractorError(u'Invalid URL: %s' % url)
961 # Download channel page
962 channel_id = mobj.group(1)
966 url = self._TEMPLATE_URL % (channel_id, pagenum)
967 page = self._download_webpage(url, channel_id,
968 u'Downloading page #%s' % pagenum)
970 # Extract video identifiers
971 ids_in_page = self.extract_videos_from_page(page)
972 video_ids.extend(ids_in_page)
974 # Download any subsequent channel pages using the json-based channel_ajax query
975 if self._MORE_PAGES_INDICATOR in page:
976 for pagenum in itertools.count(1):
977 url = self._MORE_PAGES_URL % (pagenum, channel_id)
978 page = self._download_webpage(url, channel_id,
979 u'Downloading page #%s' % pagenum)
981 page = json.loads(page)
983 ids_in_page = self.extract_videos_from_page(page['content_html'])
984 video_ids.extend(ids_in_page)
986 if self._MORE_PAGES_INDICATOR not in page['load_more_widget_html']:
989 self._downloader.to_screen(u'[youtube] Channel %s: Found %i videos' % (channel_id, len(video_ids)))
991 urls = ['http://www.youtube.com/watch?v=%s' % id for id in video_ids]
992 url_entries = [self.url_result(eurl, 'Youtube') for eurl in urls]
993 return [self.playlist_result(url_entries, channel_id)]
996 class YoutubeUserIE(InfoExtractor):
997 IE_DESC = u'YouTube.com user videos (URL or "ytuser" keyword)'
998 _VALID_URL = r'(?:(?:(?:https?://)?(?:\w+\.)?youtube\.com/user/)|ytuser:)([A-Za-z0-9_-]+)'
999 _TEMPLATE_URL = 'http://gdata.youtube.com/feeds/api/users/%s'
1000 _GDATA_PAGE_SIZE = 50
1001 _GDATA_URL = 'http://gdata.youtube.com/feeds/api/users/%s/uploads?max-results=%d&start-index=%d'
1002 _VIDEO_INDICATOR = r'/watch\?v=(.+?)[\<&]'
1003 IE_NAME = u'youtube:user'
1005 def _real_extract(self, url):
1007 mobj = re.match(self._VALID_URL, url)
1009 raise ExtractorError(u'Invalid URL: %s' % url)
1011 username = mobj.group(1)
1013 # Download video ids using YouTube Data API. Result size per
1014 # query is limited (currently to 50 videos) so we need to query
1015 # page by page until there are no video ids - it means we got
1020 for pagenum in itertools.count(0):
1021 start_index = pagenum * self._GDATA_PAGE_SIZE + 1
1023 gdata_url = self._GDATA_URL % (username, self._GDATA_PAGE_SIZE, start_index)
1024 page = self._download_webpage(gdata_url, username,
1025 u'Downloading video ids from %d to %d' % (start_index, start_index + self._GDATA_PAGE_SIZE))
1027 # Extract video identifiers
1030 for mobj in re.finditer(self._VIDEO_INDICATOR, page):
1031 if mobj.group(1) not in ids_in_page:
1032 ids_in_page.append(mobj.group(1))
1034 video_ids.extend(ids_in_page)
1036 # A little optimization - if current page is not
1037 # "full", ie. does not contain PAGE_SIZE video ids then
1038 # we can assume that this page is the last one - there
1039 # are no more ids on further pages - no need to query
1042 if len(ids_in_page) < self._GDATA_PAGE_SIZE:
1045 urls = ['http://www.youtube.com/watch?v=%s' % video_id for video_id in video_ids]
1046 url_results = [self.url_result(rurl, 'Youtube') for rurl in urls]
1047 return [self.playlist_result(url_results, playlist_title = username)]
1049 class YoutubeSearchIE(SearchInfoExtractor):
1050 IE_DESC = u'YouTube.com searches'
1051 _API_URL = 'https://gdata.youtube.com/feeds/api/videos?q=%s&start-index=%i&max-results=50&v=2&alt=jsonc'
1053 IE_NAME = u'youtube:search'
1054 _SEARCH_KEY = 'ytsearch'
1056 def report_download_page(self, query, pagenum):
1057 """Report attempt to download search page with given number."""
1058 self._downloader.to_screen(u'[youtube] query "%s": Downloading page %s' % (query, pagenum))
1060 def _get_n_results(self, query, n):
1061 """Get a specified number of results for a query"""
1067 while (50 * pagenum) < limit:
1068 self.report_download_page(query, pagenum+1)
1069 result_url = self._API_URL % (compat_urllib_parse.quote_plus(query), (50*pagenum)+1)
1070 request = compat_urllib_request.Request(result_url)
1072 data = compat_urllib_request.urlopen(request).read().decode('utf-8')
1073 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
1074 raise ExtractorError(u'Unable to download API page: %s' % compat_str(err))
1075 api_response = json.loads(data)['data']
1077 if not 'items' in api_response:
1078 raise ExtractorError(u'[youtube] No video results')
1080 new_ids = list(video['id'] for video in api_response['items'])
1081 video_ids += new_ids
1083 limit = min(n, api_response['totalItems'])
1086 if len(video_ids) > n:
1087 video_ids = video_ids[:n]
1088 videos = [self.url_result('http://www.youtube.com/watch?v=%s' % id, 'Youtube') for id in video_ids]
1089 return self.playlist_result(videos, query)
1092 class YoutubeShowIE(InfoExtractor):
1093 IE_DESC = u'YouTube.com (multi-season) shows'
1094 _VALID_URL = r'https?://www\.youtube\.com/show/(.*)'
1095 IE_NAME = u'youtube:show'
1097 def _real_extract(self, url):
1098 mobj = re.match(self._VALID_URL, url)
1099 show_name = mobj.group(1)
1100 webpage = self._download_webpage(url, show_name, u'Downloading show webpage')
1101 # There's one playlist for each season of the show
1102 m_seasons = list(re.finditer(r'href="(/playlist\?list=.*?)"', webpage))
1103 self.to_screen(u'%s: Found %s seasons' % (show_name, len(m_seasons)))
1104 return [self.url_result('https://www.youtube.com' + season.group(1), 'YoutubePlaylist') for season in m_seasons]
1107 class YoutubeFeedsInfoExtractor(YoutubeBaseInfoExtractor):
1109 Base class for extractors that fetch info from
1110 http://www.youtube.com/feed_ajax
1111 Subclasses must define the _FEED_NAME and _PLAYLIST_TITLE properties.
1113 _LOGIN_REQUIRED = True
1115 # use action_load_personal_feed instead of action_load_system_feed
1116 _PERSONAL_FEED = False
1119 def _FEED_TEMPLATE(self):
1120 action = 'action_load_system_feed'
1121 if self._PERSONAL_FEED:
1122 action = 'action_load_personal_feed'
1123 return 'http://www.youtube.com/feed_ajax?%s=1&feed_name=%s&paging=%%s' % (action, self._FEED_NAME)
1127 return u'youtube:%s' % self._FEED_NAME
1129 def _real_initialize(self):
1132 def _real_extract(self, url):
1134 # The step argument is available only in 2.7 or higher
1135 for i in itertools.count(0):
1136 paging = i*self._PAGING_STEP
1137 info = self._download_webpage(self._FEED_TEMPLATE % paging,
1138 u'%s feed' % self._FEED_NAME,
1139 u'Downloading page %s' % i)
1140 info = json.loads(info)
1141 feed_html = info['feed_html']
1142 m_ids = re.finditer(r'"/watch\?v=(.*?)["&]', feed_html)
1143 ids = orderedSet(m.group(1) for m in m_ids)
1144 feed_entries.extend(self.url_result(id, 'Youtube') for id in ids)
1145 if info['paging'] is None:
1147 return self.playlist_result(feed_entries, playlist_title=self._PLAYLIST_TITLE)
1149 class YoutubeSubscriptionsIE(YoutubeFeedsInfoExtractor):
1150 IE_DESC = u'YouTube.com subscriptions feed, "ytsubs" keyword(requires authentication)'
1151 _VALID_URL = r'https?://www\.youtube\.com/feed/subscriptions|:ytsubs(?:criptions)?'
1152 _FEED_NAME = 'subscriptions'
1153 _PLAYLIST_TITLE = u'Youtube Subscriptions'
1155 class YoutubeRecommendedIE(YoutubeFeedsInfoExtractor):
1156 IE_DESC = u'YouTube.com recommended videos, "ytrec" keyword (requires authentication)'
1157 _VALID_URL = r'https?://www\.youtube\.com/feed/recommended|:ytrec(?:ommended)?'
1158 _FEED_NAME = 'recommended'
1159 _PLAYLIST_TITLE = u'Youtube Recommended videos'
1161 class YoutubeWatchLaterIE(YoutubeFeedsInfoExtractor):
1162 IE_DESC = u'Youtube watch later list, "ytwatchlater" keyword (requires authentication)'
1163 _VALID_URL = r'https?://www\.youtube\.com/feed/watch_later|:ytwatchlater'
1164 _FEED_NAME = 'watch_later'
1165 _PLAYLIST_TITLE = u'Youtube Watch Later'
1167 _PERSONAL_FEED = True
1169 class YoutubeFavouritesIE(YoutubeBaseInfoExtractor):
1170 IE_NAME = u'youtube:favorites'
1171 IE_DESC = u'YouTube.com favourite videos, "ytfav" keyword (requires authentication)'
1172 _VALID_URL = r'https?://www\.youtube\.com/my_favorites|:ytfav(?:o?rites)?'
1173 _LOGIN_REQUIRED = True
1175 def _real_extract(self, url):
1176 webpage = self._download_webpage('https://www.youtube.com/my_favorites', 'Youtube Favourites videos')
1177 playlist_id = self._search_regex(r'list=(.+?)["&]', webpage, u'favourites playlist id')
1178 return self.url_result(playlist_id, 'YoutubePlaylist')