X-Git-Url: http://git.bitcoin.ninja/index.cgi?a=blobdiff_plain;f=youtube_dl%2Fextractor%2Fyoutube.py;h=0cb837afcc7d448ec6ae5a8b3496642cc7f956c0;hb=aa79ac0c82a80c5f2e8dcfbea0ceeda0d8463a81;hp=045507bc73f211a7227439fbe6425de9de0d658d;hpb=457ac58cc72a0b7161a0369a8f282f38ff0f2f93;p=youtube-dl diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index 045507bc7..0cb837afc 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -185,14 +185,15 @@ class YoutubeBaseInfoExtractor(InfoExtractor): self._download_webpage( req, None, - note='Confirming age', errnote='Unable to confirm age') - return True + note='Confirming age', errnote='Unable to confirm age', + fatal=False) def _real_initialize(self): if self._downloader is None: return - if not self._set_language(): - return + if self._get_login_info()[0] is not None: + if not self._set_language(): + return if not self._login(): return self._confirm_age() @@ -273,6 +274,9 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor): '138': {'ext': 'mp4', 'height': 2160, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40}, '160': {'ext': 'mp4', 'height': 144, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40}, '264': {'ext': 'mp4', 'height': 1440, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40}, + '298': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40, 'fps': 60, 'vcodec': 'h264'}, + '299': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40, 'fps': 60, 'vcodec': 'h264'}, + '266': {'ext': 'mp4', 'height': 2160, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40, 'vcodec': 'h264'}, # Dash mp4 audio '139': {'ext': 'm4a', 'format_note': 'DASH audio', 'vcodec': 'none', 'abr': 48, 'preference': -50}, @@ -286,6 +290,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor): '170': {'ext': 'webm', 'height': 1080, 'width': 1920, 'format_note': 'DASH video', 'acodec': 'none', 'container': 'webm', 'vcodec': 'VP8', 'preference': -40}, '218': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'acodec': 'none', 'container': 'webm', 'vcodec': 'VP8', 'preference': -40}, '219': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'acodec': 'none', 'container': 'webm', 'vcodec': 'VP8', 'preference': -40}, + '278': {'ext': 'webm', 'height': 144, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40, 'container': 'webm', 'vcodec': 'VP9'}, '242': {'ext': 'webm', 'height': 240, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40}, '243': {'ext': 'webm', 'height': 360, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40}, '244': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40}, @@ -295,11 +300,18 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor): '248': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40}, '271': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40}, '272': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40}, + '302': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40, 'fps': 60, 'vcodec': 'VP9'}, + '303': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40, 'fps': 60, 'vcodec': 'VP9'}, # Dash webm audio '171': {'ext': 'webm', 'vcodec': 'none', 'format_note': 'DASH audio', 'abr': 128, 'preference': -50}, '172': {'ext': 'webm', 'vcodec': 'none', 'format_note': 'DASH audio', 'abr': 256, 'preference': -50}, + # Dash webm audio with opus inside + '249': {'ext': 'webm', 'vcodec': 'none', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 50, 'preference': -50}, + '250': {'ext': 'webm', 'vcodec': 'none', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 70, 'preference': -50}, + '251': {'ext': 'webm', 'vcodec': 'none', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 160, 'preference': -50}, + # RTMP (unnamed) '_rtmp': {'protocol': 'rtmp'}, } @@ -394,6 +406,19 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor): 'format': '141', }, }, + # Controversy video + { + 'url': 'https://www.youtube.com/watch?v=T4XJQO3qol8', + 'info_dict': { + 'id': 'T4XJQO3qol8', + 'ext': 'mp4', + 'upload_date': '20100909', + 'uploader': 'The Amazing Atheist', + 'uploader_id': 'TheAmazingAtheist', + 'title': 'Burning Everyone\'s Koran', + 'description': 'SUBSCRIBE: http://www.youtube.com/saturninefilms\n\nEven Obama has taken a stand against freedom on this issue: http://www.huffingtonpost.com/2010/09/09/obama-gma-interview-quran_n_710282.html', + } + } ] def __init__(self, *args, **kwargs): @@ -503,7 +528,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor): def _parse_sig_js(self, jscode): funcname = self._search_regex( - r'signature=([$a-zA-Z]+)', jscode, + r'\.sig\|\|([a-zA-Z0-9]+)\(', jscode, 'Initial JS player signature function name') jsi = JSInterpreter(jscode) @@ -654,7 +679,17 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor): video_id = self.extract_id(url) # Get video webpage - url = proto + '://www.youtube.com/watch?v=%s&gl=US&hl=en&has_verified=1' % video_id + url = proto + '://www.youtube.com/watch?v=%s&gl=US&hl=en&has_verified=1&bpctr=9999999999' % video_id + pref_cookies = [ + c for c in self._downloader.cookiejar + if c.domain == '.youtube.com' and c.name == 'PREF'] + for pc in pref_cookies: + if 'hl=' in pc.value: + pc.value = re.sub(r'hl=[^&]+', 'hl=en', pc.value) + else: + if pc.value: + pc.value += '&' + pc.value += 'hl=en' video_webpage = self._download_webpage(url, video_id) # Attempt to extract SWF player URL @@ -667,7 +702,6 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor): # Get video info self.report_video_info_webpage_download(video_id) if re.search(r'player-age-gate-content">', video_webpage) is not None: - self.report_age_confirmation() age_gate = True # We simulate the access to the video from www.youtube.com/v/{video_id} # this can be viewed without login into Youtube @@ -675,12 +709,13 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor): 'video_id': video_id, 'eurl': 'https://youtube.googleapis.com/v/' + video_id, 'sts': self._search_regex( - r'"sts"\s*:\s*(\d+)', video_webpage, 'sts'), + r'"sts"\s*:\s*(\d+)', video_webpage, 'sts', default=''), }) video_info_url = proto + '://www.youtube.com/get_video_info?' + data - video_info_webpage = self._download_webpage(video_info_url, video_id, - note=False, - errnote='unable to download video info webpage') + video_info_webpage = self._download_webpage( + video_info_url, video_id, + note='Refetching age-gated info webpage', + errnote='unable to download video info webpage') video_info = compat_parse_qs(video_info_webpage) else: age_gate = False @@ -928,7 +963,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor): raise ExtractorError('no conn, hlsvp or url_encoded_fmt_stream_map information found in video info') # Look for the DASH manifest - if (self._downloader.params.get('youtube_include_dash_manifest', False)): + if self._downloader.params.get('youtube_include_dash_manifest', True): try: # The DASH manifest used needs to be the one from the original video_webpage. # The one found in get_video_info seems to be using different signatures. @@ -974,7 +1009,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor): existing_format.update(f) except (ExtractorError, KeyError) as e: - self.report_warning('Skipping DASH manifest: %s' % e, video_id) + self.report_warning('Skipping DASH manifest: %r' % e, video_id) self._sort_formats(formats) @@ -1026,6 +1061,7 @@ class YoutubePlaylistIE(YoutubeBaseInfoExtractor): 'url': 'https://www.youtube.com/playlist?list=PLwiyx1dc3P2JR9N8gQaQN_BCvlSlap7re', 'info_dict': { 'title': 'ytdl test PL', + 'id': 'PLwiyx1dc3P2JR9N8gQaQN_BCvlSlap7re', }, 'playlist_count': 3, }, { @@ -1045,7 +1081,7 @@ class YoutubePlaylistIE(YoutubeBaseInfoExtractor): 'note': 'issue #673', 'url': 'PLBB231211A4F62143', 'info_dict': { - 'title': 'Team Fortress 2 (Class-based LP)', + 'title': '[OLD]Team Fortress 2 (Class-based LP)', }, 'playlist_mincount': 26, }, {