Merge branch 'brightcove_in_page_embed' of https://github.com/remitamine/youtube...
[youtube-dl] / youtube_dl / extractor / youtube.py
index 08e821362eaf04c9fac3fbb948774abe87554a20..687e0b4dbc1d044eb55c89da722961c68b0428b8 100644 (file)
@@ -703,7 +703,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
 
     def _extract_signature_function(self, video_id, player_url, example_sig):
         id_m = re.match(
-            r'.*?-(?P<id>[a-zA-Z0-9_-]+)(?:/watch_as3|/html5player(?:-new)?)?\.(?P<ext>[a-z]+)$',
+            r'.*?-(?P<id>[a-zA-Z0-9_-]+)(?:/watch_as3|/html5player(?:-new)?|/base)?\.(?P<ext>[a-z]+)$',
             player_url)
         if not id_m:
             raise ExtractorError('Cannot identify player %r' % player_url)
@@ -1107,6 +1107,17 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                     if not video_info:
                         video_info = get_video_info
                     if 'token' in get_video_info:
+                        # Different get_video_info requests may report different results, e.g.
+                        # some may report video unavailability, but some may serve it without
+                        # any complaint (see https://github.com/rg3/youtube-dl/issues/7362,
+                        # the original webpage as well as el=info and el=embedded get_video_info
+                        # requests report video unavailability due to geo restriction while
+                        # el=detailpage succeeds and returns valid data). This is probably
+                        # due to YouTube measures against IP ranges of hosting providers.
+                        # Working around by preferring the first succeeded video_info containing
+                        # the token if no such video_info yet was found.
+                        if 'token' not in video_info:
+                            video_info = get_video_info
                         break
         if 'token' not in video_info:
             if 'reason' in video_info:
@@ -1332,7 +1343,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                                 player_desc = 'flash player %s' % player_version
                             else:
                                 player_version = self._search_regex(
-                                    r'html5player-([^/]+?)(?:/html5player(?:-new)?)?\.js',
+                                    [r'html5player-([^/]+?)(?:/html5player(?:-new)?)?\.js', r'(?:www|player)-([^/]+)/base\.js'],
                                     player_url,
                                     'html5 player', fatal=False)
                                 player_desc = 'html5 player %s' % player_version
@@ -1644,8 +1655,18 @@ class YoutubeChannelIE(YoutubePlaylistBaseInfoExtractor):
         'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
         'playlist_mincount': 91,
         'info_dict': {
-            'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
+            'id': 'UUKfVa3S1e4PHvxWcwyMMg8w',
+            'title': 'Uploads from lex will',
         }
+    }, {
+        'note': 'Age restricted channel',
+        # from https://www.youtube.com/user/DeusExOfficial
+        'url': 'https://www.youtube.com/channel/UCs0ifCMCm1icqRbqhUINa0w',
+        'playlist_mincount': 64,
+        'info_dict': {
+            'id': 'UUs0ifCMCm1icqRbqhUINa0w',
+            'title': 'Uploads from Deus Ex',
+        },
     }]
 
     def _real_extract(self, url):
@@ -1666,7 +1687,7 @@ class YoutubeChannelIE(YoutubePlaylistBaseInfoExtractor):
                 'channelId', channel_page, 'channel id', default=None)
             if not channel_playlist_id:
                 channel_playlist_id = self._search_regex(
-                    r'data-channel-external-id="([^"]+)"',
+                    r'data-(?:channel-external-|yt)id="([^"]+)"',
                     channel_page, 'channel id', default=None)
         if channel_playlist_id and channel_playlist_id.startswith('UC'):
             playlist_id = 'UU' + channel_playlist_id[2:]