[youtube] Fix extraction.
[youtube-dl] / youtube_dl / extractor / youtube.py
index 9f748390531b66107f0e2012a0fdee152941d98d..bc01016e4376a19816be17fb454670d9b5222f6e 100644 (file)
@@ -70,9 +70,14 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
 
     _PLAYLIST_ID_RE = r'(?:PL|LL|EC|UU|FL|RD|UL|TL|PU|OLAK5uy_)[0-9A-Za-z-_]{10,}'
 
+    _YOUTUBE_CLIENT_HEADERS = {
+        'x-youtube-client-name': '1',
+        'x-youtube-client-version': '1.20200609.04.02',
+    }
+
     def _set_language(self):
         self._set_cookie(
-            '.youtube.com', 'PREF', 'f1=50000000&hl=en',
+            '.youtube.com', 'PREF', 'f1=50000000&f6=8&hl=en',
             # YouTube sets the expire time to about two months
             expire_time=time.time() + 2 * 30 * 24 * 3600)
 
@@ -298,10 +303,11 @@ class YoutubeEntryListBaseInfoExtractor(YoutubeBaseInfoExtractor):
                     # Downloading page may result in intermittent 5xx HTTP error
                     # that is usually worked around with a retry
                     more = self._download_json(
-                        'https://youtube.com/%s' % mobj.group('more'), playlist_id,
+                        'https://www.youtube.com/%s' % mobj.group('more'), playlist_id,
                         'Downloading page #%s%s'
                         % (page_num, ' (retry #%d)' % count if count else ''),
-                        transform_source=uppercase_escape)
+                        transform_source=uppercase_escape,
+                        headers=self._YOUTUBE_CLIENT_HEADERS)
                     break
                 except ExtractorError as e:
                     if isinstance(e.cause, compat_HTTPError) and e.cause.code in (500, 503):
@@ -388,8 +394,15 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                             (?:www\.)?invidious\.drycat\.fr/|
                             (?:www\.)?tube\.poal\.co/|
                             (?:www\.)?vid\.wxzm\.sx/|
+                            (?:www\.)?yewtu\.be/|
                             (?:www\.)?yt\.elukerio\.org/|
                             (?:www\.)?yt\.lelux\.fi/|
+                            (?:www\.)?invidious\.ggc-project\.de/|
+                            (?:www\.)?yt\.maisputain\.ovh/|
+                            (?:www\.)?invidious\.13ad\.de/|
+                            (?:www\.)?invidious\.toot\.koeln/|
+                            (?:www\.)?invidious\.fdn\.fr/|
+                            (?:www\.)?watch\.nettohikari\.com/|
                             (?:www\.)?kgg2m7yk5aybusll\.onion/|
                             (?:www\.)?qklhadlycap4cnod\.onion/|
                             (?:www\.)?axqzx4s6s54s32yentfqojs3x5i7faxza6xo3ehd4bzzsg2ii4fv2iid\.onion/|
@@ -397,6 +410,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                             (?:www\.)?fz253lmuao3strwbfbmx46yu7acac2jz27iwtorgmbqlkurlclmancad\.onion/|
                             (?:www\.)?invidious\.l4qlywnpwqsluw65ts7md3khrivpirse744un3x7mlskqauz5pyuzgqd\.onion/|
                             (?:www\.)?owxfohz4kjyv25fvlqilyxast7inivgiktls3th44jhk3ej3i7ya\.b32\.i2p/|
+                            (?:www\.)?4l2dgddgsrkf2ous66i6seeyi6etzfgrue332grh2n7madpwopotugyd\.onion/|
                             youtube\.googleapis\.com/)                        # the various hostnames, with wildcard subdomains
                          (?:.*?\#/)?                                          # handle anchor (#/) redirect urls
                          (?:                                                  # the various things that can precede the ID:
@@ -1250,7 +1264,23 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
             'params': {
                 'skip_download': True,
             },
-        }
+        },
+        {
+            # empty description results in an empty string
+            'url': 'https://www.youtube.com/watch?v=x41yOUIvK2k',
+            'info_dict': {
+                'id': 'x41yOUIvK2k',
+                'ext': 'mp4',
+                'title': 'IMG 3456',
+                'description': '',
+                'upload_date': '20170613',
+                'uploader_id': 'ElevageOrVert',
+                'uploader': 'ElevageOrVert',
+            },
+            'params': {
+                'skip_download': True,
+            },
+        },
     ]
 
     def __init__(self, *args, **kwargs):
@@ -1370,7 +1400,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
         funcname = self._search_regex(
             (r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
              r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
-             r'\b(?P<sig>[a-zA-Z0-9$]{2})\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\)',
+             r'(?:\b|[^a-zA-Z0-9$])(?P<sig>[a-zA-Z0-9$]{2})\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\)',
              r'(?P<sig>[a-zA-Z0-9$]+)\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\)',
              # Obsolete patterns
              r'(["\'])signature\1\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
@@ -1644,8 +1674,63 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
         video_id = mobj.group(2)
         return video_id
 
+    def _extract_chapters_from_json(self, webpage, video_id, duration):
+        if not webpage:
+            return
+        player = self._parse_json(
+            self._search_regex(
+                r'RELATED_PLAYER_ARGS["\']\s*:\s*({.+})\s*,?\s*\n', webpage,
+                'player args', default='{}'),
+            video_id, fatal=False)
+        if not player or not isinstance(player, dict):
+            return
+        watch_next_response = player.get('watch_next_response')
+        if not isinstance(watch_next_response, compat_str):
+            return
+        response = self._parse_json(watch_next_response, video_id, fatal=False)
+        if not response or not isinstance(response, dict):
+            return
+        chapters_list = try_get(
+            response,
+            lambda x: x['playerOverlays']
+                       ['playerOverlayRenderer']
+                       ['decoratedPlayerBarRenderer']
+                       ['decoratedPlayerBarRenderer']
+                       ['playerBar']
+                       ['chapteredPlayerBarRenderer']
+                       ['chapters'],
+            list)
+        if not chapters_list:
+            return
+
+        def chapter_time(chapter):
+            return float_or_none(
+                try_get(
+                    chapter,
+                    lambda x: x['chapterRenderer']['timeRangeStartMillis'],
+                    int),
+                scale=1000)
+        chapters = []
+        for next_num, chapter in enumerate(chapters_list, start=1):
+            start_time = chapter_time(chapter)
+            if start_time is None:
+                continue
+            end_time = (chapter_time(chapters_list[next_num])
+                        if next_num < len(chapters_list) else duration)
+            if end_time is None:
+                continue
+            title = try_get(
+                chapter, lambda x: x['chapterRenderer']['title']['simpleText'],
+                compat_str)
+            chapters.append({
+                'start_time': start_time,
+                'end_time': end_time,
+                'title': title,
+            })
+        return chapters
+
     @staticmethod
-    def _extract_chapters(description, duration):
+    def _extract_chapters_from_description(description, duration):
         if not description:
             return None
         chapter_lines = re.findall(
@@ -1679,6 +1764,10 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
             })
         return chapters
 
+    def _extract_chapters(self, webpage, description, video_id, duration):
+        return (self._extract_chapters_from_json(webpage, video_id, duration)
+                or self._extract_chapters_from_description(description, duration))
+
     def _real_extract(self, url):
         url, smuggled_data = unsmuggle_url(url, {})
 
@@ -1752,7 +1841,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
         # Get video info
         video_info = {}
         embed_webpage = None
-        if re.search(r'player-age-gate-content">', video_webpage) is not None:
+        if (self._og_search_property('restrictions:age', video_webpage, default=None) == '18+'
+                or re.search(r'player-age-gate-content">', video_webpage) is not None):
             age_gate = True
             # We simulate the access to the video from www.youtube.com/v/{video_id}
             # this can be viewed without login into Youtube
@@ -1825,6 +1915,9 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
         video_details = try_get(
             player_response, lambda x: x['videoDetails'], dict) or {}
 
+        microformat = try_get(
+            player_response, lambda x: x['microformat']['playerMicroformatRenderer'], dict) or {}
+
         video_title = video_info.get('title', [None])[0] or video_details.get('title')
         if not video_title:
             self._downloader.report_warning('Unable to extract video title')
@@ -1854,7 +1947,9 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
             ''', replace_url, video_description)
             video_description = clean_html(video_description)
         else:
-            video_description = self._html_search_meta('description', video_webpage) or video_details.get('shortDescription')
+            video_description = video_details.get('shortDescription')
+            if video_description is None:
+                video_description = self._html_search_meta('description', video_webpage)
 
         if not smuggled_data.get('force_singlefeed', False):
             if not self._downloader.params.get('noplaylist'):
@@ -1902,6 +1997,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
             view_count = extract_view_count(video_info)
         if view_count is None and video_details:
             view_count = int_or_none(video_details.get('viewCount'))
+        if view_count is None and microformat:
+            view_count = int_or_none(microformat.get('viewCount'))
 
         if is_live is None:
             is_live = bool_or_none(video_details.get('isLive'))
@@ -2001,7 +2098,13 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                                 embed_webpage = self._download_webpage(
                                     embed_url, video_id, 'Downloading embed webpage')
                             jsplayer_url_json = self._search_regex(
-                                ASSETS_RE, embed_webpage, 'JS player URL')
+                                ASSETS_RE, embed_webpage, 'JS player URL (2)', default=None)
+
+                            if not jsplayer_url_json:
+                                jsplayer_url_json = self._search_regex(
+                                    r'"WEB_PLAYER_CONTEXT_CONFIG_ID_EMBEDDED_PLAYER":.+?"jsUrl":\s*("[^"]+")',
+                                    embed_webpage,
+                                    'JS player URL')
 
                         player_url = json.loads(jsplayer_url_json)
                         if player_url is None:
@@ -2153,7 +2256,12 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
             video_uploader_id = mobj.group('uploader_id')
             video_uploader_url = mobj.group('uploader_url')
         else:
-            self._downloader.report_warning('unable to extract uploader nickname')
+            owner_profile_url = url_or_none(microformat.get('ownerProfileUrl'))
+            if owner_profile_url:
+                video_uploader_id = self._search_regex(
+                    r'(?:user|channel)/([^/]+)', owner_profile_url, 'uploader id',
+                    default=None)
+                video_uploader_url = owner_profile_url
 
         channel_id = (
             str_or_none(video_details.get('channelId'))
@@ -2164,17 +2272,33 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                 video_webpage, 'channel id', default=None, group='id'))
         channel_url = 'http://www.youtube.com/channel/%s' % channel_id if channel_id else None
 
-        # thumbnail image
-        # We try first to get a high quality image:
-        m_thumb = re.search(r'<span itemprop="thumbnail".*?href="(.*?)">',
-                            video_webpage, re.DOTALL)
-        if m_thumb is not None:
-            video_thumbnail = m_thumb.group(1)
-        elif 'thumbnail_url' not in video_info:
-            self._downloader.report_warning('unable to extract video thumbnail')
+        thumbnails = []
+        thumbnails_list = try_get(
+            video_details, lambda x: x['thumbnail']['thumbnails'], list) or []
+        for t in thumbnails_list:
+            if not isinstance(t, dict):
+                continue
+            thumbnail_url = url_or_none(t.get('url'))
+            if not thumbnail_url:
+                continue
+            thumbnails.append({
+                'url': thumbnail_url,
+                'width': int_or_none(t.get('width')),
+                'height': int_or_none(t.get('height')),
+            })
+
+        if not thumbnails:
             video_thumbnail = None
-        else:   # don't panic if we can't find it
-            video_thumbnail = compat_urllib_parse_unquote_plus(video_info['thumbnail_url'][0])
+            # We try first to get a high quality image:
+            m_thumb = re.search(r'<span itemprop="thumbnail".*?href="(.*?)">',
+                                video_webpage, re.DOTALL)
+            if m_thumb is not None:
+                video_thumbnail = m_thumb.group(1)
+            thumbnail_url = try_get(video_info, lambda x: x['thumbnail_url'][0], compat_str)
+            if thumbnail_url:
+                video_thumbnail = compat_urllib_parse_unquote_plus(thumbnail_url)
+            if video_thumbnail:
+                thumbnails.append({'url': video_thumbnail})
 
         # upload date
         upload_date = self._html_search_meta(
@@ -2184,6 +2308,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                 [r'(?s)id="eow-date.*?>(.*?)</span>',
                  r'(?:id="watch-uploader-info".*?>.*?|["\']simpleText["\']\s*:\s*["\'])(?:Published|Uploaded|Streamed live|Started) on (.+?)[<"\']'],
                 video_webpage, 'upload date', default=None)
+        if not upload_date:
+            upload_date = microformat.get('publishDate') or microformat.get('uploadDate')
         upload_date = unified_strdate(upload_date)
 
         video_license = self._html_search_regex(
@@ -2255,17 +2381,21 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
         m_cat_container = self._search_regex(
             r'(?s)<h4[^>]*>\s*Category\s*</h4>\s*<ul[^>]*>(.*?)</ul>',
             video_webpage, 'categories', default=None)
+        category = None
         if m_cat_container:
             category = self._html_search_regex(
                 r'(?s)<a[^<]+>(.*?)</a>', m_cat_container, 'category',
                 default=None)
-            video_categories = None if category is None else [category]
-        else:
-            video_categories = None
+        if not category:
+            category = try_get(
+                microformat, lambda x: x['category'], compat_str)
+        video_categories = None if category is None else [category]
 
         video_tags = [
             unescapeHTML(m.group('content'))
             for m in re.finditer(self._meta_regex('og:video:tag'), video_webpage)]
+        if not video_tags:
+            video_tags = try_get(video_details, lambda x: x['keywords'], list)
 
         def _extract_count(count_name):
             return str_to_int(self._search_regex(
@@ -2316,7 +2446,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                     errnote='Unable to download video annotations', fatal=False,
                     data=urlencode_postdata({xsrf_field_name: xsrf_token}))
 
-        chapters = self._extract_chapters(description_original, video_duration)
+        chapters = self._extract_chapters(video_webpage, description_original, video_id, video_duration)
 
         # Look for the DASH manifest
         if self._downloader.params.get('youtube_include_dash_manifest', True):
@@ -2407,7 +2537,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
             'creator': video_creator or artist,
             'title': video_title,
             'alt_title': video_alt_title or track,
-            'thumbnail': video_thumbnail,
+            'thumbnails': thumbnails,
             'description': video_description,
             'categories': video_categories,
             'tags': video_tags,
@@ -2671,7 +2801,7 @@ class YoutubePlaylistIE(YoutubePlaylistBaseInfoExtractor):
         ids = []
         last_id = playlist_id[-11:]
         for n in itertools.count(1):
-            url = 'https://youtube.com/watch?v=%s&list=%s' % (last_id, playlist_id)
+            url = 'https://www.youtube.com/watch?v=%s&list=%s' % (last_id, playlist_id)
             webpage = self._download_webpage(
                 url, playlist_id, 'Downloading page {0} of Youtube mix'.format(n))
             new_ids = orderedSet(re.findall(
@@ -2903,7 +3033,7 @@ class YoutubeChannelIE(YoutubePlaylistBaseInfoExtractor):
 
 class YoutubeUserIE(YoutubeChannelIE):
     IE_DESC = 'YouTube.com user videos (URL or "ytuser" keyword)'
-    _VALID_URL = r'(?:(?:https?://(?:\w+\.)?youtube\.com/(?:(?P<user>user|c)/)?(?!(?:attribution_link|watch|results|shared)(?:$|[^a-z_A-Z0-9-])))|ytuser:)(?!feed/)(?P<id>[A-Za-z0-9_-]+)'
+    _VALID_URL = r'(?:(?:https?://(?:\w+\.)?youtube\.com/(?:(?P<user>user|c)/)?(?!(?:attribution_link|watch|results|shared)(?:$|[^a-z_A-Z0-9%-])))|ytuser:)(?!feed/)(?P<id>[A-Za-z0-9_%-]+)'
     _TEMPLATE_URL = 'https://www.youtube.com/%s/%s/videos'
     IE_NAME = 'youtube:user'
 
@@ -2933,6 +3063,9 @@ class YoutubeUserIE(YoutubeChannelIE):
     }, {
         'url': 'https://www.youtube.com/c/gametrailers',
         'only_matching': True,
+    }, {
+        'url': 'https://www.youtube.com/c/Pawe%C5%82Zadro%C5%BCniak',
+        'only_matching': True,
     }, {
         'url': 'https://www.youtube.com/gametrailers',
         'only_matching': True,
@@ -3011,7 +3144,7 @@ class YoutubeLiveIE(YoutubeBaseInfoExtractor):
 
 class YoutubePlaylistsIE(YoutubePlaylistsBaseInfoExtractor):
     IE_DESC = 'YouTube.com user/channel playlists'
-    _VALID_URL = r'https?://(?:\w+\.)?youtube\.com/(?:user|channel)/(?P<id>[^/]+)/playlists'
+    _VALID_URL = r'https?://(?:\w+\.)?youtube\.com/(?:user|channel|c)/(?P<id>[^/]+)/playlists'
     IE_NAME = 'youtube:playlists'
 
     _TESTS = [{
@@ -3037,6 +3170,9 @@ class YoutubePlaylistsIE(YoutubePlaylistsBaseInfoExtractor):
             'title': 'Chem Player',
         },
         'skip': 'Blocked',
+    }, {
+        'url': 'https://www.youtube.com/c/ChristophLaimer/playlists',
+        'only_matching': True,
     }]
 
 
@@ -3051,54 +3187,94 @@ class YoutubeSearchIE(SearchInfoExtractor, YoutubeSearchBaseInfoExtractor):
     _MAX_RESULTS = float('inf')
     IE_NAME = 'youtube:search'
     _SEARCH_KEY = 'ytsearch'
-    _EXTRA_QUERY_ARGS = {}
+    _SEARCH_PARAMS = None
     _TESTS = []
 
-    def _get_n_results(self, query, n):
-        """Get a specified number of results for a query"""
-
-        videos = []
-        limit = n
-
-        url_query = {
-            'search_query': query.encode('utf-8'),
+    def _entries(self, query, n):
+        data = {
+            'context': {
+                'client': {
+                    'clientName': 'WEB',
+                    'clientVersion': '2.20201021.03.00',
+                }
+            },
+            'query': query,
         }
-        url_query.update(self._EXTRA_QUERY_ARGS)
-        result_url = 'https://www.youtube.com/results?' + compat_urllib_parse_urlencode(url_query)
-
-        for pagenum in itertools.count(1):
-            data = self._download_json(
-                result_url, video_id='query "%s"' % query,
-                note='Downloading page %s' % pagenum,
-                errnote='Unable to download API page',
-                query={'spf': 'navigate'})
-            html_content = data[1]['body']['content']
-
-            if 'class="search-message' in html_content:
-                raise ExtractorError(
-                    '[youtube] No video results', expected=True)
-
-            new_videos = list(self._process_page(html_content))
-            videos += new_videos
-            if not new_videos or len(videos) > limit:
+        if self._SEARCH_PARAMS:
+            data['params'] = self._SEARCH_PARAMS
+        total = 0
+        for page_num in itertools.count(1):
+            search = self._download_json(
+                'https://www.youtube.com/youtubei/v1/search?key=AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
+                video_id='query "%s"' % query,
+                note='Downloading page %s' % page_num,
+                errnote='Unable to download API page', fatal=False,
+                data=json.dumps(data).encode('utf8'),
+                headers={'content-type': 'application/json'})
+            if not search:
+                break
+            slr_contents = try_get(
+                search,
+                (lambda x: x['contents']['twoColumnSearchResultsRenderer']['primaryContents']['sectionListRenderer']['contents'],
+                 lambda x: x['onResponseReceivedCommands'][0]['appendContinuationItemsAction']['continuationItems']),
+                list)
+            if not slr_contents:
                 break
-            next_link = self._html_search_regex(
-                r'href="(/results\?[^"]*\bsp=[^"]+)"[^>]*>\s*<span[^>]+class="[^"]*\byt-uix-button-content\b[^"]*"[^>]*>Next',
-                html_content, 'next link', default=None)
-            if next_link is None:
+            isr_contents = try_get(
+                slr_contents,
+                lambda x: x[0]['itemSectionRenderer']['contents'],
+                list)
+            if not isr_contents:
                 break
-            result_url = compat_urlparse.urljoin('https://www.youtube.com/', next_link)
+            for content in isr_contents:
+                if not isinstance(content, dict):
+                    continue
+                video = content.get('videoRenderer')
+                if not isinstance(video, dict):
+                    continue
+                video_id = video.get('videoId')
+                if not video_id:
+                    continue
+                title = try_get(video, lambda x: x['title']['runs'][0]['text'], compat_str)
+                description = try_get(video, lambda x: x['descriptionSnippet']['runs'][0]['text'], compat_str)
+                duration = parse_duration(try_get(video, lambda x: x['lengthText']['simpleText'], compat_str))
+                view_count_text = try_get(video, lambda x: x['viewCountText']['simpleText'], compat_str) or ''
+                view_count = int_or_none(self._search_regex(
+                    r'^(\d+)', re.sub(r'\s', '', view_count_text),
+                    'view count', default=None))
+                uploader = try_get(video, lambda x: x['ownerText']['runs'][0]['text'], compat_str)
+                total += 1
+                yield {
+                    '_type': 'url_transparent',
+                    'ie_key': YoutubeIE.ie_key(),
+                    'id': video_id,
+                    'url': video_id,
+                    'title': title,
+                    'description': description,
+                    'duration': duration,
+                    'view_count': view_count,
+                    'uploader': uploader,
+                }
+                if total == n:
+                    return
+            token = try_get(
+                slr_contents,
+                lambda x: x[1]['continuationItemRenderer']['continuationEndpoint']['continuationCommand']['token'],
+                compat_str)
+            if not token:
+                break
+            data['continuation'] = token
 
-        if len(videos) > n:
-            videos = videos[:n]
-        return self.playlist_result(videos, query)
+    def _get_n_results(self, query, n):
+        """Get a specified number of results for a query"""
+        return self.playlist_result(self._entries(query, n), query)
 
 
 class YoutubeSearchDateIE(YoutubeSearchIE):
     IE_NAME = YoutubeSearchIE.IE_NAME + ':date'
     _SEARCH_KEY = 'ytsearchdate'
     IE_DESC = 'YouTube.com searches, newest videos first'
-    _EXTRA_QUERY_ARGS = {'search_sort': 'video_date_uploaded'}
+    _SEARCH_PARAMS = 'CAI%3D'
 
 
 class YoutubeSearchURLIE(YoutubeSearchBaseInfoExtractor):
@@ -3181,9 +3357,10 @@ class YoutubeFeedsInfoExtractor(YoutubeBaseInfoExtractor):
                 break
 
             more = self._download_json(
-                'https://youtube.com/%s' % mobj.group('more'), self._PLAYLIST_TITLE,
+                'https://www.youtube.com/%s' % mobj.group('more'), self._PLAYLIST_TITLE,
                 'Downloading page #%s' % page_num,
-                transform_source=uppercase_escape)
+                transform_source=uppercase_escape,
+                headers=self._YOUTUBE_CLIENT_HEADERS)
             content_html = more['content_html']
             more_widget_html = more['load_more_widget_html']