[youtube:channel] Make `extract_videos_from_page` static

[youtube-dl] / youtube_dl / extractor / youtube.py
diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py

index dc9e15e98d655e9e53c294035f467569cd307283..8bf54931c081f66b2bb78912d4835219f8560b4a 100644 (file)
--- a/youtube_dl/extractor/youtube.py
+++ b/youtube_dl/extractor/youtube.py
@@ -1358,6 +1358,7 @@ class YoutubePlaylistIE(YoutubeBaseInfoExtractor):
  class YoutubeChannelIE(InfoExtractor):
      IE_DESC = 'YouTube.com channels'
      _VALID_URL = r'https?://(?:youtu\.be|(?:\w+\.)?youtube(?:-nocookie)?\.com)/channel/(?P<id>[0-9A-Za-z_-]+)'
+    _TEMPLATE_URL = 'https://www.youtube.com/channel/%s/videos'
      IE_NAME = 'youtube:channel'
      _TESTS = [{
          'note': 'paginated channel',
@@ -1368,7 +1369,8 @@ class YoutubeChannelIE(InfoExtractor):
          }
      }]
  
-    def extract_videos_from_page(self, page):
+    @staticmethod
+    def extract_videos_from_page(page):
          ids_in_page = []
          titles_in_page = []
          for mobj in re.finditer(r'(?:title="(?P<title>[^"]+)"[^>]+)?href="/watch\?v=(?P<id>[0-9A-Za-z_-]+)&?', page):
@@ -1386,8 +1388,8 @@ class YoutubeChannelIE(InfoExtractor):
      def _real_extract(self, url):
          channel_id = self._match_id(url)
  
-        url = 'https://www.youtube.com/channel/%s/videos' % channel_id
-        channel_page = self._download_webpage(url, channel_id)
+        url = self._TEMPLATE_URL % channel_id
+        channel_page = self._download_webpage(url, channel_id, 'Downloading page #1')
          autogenerated = re.search(r'''(?x)
                  class="[^"]*?(?:
                      channel-header-autogenerated-label|
@@ -1397,20 +1399,18 @@ class YoutubeChannelIE(InfoExtractor):
          if autogenerated:
              # The videos are contained in a single page
              # the ajax pages can't be used, they are empty
-            videos = self.extract_videos_from_page(channel_page)
              entries = [
                  self.url_result(
                      video_id, 'Youtube', video_id=video_id,
                      video_title=video_title)
-                for video_id, video_title in videos]
+                for video_id, video_title in self.extract_videos_from_page(channel_page)]
              return self.playlist_result(entries, channel_id)
  
          def _entries():
              more_widget_html = content_html = channel_page
              for pagenum in itertools.count(1):
  
-                ids_in_page = self.extract_videos_from_page(content_html)
-                for video_id, video_title in ids_in_page:
+                for video_id, video_title in self.extract_videos_from_page(content_html):
                      yield self.url_result(
                          video_id, 'Youtube', video_id=video_id,
                          video_title=video_title)
@@ -1431,12 +1431,10 @@ class YoutubeChannelIE(InfoExtractor):
          return self.playlist_result(_entries(), channel_id)
  
  
-class YoutubeUserIE(InfoExtractor):
+class YoutubeUserIE(YoutubeChannelIE):
      IE_DESC = 'YouTube.com user videos (URL or "ytuser" keyword)'
      _VALID_URL = r'(?:(?:(?:https?://)?(?:\w+\.)?youtube\.com/(?:user/)?(?!(?:attribution_link|watch|results)(?:$|[^a-z_A-Z0-9-])))|ytuser:)(?!feed/)(?P<id>[A-Za-z0-9_-]+)'
-    _TEMPLATE_URL = 'https://gdata.youtube.com/feeds/api/users/%s'
-    _GDATA_PAGE_SIZE = 50
-    _GDATA_URL = 'https://gdata.youtube.com/feeds/api/users/%s/uploads?max-results=%d&start-index=%d&alt=json'
+    _TEMPLATE_URL = 'https://www.youtube.com/user/%s/videos'
      IE_NAME = 'youtube:user'
  
      _TESTS = [{
@@ -1460,46 +1458,6 @@ class YoutubeUserIE(InfoExtractor):
          else:
              return super(YoutubeUserIE, cls).suitable(url)
  
-    def _real_extract(self, url):
-        username = self._match_id(url)
-
-        # Download video ids using YouTube Data API. Result size per
-        # query is limited (currently to 50 videos) so we need to query
-        # page by page until there are no video ids - it means we got
-        # all of them.
-
-        def download_page(pagenum):
-            start_index = pagenum * self._GDATA_PAGE_SIZE + 1
-
-            gdata_url = self._GDATA_URL % (username, self._GDATA_PAGE_SIZE, start_index)
-            page = self._download_webpage(
-                gdata_url, username,
-                'Downloading video ids from %d to %d' % (
-                    start_index, start_index + self._GDATA_PAGE_SIZE))
-
-            try:
-                response = json.loads(page)
-            except ValueError as err:
-                raise ExtractorError('Invalid JSON in API response: ' + compat_str(err))
-            if 'entry' not in response['feed']:
-                return
-
-            # Extract video identifiers
-            entries = response['feed']['entry']
-            for entry in entries:
-                title = entry['title']['$t']
-                video_id = entry['id']['$t'].split('/')[-1]
-                yield {
-                    '_type': 'url',
-                    'url': video_id,
-                    'ie_key': 'Youtube',
-                    'id': video_id,
-                    'title': title,
-                }
-        url_results = OnDemandPagedList(download_page, self._GDATA_PAGE_SIZE)
-
-        return self.playlist_result(url_results, playlist_title=username)
-
  
  class YoutubeSearchIE(SearchInfoExtractor):
      IE_DESC = 'YouTube.com searches'
@@ -1519,7 +1477,7 @@ class YoutubeSearchIE(SearchInfoExtractor):
          while (PAGE_SIZE * pagenum) < limit:
              result_url = self._API_URL % (
                  compat_urllib_parse.quote_plus(query.encode('utf-8')),
-                (PAGE_SIZE * pagenum) + 1)
+                max((PAGE_SIZE * pagenum) + 1), 2)
              data_json = self._download_webpage(
                  result_url, video_id='query "%s"' % query,
                  note='Downloading page %s' % (pagenum + 1),