Allow iterators for playlist result entries

author Philipp Hagemeister <phihag@phihag.de>

Sat, 6 Dec 2014 13:02:19 +0000 (14:02 +0100)

committer Philipp Hagemeister <phihag@phihag.de>

Sat, 6 Dec 2014 13:02:19 +0000 (14:02 +0100)
author Philipp Hagemeister <phihag@phihag.de>
Sat, 6 Dec 2014 13:02:19 +0000 (14:02 +0100)
committer Philipp Hagemeister <phihag@phihag.de>
Sat, 6 Dec 2014 13:02:19 +0000 (14:02 +0100)
diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py

index f89ac4e1de513330467e3569a17ef624f2abc736..56dc3d461796f7c8897f20875ca8ac7c0301c3dd 100755 (executable)
--- a/youtube_dl/YoutubeDL.py
+++ b/youtube_dl/YoutubeDL.py
@@ -7,6 +7,7 @@ import collections
  import datetime
  import errno
  import io
+import itertools
  import json
  import locale
  import os
@@ -654,21 +655,28 @@ class YoutubeDL(object):
              if playlistend == -1:
                  playlistend = None
  
-            if isinstance(ie_result['entries'], list):
-                n_all_entries = len(ie_result['entries'])
-                entries = ie_result['entries'][playliststart:playlistend]
+            ie_entries = ie_result['entries']
+            if isinstance(ie_entries, list):
+                n_all_entries = len(ie_entries)
+                entries = ie_entries[playliststart:playlistend]
                  n_entries = len(entries)
                  self.to_screen(
                      "[%s] playlist %s: Collected %d video ids (downloading %d of them)" %
                      (ie_result['extractor'], playlist, n_all_entries, n_entries))
-            else:
-                assert isinstance(ie_result['entries'], PagedList)
-                entries = ie_result['entries'].getslice(
+            elif isinstance(ie_entries, PagedList):
+                entries = ie_entries.getslice(
                      playliststart, playlistend)
                  n_entries = len(entries)
                  self.to_screen(
                      "[%s] playlist %s: Downloading %d videos" %
                      (ie_result['extractor'], playlist, n_entries))
+            else:  # iterable
+                entries = list(itertools.islice(
+                    ie_entries, playliststart, playlistend))
+                n_entries = len(entries)
+                self.to_screen(
+                    "[%s] playlist %s: Downloading %d videos" %
+                    (ie_result['extractor'], playlist, n_entries))
  
              for i, entry in enumerate(entries, 1):
                  self.to_screen('[download] Downloading video #%s of %s' % (i, n_entries))
diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py

index 5b5e33cea8da39ebed4ea56df1969ad7d5e5ab81..cb6081dd08ef13ce073f4af030e8c3cc40dd7f8d 100644 (file)
--- a/youtube_dl/extractor/common.py
+++ b/youtube_dl/extractor/common.py
@@ -158,8 +158,8 @@ class InfoExtractor(object):
  
  
      _type "playlist" indicates multiple videos.
-    There must be a key "entries", which is a list or a PagedList object, each
-    element of which is a valid dictionary under this specfication.
+    There must be a key "entries", which is a list, an iterable, or a PagedList
+    object, each element of which is a valid dictionary by this specification.
  
      Additionally, playlists can have "title" and "id" attributes with the same
      semantics as videos (see above).
diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py

index addef9594c09a9aafba4ac970711b7b2a9d3dbc0..8b6e591a4379115b47a4d36cc2c5d5495e5fcd03 100644 (file)
--- a/youtube_dl/extractor/youtube.py
+++ b/youtube_dl/extractor/youtube.py
@@ -1262,8 +1262,12 @@ class YoutubeChannelIE(InfoExtractor):
              # The videos are contained in a single page
              # the ajax pages can't be used, they are empty
              video_ids = self.extract_videos_from_page(channel_page)
-        else:
-            # Download all channel pages using the json-based channel_ajax query
+            entries = [
+                self.url_result(video_id, 'Youtube', video_id=video_id)
+                for video_id in video_ids]
+            return self.playlist_result(entries, channel_id)
+
+        def _entries():
              for pagenum in itertools.count(1):
                  url = self._MORE_PAGES_URL % (pagenum, channel_id)
                  page = self._download_json(
@@ -1271,16 +1275,14 @@ class YoutubeChannelIE(InfoExtractor):
                      transform_source=uppercase_escape)
  
                  ids_in_page = self.extract_videos_from_page(page['content_html'])
-                video_ids.extend(ids_in_page)
+                for video_id in ids_in_page:
+                    yield self.url_result(
+                        video_id, 'Youtube', video_id=video_id)
  
                  if self._MORE_PAGES_INDICATOR not in page['load_more_widget_html']:
                      break
  
-        self._downloader.to_screen('[youtube] Channel %s: Found %i videos' % (channel_id, len(video_ids)))
-
-        url_entries = [self.url_result(video_id, 'Youtube', video_id=video_id)
-                       for video_id in video_ids]
-        return self.playlist_result(url_entries, channel_id)
+        return self.playlist_result(_entries(), channel_id)
  
  
  class YoutubeUserIE(InfoExtractor):
author	Philipp Hagemeister <phihag@phihag.de>
	Sat, 6 Dec 2014 13:02:19 +0000 (14:02 +0100)
committer	Philipp Hagemeister <phihag@phihag.de>
	Sat, 6 Dec 2014 13:02:19 +0000 (14:02 +0100)
youtube_dl/YoutubeDL.py		patch \| blob \| history
youtube_dl/extractor/common.py		patch \| blob \| history
youtube_dl/extractor/youtube.py		patch \| blob \| history