projects
/
youtube-dl
/ blobdiff
commit
grep
author
committer
pickaxe
?
search:
re
summary
|
shortlog
|
log
|
commit
|
commitdiff
|
tree
raw
|
inline
| side by side
[youtube:feed] Implement lazy playlist extraction (closes #10184)
[youtube-dl]
/
youtube_dl
/
extractor
/
youtube.py
diff --git
a/youtube_dl/extractor/youtube.py
b/youtube_dl/extractor/youtube.py
index 617be8e96b6e305988b3ae4d0c5d3790074868e4..e9965509ce404b71bb44843882d79f9884a97a25 100644
(file)
--- a/
youtube_dl/extractor/youtube.py
+++ b/
youtube_dl/extractor/youtube.py
@@
-2699,10
+2699,7
@@
class YoutubeFeedsInfoExtractor(YoutubeBaseInfoExtractor):
def _real_initialize(self):
self._login()
def _real_initialize(self):
self._login()
- def _real_extract(self, url):
- page = self._download_webpage(
- 'https://www.youtube.com/feed/%s' % self._FEED_NAME, self._PLAYLIST_TITLE)
-
+ def _entries(self, page):
# The extraction process is the same as for playlists, but the regex
# for the video ids doesn't contain an index
ids = []
# The extraction process is the same as for playlists, but the regex
# for the video ids doesn't contain an index
ids = []
@@
-2713,12
+2710,15
@@
class YoutubeFeedsInfoExtractor(YoutubeBaseInfoExtractor):
# 'recommended' feed has infinite 'load more' and each new portion spins
# the same videos in (sometimes) slightly different order, so we'll check
# for unicity and break when portion has no new videos
# 'recommended' feed has infinite 'load more' and each new portion spins
# the same videos in (sometimes) slightly different order, so we'll check
# for unicity and break when portion has no new videos
- new_ids =
filter(lambda video_id: video_id not in ids, orderedSet(matches
))
+ new_ids =
list(filter(lambda video_id: video_id not in ids, orderedSet(matches)
))
if not new_ids:
break
ids.extend(new_ids)
if not new_ids:
break
ids.extend(new_ids)
+ for entry in self._ids_to_results(new_ids):
+ yield entry
+
mobj = re.search(r'data-uix-load-more-href="/?(?P<more>[^"]+)"', more_widget_html)
if not mobj:
break
mobj = re.search(r'data-uix-load-more-href="/?(?P<more>[^"]+)"', more_widget_html)
if not mobj:
break
@@
-2730,8
+2730,12
@@
class YoutubeFeedsInfoExtractor(YoutubeBaseInfoExtractor):
content_html = more['content_html']
more_widget_html = more['load_more_widget_html']
content_html = more['content_html']
more_widget_html = more['load_more_widget_html']
+ def _real_extract(self, url):
+ page = self._download_webpage(
+ 'https://www.youtube.com/feed/%s' % self._FEED_NAME,
+ self._PLAYLIST_TITLE)
return self.playlist_result(
return self.playlist_result(
- self._
ids_to_results(ids
), playlist_title=self._PLAYLIST_TITLE)
+ self._
entries(page
), playlist_title=self._PLAYLIST_TITLE)
class YoutubeWatchLaterIE(YoutubePlaylistIE):
class YoutubeWatchLaterIE(YoutubePlaylistIE):