[youtube] Filter duplicates in playlists base extractor
authorSergey M․ <dstftw@gmail.com>
Sun, 31 Jan 2016 11:52:02 +0000 (17:52 +0600)
committerSergey M․ <dstftw@gmail.com>
Sun, 31 Jan 2016 11:52:02 +0000 (17:52 +0600)
youtube_dl/extractor/youtube.py

index 2941da4676dfba626438441f7f18d2dcdb8a161c..9a64c1d114455bdf8df048ba88e3a17dedf5370e 100644 (file)
@@ -233,7 +233,7 @@ class YoutubePlaylistBaseInfoExtractor(YoutubeEntryListBaseInfoExtractor):
 
 class YoutubePlaylistsBaseInfoExtractor(YoutubeEntryListBaseInfoExtractor):
     def _process_page(self, content):
-        for playlist_id in re.findall(r'href="/?playlist\?list=(.+?)"', content):
+        for playlist_id in set(re.findall(r'href="/?playlist\?list=([0-9A-Za-z-_]{10,})"', content)):
             yield self.url_result(
                 'https://www.youtube.com/playlist?list=%s' % playlist_id, 'YoutubePlaylist')