[generic] Restrict share-videos.se embeds regex to filter bogus URLs (#16115)
authorSergey M․ <dstftw@gmail.com>
Sun, 8 Apr 2018 17:25:44 +0000 (00:25 +0700)
committerSergey M․ <dstftw@gmail.com>
Sun, 8 Apr 2018 17:25:44 +0000 (00:25 +0700)
youtube_dl/extractor/generic.py

index 4b210da72eb52e7e216fee67579bbf38b1d4e60f..8922d19142659d2b20df412e462d19715ff5d4d8 100644 (file)
@@ -1974,10 +1974,10 @@ class GenericIE(InfoExtractor):
             'info_dict': {
                 'id': '83645793',
                 'title': 'Lock up and get excited',
-                'thumbnail': r're:^https?://.*\.jpg(\?.*)?$',
                 'ext': 'mp4'
-            }
-        }
+            },
+            'skip': 'TODO: fix nested playlists processing in tests',
+        },
         # {
         #     # TODO: find another test
         #     # http://schema.org/VideoObject
@@ -2973,6 +2973,13 @@ class GenericIE(InfoExtractor):
             return self.playlist_from_matches(
                 xfileshare_urls, video_id, video_title, ie=XFileShareIE.ie_key())
 
+        sharevideos_urls = [mobj.group('url') for mobj in re.finditer(
+            r'<iframe[^>]+?\bsrc\s*=\s*(["\'])(?P<url>(?:https?:)?//embed\.share-videos\.se/auto/embed/\d+\?.*?\buid=\d+.*?)\1',
+            webpage)]
+        if sharevideos_urls:
+            return self.playlist_from_matches(
+                sharevideos_urls, video_id, video_title)
+
         def merge_dicts(dict1, dict2):
             merged = {}
             for k, v in dict1.items():
@@ -2988,14 +2995,6 @@ class GenericIE(InfoExtractor):
                     merged[k] = v
             return merged
 
-        # Look for Share-Videos.se embeds
-        sharevideosse_urls = [m.group('url') for m in re.finditer(
-            r'<iframe[^>]+?src\s*=\s*(["\'])(?P<url>https?://embed\.share-videos\.se/auto/embed/\d+.+?)\1',
-            webpage)]
-        if sharevideosse_urls:
-            return self.playlist_from_matches(
-                sharevideosse_urls, video_id, video_title)
-
         # Look for HTML5 media
         entries = self._parse_html5_media_entries(url, webpage, video_id, m3u8_id='hls')
         if entries: