Improve URL extraction
[youtube-dl] / youtube_dl / extractor / generic.py
index 6c0f772ac63142c83f010d07989e91e389c858cb..e5a8ffbe8d14897b410ea2b2078f4073aa5bed42 100644 (file)
@@ -32,6 +32,7 @@ from ..utils import (
     unified_strdate,
     unsmuggle_url,
     UnsupportedError,
+    url_or_none,
     xpath_text,
 )
 from .commonprotocols import RtmpIE
@@ -111,6 +112,7 @@ from .cloudflarestream import CloudflareStreamIE
 from .peertube import PeerTubeIE
 from .indavideo import IndavideoEmbedIE
 from .apa import APAIE
+from .foxnews import FoxNewsIE
 
 
 class GenericIE(InfoExtractor):
@@ -1394,17 +1396,6 @@ class GenericIE(InfoExtractor):
                 'skip_download': True,
             },
         },
-        # SVT embed
-        {
-            'url': 'http://www.svt.se/sport/ishockey/jagr-tacklar-giroux-under-intervjun',
-            'info_dict': {
-                'id': '2900353',
-                'ext': 'flv',
-                'title': 'Här trycker Jagr till Giroux (under SVT-intervjun)',
-                'duration': 27,
-                'age_limit': 0,
-            },
-        },
         # Crooks and Liars embed
         {
             'url': 'http://crooksandliars.com/2015/04/fox-friends-says-protecting-atheists',
@@ -3091,6 +3082,11 @@ class GenericIE(InfoExtractor):
             return self.playlist_from_matches(
                 apa_urls, video_id, video_title, ie=APAIE.ie_key())
 
+        foxnews_urls = FoxNewsIE._extract_urls(webpage)
+        if foxnews_urls:
+            return self.playlist_from_matches(
+                foxnews_urls, video_id, video_title, ie=FoxNewsIE.ie_key())
+
         sharevideos_urls = [mobj.group('url') for mobj in re.finditer(
             r'<iframe[^>]+?\bsrc\s*=\s*(["\'])(?P<url>(?:https?:)?//embed\.share-videos\.se/auto/embed/\d+\?.*?\buid=\d+.*?)\1',
             webpage)]
@@ -3135,8 +3131,8 @@ class GenericIE(InfoExtractor):
                 sources = [sources]
             formats = []
             for source in sources:
-                src = source.get('src')
-                if not src or not isinstance(src, compat_str):
+                src = url_or_none(source.get('src'))
+                if not src:
                     continue
                 src = compat_urlparse.urljoin(url, src)
                 src_type = source.get('type')