Improve URL extraction

[youtube-dl] / youtube_dl / extractor / generic.py
diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py

index 6c0f772ac63142c83f010d07989e91e389c858cb..e5a8ffbe8d14897b410ea2b2078f4073aa5bed42 100644 (file)
--- a/youtube_dl/extractor/generic.py
+++ b/youtube_dl/extractor/generic.py
@@ -32,6 +32,7 @@ from ..utils import (
      unified_strdate,
      unsmuggle_url,
      UnsupportedError,
+    url_or_none,
      xpath_text,
  )
  from .commonprotocols import RtmpIE
@@ -111,6 +112,7 @@ from .cloudflarestream import CloudflareStreamIE
  from .peertube import PeerTubeIE
  from .indavideo import IndavideoEmbedIE
  from .apa import APAIE
+from .foxnews import FoxNewsIE
  
  
  class GenericIE(InfoExtractor):
@@ -1394,17 +1396,6 @@ class GenericIE(InfoExtractor):
                  'skip_download': True,
              },
          },
-        # SVT embed
-        {
-            'url': 'http://www.svt.se/sport/ishockey/jagr-tacklar-giroux-under-intervjun',
-            'info_dict': {
-                'id': '2900353',
-                'ext': 'flv',
-                'title': 'Här trycker Jagr till Giroux (under SVT-intervjun)',
-                'duration': 27,
-                'age_limit': 0,
-            },
-        },
          # Crooks and Liars embed
          {
              'url': 'http://crooksandliars.com/2015/04/fox-friends-says-protecting-atheists',
@@ -3091,6 +3082,11 @@ class GenericIE(InfoExtractor):
              return self.playlist_from_matches(
                  apa_urls, video_id, video_title, ie=APAIE.ie_key())
  
+        foxnews_urls = FoxNewsIE._extract_urls(webpage)
+        if foxnews_urls:
+            return self.playlist_from_matches(
+                foxnews_urls, video_id, video_title, ie=FoxNewsIE.ie_key())
+
          sharevideos_urls = [mobj.group('url') for mobj in re.finditer(
              r'<iframe[^>]+?\bsrc\s*=\s*(["\'])(?P<url>(?:https?:)?//embed\.share-videos\.se/auto/embed/\d+\?.*?\buid=\d+.*?)\1',
              webpage)]
@@ -3135,8 +3131,8 @@ class GenericIE(InfoExtractor):
                  sources = [sources]
              formats = []
              for source in sources:
-                src = source.get('src')
-                if not src or not isinstance(src, compat_str):
+                src = url_or_none(source.get('src'))
+                if not src:
                      continue
                  src = compat_urlparse.urljoin(url, src)
                  src_type = source.get('type')