[extractor/generic] Add test for svt embed

[youtube-dl] / youtube_dl / extractor / generic.py
diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py

index 4946cc1325b0ffdecca0767e7532ddcb4741a028..7c38bce7c2d8c7be1547419027d229a2f9582a6d 100644 (file)
--- a/youtube_dl/extractor/generic.py
+++ b/youtube_dl/extractor/generic.py
@@ -37,6 +37,7 @@ from .condenast import CondeNastIE
  from .udn import UDNEmbedIE
  from .senateisvp import SenateISVPIE
  from .bliptv import BlipTVIE
+from .svt import SVTIE
  
  
  class GenericIE(InfoExtractor):
@@ -413,6 +414,19 @@ class GenericIE(InfoExtractor):
                  'thumbnail': 're:^https?://.*\.jpg$',
              },
          },
+        # MLB articles
+        {
+            'url': 'http://m.mlb.com/news/article/118550098/blue-jays-kevin-pillar-goes-spidey-up-the-wall-to-rob-tim-beckham-of-a-homer',
+            'md5': 'b190e70141fb9a1552a85426b4da1b5d',
+            'info_dict': {
+                'id': '75609783',
+                'ext': 'mp4',
+                'title': 'Must C: Pillar climbs for catch',
+                'description': '4/15/15: Blue Jays outfielder Kevin Pillar continues his defensive dominance by climbing the wall in left to rob Tim Beckham of a home run',
+                'timestamp': 1429124820,
+                'upload_date': '20150415',
+            }
+        },
          # Wistia embed
          {
              'url': 'http://education-portal.com/academy/lesson/north-american-exploration-failed-colonies-of-spain-france-england.html#lesson',
@@ -645,6 +659,17 @@ class GenericIE(InfoExtractor):
                  'title': 'Facebook Creates "On This Day" | Crunch Report',
              },
          },
+        # SVT embed
+        {
+            'url': 'http://www.svt.se/sport/ishockey/jagr-tacklar-giroux-under-intervjun',
+            'info_dict': {
+                'id': '2900353',
+                'ext': 'flv',
+                'title': 'Här trycker Jagr till Giroux (under SVT-intervjun)',
+                'duration': 27,
+                'age_limit': 0,
+            },
+        },
          # RSS feed with enclosure
          {
              'url': 'http://podcastfeeds.nbcnews.com/audio/podcast/MSNBC-MADDOW-NETCAST-M4V.xml',
@@ -1078,6 +1103,11 @@ class GenericIE(InfoExtractor):
          if bliptv_url:
              return self.url_result(bliptv_url, 'BlipTV')
  
+        # Look for SVT player
+        svt_url = SVTIE._extract_url(webpage)
+        if svt_url:
+            return self.url_result(svt_url, 'SVT')
+
          # Look for embedded condenast player
          matches = re.findall(
              r'<iframe\s+(?:[a-zA-Z-]+="[^"]+"\s+)*?src="(https?://player\.cnevids\.com/embed/[^"]+")',
@@ -1289,6 +1319,10 @@ class GenericIE(InfoExtractor):
          mobj = re.search(
              r'<iframe[^>]+?src=(["\'])(?P<url>https?://m(?:lb)?\.mlb\.com/shared/video/embed/embed\.html\?.+?)\1',
              webpage)
+        if not mobj:
+            mobj = re.search(
+                r'data-video-link=["\'](?P<url>http://m.mlb.com/video/[^"\']+)',
+                webpage)
          if mobj is not None:
              return self.url_result(mobj.group('url'), 'MLB')
  
@@ -1436,7 +1470,7 @@ class GenericIE(InfoExtractor):
                  if refresh_header:
                      found = re.search(REDIRECT_REGEX, refresh_header)
              if found:
-                new_url = found.group(1)
+                new_url = compat_urlparse.urljoin(url, found.group(1))
                  self.report_following_redirect(new_url)
                  return {
                      '_type': 'url',