[mlb] Add support for embedded videos (Closes #3653)
authorSergey M․ <dstftw@gmail.com>
Tue, 2 Sep 2014 13:19:28 +0000 (20:19 +0700)
committerSergey M․ <dstftw@gmail.com>
Tue, 2 Sep 2014 13:19:28 +0000 (20:19 +0700)
youtube_dl/extractor/generic.py
youtube_dl/extractor/mlb.py

index b4f748e87fbc08ab62d62dc1b17b5da817afa5b3..1b7697870bde93fd5bb4218d12eac8c166a306ba 100644 (file)
@@ -366,7 +366,22 @@ class GenericIE(InfoExtractor):
                 'extract_flat': False,
                 'skip_download': True,
             }
-        }
+        },
+        # MLB embed
+        {
+            'url': 'http://umpire-empire.com/index.php/topic/58125-laz-decides-no-thats-low/',
+            'md5': '96f09a37e44da40dd083e12d9a683327',
+            'info_dict': {
+                'id': '33322633',
+                'ext': 'mp4',
+                'title': 'Ump changes call to ball',
+                'description': 'md5:71c11215384298a172a6dcb4c2e20685',
+                'duration': 48,
+                'timestamp': 1401537900,
+                'upload_date': '20140531',
+                'thumbnail': 're:^https?://.*\.jpg$',
+            },
+        },
     ]
 
     def report_download_webpage(self, video_id):
@@ -809,6 +824,12 @@ class GenericIE(InfoExtractor):
         if mobj is not None:
             return self.url_result(mobj.group('url'), 'SBS')
 
+        mobj = re.search(
+            r'<iframe[^>]+?src=(["\'])(?P<url>https?://m\.mlb\.com/shared/video/embed/embed\.html\?.+?)\1',
+            webpage)
+        if mobj is not None:
+            return self.url_result(mobj.group('url'), 'MLB')
+
         # Start with something easy: JW Player in SWFObject
         found = re.findall(r'flashvars: [\'"](?:.*&)?file=(http[^\'"&]*)', webpage)
         if not found:
index 37c72bc5357e3766819b0f86d5bc379fdf7406c4..bfdb462ebaf663df7d7c4f3c1618988f0aa1d1e3 100644 (file)
@@ -11,7 +11,7 @@ from ..utils import (
 
 
 class MLBIE(InfoExtractor):
-    _VALID_URL = r'https?://m\.mlb\.com/(?:.*?/)?video/(?:topic/[\da-z_-]+/)?v(?P<id>n?\d+)'
+    _VALID_URL = r'https?://m\.mlb\.com/(?:(?:.*?/)?video/(?:topic/[\da-z_-]+/)?v|shared/video/embed/embed\.html\?.*?\bcontent_id=)(?P<id>n?\d+)'
     _TESTS = [
         {
             'url': 'http://m.mlb.com/sea/video/topic/51231442/v34698933/nymsea-ackley-robs-a-home-run-with-an-amazing-catch/?c_id=sea',
@@ -69,6 +69,10 @@ class MLBIE(InfoExtractor):
                 'thumbnail': 're:^https?://.*\.jpg$',
             },
         },
+        {
+            'url': 'http://m.mlb.com/shared/video/embed/embed.html?content_id=35692085&topic_id=6479266&width=400&height=224&property=mlb',
+            'only_matching': True,
+        },
     ]
 
     def _real_extract(self, url):