X-Git-Url: http://git.bitcoin.ninja/index.cgi?a=blobdiff_plain;f=youtube_dl%2Fextractor%2Fmlb.py;h=b907f6b4926f9e13e58cb9aa61b1aebabfef1037;hb=6cd452acffe8d79c895a2ebd0346e2ba7f9e112f;hp=2b500bdffb2242aceb2e0f7c593c87b2087989d7;hpb=b1b01841afac9b65b706c3436a5717b603458491;p=youtube-dl diff --git a/youtube_dl/extractor/mlb.py b/youtube_dl/extractor/mlb.py index 2b500bdff..b907f6b49 100644 --- a/youtube_dl/extractor/mlb.py +++ b/youtube_dl/extractor/mlb.py @@ -1,67 +1,120 @@ from __future__ import unicode_literals -import re +from .nhl import NHLBaseIE -from .common import InfoExtractor - -class MlbIE(InfoExtractor): - _VALID_URL = r'http?://m\.mlb\.com/video/topic/[0-9]+/v(?Pn?\d+)/.*$' - _TEST = { - 'url': 'http://m.mlb.com/video/topic/81536970/v34496663/mianym-stanton-practices-for-the-home-run-derby', - 'md5': u'd9c022c10d21f849f49c05ae12a8a7e9', - 'info_dict': { - 'id': '34496663', - 'ext': 'mp4', - 'format': 'mp4', - 'description': "7/11/14: Giancarlo Stanton practices for the Home Run Derby prior to the game against the Mets", - 'title': "Stanton prepares for Derby", +class MLBIE(NHLBaseIE): + _VALID_URL = r'''(?x) + https?:// + (?:[\da-z_-]+\.)*(?Pmlb)\.com/ + (?: + (?: + (?:[^/]+/)*c-| + (?: + shared/video/embed/(?:embed|m-internal-embed)\.html| + (?:[^/]+/)+(?:play|index)\.jsp| + )\?.*?\bcontent_id= + ) + (?P\d+) + ) + ''' + _CONTENT_DOMAIN = 'content.mlb.com' + _TESTS = [ + { + 'url': 'https://www.mlb.com/mariners/video/ackleys-spectacular-catch/c-34698933', + 'md5': '632358dacfceec06bad823b83d21df2d', + 'info_dict': { + 'id': '34698933', + 'ext': 'mp4', + 'title': "Ackley's spectacular catch", + 'description': 'md5:7f5a981eb4f3cbc8daf2aeffa2215bf0', + 'duration': 66, + 'timestamp': 1405995000, + 'upload_date': '20140722', + 'thumbnail': r're:^https?://.*\.jpg$', + }, }, - } - - def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - video_id = mobj.group('id') - - webpage = self._download_webpage(url, video_id) - - title = self._og_search_title(webpage, default=video_id) - description = self._html_search_regex(r'', webpage, 'description', fatal=False) - thumbnail = self._html_search_regex(r'', webpage, 'image', fatal=False) - - # use the thumbnail URL to find the folder that contains the videos - _image_url = r'http://mediadownloads.mlb.com/mlbam/(?P<_date>n?.+)/images/.*$' - bobj = re.match(_image_url, thumbnail) - datestr = bobj.group('_date') - base_url = 'http://mediadownloads.mlb.com/mlbam/' + datestr - filespage = self._download_webpage(base_url, video_id) - - # Try 1800K, 1500K, 1200K, 600K, then 300K videos - video = self._html_search_regex(r'