From 172240c0a40f44d2aa384c512cc65c7e4c9e3660 Mon Sep 17 00:00:00 2001 From: Charles Chen Date: Tue, 15 Jul 2014 13:55:23 -0700 Subject: [PATCH] Switched to use media detail XML to extract video URL --- youtube_dl/extractor/mlb.py | 57 +++++++++++++++++++++---------------- 1 file changed, 32 insertions(+), 25 deletions(-) diff --git a/youtube_dl/extractor/mlb.py b/youtube_dl/extractor/mlb.py index 2b500bdff..61ba58843 100644 --- a/youtube_dl/extractor/mlb.py +++ b/youtube_dl/extractor/mlb.py @@ -28,37 +28,44 @@ class MlbIE(InfoExtractor): title = self._og_search_title(webpage, default=video_id) description = self._html_search_regex(r'', webpage, 'description', fatal=False) thumbnail = self._html_search_regex(r'', webpage, 'image', fatal=False) + + # use the video_id to find the Media detail XML + id_len = len(video_id) + _mediadetail_url = 'http://m.mlb.com/gen/multimedia/detail/'+video_id[id_len-3]+'/'+video_id[id_len-2]+'/'+video_id[id_len-1]+'/'+video_id+'.xml' - # use the thumbnail URL to find the folder that contains the videos - _image_url = r'http://mediadownloads.mlb.com/mlbam/(?P<_date>n?.+)/images/.*$' - bobj = re.match(_image_url, thumbnail) - datestr = bobj.group('_date') - base_url = 'http://mediadownloads.mlb.com/mlbam/' + datestr - filespage = self._download_webpage(base_url, video_id) - - # Try 1800K, 1500K, 1200K, 600K, then 300K videos - video = self._html_search_regex(r'