X-Git-Url: http://git.bitcoin.ninja/index.cgi?a=blobdiff_plain;f=youtube_dl%2Fextractor%2Fmit.py;h=29ca45778a17654c4d2125ceda177b71cffca8a8;hb=974c1b2d4292308a26a47136e7fcf9b61f8b285a;hp=f50e36cb4fcc82a45d3a5f379beadb2764e2faf9;hpb=5b2478e2ba88db686752ed32c722b20be26fafa0;p=youtube-dl diff --git a/youtube_dl/extractor/mit.py b/youtube_dl/extractor/mit.py index f50e36cb4..29ca45778 100644 --- a/youtube_dl/extractor/mit.py +++ b/youtube_dl/extractor/mit.py @@ -4,37 +4,37 @@ import re import json from .common import InfoExtractor +from .youtube import YoutubeIE from ..utils import ( - compat_urlparse, clean_html, + ExtractorError, get_element_by_id, ) class TechTVMITIE(InfoExtractor): IE_NAME = 'techtv.mit.edu' - _VALID_URL = r'https?://techtv\.mit\.edu/(videos|embeds)/(?P\d+)' + _VALID_URL = r'https?://techtv\.mit\.edu/(?:videos|embeds)/(?P\d+)' _TEST = { 'url': 'http://techtv.mit.edu/videos/25418-mit-dna-learning-center-set', - 'md5': '1f8cb3e170d41fd74add04d3c9330e5f', + 'md5': '00a3a27ee20d44bcaa0933ccec4a2cf7', 'info_dict': { 'id': '25418', 'ext': 'mp4', - 'title': 'MIT DNA Learning Center Set', - 'description': 'md5:82313335e8a8a3f243351ba55bc1b474', + 'title': 'MIT DNA and Protein Sets', + 'description': 'md5:46f5c69ce434f0a97e7c628cc142802d', }, } def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - video_id = mobj.group('id') + video_id = self._match_id(url) raw_page = self._download_webpage( 'http://techtv.mit.edu/videos/%s' % video_id, video_id) clean_page = re.compile(r'', re.S).sub('', raw_page) - base_url = self._search_regex( - r'ipadUrl: \'(.+?cloudfront.net/)', raw_page, 'base url') + base_url = self._proto_relative_url(self._search_regex( + r'ipadUrl: \'(.+?cloudfront.net/)', raw_page, 'base url'), 'http:') formats_json = self._search_regex( r'bitrates: (\[.+?\])', raw_page, 'video formats') formats_mit = json.loads(formats_json) @@ -71,7 +71,6 @@ class MITIE(TechTVMITIE): _TEST = { 'url': 'http://video.mit.edu/watch/the-government-is-profiling-you-13222/', - 'file': '.mp4', 'md5': '7db01d5ccc1895fc5010e9c9e13648da', 'info_dict': { 'id': '21783', @@ -87,4 +86,71 @@ class MITIE(TechTVMITIE): webpage = self._download_webpage(url, page_title) embed_url = self._search_regex( r'