X-Git-Url: http://git.bitcoin.ninja/index.cgi?a=blobdiff_plain;f=youtube_dl%2Fextractor%2Fmit.py;h=1aea78d118a84a135494214da54c3c2c21465bc9;hb=4c76aa06665621c7689938afd7bbdbc797b5c7ea;hp=270fdf04b53fa3b7d2d5ecec28103ff133f6b0cc;hpb=a83a3139d1b42b5ca0c947c8e083ec1877622d16;p=youtube-dl diff --git a/youtube_dl/extractor/mit.py b/youtube_dl/extractor/mit.py index 270fdf04b..1aea78d11 100644 --- a/youtube_dl/extractor/mit.py +++ b/youtube_dl/extractor/mit.py @@ -6,7 +6,6 @@ import json from .common import InfoExtractor from .youtube import YoutubeIE from ..utils import ( - compat_urlparse, clean_html, ExtractorError, get_element_by_id, @@ -15,28 +14,27 @@ from ..utils import ( class TechTVMITIE(InfoExtractor): IE_NAME = 'techtv.mit.edu' - _VALID_URL = r'https?://techtv\.mit\.edu/(videos|embeds)/(?P\d+)' + _VALID_URL = r'https?://techtv\.mit\.edu/(?:videos|embeds)/(?P\d+)' _TEST = { 'url': 'http://techtv.mit.edu/videos/25418-mit-dna-learning-center-set', - 'md5': '1f8cb3e170d41fd74add04d3c9330e5f', + 'md5': '00a3a27ee20d44bcaa0933ccec4a2cf7', 'info_dict': { 'id': '25418', 'ext': 'mp4', - 'title': 'MIT DNA Learning Center Set', - 'description': 'md5:82313335e8a8a3f243351ba55bc1b474', + 'title': 'MIT DNA and Protein Sets', + 'description': 'md5:46f5c69ce434f0a97e7c628cc142802d', }, } def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - video_id = mobj.group('id') + video_id = self._match_id(url) raw_page = self._download_webpage( 'http://techtv.mit.edu/videos/%s' % video_id, video_id) clean_page = re.compile(r'', re.S).sub('', raw_page) - base_url = self._search_regex( - r'ipadUrl: \'(.+?cloudfront.net/)', raw_page, 'base url') + base_url = self._proto_relative_url(self._search_regex( + r'ipadUrl: \'(.+?cloudfront.net/)', raw_page, 'base url'), 'http:') formats_json = self._search_regex( r'bitrates: (\[.+?\])', raw_page, 'video formats') formats_mit = json.loads(formats_json) @@ -73,7 +71,6 @@ class MITIE(TechTVMITIE): _TEST = { 'url': 'http://video.mit.edu/watch/the-government-is-profiling-you-13222/', - 'file': '.mp4', 'md5': '7db01d5ccc1895fc5010e9c9e13648da', 'info_dict': { 'id': '21783', @@ -89,12 +86,12 @@ class MITIE(TechTVMITIE): webpage = self._download_webpage(url, page_title) embed_url = self._search_regex( r'