From: Philipp Hagemeister Date: Mon, 18 Feb 2013 17:45:09 +0000 (+0100) Subject: Add tests to MySpass X-Git-Url: http://git.bitcoin.ninja/index.cgi?a=commitdiff_plain;h=58994225bcd6626c2ed2bbf441577b0beaa4bf3c;p=youtube-dl Add tests to MySpass --- 58994225bcd6626c2ed2bbf441577b0beaa4bf3c diff --cc test/tests.json index a6782ed4c,5c46af2c8..a3c31ae51 --- a/test/tests.json +++ b/test/tests.json @@@ -286,14 -286,5 +286,23 @@@ "title": "test chars: \"'/\\Ã¤<>This is a test video for youtube-dl.For more information, contact phihag@phihag.de ." } + }, + { + "name": "TED", + "url": "http://www.ted.com/talks/dan_dennett_on_our_consciousness.html", + "file": "102.mp4", + "md5": "7bc087e71d16f18f9b8ab9fa62a8a031", + "info_dict": { + "title": "Dan Dennett: The illusion of consciousness" + } ++ }, ++ { ++ "name": "MySpass", ++ "url": "http://www.myspass.de/myspass/shows/tvshows/absolute-mehrheit/Absolute-Mehrheit-vom-17022013-Die-Highlights-Teil-2--/11741/", ++ "file": "11741.mp4", ++ "md5": "0b49f4844a068f8b33f4b7c88405862b", ++ "info_dict": { ++ "title": "Absolute Mehrheit vom 17.02.2013 - Die Highlights, Teil 2" ++ } } ] diff --cc youtube_dl/InfoExtractors.py index 086aa5da3,57d5e9d36..fe9bd97d0 --- a/youtube_dl/InfoExtractors.py +++ b/youtube_dl/InfoExtractors.py @@@ -3967,31 -3967,64 +3967,87 @@@ class KeekIE(InfoExtractor) 'uploader': uploader } return [info] - + -class MyspassIE(InfoExtractor): +class TEDIE(InfoExtractor): + _VALID_URL=r'http://www.ted.com/talks/(?P\w+)' + def _real_extract(self, url): + m=re.match(self._VALID_URL, url) + videoName=m.group('videoName') + webpage=self._download_webpage(url, 0, 'Downloading \"%s\" page' % videoName) + #If the url includes the language we get the title translated + title_RE=r'

(?P[\s\w:/\.\?=\+-\\\'])</span></h1>' + title=re.search(title_RE, webpage).group('title') + info_RE=r'''<script\ type="text/javascript">var\ talkDetails\ =(.?) + "id":(?P<videoID>[\d]+).? + "mediaSlug":"(?P<mediaSlug>[\w\d]+?)"''' + info_match=re.search(info_RE,webpage,re.VERBOSE) + video_id=info_match.group('videoID') + mediaSlug=info_match.group('mediaSlug') + video_url='http://download.ted.com/talks/%s.mp4' % mediaSlug + info = { + 'id':video_id, + 'url':video_url, + 'ext': 'mp4', + 'title': title + } + return [info] + ++class MySpassIE(InfoExtractor): + _VALID_URL = r'http://www.myspass.de/.' - IE_NAME = u'myspass' + + def _real_extract(self, url): + META_DATA_URL_TEMPLATE = 'http://www.myspass.de/myspass/includes/apps/video/getvideometadataxml.php?id=%s' - ++ + # video id is the last path element of the URL + # usually there is a trailing slash, so also try the second but last + url_path = compat_urllib_parse_urlparse(url).path + url_parent_path, video_id = os.path.split(url_path) + if not video_id: + _, video_id = os.path.split(url_parent_path) + + # get metadata + metadata_url = META_DATA_URL_TEMPLATE % video_id + metadata_text = self._download_webpage(metadata_url, video_id) + metadata = xml.etree.ElementTree.fromstring(metadata_text.encode('utf-8')) + + # extract values from metadata + url_flv_el = metadata.find('url_flv') + if url_flv_el is None: + self._downloader.trouble(u'ERROR: unable to extract download url') + return + video_url = url_flv_el.text + extension = os.path.splitext(video_url)[1][1:] + title_el = metadata.find('title') + if title_el is None: + self._downloader.trouble(u'ERROR: unable to extract title') + return + title = title_el.text + format_id_el = metadata.find('format_id') + if format_id_el is None: + format = ext + else: + format = format_id_el.text + description_el = metadata.find('description') + if description_el is not None: + description = description_el.text + else: + description = None + imagePreview_el = metadata.find('imagePreview') + if imagePreview_el is not None: + thumbnail = imagePreview_el.text + else: + thumbnail = None + info = { + 'id': video_id, + 'url': video_url, + 'title': title, + 'ext': extension, + 'format': format, + 'thumbnail': thumbnail, + 'description': description + } + return [info] + def gen_extractors(): """ Return a list of an instance of every supported extractor. The order does matter; the first extractor matched is the one handling the URL. @@@ -4039,7 -4072,7 +4095,8 @@@ RBMARadioIE(), EightTracksIE(), KeekIE(), - MyspassIE(), + TEDIE(), ++ MySpassIE(), GenericIE() ]