X-Git-Url: http://git.bitcoin.ninja/index.cgi?a=blobdiff_plain;f=youtube_dl%2Fextractor%2Fted.py;h=f8a87afdaf4d27c59b4b29491569b243331b2322;hb=9e1a5b845586a0a5431fb72467142046d8571e6f;hp=df569a8769923b60cfea00f45c6f226e377dcee6;hpb=2d4c98dbd17676978114b70d59ea15628f886c24;p=youtube-dl diff --git a/youtube_dl/extractor/ted.py b/youtube_dl/extractor/ted.py index df569a876..f8a87afda 100644 --- a/youtube_dl/extractor/ted.py +++ b/youtube_dl/extractor/ted.py @@ -27,17 +27,18 @@ class TEDIE(SubtitlesInfoExtractor): ''' _TESTS = [{ 'url': 'http://www.ted.com/talks/dan_dennett_on_our_consciousness.html', - 'md5': '4ea1dada91e4174b53dac2bb8ace429d', + 'md5': 'fc94ac279feebbce69f21c0c6ee82810', 'info_dict': { 'id': '102', 'ext': 'mp4', 'title': 'The illusion of consciousness', 'description': ('Philosopher Dan Dennett makes a compelling ' - 'argument that not only don\'t we understand our own ' - 'consciousness, but that half the time our brains are ' - 'actively fooling us.'), + 'argument that not only don\'t we understand our own ' + 'consciousness, but that half the time our brains are ' + 'actively fooling us.'), 'uploader': 'Dan Dennett', 'width': 854, + 'duration': 1308, } }, { 'url': 'http://www.ted.com/watch/ted-institute/ted-bcg/vishal-sikka-the-beauty-and-power-of-algorithms', @@ -53,13 +54,33 @@ class TEDIE(SubtitlesInfoExtractor): 'url': 'http://www.ted.com/talks/gabby_giffords_and_mark_kelly_be_passionate_be_courageous_be_your_best', 'info_dict': { 'id': '1972', - 'ext': 'flv', + 'ext': 'mp4', 'title': 'Be passionate. Be courageous. Be your best.', 'uploader': 'Gabby Giffords and Mark Kelly', - 'description': 'md5:d89e1d8ebafdac8e55df4c219ecdbfe9', + 'description': 'md5:5174aed4d0f16021b704120360f72b92', + 'duration': 1128, + }, + }, { + 'url': 'http://www.ted.com/playlists/who_are_the_hackers', + 'info_dict': { + 'id': '10', + 'title': 'Who are the hackers?', + }, + 'playlist_mincount': 6, + }, { + # contains a youtube video + 'url': 'https://www.ted.com/talks/douglas_adams_parrots_the_universe_and_everything', + 'add_ie': ['Youtube'], + 'info_dict': { + 'id': '_ZG8HBuDjgc', + 'ext': 'mp4', + 'title': 'Douglas Adams: Parrots the Universe and Everything', + 'description': 'md5:01ad1e199c49ac640cb1196c0e9016af', + 'uploader': 'University of California Television (UCTV)', + 'uploader_id': 'UCtelevision', + 'upload_date': '20080522', }, 'params': { - # rtmp download 'skip_download': True, }, }] @@ -72,7 +93,7 @@ class TEDIE(SubtitlesInfoExtractor): def _extract_info(self, webpage): info_json = self._search_regex(r'q\("\w+.init",({.+})\)', - webpage, 'info json') + webpage, 'info json') return json.loads(info_json) def _real_extract(self, url): @@ -92,12 +113,12 @@ class TEDIE(SubtitlesInfoExtractor): '''Returns the videos of the playlist''' webpage = self._download_webpage(url, name, - 'Downloading playlist webpage') + 'Downloading playlist webpage') info = self._extract_info(webpage) playlist_info = info['playlist'] playlist_entries = [ - self.url_result(u'http://www.ted.com/talks/' + talk['slug'], self.ie_key()) + self.url_result('http://www.ted.com/talks/' + talk['slug'], self.ie_key()) for talk in info['talks'] ] return self.playlist_result( @@ -111,6 +132,13 @@ class TEDIE(SubtitlesInfoExtractor): talk_info = self._extract_info(webpage)['talks'][0] + if talk_info.get('external') is not None: + self.to_screen('Found video from %s' % talk_info['external']['service']) + return { + '_type': 'url', + 'url': talk_info['external']['uri'], + } + formats = [{ 'url': format_url, 'format_id': format_id, @@ -146,12 +174,13 @@ class TEDIE(SubtitlesInfoExtractor): thumbnail = 'http://' + thumbnail return { 'id': video_id, - 'title': talk_info['title'], + 'title': talk_info['title'].strip(), 'uploader': talk_info['speaker'], 'thumbnail': thumbnail, 'description': self._og_search_description(webpage), 'subtitles': video_subtitles, 'formats': formats, + 'duration': talk_info.get('duration'), } def _get_available_subtitles(self, video_id, talk_info): @@ -163,7 +192,7 @@ class TEDIE(SubtitlesInfoExtractor): sub_lang_list[l] = url return sub_lang_list else: - self._downloader.report_warning(u'video doesn\'t have subtitles') + self._downloader.report_warning('video doesn\'t have subtitles') return {} def _watch_info(self, url, name): @@ -178,7 +207,10 @@ class TEDIE(SubtitlesInfoExtractor): title = self._html_search_regex( r"(?s)(.+?)", webpage, 'title') description = self._html_search_regex( - r'(?s)

.*?

(.*?)', + [ + r'(?s)

.*?

(.*?)', + r'(?s)

About this talk:\s+(.*?)

', + ], webpage, 'description', fatal=False) return {