X-Git-Url: http://git.bitcoin.ninja/index.cgi?a=blobdiff_plain;f=youtube_dl%2Fextractor%2Fted.py;h=ad1a46c3385713056b94d2f00e38558e8ea69b1c;hb=43b81eb98afb8958b9a31220bf710a97751df5be;hp=b9e65447f1f69c63fc61eab1469644070e5dc5da;hpb=bacac173a9ab310f48555bcb5796e11701cff899;p=youtube-dl diff --git a/youtube_dl/extractor/ted.py b/youtube_dl/extractor/ted.py index b9e65447f..ad1a46c33 100644 --- a/youtube_dl/extractor/ted.py +++ b/youtube_dl/extractor/ted.py @@ -7,12 +7,13 @@ from .subtitles import SubtitlesInfoExtractor from ..utils import ( compat_str, - RegexNotFoundError, ) class TEDIE(SubtitlesInfoExtractor): - _VALID_URL = r'''(?x)http://www\.ted\.com/ + _VALID_URL = r'''(?x) + (?Phttps?://) + (?Pwww|embed)(?P\.ted\.com/ ( (?Pplaylists(?:/\d+)?) # We have a playlist | @@ -20,12 +21,14 @@ class TEDIE(SubtitlesInfoExtractor): ) (/lang/(.*?))? # The url may contain the language /(?P\w+) # Here goes the name and then ".html" + .*)$ ''' _TEST = { 'url': 'http://www.ted.com/talks/dan_dennett_on_our_consciousness.html', - 'file': '102.mp4', 'md5': '4ea1dada91e4174b53dac2bb8ace429d', 'info_dict': { + 'id': '102', + 'ext': 'mp4', 'title': 'The illusion of consciousness', 'description': ('Philosopher Dan Dennett makes a compelling ' 'argument that not only don\'t we understand our own ' @@ -48,6 +51,9 @@ class TEDIE(SubtitlesInfoExtractor): def _real_extract(self, url): m = re.match(self._VALID_URL, url, re.VERBOSE) + if m.group('type') == 'embed': + desktop_url = m.group('proto') + 'www' + m.group('urlmain') + return self.url_result(desktop_url, 'TED') name = m.group('name') if m.group('type_talk'): return self._talk_info(url, name) @@ -86,18 +92,21 @@ class TEDIE(SubtitlesInfoExtractor): } for (format_id, format_url) in talk_info['nativeDownloads'].items()] self._sort_formats(formats) - video_id = talk_info['id'] + video_id = compat_str(talk_info['id']) # subtitles video_subtitles = self.extract_subtitles(video_id, talk_info) if self._downloader.params.get('listsubtitles', False): self._list_available_subtitles(video_id, talk_info) return + thumbnail = talk_info['thumb'] + if not thumbnail.startswith('http'): + thumbnail = 'http://' + thumbnail return { 'id': video_id, 'title': talk_info['title'], 'uploader': talk_info['speaker'], - 'thumbnail': talk_info['thumb'], + 'thumbnail': thumbnail, 'description': self._og_search_description(webpage), 'subtitles': video_subtitles, 'formats': formats,