Merge branch 'compat-getenv-and-expanduser' of https://github.com/dstftw/youtube...
[youtube-dl] / youtube_dl / extractor / ted.py
index df569a8769923b60cfea00f45c6f226e377dcee6..cd4af96fdb02b2f4ea392fe19482371b6a02acad 100644 (file)
@@ -27,7 +27,7 @@ class TEDIE(SubtitlesInfoExtractor):
         '''
     _TESTS = [{
         'url': 'http://www.ted.com/talks/dan_dennett_on_our_consciousness.html',
-        'md5': '4ea1dada91e4174b53dac2bb8ace429d',
+        'md5': 'fc94ac279feebbce69f21c0c6ee82810',
         'info_dict': {
             'id': '102',
             'ext': 'mp4',
@@ -53,13 +53,32 @@ class TEDIE(SubtitlesInfoExtractor):
         'url': 'http://www.ted.com/talks/gabby_giffords_and_mark_kelly_be_passionate_be_courageous_be_your_best',
         'info_dict': {
             'id': '1972',
-            'ext': 'flv',
+            'ext': 'mp4',
             'title': 'Be passionate. Be courageous. Be your best.',
             'uploader': 'Gabby Giffords and Mark Kelly',
-            'description': 'md5:d89e1d8ebafdac8e55df4c219ecdbfe9',
+            'description': 'md5:5174aed4d0f16021b704120360f72b92',
+        },
+    }, {
+        'url': 'http://www.ted.com/playlists/who_are_the_hackers',
+        'info_dict': {
+            'id': '10',
+            'title': 'Who are the hackers?',
+        },
+        'playlist_mincount': 6,
+    }, {
+        # contains a youtube video
+        'url': 'https://www.ted.com/talks/douglas_adams_parrots_the_universe_and_everything',
+        'add_ie': ['Youtube'],
+        'info_dict': {
+            'id': '_ZG8HBuDjgc',
+            'ext': 'mp4',
+            'title': 'Douglas Adams: Parrots the Universe and Everything',
+            'description': 'md5:01ad1e199c49ac640cb1196c0e9016af',
+            'uploader': 'University of California Television (UCTV)',
+            'uploader_id': 'UCtelevision',
+            'upload_date': '20080522',
         },
         'params': {
-            # rtmp download
             'skip_download': True,
         },
     }]
@@ -97,7 +116,7 @@ class TEDIE(SubtitlesInfoExtractor):
         playlist_info = info['playlist']
 
         playlist_entries = [
-            self.url_result(u'http://www.ted.com/talks/' + talk['slug'], self.ie_key())
+            self.url_result('http://www.ted.com/talks/' + talk['slug'], self.ie_key())
             for talk in info['talks']
         ]
         return self.playlist_result(
@@ -111,6 +130,13 @@ class TEDIE(SubtitlesInfoExtractor):
 
         talk_info = self._extract_info(webpage)['talks'][0]
 
+        if talk_info.get('external') is not None:
+            self.to_screen('Found video from %s' % talk_info['external']['service'])
+            return {
+                '_type': 'url',
+                'url': talk_info['external']['uri'],
+            }
+
         formats = [{
             'url': format_url,
             'format_id': format_id,
@@ -146,7 +172,7 @@ class TEDIE(SubtitlesInfoExtractor):
             thumbnail = 'http://' + thumbnail
         return {
             'id': video_id,
-            'title': talk_info['title'],
+            'title': talk_info['title'].strip(),
             'uploader': talk_info['speaker'],
             'thumbnail': thumbnail,
             'description': self._og_search_description(webpage),
@@ -163,7 +189,7 @@ class TEDIE(SubtitlesInfoExtractor):
                 sub_lang_list[l] = url
             return sub_lang_list
         else:
-            self._downloader.report_warning(u'video doesn\'t have subtitles')
+            self._downloader.report_warning('video doesn\'t have subtitles')
             return {}
 
     def _watch_info(self, url, name):
@@ -178,7 +204,10 @@ class TEDIE(SubtitlesInfoExtractor):
         title = self._html_search_regex(
             r"(?s)<h1(?:\s+class='[^']+')?>(.+?)</h1>", webpage, 'title')
         description = self._html_search_regex(
-            r'(?s)<h4 class="[^"]+" id="h3--about-this-talk">.*?</h4>(.*?)</div>',
+            [
+                r'(?s)<h4 class="[^"]+" id="h3--about-this-talk">.*?</h4>(.*?)</div>',
+                r'(?s)<p><strong>About this talk:</strong>\s+(.*?)</p>',
+            ],
             webpage, 'description', fatal=False)
 
         return {