[cbsnews] Extract subtitles
authorJohn Assael <iassael@gmail.com>
Thu, 21 Jan 2016 19:51:48 +0000 (19:51 +0000)
committerSergey M․ <dstftw@gmail.com>
Fri, 22 Jan 2016 16:15:51 +0000 (22:15 +0600)
added test function for CBS News subtitles

youtube_dl/extractor/cbsnews.py

index d211ec23befd0eaa037efedb9024cd7c0b32864a..7b9dd67fd6f57275e68dddf29a35cad5b98d553b 100644 (file)
@@ -41,6 +41,21 @@ class CBSNewsIE(InfoExtractor):
                 'skip_download': True,
             },
         },
+        {
+            'url': 'http://www.cbsnews.com/videos/mountain-lions-of-l-a/',
+            'info_dict': {
+                'id': 'Mountain Lions of L.A.',
+                'ext': 'flv',
+                'title': 'Fort Hood shooting: Army downplays mental illness as cause of attack',
+                'thumbnail': 're:^http?://.*\.jpg$',
+                'subtitles': 're:^http?://.*\.xml$',
+                'duration': 787,
+            },
+            'params': {
+                # rtmp download
+                'skip_download': True,
+            },
+        },
     ]
 
     def _real_extract(self, url):
@@ -85,10 +100,21 @@ class CBSNewsIE(InfoExtractor):
                 fmt['ext'] = 'mp4'
             formats.append(fmt)
 
+        if 'mpxRefId' in video_info:
+            cap_url = 'http://www.cbsnews.com/videos/captions/%s.adb_xml' % video_info['mpxRefId']
+            subtitles = {
+                'en': [{
+                    'url': cap_url,
+                    'ext': 'xml'
+                }], }
+        else:
+            subtitles = {}
+
         return {
             'id': video_id,
             'title': title,
             'thumbnail': thumbnail,
             'duration': duration,
             'formats': formats,
+            'subtitles': subtitles,
         }