From: John Assael Date: Thu, 21 Jan 2016 19:51:48 +0000 (+0000) Subject: [cbsnews] Extract subtitles X-Git-Url: http://git.bitcoin.ninja/?a=commitdiff_plain;h=4118cc02c1694a7fd355b75c806a69d75b3850bb;p=youtube-dl [cbsnews] Extract subtitles added test function for CBS News subtitles --- diff --git a/youtube_dl/extractor/cbsnews.py b/youtube_dl/extractor/cbsnews.py index d211ec23b..7b9dd67fd 100644 --- a/youtube_dl/extractor/cbsnews.py +++ b/youtube_dl/extractor/cbsnews.py @@ -41,6 +41,21 @@ class CBSNewsIE(InfoExtractor): 'skip_download': True, }, }, + { + 'url': 'http://www.cbsnews.com/videos/mountain-lions-of-l-a/', + 'info_dict': { + 'id': 'Mountain Lions of L.A.', + 'ext': 'flv', + 'title': 'Fort Hood shooting: Army downplays mental illness as cause of attack', + 'thumbnail': 're:^http?://.*\.jpg$', + 'subtitles': 're:^http?://.*\.xml$', + 'duration': 787, + }, + 'params': { + # rtmp download + 'skip_download': True, + }, + }, ] def _real_extract(self, url): @@ -85,10 +100,21 @@ class CBSNewsIE(InfoExtractor): fmt['ext'] = 'mp4' formats.append(fmt) + if 'mpxRefId' in video_info: + cap_url = 'http://www.cbsnews.com/videos/captions/%s.adb_xml' % video_info['mpxRefId'] + subtitles = { + 'en': [{ + 'url': cap_url, + 'ext': 'xml' + }], } + else: + subtitles = {} + return { 'id': video_id, 'title': title, 'thumbnail': thumbnail, 'duration': duration, 'formats': formats, + 'subtitles': subtitles, }