X-Git-Url: http://git.bitcoin.ninja/index.cgi?a=blobdiff_plain;f=youtube_dl%2Fextractor%2Fcbsnews.py;h=7319ee1b734e6fc30e4cb681f66eb082ffb9622a;hb=5633b4d39d178402c6d89146c8c9c34e3bf58619;hp=cabf7e73b981103d074486dd35c5b422d3d770da;hpb=ed7cd1e859cf97e975a28a5e8c58a1d1aca819fe;p=youtube-dl diff --git a/youtube_dl/extractor/cbsnews.py b/youtube_dl/extractor/cbsnews.py index cabf7e73b..7319ee1b7 100644 --- a/youtube_dl/extractor/cbsnews.py +++ b/youtube_dl/extractor/cbsnews.py @@ -1,15 +1,17 @@ # encoding: utf-8 from __future__ import unicode_literals -import re -import json - +from .common import InfoExtractor from .theplatform import ThePlatformIE +from ..utils import ( + parse_duration, + find_xpath_attr, +) class CBSNewsIE(ThePlatformIE): IE_DESC = 'CBS News' - _VALID_URL = r'http://(?:www\.)?cbsnews\.com/(?:[^/]+/)+(?P[\da-z_-]+)' + _VALID_URL = r'http://(?:www\.)?cbsnews\.com/(?:news|videos)/(?P[\da-z_-]+)' _TESTS = [ { @@ -47,15 +49,23 @@ class CBSNewsIE(ThePlatformIE): }, ] + def _parse_smil_subtitles(self, smil, namespace=None, subtitles_lang='en'): + closed_caption_e = find_xpath_attr(smil, self._xpath_ns('.//param', namespace), 'name', 'ClosedCaptionURL') + return { + 'en': [{ + 'ext': 'ttml', + 'url': closed_caption_e.attrib['value'], + }] + } if closed_caption_e is not None and closed_caption_e.attrib.get('value') else [] + def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - video_id = mobj.group('id') + video_id = self._match_id(url) webpage = self._download_webpage(url, video_id) - video_info = json.loads(self._html_search_regex( + video_info = self._parse_json(self._html_search_regex( r'(?: