X-Git-Url: http://git.bitcoin.ninja/index.cgi?a=blobdiff_plain;f=youtube_dl%2Fextractor%2Fcbsnews.py;h=7319ee1b734e6fc30e4cb681f66eb082ffb9622a;hb=d1e440a4a18522207a1a3e624bf801c8338f9146;hp=cabf7e73b981103d074486dd35c5b422d3d770da;hpb=874e05975b9dde433dc633f3060c3b9013775343;p=youtube-dl diff --git a/youtube_dl/extractor/cbsnews.py b/youtube_dl/extractor/cbsnews.py index cabf7e73b..7319ee1b7 100644 --- a/youtube_dl/extractor/cbsnews.py +++ b/youtube_dl/extractor/cbsnews.py @@ -1,15 +1,17 @@ # encoding: utf-8 from __future__ import unicode_literals -import re -import json - +from .common import InfoExtractor from .theplatform import ThePlatformIE +from ..utils import ( + parse_duration, + find_xpath_attr, +) class CBSNewsIE(ThePlatformIE): IE_DESC = 'CBS News' - _VALID_URL = r'http://(?:www\.)?cbsnews\.com/(?:[^/]+/)+(?P[\da-z_-]+)' + _VALID_URL = r'http://(?:www\.)?cbsnews\.com/(?:news|videos)/(?P[\da-z_-]+)' _TESTS = [ { @@ -47,15 +49,23 @@ class CBSNewsIE(ThePlatformIE): }, ] + def _parse_smil_subtitles(self, smil, namespace=None, subtitles_lang='en'): + closed_caption_e = find_xpath_attr(smil, self._xpath_ns('.//param', namespace), 'name', 'ClosedCaptionURL') + return { + 'en': [{ + 'ext': 'ttml', + 'url': closed_caption_e.attrib['value'], + }] + } if closed_caption_e is not None and closed_caption_e.attrib.get('value') else [] + def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - video_id = mobj.group('id') + video_id = self._match_id(url) webpage = self._download_webpage(url, video_id) - video_info = json.loads(self._html_search_regex( + video_info = self._parse_json(self._html_search_regex( r'(?: