X-Git-Url: http://git.bitcoin.ninja/index.cgi?a=blobdiff_plain;f=youtube_dl%2Fextractor%2Fcbs.py;h=ac2c7dced6f3561bb90f679947cfe35d6a31e2b1;hb=0f47cc2e925014afef4339a8213d52797a710eb3;hp=62f52ec8edcd7d98993d1cb9f02769f3606539b5;hpb=f74197a0746ac24fc6003166fbc7b36362ce8ba8;p=youtube-dl diff --git a/youtube_dl/extractor/cbs.py b/youtube_dl/extractor/cbs.py index 62f52ec8e..ac2c7dced 100644 --- a/youtube_dl/extractor/cbs.py +++ b/youtube_dl/extractor/cbs.py @@ -1,11 +1,12 @@ from __future__ import unicode_literals +import re + from .theplatform import ThePlatformIE from ..utils import ( xpath_text, xpath_element, int_or_none, - ExtractorError, find_xpath_attr, ) @@ -22,7 +23,7 @@ class CBSBaseIE(ThePlatformIE): class CBSIE(CBSBaseIE): - _VALID_URL = r'https?://(?:www\.)?(?:cbs\.com/shows/[^/]+/(?:video|artist)|colbertlateshow\.com/(?:video|podcasts))/[^/]+/(?P[^/]+)' + _VALID_URL = r'(?:cbs:(?P\w+)|https?://(?:www\.)?(?:cbs\.com/shows/[^/]+/(?:video|artist)|colbertlateshow\.com/(?:video|podcasts))/[^/]+/(?P[^/]+))' _TESTS = [{ 'url': 'http://www.cbs.com/shows/garth-brooks/video/_u7W953k6la293J7EPTd9oHkSPs6Xn6_/connect-chat-feat-garth-brooks/', @@ -67,11 +68,12 @@ class CBSIE(CBSBaseIE): TP_RELEASE_URL_TEMPLATE = 'http://link.theplatform.com/s/dJ5BDC/%s?mbr=true' def _real_extract(self, url): - display_id = self._match_id(url) - webpage = self._download_webpage(url, display_id) - content_id = self._search_regex( - [r"video\.settings\.content_id\s*=\s*'([^']+)';", r"cbsplayer\.contentId\s*=\s*'([^']+)';"], - webpage, 'content id') + content_id, display_id = re.match(self._VALID_URL, url).groups() + if not content_id: + webpage = self._download_webpage(url, display_id) + content_id = self._search_regex( + [r"video\.settings\.content_id\s*=\s*'([^']+)';", r"cbsplayer\.contentId\s*=\s*'([^']+)';"], + webpage, 'content id') items_data = self._download_xml( 'http://can.cbs.com/thunder/player/videoPlayerService.php', content_id, query={'partner': 'cbs', 'contentId': content_id}) @@ -84,14 +86,11 @@ class CBSIE(CBSBaseIE): pid = xpath_text(item, 'pid') if not pid: continue - try: - tp_release_url = self.TP_RELEASE_URL_TEMPLATE % pid - if '.m3u8' in xpath_text(item, 'contentUrl', default=''): - tp_release_url += '&manifest=m3u' - tp_formats, tp_subtitles = self._extract_theplatform_smil( - tp_release_url, content_id, 'Downloading %s SMIL data' % pid) - except ExtractorError: - continue + tp_release_url = self.TP_RELEASE_URL_TEMPLATE % pid + if '.m3u8' in xpath_text(item, 'contentUrl', default=''): + tp_release_url += '&manifest=m3u' + tp_formats, tp_subtitles = self._extract_theplatform_smil( + tp_release_url, content_id, 'Downloading %s SMIL data' % pid) formats.extend(tp_formats) subtitles = self._merge_subtitles(subtitles, tp_subtitles) self._sort_formats(formats)