Merge remote-tracking branch 'Boris-de/wdrmaus_fix#8562'
[youtube-dl] / youtube_dl / extractor / cbs.py
index c621a08d54a2dc405e1863345941485439691fe3..ac2c7dced6f3561bb90f679947cfe35d6a31e2b1 100644 (file)
@@ -1,11 +1,12 @@
 from __future__ import unicode_literals
 
+import re
+
 from .theplatform import ThePlatformIE
 from ..utils import (
     xpath_text,
     xpath_element,
     int_or_none,
-    ExtractorError,
     find_xpath_attr,
 )
 
@@ -22,7 +23,7 @@ class CBSBaseIE(ThePlatformIE):
 
 
 class CBSIE(CBSBaseIE):
-    _VALID_URL = r'https?://(?:www\.)?(?:cbs\.com/shows/[^/]+/(?:video|artist)|colbertlateshow\.com/(?:video|podcasts))/[^/]+/(?P<id>[^/]+)'
+    _VALID_URL = r'(?:cbs:(?P<content_id>\w+)|https?://(?:www\.)?(?:cbs\.com/shows/[^/]+/(?:video|artist)|colbertlateshow\.com/(?:video|podcasts))/[^/]+/(?P<display_id>[^/]+))'
 
     _TESTS = [{
         'url': 'http://www.cbs.com/shows/garth-brooks/video/_u7W953k6la293J7EPTd9oHkSPs6Xn6_/connect-chat-feat-garth-brooks/',
@@ -64,14 +65,15 @@ class CBSIE(CBSBaseIE):
         'url': 'http://www.colbertlateshow.com/podcasts/dYSwjqPs_X1tvbV_P2FcPWRa_qT6akTC/in-the-bad-room-with-stephen/',
         'only_matching': True,
     }]
-    TP_RELEASE_URL_TEMPLATE = 'http://link.theplatform.com/s/dJ5BDC/%s?manifest=m3u&mbr=true'
+    TP_RELEASE_URL_TEMPLATE = 'http://link.theplatform.com/s/dJ5BDC/%s?mbr=true'
 
     def _real_extract(self, url):
-        display_id = self._match_id(url)
-        webpage = self._download_webpage(url, display_id)
-        content_id = self._search_regex(
-            [r"video\.settings\.content_id\s*=\s*'([^']+)';", r"cbsplayer\.contentId\s*=\s*'([^']+)';"],
-            webpage, 'content id')
+        content_id, display_id = re.match(self._VALID_URL, url).groups()
+        if not content_id:
+            webpage = self._download_webpage(url, display_id)
+            content_id = self._search_regex(
+                [r"video\.settings\.content_id\s*=\s*'([^']+)';", r"cbsplayer\.contentId\s*=\s*'([^']+)';"],
+                webpage, 'content id')
         items_data = self._download_xml(
             'http://can.cbs.com/thunder/player/videoPlayerService.php',
             content_id, query={'partner': 'cbs', 'contentId': content_id})
@@ -84,11 +86,11 @@ class CBSIE(CBSBaseIE):
             pid = xpath_text(item, 'pid')
             if not pid:
                 continue
-            try:
-                tp_formats, tp_subtitles = self._extract_theplatform_smil(
-                    self.TP_RELEASE_URL_TEMPLATE % pid, content_id, 'Downloading %s SMIL data' % pid)
-            except ExtractorError:
-                continue
+            tp_release_url = self.TP_RELEASE_URL_TEMPLATE % pid
+            if '.m3u8' in xpath_text(item, 'contentUrl', default=''):
+                tp_release_url += '&manifest=m3u'
+            tp_formats, tp_subtitles = self._extract_theplatform_smil(
+                tp_release_url, content_id, 'Downloading %s SMIL data' % pid)
             formats.extend(tp_formats)
             subtitles = self._merge_subtitles(subtitles, tp_subtitles)
         self._sort_formats(formats)