Add support for https for all extractors as preventive and future-proof measure
[youtube-dl] / youtube_dl / extractor / cbsnews.py
index 8f864699f93675afe2ce00db21a13c4110bff46b..f23bac9a1ff279ed28b43fa52f080bc2c4d651d7 100644 (file)
@@ -3,12 +3,15 @@ from __future__ import unicode_literals
 
 from .common import InfoExtractor
 from .theplatform import ThePlatformIE
-from ..utils import parse_duration
+from ..utils import (
+    parse_duration,
+    find_xpath_attr,
+)
 
 
 class CBSNewsIE(ThePlatformIE):
     IE_DESC = 'CBS News'
-    _VALID_URL = r'http://(?:www\.)?cbsnews\.com/(?:news|videos)/(?P<id>[\da-z_-]+)'
+    _VALID_URL = r'https?://(?:www\.)?cbsnews\.com/(?:news|videos)/(?P<id>[\da-z_-]+)'
 
     _TESTS = [
         {
@@ -46,6 +49,15 @@ class CBSNewsIE(ThePlatformIE):
         },
     ]
 
+    def _parse_smil_subtitles(self, smil, namespace=None, subtitles_lang='en'):
+        closed_caption_e = find_xpath_attr(smil, self._xpath_ns('.//param', namespace), 'name', 'ClosedCaptionURL')
+        return {
+            'en': [{
+                'ext': 'ttml',
+                'url': closed_caption_e.attrib['value'],
+            }]
+        } if closed_caption_e is not None and closed_caption_e.attrib.get('value') else []
+
     def _real_extract(self, url):
         video_id = self._match_id(url)
 
@@ -61,18 +73,12 @@ class CBSNewsIE(ThePlatformIE):
         thumbnail = item.get('mediaImage') or item.get('thumbnail')
 
         subtitles = {}
-        if 'mpxRefId' in video_info:
-            subtitles['en'] = [{
-                'ext': 'ttml',
-                'url': 'http://www.cbsnews.com/videos/captions/%s.adb_xml' % video_info['mpxRefId'],
-            }]
-
         formats = []
         for format_id in ['RtmpMobileLow', 'RtmpMobileHigh', 'Hls', 'RtmpDesktop']:
             pid = item.get('media' + format_id)
             if not pid:
                 continue
-            release_url = 'http://link.theplatform.com/s/dJ5BDC/%s?format=SMIL&mbr=true' % pid
+            release_url = 'http://link.theplatform.com/s/dJ5BDC/%s?mbr=true' % pid
             tp_formats, tp_subtitles = self._extract_theplatform_smil(release_url, video_id, 'Downloading %s SMIL data' % pid)
             formats.extend(tp_formats)
             subtitles = self._merge_subtitles(subtitles, tp_subtitles)
@@ -90,7 +96,7 @@ class CBSNewsIE(ThePlatformIE):
 
 class CBSNewsLiveVideoIE(InfoExtractor):
     IE_DESC = 'CBS News Live Videos'
-    _VALID_URL = r'http://(?:www\.)?cbsnews\.com/live/video/(?P<id>[\da-z_-]+)'
+    _VALID_URL = r'https?://(?:www\.)?cbsnews\.com/live/video/(?P<id>[\da-z_-]+)'
 
     _TEST = {
         'url': 'http://www.cbsnews.com/live/video/clinton-sanders-prepare-to-face-off-in-nh/',