[cbsnews] Relax video info regex (fixes #13284)
[youtube-dl] / youtube_dl / extractor / cbsnews.py
index 91b0f5fa94c7ba919e01fd097cbdfc71fe6992b4..ef5911bd6afa4aaa5309c5c1f7ffe6371261db41 100644 (file)
@@ -39,7 +39,7 @@ class CBSNewsIE(CBSIE):
                 'upload_date': '20140404',
                 'timestamp': 1396650660,
                 'uploader': 'CBSI-NEW',
-                'thumbnail': 're:^https?://.*\.jpg$',
+                'thumbnail': r're:^https?://.*\.jpg$',
                 'duration': 205,
                 'subtitles': {
                     'en': [{
@@ -52,6 +52,21 @@ class CBSNewsIE(CBSIE):
                 'skip_download': True,
             },
         },
+        {
+            'url': 'http://www.cbsnews.com/news/maria-ridulph-murder-will-the-nations-oldest-cold-case-to-go-to-trial-ever-get-solved/',
+            'info_dict': {
+                'id': 'QpM5BJjBVEAUFi7ydR9LusS69DPLqPJ1',
+                'ext': 'mp4',
+                'title': 'Cold as Ice',
+                'description': 'Can a childhood memory of a friend\'s murder solve a 1957 cold case? "48 Hours" correspondent Erin Moriarty has the latest.',
+                'upload_date': '20170604',
+                'timestamp': 1496538000,
+                'uploader': 'CBSI-NEW',
+            },
+            'params': {
+                'skip_download': True,
+            },
+        },
     ]
 
     def _real_extract(self, url):
@@ -60,12 +75,18 @@ class CBSNewsIE(CBSIE):
         webpage = self._download_webpage(url, video_id)
 
         video_info = self._parse_json(self._html_search_regex(
-            r'(?:<ul class="media-list items" id="media-related-items"><li data-video-info|<div id="cbsNewsVideoPlayer" data-video-player-options)=\'({.+?})\'',
-            webpage, 'video JSON info'), video_id)
+            r'(?:<ul class="media-list items" id="media-related-items"[^>]*><li data-video-info|<div id="cbsNewsVideoPlayer" data-video-player-options)=\'({.+?})\'',
+            webpage, 'video JSON info', default='{}'), video_id, fatal=False)
+
+        if video_info:
+            item = video_info['item'] if 'item' in video_info else video_info
+        else:
+            state = self._parse_json(self._search_regex(
+                r'data-cbsvideoui-options=(["\'])(?P<json>{.+?})\1', webpage,
+                'playlist JSON info', group='json'), video_id)['state']
+            item = state['playlist'][state['pid']]
 
-        item = video_info['item'] if 'item' in video_info else video_info
-        guid = item['mpxRefId']
-        return self._extract_video_info(guid)
+        return self._extract_video_info(item['mpxRefId'], 'cbsnews')
 
 
 class CBSNewsLiveVideoIE(InfoExtractor):