[watchbox] Fix extraction (closes #17107)
authorSergey M․ <dstftw@gmail.com>
Mon, 30 Jul 2018 16:28:44 +0000 (23:28 +0700)
committerSergey M․ <dstftw@gmail.com>
Mon, 30 Jul 2018 16:28:44 +0000 (23:28 +0700)
youtube_dl/extractor/watchbox.py

index d993130804b79a6ec0ab20e367431cc9068e2ea9..5a4e46e73a28e71a6ee20f30be9e3af6336c8d29 100644 (file)
@@ -10,6 +10,7 @@ from ..utils import (
     js_to_json,
     strip_or_none,
     try_get,
+    unescapeHTML,
     unified_timestamp,
 )
 
@@ -67,12 +68,20 @@ class WatchBoxIE(InfoExtractor):
 
         webpage = self._download_webpage(url, video_id)
 
-        source = (self._parse_json(
+        player_config = self._parse_json(
             self._search_regex(
-                r'playerConf\s*=\s*({.+?})\s*;', webpage, 'player config',
-                default='{}'),
-            video_id, transform_source=js_to_json,
-            fatal=False) or {}).get('source') or {}
+                r'data-player-conf=(["\'])(?P<data>{.+?})\1', webpage,
+                'player config', default='{}', group='data'),
+            video_id, transform_source=unescapeHTML, fatal=False)
+
+        if not player_config:
+            player_config = self._parse_json(
+                self._search_regex(
+                    r'playerConf\s*=\s*({.+?})\s*;', webpage, 'player config',
+                    default='{}'),
+                video_id, transform_source=js_to_json, fatal=False) or {}
+
+        source = player_config.get('source') or {}
 
         video_id = compat_str(source.get('videoId') or video_id)