[jwplatform] Use js_to_json to detect more JWPlayers
[youtube-dl] / youtube_dl / extractor / jwplatform.py
index ce3126943939063bc65c4c710e10ea61153ac036..38199fcd0b5c46ba4e47b8fc8e448b3471dd17ea 100644 (file)
@@ -9,6 +9,7 @@ from ..utils import (
     determine_ext,
     float_or_none,
     int_or_none,
+    js_to_json,
     mimetype2ext,
 )
 
@@ -19,14 +20,15 @@ class JWPlatformBaseIE(InfoExtractor):
         # TODO: Merge this with JWPlayer-related codes in generic.py
 
         mobj = re.search(
-            'jwplayer\((?P<quote>[\'"])[^\'" ]+(?P=quote)\)\.setup\((?P<options>[^)]+)\)',
+            r'jwplayer\((?P<quote>[\'"])[^\'" ]+(?P=quote)\)\.setup\s*\((?P<options>[^)]+)\)',
             webpage)
         if mobj:
             return mobj.group('options')
 
     def _extract_jwplayer_data(self, webpage, video_id, *args, **kwargs):
         jwplayer_data = self._parse_json(
-            self._find_jwplayer_data(webpage), video_id)
+            self._find_jwplayer_data(webpage), video_id,
+            transform_source=js_to_json)
         return self._parse_jwplayer_data(
             jwplayer_data, video_id, *args, **kwargs)
 
@@ -63,10 +65,17 @@ class JWPlatformBaseIE(InfoExtractor):
                         'ext': ext,
                     })
                 else:
+                    height = int_or_none(source.get('height'))
+                    if height is None:
+                        # Often no height is provided but there is a label in
+                        # format like 1080p.
+                        height = int_or_none(self._search_regex(
+                            r'^(\d{3,})[pP]$', source.get('label') or '',
+                            'height', default=None))
                     a_format = {
                         'url': source_url,
                         'width': int_or_none(source.get('width')),
-                        'height': int_or_none(source.get('height')),
+                        'height': height,
                         'ext': ext,
                     }
                     if source_url.startswith('rtmp'):