Merge branch 'fstirlitz-filmon'
[youtube-dl] / youtube_dl / extractor / jwplatform.py
index 2a499bb771602c381c27f1cdbce5701b432b80c6..aff7ab49a9500c8bdabe78fac393eb30ef827db5 100644 (file)
@@ -9,7 +9,9 @@ from ..utils import (
     determine_ext,
     float_or_none,
     int_or_none,
+    js_to_json,
     mimetype2ext,
+    urljoin,
 )
 
 
@@ -19,30 +21,40 @@ class JWPlatformBaseIE(InfoExtractor):
         # TODO: Merge this with JWPlayer-related codes in generic.py
 
         mobj = re.search(
-            'jwplayer\((?P<quote>[\'"])[^\'" ]+(?P=quote)\)\.setup\((?P<options>[^)]+)\)',
+            r'jwplayer\((?P<quote>[\'"])[^\'" ]+(?P=quote)\)\.setup\s*\((?P<options>[^)]+)\)',
             webpage)
         if mobj:
             return mobj.group('options')
 
     def _extract_jwplayer_data(self, webpage, video_id, *args, **kwargs):
         jwplayer_data = self._parse_json(
-            self._find_jwplayer_data(webpage), video_id)
+            self._find_jwplayer_data(webpage), video_id,
+            transform_source=js_to_json)
         return self._parse_jwplayer_data(
             jwplayer_data, video_id, *args, **kwargs)
 
-    def _parse_jwplayer_data(self, jwplayer_data, video_id, require_title=True, m3u8_id=None, rtmp_params=None, base_url=None):
+    def _parse_jwplayer_data(self, jwplayer_data, video_id=None, require_title=True,
+                             m3u8_id=None, mpd_id=None, rtmp_params=None, base_url=None):
         # JWPlayer backward compatibility: flattened playlists
         # https://github.com/jwplayer/jwplayer/blob/v7.4.3/src/js/api/config.js#L81-L96
         if 'playlist' not in jwplayer_data:
             jwplayer_data = {'playlist': [jwplayer_data]}
 
         entries = []
+
+        # JWPlayer backward compatibility: single playlist item
+        # https://github.com/jwplayer/jwplayer/blob/v7.7.0/src/js/playlist/playlist.js#L10
+        if not isinstance(jwplayer_data['playlist'], list):
+            jwplayer_data['playlist'] = [jwplayer_data['playlist']]
+
         for video_data in jwplayer_data['playlist']:
             # JWPlayer backward compatibility: flattened sources
             # https://github.com/jwplayer/jwplayer/blob/v7.4.3/src/js/playlist/item.js#L29-L35
             if 'sources' not in video_data:
                 video_data['sources'] = [video_data]
 
+            this_video_id = video_id or video_data['mediaid']
+
             formats = []
             for source in video_data['sources']:
                 source_url = self._proto_relative_url(source['file'])
@@ -52,7 +64,10 @@ class JWPlatformBaseIE(InfoExtractor):
                 ext = mimetype2ext(source_type) or determine_ext(source_url)
                 if source_type == 'hls' or ext == 'm3u8':
                     formats.extend(self._extract_m3u8_formats(
-                        source_url, video_id, 'mp4', 'm3u8_native', m3u8_id=m3u8_id, fatal=False))
+                        source_url, this_video_id, 'mp4', 'm3u8_native', m3u8_id=m3u8_id, fatal=False))
+                elif ext == 'mpd':
+                    formats.extend(self._extract_mpd_formats(
+                        source_url, this_video_id, mpd_id=mpd_id, fatal=False))
                 # https://github.com/jwplayer/jwplayer/blob/master/src/js/providers/default.js#L67
                 elif source_type.startswith('audio') or ext in ('oga', 'aac', 'mp3', 'mpeg', 'vorbis'):
                     formats.append({
@@ -61,14 +76,21 @@ class JWPlatformBaseIE(InfoExtractor):
                         'ext': ext,
                     })
                 else:
+                    height = int_or_none(source.get('height'))
+                    if height is None:
+                        # Often no height is provided but there is a label in
+                        # format like 1080p.
+                        height = int_or_none(self._search_regex(
+                            r'^(\d{3,})[pP]$', source.get('label') or '',
+                            'height', default=None))
                     a_format = {
                         'url': source_url,
                         'width': int_or_none(source.get('width')),
-                        'height': int_or_none(source.get('height')),
+                        'height': height,
                         'ext': ext,
                     }
                     if source_url.startswith('rtmp'):
-                        a_format['ext'] = 'flv',
+                        a_format['ext'] = 'flv'
 
                         # See com/longtailvideo/jwplayer/media/RTMPMediaProvider.as
                         # of jwplayer.flash.swf
@@ -89,18 +111,22 @@ class JWPlatformBaseIE(InfoExtractor):
             tracks = video_data.get('tracks')
             if tracks and isinstance(tracks, list):
                 for track in tracks:
-                    if track.get('file') and track.get('kind') == 'captions':
-                        subtitles.setdefault(track.get('label') or 'en', []).append({
-                            'url': self._proto_relative_url(track['file'])
-                        })
+                    if track.get('kind') != 'captions':
+                        continue
+                    track_url = urljoin(base_url, track.get('file'))
+                    if not track_url:
+                        continue
+                    subtitles.setdefault(track.get('label') or 'en', []).append({
+                        'url': self._proto_relative_url(track_url)
+                    })
 
             entries.append({
-                'id': video_id,
+                'id': this_video_id,
                 'title': video_data['title'] if require_title else video_data.get('title'),
                 'description': video_data.get('description'),
                 'thumbnail': self._proto_relative_url(video_data.get('image')),
                 'timestamp': int_or_none(video_data.get('pubdate')),
-                'duration': float_or_none(jwplayer_data.get('duration')),
+                'duration': float_or_none(jwplayer_data.get('duration') or video_data.get('duration')),
                 'subtitles': subtitles,
                 'formats': formats,
             })