[extractor/common] improve jwplayer relative url handling(closes #18892)
[youtube-dl] / youtube_dl / extractor / common.py
index af621b74b5eb7c0f2a0fe89be3ee8ecf42a1859b..95456b291b116e8a14b0d5d945a5bd33fd4a566c 100644 (file)
@@ -1668,7 +1668,7 @@ class InfoExtractor(object):
             rendition = stream_group[0]
             return rendition.get('NAME') or stream_group_id
 
-        # parse EXT-X-MEDIA tags before EXT-X-STREAM-INF inorder to have the
+        # parse EXT-X-MEDIA tags before EXT-X-STREAM-INF in order to have the
         # chance to detect video only formats when EXT-X-STREAM-INF tags
         # precede EXT-X-MEDIA tags in HLS manifest such as [3].
         for line in m3u8_doc.splitlines():
@@ -2630,7 +2630,7 @@ class InfoExtractor(object):
                 'id': this_video_id,
                 'title': unescapeHTML(video_data['title'] if require_title else video_data.get('title')),
                 'description': video_data.get('description'),
-                'thumbnail': self._proto_relative_url(video_data.get('image')),
+                'thumbnail': urljoin(base_url, self._proto_relative_url(video_data.get('image'))),
                 'timestamp': int_or_none(video_data.get('pubdate')),
                 'duration': float_or_none(jwplayer_data.get('duration') or video_data.get('duration')),
                 'subtitles': subtitles,
@@ -2657,12 +2657,9 @@ class InfoExtractor(object):
         for source in jwplayer_sources_data:
             if not isinstance(source, dict):
                 continue
-            source_url = self._proto_relative_url(source.get('file'))
-            if not source_url:
-                continue
-            if base_url:
-                source_url = compat_urlparse.urljoin(base_url, source_url)
-            if source_url in urls:
+            source_url = urljoin(
+                base_url, self._proto_relative_url(source.get('file')))
+            if not source_url or source_url in urls:
                 continue
             urls.append(source_url)
             source_type = source.get('type') or ''