[theplatform] fix pid extraction in the platform feed
[youtube-dl] / youtube_dl / extractor / theplatform.py
index d52f9ae4bbf86833524bfc68751f77e9357716e5..f2e975ab0d3e3c4672e16cb0d35214161e097614 100644 (file)
@@ -16,11 +16,12 @@ from ..compat import (
 from ..utils import (
     determine_ext,
     ExtractorError,
-    xpath_with_ns,
-    unsmuggle_url,
+    float_or_none,
     int_or_none,
+    sanitized_Request,
+    unsmuggle_url,
     url_basename,
-    float_or_none,
+    xpath_with_ns,
 )
 
 default_ns = 'http://www.w3.org/2005/SMIL21/Language'
@@ -84,7 +85,7 @@ class ThePlatformBaseIE(InfoExtractor):
 class ThePlatformIE(ThePlatformBaseIE):
     _VALID_URL = r'''(?x)
         (?:https?://(?:link|player)\.theplatform\.com/[sp]/(?P<provider_id>[^/]+)/
-           (?:(?P<media>(?:[^/]+/)+select/media/)|(?P<config>(?:[^/\?]+/(?:swf|config)|onsite)/select/))?
+           (?:(?P<media>(?:(?:[^/]+/)+select/)?media/)|(?P<config>(?:[^/\?]+/(?:swf|config)|onsite)/select/))?
          |theplatform:)(?P<id>[^/\?&]+)'''
 
     _TESTS = [{
@@ -188,8 +189,11 @@ class ThePlatformIE(ThePlatformBaseIE):
             # I try one by one
             for script in reversed(scripts):
                 feed_script = self._download_webpage(
-                    self._proto_relative_url(script, 'http:'), video_id, 'Downloading feed script')
-                feed_id = self._search_regex(r'defaultFeedId\s*:\s*"([^"]+)"', feed_script, 'default feed id', default=None)
+                    self._proto_relative_url(script, 'http:'),
+                    video_id, 'Downloading feed script')
+                feed_id = self._search_regex(
+                    r'defaultFeedId\s*:\s*"([^"]+)"', feed_script,
+                    'default feed id', default=None)
                 if feed_id is not None:
                     break
             if feed_id is None:
@@ -201,7 +205,12 @@ class ThePlatformIE(ThePlatformBaseIE):
             smil_url = url
         # Explicitly specified SMIL (see https://github.com/rg3/youtube-dl/issues/7385)
         elif '/guid/' in url:
-            webpage = self._download_webpage(url, video_id)
+            headers = {}
+            source_url = smuggled_data.get('source_url')
+            if source_url:
+                headers['Referer'] = source_url
+            request = sanitized_Request(url, headers=headers)
+            webpage = self._download_webpage(request, video_id)
             smil_url = self._search_regex(
                 r'<link[^>]+href=(["\'])(?P<url>.+?)\1[^>]+type=["\']application/smil\+xml',
                 webpage, 'smil url', group='url')
@@ -275,7 +284,7 @@ class ThePlatformFeedIE(ThePlatformBaseIE):
         duration = None
         for item in entry['media$content']:
             smil_url = item['plfile$url'] + '&format=SMIL&Tracking=true&Embedded=true&formats=MPEG4,F4M'
-            cur_video_id = url_basename(smil_url)
+            cur_video_id = ThePlatformIE._match_id(smil_url)
             if first_video_id is None:
                 first_video_id = cur_video_id
                 duration = float_or_none(item.get('plfile$duration'))