[theplatform] Recognize URLs with whitespaces (closes #12044)
authorYen Chi Hsuan <yan12125@gmail.com>
Fri, 17 Feb 2017 15:13:51 +0000 (23:13 +0800)
committerYen Chi Hsuan <yan12125@gmail.com>
Fri, 17 Feb 2017 15:13:51 +0000 (23:13 +0800)
ChangeLog
youtube_dl/extractor/generic.py
youtube_dl/extractor/theplatform.py

index 4e69b03d0e940abc32fac2ad27ef279316c88d46..d5fe3dd5b89e4af209d9a9585fcebf6d10e390c9 100644 (file)
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,6 +1,7 @@
 version <unreleased>
 
 Extractors
++ [theplatform] Recognize URLs with whitespaces (#12044)
 + [generic] Support complex JWPlayer embedded videos (#12030)
 
 
index 3db31debea88aee29718a56cdc38109487dd9c8e..9868ca6d0b80397e6490315e8d4b3dab0b3c1bf2 100644 (file)
@@ -1501,7 +1501,12 @@ class GenericIE(InfoExtractor):
                 'skip_download': True,
             },
             'add_ie': [VideoPressIE.ie_key()],
-        }
+        },
+        {
+            # ThePlatform embedded with whitespaces in URLs
+            'url': 'http://www.golfchannel.com/topics/shows/golftalkcentral.htm',
+            'only_matching': True,
+        },
         # {
         #     # TODO: find another test
         #     # http://schema.org/VideoObject
index 5c5987c6a95ab04bd9f43540dc63abc7b13e2932..9a424b1c6aeb089af8050d7eee6b29591968c3aa 100644 (file)
@@ -179,10 +179,12 @@ class ThePlatformIE(ThePlatformBaseIE, AdobePassIE):
         if m:
             return [m.group('url')]
 
+        # Are whitesapces ignored in URLs?
+        # https://github.com/rg3/youtube-dl/issues/12044
         matches = re.findall(
-            r'<(?:iframe|script)[^>]+src=(["\'])((?:https?:)?//player\.theplatform\.com/p/.+?)\1', webpage)
+            r'(?s)<(?:iframe|script)[^>]+src=(["\'])((?:https?:)?//player\.theplatform\.com/p/.+?)\1', webpage)
         if matches:
-            return list(zip(*matches))[1]
+            return [re.sub(r'\s', '', list(zip(*matches))[1][0])]
 
     @staticmethod
     def _sign_url(url, sig_key, sig_secret, life=600, include_qs=False):