[youtube] Fix extraction for domainless player URLs
authorSergey M․ <dstftw@gmail.com>
Tue, 31 Jan 2017 15:19:29 +0000 (22:19 +0700)
committerSergey M․ <dstftw@gmail.com>
Tue, 31 Jan 2017 15:22:37 +0000 (22:22 +0700)
Closes #11890
Closes #11891
Closes #11892
Closes #11894
Closes #11895
Closes #11897
Closes #11900
Closes #11903
Closes #11904
Closes #11906
Closes #11907
Closes #11909
Closes #11913
Closes #11914
Closes #11915
Closes #11916
Closes #11917
Closes #11918
Closes #11919

youtube_dl/extractor/youtube.py

index 630586796694f963d0c19473ccd97c23b8c2ccf0..ea398bcc84598ad27c00bcd93a5fcd44a4019910 100644 (file)
@@ -1028,8 +1028,9 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
 
     def _parse_sig_js(self, jscode):
         funcname = self._search_regex(
-            r'\.sig\|\|([a-zA-Z0-9$]+)\(', jscode,
-            'Initial JS player signature function name')
+            (r'(["\'])signature\1\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
+             r'\.sig\|\|(?P<sig>[a-zA-Z0-9$]+)\('),
+            jscode, 'Initial JS player signature function name', group='sig')
 
         jsi = JSInterpreter(jscode)
         initial_function = jsi.extract_function(funcname)
@@ -1050,6 +1051,9 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
 
         if player_url.startswith('//'):
             player_url = 'https:' + player_url
+        elif not re.match(r'https?://', player_url):
+            player_url = compat_urlparse.urljoin(
+                'https://www.youtube.com', player_url)
         try:
             player_id = (player_url, self._signature_cache_id(s))
             if player_id not in self._player_cache: