Allow ampersand right after ? in youtube URLs (Closes #602)
authorPhilipp Hagemeister <phihag@phihag.de>
Thu, 27 Dec 2012 04:31:36 +0000 (05:31 +0100)
committerPhilipp Hagemeister <phihag@phihag.de>
Thu, 27 Dec 2012 04:31:36 +0000 (05:31 +0100)
test/test_all_urls.py [new file with mode: 0644]
test/test_youtube_playlist_ids.py [deleted file]
youtube_dl/InfoExtractors.py

diff --git a/test/test_all_urls.py b/test/test_all_urls.py
new file mode 100644 (file)
index 0000000..06de8e7
--- /dev/null
@@ -0,0 +1,27 @@
+#!/usr/bin/env python
+
+import sys
+import unittest
+
+# Allow direct execution
+import os
+sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+
+from youtube_dl.InfoExtractors import YoutubeIE, YoutubePlaylistIE
+
+class TestAllURLsMatching(unittest.TestCase):
+    def test_youtube_playlist_matching(self):
+        self.assertTrue(YoutubePlaylistIE().suitable(u'ECUl4u3cNGP61MdtwGTqZA0MreSaDybji8'))
+        self.assertTrue(YoutubePlaylistIE().suitable(u'PL63F0C78739B09958'))
+        self.assertFalse(YoutubePlaylistIE().suitable(u'PLtS2H6bU1M'))
+
+    def test_youtube_matching(self):
+        self.assertTrue(YoutubeIE().suitable(u'PLtS2H6bU1M'))
+
+    def test_youtube_extract(self):
+        self.assertEqual(YoutubeIE()._extract_id('http://www.youtube.com/watch?&v=BaW_jenozKc'), 'BaW_jenozKc')
+        self.assertEqual(YoutubeIE()._extract_id('https://www.youtube.com/watch?&v=BaW_jenozKc'), 'BaW_jenozKc')
+        self.assertEqual(YoutubeIE()._extract_id('https://www.youtube.com/watch?feature=player_embedded&v=BaW_jenozKc'), 'BaW_jenozKc')
+
+if __name__ == '__main__':
+    unittest.main()
diff --git a/test/test_youtube_playlist_ids.py b/test/test_youtube_playlist_ids.py
deleted file mode 100644 (file)
index 2eeb321..0000000
+++ /dev/null
@@ -1,22 +0,0 @@
-#!/usr/bin/env python
-
-import sys
-import unittest
-
-# Allow direct execution
-import os
-sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
-
-from youtube_dl.InfoExtractors import YoutubeIE, YoutubePlaylistIE
-
-class TestYoutubePlaylistMatching(unittest.TestCase):
-    def test_playlist_matching(self):
-        self.assertTrue(YoutubePlaylistIE().suitable(u'ECUl4u3cNGP61MdtwGTqZA0MreSaDybji8'))
-        self.assertTrue(YoutubePlaylistIE().suitable(u'PL63F0C78739B09958'))
-        self.assertFalse(YoutubePlaylistIE().suitable(u'PLtS2H6bU1M'))
-
-    def test_youtube_matching(self):
-        self.assertTrue(YoutubeIE().suitable(u'PLtS2H6bU1M'))
-
-if __name__ == '__main__':
-    unittest.main()
index 890ff73783f14195aa9bba37c6e7b0b23961bff5..e3cf7e11598448aeb13028dff31a3018032d7906 100755 (executable)
@@ -120,7 +120,7 @@ class YoutubeIE(InfoExtractor):
                              |(?:                                             # or the v= param in all its forms
                                  (?:watch(?:_popup)?(?:\.php)?)?              # preceding watch(_popup|.php) or nothing (like /?v=xxxx)
                                  (?:\?|\#!?)                                  # the params delimiter ? or # or #!
-                                 (?:.+&)?                                     # any other preceding param (like /?s=tuff&v=xxxx)
+                                 (?:.*?&)?                                    # any other preceding param (like /?s=tuff&v=xxxx)
                                  v=
                              )
                          )?                                                   # optional -> youtube.com/xxxx is OK
@@ -325,22 +325,25 @@ class YoutubeIE(InfoExtractor):
             self._downloader.trouble(u'ERROR: unable to confirm age: %s' % compat_str(err))
             return
 
-    def _real_extract(self, url):
-        # Extract original video URL from URL with redirection, like age verification, using next_url parameter
-        mobj = re.search(self._NEXT_URL_RE, url)
-        if mobj:
-            url = 'http://www.youtube.com/' + compat_urllib_parse.unquote(mobj.group(1)).lstrip('/')
-
-        # Extract video id from URL
+    def _extract_id(self, url):
         mobj = re.match(self._VALID_URL, url, re.VERBOSE)
         if mobj is None:
             self._downloader.trouble(u'ERROR: invalid URL: %s' % url)
             return
         video_id = mobj.group(2)
+        return video_id
+
+    def _real_extract(self, url):
+        # Extract original video URL from URL with redirection, like age verification, using next_url parameter
+        mobj = re.search(self._NEXT_URL_RE, url)
+        if mobj:
+            url = 'http://www.youtube.com/' + compat_urllib_parse.unquote(mobj.group(1)).lstrip('/')
+        video_id = self._extract_id(url)
 
         # Get video webpage
         self.report_video_webpage_download(video_id)
-        request = compat_urllib_request.Request('http://www.youtube.com/watch?v=%s&gl=US&hl=en&has_verified=1' % video_id)
+        url = 'http://www.youtube.com/watch?v=%s&gl=US&hl=en&has_verified=1' % video_id
+        request = compat_urllib_request.Request(url)
         try:
             video_webpage_bytes = compat_urllib_request.urlopen(request).read()
         except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err: