[youtube] Modify the regex to match ids of length 11 (fixes #1396)
authorJaime Marquínez Ferrándiz <jaime.marquinez.ferrandiz@gmail.com>
Mon, 9 Sep 2013 08:33:12 +0000 (10:33 +0200)
committerJaime Marquínez Ferrándiz <jaime.marquinez.ferrandiz@gmail.com>
Mon, 9 Sep 2013 08:33:12 +0000 (10:33 +0200)
In urls like http://www.youtube.com/watch?v=BaW_jenozKcsharePLED17F32AD9753930 you can't split the query string and ids always have that length.

test/test_all_urls.py
youtube_dl/extractor/youtube.py

index 5d8d93e0e9db73063e097812f0403b02309527ed..99fc7bd28c46393513795de17ac7eee76cb4b9ba 100644 (file)
@@ -72,10 +72,13 @@ class TestAllURLsMatching(unittest.TestCase):
         self.assertTrue(JustinTVIE.suitable(u"http://www.twitch.tv/tsm_theoddone/c/2349361"))
 
     def test_youtube_extract(self):
-        self.assertEqual(YoutubeIE()._extract_id('http://www.youtube.com/watch?&v=BaW_jenozKc'), 'BaW_jenozKc')
-        self.assertEqual(YoutubeIE()._extract_id('https://www.youtube.com/watch?&v=BaW_jenozKc'), 'BaW_jenozKc')
-        self.assertEqual(YoutubeIE()._extract_id('https://www.youtube.com/watch?feature=player_embedded&v=BaW_jenozKc'), 'BaW_jenozKc')
-        self.assertEqual(YoutubeIE()._extract_id('https://www.youtube.com/watch_popup?v=BaW_jenozKc'), 'BaW_jenozKc')
+        assertExtractId = lambda url, id: self.assertEqual(YoutubeIE()._extract_id(url), id)
+        assertExtractId('http://www.youtube.com/watch?&v=BaW_jenozKc', 'BaW_jenozKc')
+        assertExtractId('https://www.youtube.com/watch?&v=BaW_jenozKc', 'BaW_jenozKc')
+        assertExtractId('https://www.youtube.com/watch?feature=player_embedded&v=BaW_jenozKc', 'BaW_jenozKc')
+        assertExtractId('https://www.youtube.com/watch_popup?v=BaW_jenozKc', 'BaW_jenozKc')
+        assertExtractId('http://www.youtube.com/watch?v=BaW_jenozKcsharePLED17F32AD9753930', 'BaW_jenozKc')
+        assertExtractId('BaW_jenozKc', 'BaW_jenozKc')
 
     def test_no_duplicates(self):
         ies = gen_extractors()
index bad15cb44a68d9ec1914304241fb7c1c2106bfc5..6a835129310327942ceffd7967dd880ff425f661 100644 (file)
@@ -150,7 +150,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                          |youtu\.be/                                          # just youtu.be/xxxx
                          )
                      )?                                                       # all until now is optional -> you can pass the naked ID
-                     ([0-9A-Za-z_-]+)                                         # here is it! the YouTube video ID
+                     ([0-9A-Za-z_-]{11})                                      # here is it! the YouTube video ID
                      (?(1).+)?                                                # if we found the ID, everything can follow
                      $"""
     _NEXT_URL_RE = r'[\?&]next_url=([^&]+)'