From: Philipp Hagemeister Date: Thu, 27 Dec 2012 04:31:36 +0000 (+0100) Subject: Allow ampersand right after ? in youtube URLs (Closes #602) X-Git-Url: http://git.bitcoin.ninja/?a=commitdiff_plain;h=3bb6165927c277c3af73d5ef1ffb6ce9ea663d10;p=youtube-dl Allow ampersand right after ? in youtube URLs (Closes #602) --- diff --git a/test/test_all_urls.py b/test/test_all_urls.py new file mode 100644 index 000000000..06de8e7b8 --- /dev/null +++ b/test/test_all_urls.py @@ -0,0 +1,27 @@ +#!/usr/bin/env python + +import sys +import unittest + +# Allow direct execution +import os +sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) + +from youtube_dl.InfoExtractors import YoutubeIE, YoutubePlaylistIE + +class TestAllURLsMatching(unittest.TestCase): + def test_youtube_playlist_matching(self): + self.assertTrue(YoutubePlaylistIE().suitable(u'ECUl4u3cNGP61MdtwGTqZA0MreSaDybji8')) + self.assertTrue(YoutubePlaylistIE().suitable(u'PL63F0C78739B09958')) + self.assertFalse(YoutubePlaylistIE().suitable(u'PLtS2H6bU1M')) + + def test_youtube_matching(self): + self.assertTrue(YoutubeIE().suitable(u'PLtS2H6bU1M')) + + def test_youtube_extract(self): + self.assertEqual(YoutubeIE()._extract_id('http://www.youtube.com/watch?&v=BaW_jenozKc'), 'BaW_jenozKc') + self.assertEqual(YoutubeIE()._extract_id('https://www.youtube.com/watch?&v=BaW_jenozKc'), 'BaW_jenozKc') + self.assertEqual(YoutubeIE()._extract_id('https://www.youtube.com/watch?feature=player_embedded&v=BaW_jenozKc'), 'BaW_jenozKc') + +if __name__ == '__main__': + unittest.main() diff --git a/test/test_youtube_playlist_ids.py b/test/test_youtube_playlist_ids.py deleted file mode 100644 index 2eeb3216c..000000000 --- a/test/test_youtube_playlist_ids.py +++ /dev/null @@ -1,22 +0,0 @@ -#!/usr/bin/env python - -import sys -import unittest - -# Allow direct execution -import os -sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) - -from youtube_dl.InfoExtractors import YoutubeIE, YoutubePlaylistIE - -class TestYoutubePlaylistMatching(unittest.TestCase): - def test_playlist_matching(self): - self.assertTrue(YoutubePlaylistIE().suitable(u'ECUl4u3cNGP61MdtwGTqZA0MreSaDybji8')) - self.assertTrue(YoutubePlaylistIE().suitable(u'PL63F0C78739B09958')) - self.assertFalse(YoutubePlaylistIE().suitable(u'PLtS2H6bU1M')) - - def test_youtube_matching(self): - self.assertTrue(YoutubeIE().suitable(u'PLtS2H6bU1M')) - -if __name__ == '__main__': - unittest.main() diff --git a/youtube_dl/InfoExtractors.py b/youtube_dl/InfoExtractors.py index 890ff7378..e3cf7e115 100755 --- a/youtube_dl/InfoExtractors.py +++ b/youtube_dl/InfoExtractors.py @@ -120,7 +120,7 @@ class YoutubeIE(InfoExtractor): |(?: # or the v= param in all its forms (?:watch(?:_popup)?(?:\.php)?)? # preceding watch(_popup|.php) or nothing (like /?v=xxxx) (?:\?|\#!?) # the params delimiter ? or # or #! - (?:.+&)? # any other preceding param (like /?s=tuff&v=xxxx) + (?:.*?&)? # any other preceding param (like /?s=tuff&v=xxxx) v= ) )? # optional -> youtube.com/xxxx is OK @@ -325,22 +325,25 @@ class YoutubeIE(InfoExtractor): self._downloader.trouble(u'ERROR: unable to confirm age: %s' % compat_str(err)) return - def _real_extract(self, url): - # Extract original video URL from URL with redirection, like age verification, using next_url parameter - mobj = re.search(self._NEXT_URL_RE, url) - if mobj: - url = 'http://www.youtube.com/' + compat_urllib_parse.unquote(mobj.group(1)).lstrip('/') - - # Extract video id from URL + def _extract_id(self, url): mobj = re.match(self._VALID_URL, url, re.VERBOSE) if mobj is None: self._downloader.trouble(u'ERROR: invalid URL: %s' % url) return video_id = mobj.group(2) + return video_id + + def _real_extract(self, url): + # Extract original video URL from URL with redirection, like age verification, using next_url parameter + mobj = re.search(self._NEXT_URL_RE, url) + if mobj: + url = 'http://www.youtube.com/' + compat_urllib_parse.unquote(mobj.group(1)).lstrip('/') + video_id = self._extract_id(url) # Get video webpage self.report_video_webpage_download(video_id) - request = compat_urllib_request.Request('http://www.youtube.com/watch?v=%s&gl=US&hl=en&has_verified=1' % video_id) + url = 'http://www.youtube.com/watch?v=%s&gl=US&hl=en&has_verified=1' % video_id + request = compat_urllib_request.Request(url) try: video_webpage_bytes = compat_urllib_request.urlopen(request).read() except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err: