Merge branch 'master' of github.com:rg3/youtube-dl
authorPhilipp Hagemeister <phihag@phihag.de>
Sat, 7 Sep 2013 20:28:54 +0000 (22:28 +0200)
committerPhilipp Hagemeister <phihag@phihag.de>
Sat, 7 Sep 2013 20:28:54 +0000 (22:28 +0200)
test/test_all_urls.py
youtube_dl/extractor/__init__.py
youtube_dl/extractor/dailymotion.py
youtube_dl/extractor/generic.py
youtube_dl/extractor/metacritic.py [new file with mode: 0644]
youtube_dl/extractor/youtube.py

index fe4090d18747d3b9da35d1e12c634e1c00e18c5c..5d8d93e0e9db73063e097812f0403b02309527ed 100644 (file)
@@ -21,14 +21,15 @@ class TestAllURLsMatching(unittest.TestCase):
         self.assertEqual(self.matching_ies(url), ie_list)
 
     def test_youtube_playlist_matching(self):
-        self.assertTrue(YoutubePlaylistIE.suitable(u'ECUl4u3cNGP61MdtwGTqZA0MreSaDybji8'))
-        self.assertTrue(YoutubePlaylistIE.suitable(u'UUBABnxM4Ar9ten8Mdjj1j0Q')) #585
-        self.assertTrue(YoutubePlaylistIE.suitable(u'PL63F0C78739B09958'))
-        self.assertTrue(YoutubePlaylistIE.suitable(u'https://www.youtube.com/playlist?list=UUBABnxM4Ar9ten8Mdjj1j0Q'))
-        self.assertTrue(YoutubePlaylistIE.suitable(u'https://www.youtube.com/course?list=ECUl4u3cNGP61MdtwGTqZA0MreSaDybji8'))
-        self.assertTrue(YoutubePlaylistIE.suitable(u'https://www.youtube.com/playlist?list=PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC'))
-        self.assertTrue(YoutubePlaylistIE.suitable(u'https://www.youtube.com/watch?v=AV6J6_AeFEQ&playnext=1&list=PL4023E734DA416012')) #668
-        self.assertFalse(YoutubePlaylistIE.suitable(u'PLtS2H6bU1M'))
+        assertPlaylist = lambda url: self.assertMatch(url, ['youtube:playlist'])
+        assertPlaylist(u'ECUl4u3cNGP61MdtwGTqZA0MreSaDybji8')
+        assertPlaylist(u'UUBABnxM4Ar9ten8Mdjj1j0Q') #585
+        assertPlaylist(u'PL63F0C78739B09958')
+        assertPlaylist(u'https://www.youtube.com/playlist?list=UUBABnxM4Ar9ten8Mdjj1j0Q')
+        assertPlaylist(u'https://www.youtube.com/course?list=ECUl4u3cNGP61MdtwGTqZA0MreSaDybji8')
+        assertPlaylist(u'https://www.youtube.com/playlist?list=PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC')
+        assertPlaylist(u'https://www.youtube.com/watch?v=AV6J6_AeFEQ&playnext=1&list=PL4023E734DA416012') #668
+        self.assertFalse('youtube:playlist' in self.matching_ies(u'PLtS2H6bU1M'))
 
     def test_youtube_matching(self):
         self.assertTrue(YoutubeIE.suitable(u'PLtS2H6bU1M'))
@@ -37,13 +38,23 @@ class TestAllURLsMatching(unittest.TestCase):
         self.assertMatch('http://www.youtube.com/v/BaW_jenozKc', ['youtube'])
 
     def test_youtube_channel_matching(self):
-        self.assertTrue(YoutubeChannelIE.suitable('https://www.youtube.com/channel/HCtnHdj3df7iM'))
-        self.assertTrue(YoutubeChannelIE.suitable('https://www.youtube.com/channel/HCtnHdj3df7iM?feature=gb_ch_rec'))
-        self.assertTrue(YoutubeChannelIE.suitable('https://www.youtube.com/channel/HCtnHdj3df7iM/videos'))
+        assertChannel = lambda url: self.assertMatch(url, ['youtube:channel'])
+        assertChannel('https://www.youtube.com/channel/HCtnHdj3df7iM')
+        assertChannel('https://www.youtube.com/channel/HCtnHdj3df7iM?feature=gb_ch_rec')
+        assertChannel('https://www.youtube.com/channel/HCtnHdj3df7iM/videos')
 
     def test_youtube_user_matching(self):
         self.assertMatch('www.youtube.com/NASAgovVideo/videos', ['youtube:user'])
 
+    def test_youtube_feeds(self):
+        self.assertMatch('https://www.youtube.com/feed/watch_later', ['youtube:watch_later'])
+        self.assertMatch('https://www.youtube.com/feed/subscriptions', ['youtube:subscriptions'])
+        self.assertMatch('https://www.youtube.com/feed/recommended', ['youtube:recommended'])
+        self.assertMatch('https://www.youtube.com/my_favorites', ['youtube:favorites'])
+
+    def test_youtube_show_matching(self):
+        self.assertMatch('http://www.youtube.com/show/airdisasters', ['youtube:show'])
+
     def test_justin_tv_channelid_matching(self):
         self.assertTrue(JustinTVIE.suitable(u"justin.tv/vanillatv"))
         self.assertTrue(JustinTVIE.suitable(u"twitch.tv/vanillatv"))
index 70ebd29e21c35026bb998f4d5cbbbf5037fbeee1..fbe0b8cb7ad1693230fa87ccb5121e1047bf0652 100644 (file)
@@ -52,6 +52,7 @@ from .keek import KeekIE
 from .liveleak import LiveLeakIE
 from .livestream import LivestreamIE
 from .metacafe import MetacafeIE
+from .metacritic import MetacriticIE
 from .mit import TechTVMITIE, MITIE
 from .mixcloud import MixcloudIE
 from .mtv import MTVIE
index 439033d2310d1be9034c225c9ed7eee39b9c56bd..3c616e0896cecc33077c9bd7ae0c9b15c5d4ddbd 100644 (file)
@@ -14,7 +14,7 @@ from ..utils import (
 class DailymotionIE(InfoExtractor):
     """Information Extractor for Dailymotion"""
 
-    _VALID_URL = r'(?i)(?:https?://)?(?:www\.)?dailymotion\.[a-z]{2,3}/video/([^/]+)'
+    _VALID_URL = r'(?i)(?:https?://)?(?:www\.)?dailymotion\.[a-z]{2,3}/(?:embed/)?video/([^/]+)'
     IE_NAME = u'dailymotion'
     _TEST = {
         u'url': u'http://www.dailymotion.com/video/x33vw9_tutoriel-de-youtubeur-dl-des-video_tech',
@@ -33,6 +33,7 @@ class DailymotionIE(InfoExtractor):
         video_id = mobj.group(1).split('_')[0].split('?')[0]
 
         video_extension = 'mp4'
+        url = 'http://www.dailymotion.com/video/%s' % video_id
 
         # Retrieve video webpage to extract further information
         request = compat_urllib_request.Request(url)
index de7379a921bbe2ade08bdc27250c597cd6c1e532..f92e61fea5db38a5c0df130b512a7920def14af5 100644 (file)
@@ -109,6 +109,11 @@ class GenericIE(InfoExtractor):
         return new_url
 
     def _real_extract(self, url):
+        parsed_url = compat_urlparse.urlparse(url)
+        if not parsed_url.scheme:
+            self._downloader.report_warning('The url doesn\'t specify the protocol, trying with http')
+            return self.url_result('http://' + url)
+
         try:
             new_url = self._test_redirect(url)
             if new_url:
diff --git a/youtube_dl/extractor/metacritic.py b/youtube_dl/extractor/metacritic.py
new file mode 100644 (file)
index 0000000..449138b
--- /dev/null
@@ -0,0 +1,55 @@
+import re
+import xml.etree.ElementTree
+import operator
+
+from .common import InfoExtractor
+
+
+class MetacriticIE(InfoExtractor):
+    _VALID_URL = r'https?://www\.metacritic\.com/.+?/trailers/(?P<id>\d+)'
+
+    _TEST = {
+        u'url': u'http://www.metacritic.com/game/playstation-4/infamous-second-son/trailers/3698222',
+        u'file': u'3698222.mp4',
+        u'info_dict': {
+            u'title': u'inFamous: Second Son - inSide Sucker Punch: Smoke & Mirrors',
+            u'description': u'Take a peak behind-the-scenes to see how Sucker Punch brings smoke into the universe of inFAMOUS Second Son on the PS4.',
+            u'duration': 221,
+        },
+    }
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        video_id = mobj.group('id')
+        webpage = self._download_webpage(url, video_id)
+        # The xml is not well formatted, there are raw '&'
+        info_xml = self._download_webpage('http://www.metacritic.com/video_data?video=' + video_id,
+            video_id, u'Downloading info xml').replace('&', '&amp;')
+        info = xml.etree.ElementTree.fromstring(info_xml.encode('utf-8'))
+
+        clip = next(c for c in info.findall('playList/clip') if c.find('id').text == video_id)
+        formats = []
+        for videoFile in clip.findall('httpURI/videoFile'):
+            rate_str = videoFile.find('rate').text
+            video_url = videoFile.find('filePath').text
+            formats.append({
+                'url': video_url,
+                'ext': 'mp4',
+                'format_id': rate_str,
+                'rate': int(rate_str),
+            })
+        formats.sort(key=operator.itemgetter('rate'))
+
+        description = self._html_search_regex(r'<b>Description:</b>(.*?)</p>',
+            webpage, u'description', flags=re.DOTALL)
+
+        info = {
+            'id': video_id,
+            'title': clip.find('title').text,
+            'formats': formats,
+            'description': description,
+            'duration': int(clip.find('duration').text),
+        }
+        # TODO: Remove when #980 has been merged
+        info.update(formats[-1])
+        return info
index 98a44f333180f003c3f9528597b1a67da1a1525d..423a5e973ce258e1248c62e7ed444ef943fc797e 100644 (file)
@@ -386,7 +386,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
     @classmethod
     def suitable(cls, url):
         """Receives a URL and returns True if suitable for this IE."""
-        if YoutubePlaylistIE.suitable(url) or YoutubeSubscriptionsIE.suitable(url): return False
+        if YoutubePlaylistIE.suitable(url): return False
         return re.match(cls._VALID_URL, url, re.VERBOSE) is not None
 
     def report_video_webpage_download(self, video_id):
@@ -1015,14 +1015,18 @@ class YoutubeChannelIE(InfoExtractor):
 
 class YoutubeUserIE(InfoExtractor):
     IE_DESC = u'YouTube.com user videos (URL or "ytuser" keyword)'
-    _VALID_URL = r'(?:(?:(?:https?://)?(?:\w+\.)?youtube\.com/(?:user/)?)|ytuser:)([A-Za-z0-9_-]+)'
+    _VALID_URL = r'(?:(?:(?:https?://)?(?:\w+\.)?youtube\.com/(?:user/)?)|ytuser:)(?!feed/)([A-Za-z0-9_-]+)'
     _TEMPLATE_URL = 'http://gdata.youtube.com/feeds/api/users/%s'
     _GDATA_PAGE_SIZE = 50
     _GDATA_URL = 'http://gdata.youtube.com/feeds/api/users/%s/uploads?max-results=%d&start-index=%d&alt=json'
     IE_NAME = u'youtube:user'
 
+    @classmethod
     def suitable(cls, url):
-        if YoutubeIE.suitable(url): return False
+        # Don't return True if the url can be extracted with other youtube
+        # extractor, the regex would is too permissive and it would match.
+        other_ies = iter(klass for (name, klass) in globals().items() if name.endswith('IE') and klass is not cls)
+        if any(ie.suitable(url) for ie in other_ies): return False
         else: return super(YoutubeUserIE, cls).suitable(url)
 
     def _real_extract(self, url):