From: Philipp Hagemeister Date: Mon, 21 Apr 2014 00:48:04 +0000 (+0200) Subject: Merge remote-tracking branch 'kwbr/master' X-Git-Url: http://git.bitcoin.ninja/index.cgi?a=commitdiff_plain;h=117a7d194432788435d9a2e697378c8bfc9a640d;hp=7560096db5ce0002b3cf3f3bcbbb53ef05e6fb13;p=youtube-dl Merge remote-tracking branch 'kwbr/master' --- diff --git a/test/helper.py b/test/helper.py index 8739f816c..09873aea3 100644 --- a/test/helper.py +++ b/test/helper.py @@ -74,13 +74,19 @@ class FakeYDL(YoutubeDL): old_report_warning(message) self.report_warning = types.MethodType(report_warning, self) -def gettestcases(): + +def gettestcases(include_onlymatching=False): for ie in youtube_dl.extractor.gen_extractors(): t = getattr(ie, '_TEST', None) if t: - t['name'] = type(ie).__name__[:-len('IE')] - yield t - for t in getattr(ie, '_TESTS', []): + assert not hasattr(ie, '_TESTS'), \ + '%s has _TEST and _TESTS' % type(ie).__name__ + tests = [t] + else: + tests = getattr(ie, '_TESTS', []) + for t in tests: + if not include_onlymatching and getattr(t, 'only_matching', False): + continue t['name'] = type(ie).__name__[:-len('IE')] yield t diff --git a/test/test_all_urls.py b/test/test_all_urls.py index 7f7362a3a..4b56137ce 100644 --- a/test/test_all_urls.py +++ b/test/test_all_urls.py @@ -77,20 +77,20 @@ class TestAllURLsMatching(unittest.TestCase): self.assertMatch('https://www.youtube.com/results?baz=bar&search_query=youtube-dl+test+video&filters=video&lclk=video', ['youtube:search_url']) def test_justin_tv_channelid_matching(self): - self.assertTrue(JustinTVIE.suitable(u"justin.tv/vanillatv")) - self.assertTrue(JustinTVIE.suitable(u"twitch.tv/vanillatv")) - self.assertTrue(JustinTVIE.suitable(u"www.justin.tv/vanillatv")) - self.assertTrue(JustinTVIE.suitable(u"www.twitch.tv/vanillatv")) - self.assertTrue(JustinTVIE.suitable(u"http://www.justin.tv/vanillatv")) - self.assertTrue(JustinTVIE.suitable(u"http://www.twitch.tv/vanillatv")) - self.assertTrue(JustinTVIE.suitable(u"http://www.justin.tv/vanillatv/")) - self.assertTrue(JustinTVIE.suitable(u"http://www.twitch.tv/vanillatv/")) + self.assertTrue(JustinTVIE.suitable('justin.tv/vanillatv')) + self.assertTrue(JustinTVIE.suitable('twitch.tv/vanillatv')) + self.assertTrue(JustinTVIE.suitable('www.justin.tv/vanillatv')) + self.assertTrue(JustinTVIE.suitable('www.twitch.tv/vanillatv')) + self.assertTrue(JustinTVIE.suitable('http://www.justin.tv/vanillatv')) + self.assertTrue(JustinTVIE.suitable('http://www.twitch.tv/vanillatv')) + self.assertTrue(JustinTVIE.suitable('http://www.justin.tv/vanillatv/')) + self.assertTrue(JustinTVIE.suitable('http://www.twitch.tv/vanillatv/')) def test_justintv_videoid_matching(self): - self.assertTrue(JustinTVIE.suitable(u"http://www.twitch.tv/vanillatv/b/328087483")) + self.assertTrue(JustinTVIE.suitable('http://www.twitch.tv/vanillatv/b/328087483')) def test_justin_tv_chapterid_matching(self): - self.assertTrue(JustinTVIE.suitable(u"http://www.twitch.tv/tsm_theoddone/c/2349361")) + self.assertTrue(JustinTVIE.suitable('http://www.twitch.tv/tsm_theoddone/c/2349361')) def test_youtube_extract(self): assertExtractId = lambda url, id: self.assertEqual(YoutubeIE.extract_id(url), id) @@ -106,7 +106,7 @@ class TestAllURLsMatching(unittest.TestCase): def test_no_duplicates(self): ies = gen_extractors() - for tc in gettestcases(): + for tc in gettestcases(include_onlymatching=True): url = tc['url'] for ie in ies: if type(ie).__name__ in ('GenericIE', tc['name'] + 'IE'): @@ -176,5 +176,6 @@ class TestAllURLsMatching(unittest.TestCase): 'https://screen.yahoo.com/smartwatches-latest-wearable-gadgets-163745379-cbs.html', ['Yahoo']) + if __name__ == '__main__': unittest.main() diff --git a/test/test_playlists.py b/test/test_playlists.py index 17f1e5fab..02fcde1f7 100644 --- a/test/test_playlists.py +++ b/test/test_playlists.py @@ -192,8 +192,8 @@ class TestPlaylists(unittest.TestCase): self.assertIsPlaylist(result) self.assertEqual(result['id'], 'dezhurnyi_angel') self.assertEqual(result['title'], 'Дежурный ангел (2010 - 2012)') - self.assertTrue(len(result['entries']) >= 36) - + self.assertTrue(len(result['entries']) >= 23) + def test_ivi_compilation_season(self): dl = FakeYDL() ie = IviCompilationIE(dl) @@ -201,7 +201,7 @@ class TestPlaylists(unittest.TestCase): self.assertIsPlaylist(result) self.assertEqual(result['id'], 'dezhurnyi_angel/season2') self.assertEqual(result['title'], 'Дежурный ангел (2010 - 2012) 2 сезон') - self.assertTrue(len(result['entries']) >= 20) + self.assertTrue(len(result['entries']) >= 7) def test_imdb_list(self): dl = FakeYDL() diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index b9c759165..e0ef1cd3e 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -181,6 +181,7 @@ from .nfb import NFBIE from .nhl import NHLIE, NHLVideocenterIE from .niconico import NiconicoIE from .ninegag import NineGagIE +from .noco import NocoIE from .normalboots import NormalbootsIE from .novamov import NovaMovIE from .nowness import NownessIE diff --git a/youtube_dl/extractor/dailymotion.py b/youtube_dl/extractor/dailymotion.py index bae1c7754..c759b9889 100644 --- a/youtube_dl/extractor/dailymotion.py +++ b/youtube_dl/extractor/dailymotion.py @@ -8,7 +8,6 @@ from .subtitles import SubtitlesInfoExtractor from ..utils import ( compat_urllib_request, compat_str, - get_element_by_id, orderedSet, str_to_int, int_or_none, diff --git a/youtube_dl/extractor/extremetube.py b/youtube_dl/extractor/extremetube.py index 1c20e4364..ff7c0cd3e 100644 --- a/youtube_dl/extractor/extremetube.py +++ b/youtube_dl/extractor/extremetube.py @@ -1,4 +1,5 @@ -import os +from __future__ import unicode_literals + import re from .common import InfoExtractor @@ -8,18 +9,23 @@ from ..utils import ( compat_urllib_parse, ) + class ExtremeTubeIE(InfoExtractor): - _VALID_URL = r'^(?:https?://)?(?:www\.)?(?Pextremetube\.com/video/.+?(?P[0-9]+))(?:[/?&]|$)' - _TEST = { - u'url': u'http://www.extremetube.com/video/music-video-14-british-euro-brit-european-cumshots-swallow-652431', - u'file': u'652431.mp4', - u'md5': u'1fb9228f5e3332ec8c057d6ac36f33e0', - u'info_dict': { - u"title": u"Music Video 14 british euro brit european cumshots swallow", - u"uploader": u"unknown", - u"age_limit": 18, + _VALID_URL = r'^(?:https?://)?(?:www\.)?(?Pextremetube\.com/.*?video/.+?(?P[0-9]+))(?:[/?&]|$)' + _TESTS = [{ + 'url': 'http://www.extremetube.com/video/music-video-14-british-euro-brit-european-cumshots-swallow-652431', + 'md5': '1fb9228f5e3332ec8c057d6ac36f33e0', + 'info_dict': { + 'id': '652431', + 'ext': 'mp4', + 'title': 'Music Video 14 british euro brit european cumshots swallow', + 'uploader': 'unknown', + 'age_limit': 18, } - } + }, { + 'url': 'http://www.extremetube.com/gay/video/abcde-1234', + 'only_matching': True, + }] def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) @@ -30,11 +36,14 @@ class ExtremeTubeIE(InfoExtractor): req.add_header('Cookie', 'age_verified=1') webpage = self._download_webpage(req, video_id) - video_title = self._html_search_regex(r'

]*?title="([^"]+)"[^>]*>\1<', webpage, u'title') - uploader = self._html_search_regex(r'>Posted by:(?=<)(?:\s|<[^>]*>)*(.+?)\|', webpage, u'uploader', fatal=False) - video_url = compat_urllib_parse.unquote(self._html_search_regex(r'video_url=(.+?)&', webpage, u'video_url')) + video_title = self._html_search_regex( + r'

]*?title="([^"]+)"[^>]*>\1<', webpage, 'title') + uploader = self._html_search_regex( + r'>Posted by:(?=<)(?:\s|<[^>]*>)*(.+?)\|', webpage, 'uploader', + fatal=False) + video_url = compat_urllib_parse.unquote(self._html_search_regex( + r'video_url=(.+?)&', webpage, 'video_url')) path = compat_urllib_parse_urlparse(video_url).path - extension = os.path.splitext(path)[1][1:] format = path.split('/')[5].split('_')[:2] format = "-".join(format) @@ -43,7 +52,6 @@ class ExtremeTubeIE(InfoExtractor): 'title': video_title, 'uploader': uploader, 'url': video_url, - 'ext': extension, 'format': format, 'format_id': format, 'age_limit': 18, diff --git a/youtube_dl/extractor/ign.py b/youtube_dl/extractor/ign.py index cfeaa4146..1f42c6d3a 100644 --- a/youtube_dl/extractor/ign.py +++ b/youtube_dl/extractor/ign.py @@ -106,7 +106,7 @@ class OneUPIE(IGNIE): _DESCRIPTION_RE = r'
(.+?)
' - _TEST = { + _TESTS = [{ 'url': 'http://gamevideos.1up.com/video/id/34976', 'md5': '68a54ce4ebc772e4b71e3123d413163d', 'info_dict': { @@ -115,10 +115,7 @@ class OneUPIE(IGNIE): 'title': 'Sniper Elite V2 - Trailer', 'description': 'md5:5d289b722f5a6d940ca3136e9dae89cf', } - } - - # Override IGN tests - _TESTS = [] + }] def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) diff --git a/youtube_dl/extractor/noco.py b/youtube_dl/extractor/noco.py new file mode 100644 index 000000000..ec011eb49 --- /dev/null +++ b/youtube_dl/extractor/noco.py @@ -0,0 +1,105 @@ +# encoding: utf-8 +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor +from ..utils import ( + ExtractorError, + unified_strdate, + compat_str, +) + + +class NocoIE(InfoExtractor): + _VALID_URL = r'http://(?:(?:www\.)?noco\.tv/emission/|player\.noco\.tv/\?idvideo=)(?P\d+)' + + _TEST = { + 'url': 'http://noco.tv/emission/11538/nolife/ami-ami-idol-hello-france/', + 'md5': '0a993f0058ddbcd902630b2047ef710e', + 'info_dict': { + 'id': '11538', + 'ext': 'mp4', + 'title': 'Ami Ami Idol - Hello! France', + 'description': 'md5:4eaab46ab68fa4197a317a88a53d3b86', + 'upload_date': '20140412', + 'uploader': 'Nolife', + 'uploader_id': 'NOL', + 'duration': 2851.2, + } + } + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + video_id = mobj.group('id') + + medias = self._download_json( + 'http://api.noco.tv/1.0/video/medias/%s' % video_id, video_id, 'Downloading video JSON') + + formats = [] + + for fmt in medias['fr']['video_list']['default']['quality_list']: + format_id = fmt['quality_key'] + + file = self._download_json( + 'http://api.noco.tv/1.0/video/file/%s/fr/%s' % (format_id.lower(), video_id), + video_id, 'Downloading %s video JSON' % format_id) + + file_url = file['file'] + if not file_url: + continue + + if file_url == 'forbidden': + raise ExtractorError( + '%s returned error: %s - %s' % ( + self.IE_NAME, file['popmessage']['title'], file['popmessage']['message']), + expected=True) + + formats.append({ + 'url': file_url, + 'format_id': format_id, + 'width': fmt['res_width'], + 'height': fmt['res_lines'], + 'abr': fmt['audiobitrate'], + 'vbr': fmt['videobitrate'], + 'filesize': fmt['filesize'], + 'format_note': fmt['quality_name'], + 'preference': fmt['priority'], + }) + + self._sort_formats(formats) + + show = self._download_json( + 'http://api.noco.tv/1.0/shows/show/%s' % video_id, video_id, 'Downloading show JSON')[0] + + upload_date = unified_strdate(show['indexed']) + uploader = show['partner_name'] + uploader_id = show['partner_key'] + duration = show['duration_ms'] / 1000.0 + thumbnail = show['screenshot'] + + episode = show.get('show_TT') or show.get('show_OT') + family = show.get('family_TT') or show.get('family_OT') + episode_number = show.get('episode_number') + + title = '' + if family: + title += family + if episode_number: + title += ' #' + compat_str(episode_number) + if episode: + title += ' - ' + episode + + description = show.get('show_resume') or show.get('family_resume') + + return { + 'id': video_id, + 'title': title, + 'description': description, + 'thumbnail': thumbnail, + 'upload_date': upload_date, + 'uploader': uploader, + 'uploader_id': uploader_id, + 'duration': duration, + 'formats': formats, + } \ No newline at end of file diff --git a/youtube_dl/extractor/rutube.py b/youtube_dl/extractor/rutube.py index f1ce66433..4f7f8cb6d 100644 --- a/youtube_dl/extractor/rutube.py +++ b/youtube_dl/extractor/rutube.py @@ -43,13 +43,14 @@ class RutubeIE(InfoExtractor): 'http://rutube.ru/api/video/%s/?format=json' % video_id, video_id, 'Downloading video JSON') - trackinfo = self._download_json( - 'http://rutube.ru/api/play/trackinfo/%s/?format=json' % video_id, - video_id, 'Downloading trackinfo JSON') - # Some videos don't have the author field - author = trackinfo.get('author') or {} - m3u8_url = trackinfo['video_balancer'].get('m3u8') + author = video.get('author') or {} + + options = self._download_json( + 'http://rutube.ru/api/play/options/%s/?format=json' %video_id, + video_id, 'Downloading options JSON') + + m3u8_url = options['video_balancer'].get('m3u8') if m3u8_url is None: raise ExtractorError('Couldn\'t find m3u8 manifest url') diff --git a/youtube_dl/extractor/steam.py b/youtube_dl/extractor/steam.py index 91658f892..1d8d57224 100644 --- a/youtube_dl/extractor/steam.py +++ b/youtube_dl/extractor/steam.py @@ -1,3 +1,5 @@ +from __future__ import unicode_literals + import re from .common import InfoExtractor @@ -8,78 +10,114 @@ from ..utils import ( class SteamIE(InfoExtractor): - _VALID_URL = r"""http://store\.steampowered\.com/ - (agecheck/)? - (?Pvideo|app)/ #If the page is only for videos or for a game - (?P\d+)/? - (?P\d*)(?P\??) #For urltype == video we sometimes get the videoID - """ + _VALID_URL = r"""(?x) + https?://store\.steampowered\.com/ + (agecheck/)? + (?Pvideo|app)/ #If the page is only for videos or for a game + (?P\d+)/? + (?P\d*)(?P\??) # For urltype == video we sometimes get the videoID + | + https?://(?:www\.)?steamcommunity\.com/sharedfiles/filedetails/\?id=(?P[0-9]+) + """ _VIDEO_PAGE_TEMPLATE = 'http://store.steampowered.com/video/%s/' _AGECHECK_TEMPLATE = 'http://store.steampowered.com/agecheck/video/%s/?snr=1_agecheck_agecheck__age-gate&ageDay=1&ageMonth=January&ageYear=1970' - _TEST = { - u"url": u"http://store.steampowered.com/video/105600/", - u"playlist": [ + _TESTS = [{ + "url": "http://store.steampowered.com/video/105600/", + "playlist": [ { - u"file": u"81300.flv", - u"md5": u"f870007cee7065d7c76b88f0a45ecc07", - u"info_dict": { - u"title": u"Terraria 1.1 Trailer", - u'playlist_index': 1, + "md5": "f870007cee7065d7c76b88f0a45ecc07", + "info_dict": { + 'id': '81300', + 'ext': 'flv', + "title": "Terraria 1.1 Trailer", + 'playlist_index': 1, } }, { - u"file": u"80859.flv", - u"md5": u"61aaf31a5c5c3041afb58fb83cbb5751", - u"info_dict": { - u"title": u"Terraria Trailer", - u'playlist_index': 2, + "md5": "61aaf31a5c5c3041afb58fb83cbb5751", + "info_dict": { + 'id': '80859', + 'ext': 'flv', + "title": "Terraria Trailer", + 'playlist_index': 2, } } - ] - } - - - @classmethod - def suitable(cls, url): - """Receives a URL and returns True if suitable for this IE.""" - return re.match(cls._VALID_URL, url, re.VERBOSE) is not None + ], + 'params': { + 'playlistend': 2, + } + }, { + 'url': 'http://steamcommunity.com/sharedfiles/filedetails/?id=242472205', + 'info_dict': { + 'id': 'WB5DvDOOvAY', + 'ext': 'mp4', + 'upload_date': '20140329', + 'title': 'FRONTIERS - Final Greenlight Trailer', + 'description': "The final trailer for the Steam Greenlight launch. Hooray, progress! Here's the official Greenlight page: http://steamcommunity.com/sharedfiles/filedetails/?id=242472205", + 'uploader': 'AAD Productions', + 'uploader_id': 'AtomicAgeDogGames', + } + }] def _real_extract(self, url): - m = re.match(self._VALID_URL, url, re.VERBOSE) - gameID = m.group('gameID') - - videourl = self._VIDEO_PAGE_TEMPLATE % gameID - webpage = self._download_webpage(videourl, gameID) + m = re.match(self._VALID_URL, url) + fileID = m.group('fileID') + if fileID: + videourl = url + playlist_id = fileID + else: + gameID = m.group('gameID') + playlist_id = gameID + videourl = self._VIDEO_PAGE_TEMPLATE % playlist_id + webpage = self._download_webpage(videourl, playlist_id) if re.search('

Please enter your birth date to continue:

', webpage) is not None: - videourl = self._AGECHECK_TEMPLATE % gameID + videourl = self._AGECHECK_TEMPLATE % playlist_id self.report_age_confirmation() - webpage = self._download_webpage(videourl, gameID) + webpage = self._download_webpage(videourl, playlist_id) + + if fileID: + playlist_title = self._html_search_regex( + r'
(.+)
', webpage, 'title') + mweb = re.finditer(r'''(?x) + 'movie_(?P[0-9]+)':\s*\{\s* + YOUTUBE_VIDEO_ID:\s*"(?P[^"]+)", + ''', webpage) + videos = [{ + '_type': 'url', + 'url': vid.group('youtube_id'), + 'ie_key': 'Youtube', + } for vid in mweb] + else: + playlist_title = self._html_search_regex( + r'', webpage, 'game title') + + mweb = re.finditer(r'''(?x) + 'movie_(?P[0-9]+)':\s*\{\s* + FILENAME:\s*"(?P[\w:/\.\?=]+)" + (,\s*MOVIE_NAME:\s*\"(?P[\w:/\.\?=\+-]+)\")?\s*\}, + ''', webpage) + titles = re.finditer( + r'(?P.+?)', webpage) + thumbs = re.finditer( + r'', webpage) + videos = [] - self.report_extraction(gameID) - game_title = self._html_search_regex(r'', - webpage, 'game title') + for vid, vtitle, thumb in zip(mweb, titles, thumbs): + video_id = vid.group('videoID') + title = vtitle.group('videoName') + video_url = vid.group('videoURL') + video_thumb = thumb.group('thumbnail') + if not video_url: + raise ExtractorError('Cannot find video url for %s' % video_id) + videos.append({ + 'id': video_id, + 'url': video_url, + 'ext': 'flv', + 'title': unescapeHTML(title), + 'thumbnail': video_thumb + }) + if not videos: + raise ExtractorError('Could not find any videos') - urlRE = r"'movie_(?P\d+)': \{\s*FILENAME: \"(?P[\w:/\.\?=]+)\"(,\s*MOVIE_NAME: \"(?P[\w:/\.\?=\+-]+)\")?\s*\}," - mweb = re.finditer(urlRE, webpage) - namesRE = r'(?P.+?)' - titles = re.finditer(namesRE, webpage) - thumbsRE = r'' - thumbs = re.finditer(thumbsRE, webpage) - videos = [] - for vid,vtitle,thumb in zip(mweb,titles,thumbs): - video_id = vid.group('videoID') - title = vtitle.group('videoName') - video_url = vid.group('videoURL') - video_thumb = thumb.group('thumbnail') - if not video_url: - raise ExtractorError(u'Cannot find video url for %s' % video_id) - info = { - 'id':video_id, - 'url':video_url, - 'ext': 'flv', - 'title': unescapeHTML(title), - 'thumbnail': video_thumb - } - videos.append(info) - return [self.playlist_result(videos, gameID, game_title)] + return self.playlist_result(videos, playlist_id, playlist_title) diff --git a/youtube_dl/extractor/teamcoco.py b/youtube_dl/extractor/teamcoco.py index dcdadd120..f8dd7e955 100644 --- a/youtube_dl/extractor/teamcoco.py +++ b/youtube_dl/extractor/teamcoco.py @@ -3,9 +3,6 @@ from __future__ import unicode_literals import re from .common import InfoExtractor -from ..utils import ( - ExtractorError, -) class TeamcocoIE(InfoExtractor): diff --git a/youtube_dl/extractor/yahoo.py b/youtube_dl/extractor/yahoo.py index e2cf1ae56..4671f49ed 100644 --- a/youtube_dl/extractor/yahoo.py +++ b/youtube_dl/extractor/yahoo.py @@ -104,7 +104,7 @@ class YahooNewsIE(YahooIE): IE_NAME = 'yahoo:news' _VALID_URL = r'http://news\.yahoo\.com/video/.*?-(?P\d*?)\.html' - _TEST = { + _TESTS = [{ 'url': 'http://news.yahoo.com/video/china-moses-crazy-blues-104538833.html', 'md5': '67010fdf3a08d290e060a4dd96baa07b', 'info_dict': { @@ -113,10 +113,7 @@ class YahooNewsIE(YahooIE): 'title': 'China Moses Is Crazy About the Blues', 'description': 'md5:9900ab8cd5808175c7b3fe55b979bed0', }, - } - - # Overwrite YahooIE properties we don't want - _TESTS = [] + }] def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) diff --git a/youtube_dl/version.py b/youtube_dl/version.py index 55382f0b0..a1bbf4176 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,2 +1,2 @@ -__version__ = '2014.04.13' +__version__ = '2014.04.21'