From: Philipp Hagemeister Date: Wed, 30 Apr 2014 00:04:55 +0000 (+0200) Subject: Merge branch 'master' of github.com:rg3/youtube-dl X-Git-Url: http://git.bitcoin.ninja/index.cgi?a=commitdiff_plain;h=deab8c19609d522bb84ac563148038cb818b937e;hp=33ab8453c4a89564bf14fc800874f09d66cdcf4a;p=youtube-dl Merge branch 'master' of github.com:rg3/youtube-dl --- diff --git a/test/helper.py b/test/helper.py index da714078d..d5e0a603e 100644 --- a/test/helper.py +++ b/test/helper.py @@ -134,3 +134,17 @@ def expect_info_dict(self, expected_dict, got_dict): missing_keys, 'Missing keys in test definition: %s' % ( ', '.join(sorted(missing_keys)))) + + +def assertRegexpMatches(self, text, regexp, msg=None): + if hasattr(self, 'assertRegexpMatches'): + return self.assertRegexpMatches(text, regexp, msg) + else: + m = re.match(regexp, text) + if not m: + note = 'Regexp didn\'t match: %r not found in %r' % (regexp, text) + if msg is None: + msg = note + else: + msg = note + ', ' + msg + self.assertTrue(m, msg) diff --git a/test/test_YoutubeDL.py b/test/test_YoutubeDL.py index 2902dbec7..8735013f7 100644 --- a/test/test_YoutubeDL.py +++ b/test/test_YoutubeDL.py @@ -8,7 +8,7 @@ import sys import unittest sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) -from test.helper import FakeYDL +from test.helper import FakeYDL, assertRegexpMatches from youtube_dl import YoutubeDL from youtube_dl.extractor import YoutubeIE @@ -274,6 +274,12 @@ class TestFormatSelection(unittest.TestCase): # Replace missing fields with 'NA' self.assertEqual(fname('%(uploader_date)s-%(id)s.%(ext)s'), 'NA-1234.mp4') + def test_format_note(self): + ydl = YoutubeDL() + self.assertEqual(ydl._format_note({}), '') + assertRegexpMatches(self, ydl._format_note({ + 'vbr': 10, + }), '^x\s*10k$') if __name__ == '__main__': unittest.main() diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py index d4dd05d8c..e9811bd05 100755 --- a/youtube_dl/YoutubeDL.py +++ b/youtube_dl/YoutubeDL.py @@ -1139,57 +1139,57 @@ class YoutubeDL(object): res = default return res - def list_formats(self, info_dict): - def format_note(fdict): - res = '' - if fdict.get('ext') in ['f4f', 'f4m']: - res += '(unsupported) ' - if fdict.get('format_note') is not None: - res += fdict['format_note'] + ' ' - if fdict.get('tbr') is not None: - res += '%4dk ' % fdict['tbr'] - if fdict.get('container') is not None: - if res: - res += ', ' - res += '%s container' % fdict['container'] - if (fdict.get('vcodec') is not None and - fdict.get('vcodec') != 'none'): - if res: - res += ', ' - res += fdict['vcodec'] - if fdict.get('vbr') is not None: - res += '@' - elif fdict.get('vbr') is not None and fdict.get('abr') is not None: - res += 'video@' + def _format_note(self, fdict): + res = '' + if fdict.get('ext') in ['f4f', 'f4m']: + res += '(unsupported) ' + if fdict.get('format_note') is not None: + res += fdict['format_note'] + ' ' + if fdict.get('tbr') is not None: + res += '%4dk ' % fdict['tbr'] + if fdict.get('container') is not None: + if res: + res += ', ' + res += '%s container' % fdict['container'] + if (fdict.get('vcodec') is not None and + fdict.get('vcodec') != 'none'): + if res: + res += ', ' + res += fdict['vcodec'] if fdict.get('vbr') is not None: - res += '%4dk' % fdict['vbr'] - if fdict.get('acodec') is not None: - if res: - res += ', ' - if fdict['acodec'] == 'none': - res += 'video only' - else: - res += '%-5s' % fdict['acodec'] - elif fdict.get('abr') is not None: - if res: - res += ', ' - res += 'audio' - if fdict.get('abr') is not None: - res += '@%3dk' % fdict['abr'] - if fdict.get('asr') is not None: - res += ' (%5dHz)' % fdict['asr'] - if fdict.get('filesize') is not None: - if res: - res += ', ' - res += format_bytes(fdict['filesize']) - return res + res += '@' + elif fdict.get('vbr') is not None and fdict.get('abr') is not None: + res += 'video@' + if fdict.get('vbr') is not None: + res += '%4dk' % fdict['vbr'] + if fdict.get('acodec') is not None: + if res: + res += ', ' + if fdict['acodec'] == 'none': + res += 'video only' + else: + res += '%-5s' % fdict['acodec'] + elif fdict.get('abr') is not None: + if res: + res += ', ' + res += 'audio' + if fdict.get('abr') is not None: + res += '@%3dk' % fdict['abr'] + if fdict.get('asr') is not None: + res += ' (%5dHz)' % fdict['asr'] + if fdict.get('filesize') is not None: + if res: + res += ', ' + res += format_bytes(fdict['filesize']) + return res + def list_formats(self, info_dict): def line(format, idlen=20): return (('%-' + compat_str(idlen + 1) + 's%-10s%-12s%s') % ( format['format_id'], format['ext'], self.format_resolution(format), - format_note(format), + self._format_note(format), )) formats = info_dict.get('formats', [info_dict]) @@ -1197,8 +1197,8 @@ class YoutubeDL(object): max(len(f['format_id']) for f in formats)) formats_s = [line(f, idlen) for f in formats] if len(formats) > 1: - formats_s[0] += (' ' if format_note(formats[0]) else '') + '(worst)' - formats_s[-1] += (' ' if format_note(formats[-1]) else '') + '(best)' + formats_s[0] += (' ' if self._format_note(formats[0]) else '') + '(worst)' + formats_s[-1] += (' ' if self._format_note(formats[-1]) else '') + '(best)' header_line = line({ 'format_id': 'format code', 'ext': 'extension', diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index 4b53bef5c..e27cd2d2d 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -160,6 +160,7 @@ from .mofosex import MofosexIE from .mooshare import MooshareIE from .morningstar import MorningstarIE from .motorsport import MotorsportIE +from .moviezine import MoviezineIE from .movshare import MovShareIE from .mtv import ( MTVIE, diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index 13064e2cd..cfb009d79 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -248,6 +248,18 @@ class GenericIE(InfoExtractor): 'ext': 'mp4', 'title': 'Honda’s New Asimo Robot Is More Human Than Ever', } + }, + # Dailymotion embed + { + 'url': 'http://www.spi0n.com/zap-spi0n-com-n216/', + 'md5': '441aeeb82eb72c422c7f14ec533999cd', + 'info_dict': { + 'id': 'k2mm4bCdJ6CQ2i7c8o2', + 'ext': 'mp4', + 'title': 'Le Zap de Spi0n n°216 - Zapping du Web', + 'uploader': 'Spi0n', + }, + 'add_ie': ['Dailymotion'], } ] @@ -333,6 +345,15 @@ class GenericIE(InfoExtractor): } def _real_extract(self, url): + if url.startswith('//'): + return { + '_type': 'url', + 'url': ( + 'http:' + if self._downloader.params.get('prefer_insecure', False) + else 'https:') + url, + } + parsed_url = compat_urlparse.urlparse(url) if not parsed_url.scheme: default_search = self._downloader.params.get('default_search') @@ -469,7 +490,7 @@ class GenericIE(InfoExtractor): matches = re.findall( r']+?src=(["\'])(?P(?:https?:)?//(?:www\.)?dailymotion\.com/embed/video/.+?)\1', webpage) if matches: - urlrs = [self.url_result(unescapeHTML(tuppl[1]), 'Dailymotion') + urlrs = [self.url_result(unescapeHTML(tuppl[1])) for tuppl in matches] return self.playlist_result( urlrs, playlist_id=video_id, playlist_title=video_title) diff --git a/youtube_dl/extractor/moviezine.py b/youtube_dl/extractor/moviezine.py new file mode 100644 index 000000000..43146180a --- /dev/null +++ b/youtube_dl/extractor/moviezine.py @@ -0,0 +1,45 @@ +# -*- coding: utf-8 -*- +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor + + +class MoviezineIE(InfoExtractor): + _VALID_URL = r'https?://www\.moviezine\.se/video/(?P[^?#]+)' + + _TEST = { + 'url': 'http://www.moviezine.se/video/205866', + 'info_dict': { + 'id': '205866', + 'ext': 'mp4', + 'title': 'Oculus - Trailer 1', + 'description': 'md5:40cc6790fc81d931850ca9249b40e8a4', + 'thumbnail': 're:http://.*\.jpg', + }, + } + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + video_id = mobj.group('id') + + webpage = self._download_webpage(url, video_id) + jsplayer = self._download_webpage('http://www.moviezine.se/api/player.js?video=%s' % video_id, video_id, 'Downloading js api player') + + formats =[{ + 'format_id': 'sd', + 'url': self._html_search_regex(r'file: "(.+?)",', jsplayer, 'file'), + 'quality': 0, + 'ext': 'mp4', + }] + + self._sort_formats(formats) + + return { + 'id': video_id, + 'title': self._search_regex(r'title: "(.+?)",', jsplayer, 'title'), + 'thumbnail': self._search_regex(r'image: "(.+?)",', jsplayer, 'image'), + 'formats': formats, + 'description': self._og_search_description(webpage), + } diff --git a/youtube_dl/extractor/syfy.py b/youtube_dl/extractor/syfy.py index 8809a57fe..f76b6e2b2 100644 --- a/youtube_dl/extractor/syfy.py +++ b/youtube_dl/extractor/syfy.py @@ -6,9 +6,9 @@ from .common import InfoExtractor class SyfyIE(InfoExtractor): - _VALID_URL = r'https?://www\.syfy\.com/videos/.+?vid:(?P\d+)' + _VALID_URL = r'https?://www\.syfy\.com/(?:videos/.+?vid:(?P[0-9]+)|(?!videos)(?P[^/]+)(?:$|[?#]))' - _TEST = { + _TESTS = [{ 'url': 'http://www.syfy.com/videos/Robot%20Combat%20League/Behind%20the%20Scenes/vid:2631458', 'md5': 'e07de1d52c7278adbb9b9b1c93a66849', 'info_dict': { @@ -18,10 +18,30 @@ class SyfyIE(InfoExtractor): 'description': 'Listen to what insights George Lucas give his daughter Amanda.', }, 'add_ie': ['ThePlatform'], - } + }, { + 'url': 'http://www.syfy.com/wilwheaton', + 'md5': '94dfa54ee3ccb63295b276da08c415f6', + 'info_dict': { + 'id': '4yoffOOXC767', + 'ext': 'flv', + 'title': 'The Wil Wheaton Project - Premiering May 27th at 10/9c.', + 'description': 'The Wil Wheaton Project premieres May 27th at 10/9c. Don\'t miss it.', + }, + 'add_ie': ['ThePlatform'], + 'skip': 'Blocked outside the US', + }] def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) - video_id = mobj.group('id') + video_name = mobj.group('video_name') + if video_name: + generic_webpage = self._download_webpage(url, video_name) + video_id = self._search_regex( + r'', + generic_webpage, 'video ID') + url = 'http://www.syfy.com/videos/%s/%s/vid:%s' % ( + video_name, video_name, video_id) + else: + video_id = mobj.group('id') webpage = self._download_webpage(url, video_id) return self.url_result(self._og_search_video_url(webpage)) diff --git a/youtube_dl/extractor/theplatform.py b/youtube_dl/extractor/theplatform.py index 91f2453eb..f15780ef5 100644 --- a/youtube_dl/extractor/theplatform.py +++ b/youtube_dl/extractor/theplatform.py @@ -52,7 +52,7 @@ class ThePlatformIE(InfoExtractor): head = meta.find(_x('smil:head')) body = meta.find(_x('smil:body')) - f4m_node = body.find(_x('smil:seq/smil:video')) + f4m_node = body.find(_x('smil:seq//smil:video')) if f4m_node is not None: f4m_url = f4m_node.attrib['src'] if 'manifest.f4m?' not in f4m_url: diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index c008ed54a..c3ae33009 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -210,23 +210,23 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor): '141': {'ext': 'm4a', 'format_note': 'DASH audio', 'vcodec': 'none', 'abr': 256, 'preference': -50}, # Dash webm - '167': {'ext': 'webm', 'height': 360, 'width': 640, 'format_note': 'DASH video', 'acodec': 'none', 'container': 'webm', 'vcodec': 'VP8', 'acodec': 'none', 'preference': -40}, - '168': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'acodec': 'none', 'container': 'webm', 'vcodec': 'VP8', 'acodec': 'none', 'preference': -40}, - '169': {'ext': 'webm', 'height': 720, 'width': 1280, 'format_note': 'DASH video', 'acodec': 'none', 'container': 'webm', 'vcodec': 'VP8', 'acodec': 'none', 'preference': -40}, - '170': {'ext': 'webm', 'height': 1080, 'width': 1920, 'format_note': 'DASH video', 'acodec': 'none', 'container': 'webm', 'vcodec': 'VP8', 'acodec': 'none', 'preference': -40}, - '218': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'acodec': 'none', 'container': 'webm', 'vcodec': 'VP8', 'acodec': 'none', 'preference': -40}, - '219': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'acodec': 'none', 'container': 'webm', 'vcodec': 'VP8', 'acodec': 'none', 'preference': -40}, - '242': {'ext': 'webm', 'height': 240, 'format_note': 'DASH webm', 'preference': -40}, - '243': {'ext': 'webm', 'height': 360, 'format_note': 'DASH webm', 'preference': -40}, - '244': {'ext': 'webm', 'height': 480, 'format_note': 'DASH webm', 'preference': -40}, - '245': {'ext': 'webm', 'height': 480, 'format_note': 'DASH webm', 'preference': -40}, - '246': {'ext': 'webm', 'height': 480, 'format_note': 'DASH webm', 'preference': -40}, - '247': {'ext': 'webm', 'height': 720, 'format_note': 'DASH webm', 'preference': -40}, - '248': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH webm', 'preference': -40}, + '167': {'ext': 'webm', 'height': 360, 'width': 640, 'format_note': 'DASH video', 'acodec': 'none', 'container': 'webm', 'vcodec': 'VP8', 'preference': -40}, + '168': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'acodec': 'none', 'container': 'webm', 'vcodec': 'VP8', 'preference': -40}, + '169': {'ext': 'webm', 'height': 720, 'width': 1280, 'format_note': 'DASH video', 'acodec': 'none', 'container': 'webm', 'vcodec': 'VP8', 'preference': -40}, + '170': {'ext': 'webm', 'height': 1080, 'width': 1920, 'format_note': 'DASH video', 'acodec': 'none', 'container': 'webm', 'vcodec': 'VP8', 'preference': -40}, + '218': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'acodec': 'none', 'container': 'webm', 'vcodec': 'VP8', 'preference': -40}, + '219': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'acodec': 'none', 'container': 'webm', 'vcodec': 'VP8', 'preference': -40}, + '242': {'ext': 'webm', 'height': 240, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40}, + '243': {'ext': 'webm', 'height': 360, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40}, + '244': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40}, + '245': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40}, + '246': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40}, + '247': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40}, + '248': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40}, # Dash webm audio - '171': {'ext': 'webm', 'vcodec': 'none', 'format_note': 'DASH webm audio', 'abr': 48, 'preference': -50}, - '172': {'ext': 'webm', 'vcodec': 'none', 'format_note': 'DASH webm audio', 'abr': 256, 'preference': -50}, + '171': {'ext': 'webm', 'vcodec': 'none', 'format_note': 'DASH audio', 'abr': 48, 'preference': -50}, + '172': {'ext': 'webm', 'vcodec': 'none', 'format_note': 'DASH audio', 'abr': 256, 'preference': -50}, # RTMP (unnamed) '_rtmp': {'protocol': 'rtmp'},