From: Sergey M․ Date: Wed, 20 May 2015 16:10:06 +0000 (+0600) Subject: Merge branch 'ping-viki-shows' X-Git-Url: http://git.bitcoin.ninja/index.cgi?p=youtube-dl;a=commitdiff_plain;h=ca57a598838f7d945a09a1df569e1c29fdd0732b;hp=b0d619fde2b187f2b36b077a1eb11d766429f88c Merge branch 'ping-viki-shows' --- diff --git a/docs/supportedsites.md b/docs/supportedsites.md index 43fbe8b1d..a4879bd9a 100644 --- a/docs/supportedsites.md +++ b/docs/supportedsites.md @@ -142,6 +142,7 @@ - **Eporner** - **EroProfile** - **Escapist** + - **ESPN** (Currently broken) - **EveryonesMixtape** - **exfm**: ex.fm - **ExpoTV** @@ -338,6 +339,7 @@ - **OktoberfestTV** - **on.aol.com** - **Ooyala** + - **OoyalaExternal** - **OpenFilm** - **orf:fm4**: radio FM4 - **orf:iptv**: iptv.ORF.at @@ -451,6 +453,7 @@ - **Spike** - **Sport5** - **SportBox** + - **SportBoxEmbed** - **SportDeutschland** - **Srf** - **SRMediathek**: Saarländischer Rundfunk @@ -510,6 +513,8 @@ - **Turbo** - **Tutv** - **tv.dfb.de** + - **TV2** + - **TV2Article** - **TV4**: tv4.se and tv4play.se - **tvigle**: Интернет-телевидение Tvigle.ru - **tvp.pl** diff --git a/test/test_utils.py b/test/test_utils.py index b40107037..e13e11b59 100644 --- a/test/test_utils.py +++ b/test/test_utils.py @@ -621,6 +621,21 @@ Line ''' self.assertEqual(dfxp2srt(dfxp_data), srt_data) + dfxp_data_no_default_namespace = ''' + + +
+

The first line

+
+ +
''' + srt_data = '''1 +00:00:00,000 --> 00:00:01,000 +The first line + +''' + self.assertEqual(dfxp2srt(dfxp_data_no_default_namespace), srt_data) + if __name__ == '__main__': unittest.main() diff --git a/tox.ini b/tox.ini index 00c6e00e3..cd805fe8a 100644 --- a/tox.ini +++ b/tox.ini @@ -4,6 +4,8 @@ envlist = py26,py27,py33,py34 deps = nose coverage +# We need a valid $HOME for test_compat_expanduser +passenv = HOME defaultargs = test --exclude test_download.py --exclude test_age_restriction.py --exclude test_subtitles.py --exclude test_write_annotations.py --exclude test_youtube_lists.py diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py index 5df889945..58b34e087 100755 --- a/youtube_dl/YoutubeDL.py +++ b/youtube_dl/YoutubeDL.py @@ -1368,7 +1368,7 @@ class YoutubeDL(object): postprocessors = [] self.report_warning('You have requested multiple ' 'formats but ffmpeg or avconv are not installed.' - ' The formats won\'t be merged') + ' The formats won\'t be merged.') else: postprocessors = [merger] @@ -1395,8 +1395,8 @@ class YoutubeDL(object): requested_formats = info_dict['requested_formats'] if self.params.get('merge_output_format') is None and not compatible_formats(requested_formats): info_dict['ext'] = 'mkv' - self.report_warning('You have requested formats incompatible for merge. ' - 'The formats will be merged into mkv') + self.report_warning( + 'Requested formats are incompatible for merge and will be merged into mkv.') # Ensure filename always has a correct extension for successful merge filename = '%s.%s' % (filename_wo_ext, info_dict['ext']) if os.path.exists(encodeFilename(filename)): diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index 173e9a155..24efb7ce5 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -572,6 +572,10 @@ from .tumblr import TumblrIE from .tunein import TuneInIE from .turbo import TurboIE from .tutv import TutvIE +from .tv2 import ( + TV2IE, + TV2ArticleIE, +) from .tv4 import TV4IE from .tvigle import TvigleIE from .tvp import TvpIE, TvpSeriesIE diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index 65bb77086..cecf917ff 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -786,8 +786,8 @@ class InfoExtractor(object): return True except ExtractorError as e: if isinstance(e.cause, compat_HTTPError): - self.report_warning( - '%s URL is invalid, skipping' % item, video_id) + self.to_screen( + '%s: %s URL is invalid, skipping' % (video_id, item)) return False raise diff --git a/youtube_dl/extractor/instagram.py b/youtube_dl/extractor/instagram.py index 65f6ca103..b10755788 100644 --- a/youtube_dl/extractor/instagram.py +++ b/youtube_dl/extractor/instagram.py @@ -7,9 +7,9 @@ from ..utils import int_or_none class InstagramIE(InfoExtractor): - _VALID_URL = r'https?://instagram\.com/p/(?P[\da-zA-Z]+)' + _VALID_URL = r'https://instagram\.com/p/(?P[\da-zA-Z]+)' _TEST = { - 'url': 'http://instagram.com/p/aye83DjauH/?foo=bar#abc', + 'url': 'https://instagram.com/p/aye83DjauH/?foo=bar#abc', 'md5': '0d2da106a9d2631273e192b372806516', 'info_dict': { 'id': 'aye83DjauH', @@ -41,11 +41,11 @@ class InstagramIE(InfoExtractor): class InstagramUserIE(InfoExtractor): - _VALID_URL = r'http://instagram\.com/(?P[^/]{2,})/?(?:$|[?#])' + _VALID_URL = r'https://instagram\.com/(?P[^/]{2,})/?(?:$|[?#])' IE_DESC = 'Instagram user profile' IE_NAME = 'instagram:user' _TEST = { - 'url': 'http://instagram.com/porsche', + 'url': 'https://instagram.com/porsche', 'info_dict': { 'id': 'porsche', 'title': 'porsche', diff --git a/youtube_dl/extractor/letv.py b/youtube_dl/extractor/letv.py index 1484ac0d2..da896caf1 100644 --- a/youtube_dl/extractor/letv.py +++ b/youtube_dl/extractor/letv.py @@ -50,9 +50,7 @@ class LetvIE(InfoExtractor): 'title': '与龙共舞 完整版', 'description': 'md5:7506a5eeb1722bb9d4068f85024e3986', }, - 'params': { - 'cn_verification_proxy': 'http://proxy.uku.im:8888' - }, + 'skip': 'Only available in China', }] @staticmethod diff --git a/youtube_dl/extractor/qqmusic.py b/youtube_dl/extractor/qqmusic.py index 13113820b..b540033e2 100644 --- a/youtube_dl/extractor/qqmusic.py +++ b/youtube_dl/extractor/qqmusic.py @@ -26,7 +26,7 @@ class QQMusicIE(InfoExtractor): 'title': '可惜没如果', 'upload_date': '20141227', 'creator': '林俊杰', - 'description': 'md5:4348ff1dd24036906baa7b6f973f8d30', + 'description': 'md5:d327722d0361576fde558f1ac68a7065', } }] @@ -60,6 +60,8 @@ class QQMusicIE(InfoExtractor): lrc_content = self._html_search_regex( r'
]*>([^<>]+)
', detail_info_page, 'LRC lyrics', default=None) + if lrc_content: + lrc_content = lrc_content.replace('\\n', '\n') guid = self.m_r_get_ruin() diff --git a/youtube_dl/extractor/sbs.py b/youtube_dl/extractor/sbs.py index 3073e5e86..d4bd1a0d7 100644 --- a/youtube_dl/extractor/sbs.py +++ b/youtube_dl/extractor/sbs.py @@ -1,7 +1,6 @@ # -*- coding: utf-8 -*- from __future__ import unicode_literals -import json import re from .common import InfoExtractor from ..utils import ( diff --git a/youtube_dl/extractor/sohu.py b/youtube_dl/extractor/sohu.py index eab4adfca..29bd9ce6f 100644 --- a/youtube_dl/extractor/sohu.py +++ b/youtube_dl/extractor/sohu.py @@ -23,9 +23,7 @@ class SohuIE(InfoExtractor): 'ext': 'mp4', 'title': 'MV:Far East Movement《The Illest》', }, - 'params': { - 'cn_verification_proxy': 'proxy.uku.im:8888' - } + 'skip': 'On available in China', }, { 'url': 'http://tv.sohu.com/20150305/n409385080.shtml', 'md5': 'ac9a5d322b4bf9ae184d53e4711e4f1a', diff --git a/youtube_dl/extractor/tv2.py b/youtube_dl/extractor/tv2.py new file mode 100644 index 000000000..fa338b936 --- /dev/null +++ b/youtube_dl/extractor/tv2.py @@ -0,0 +1,126 @@ +# encoding: utf-8 +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor +from ..utils import ( + determine_ext, + int_or_none, + float_or_none, + parse_iso8601, + remove_end, +) + + +class TV2IE(InfoExtractor): + _VALID_URL = 'http://(?:www\.)?tv2\.no/v/(?P\d+)' + _TEST = { + 'url': 'http://www.tv2.no/v/916509/', + 'md5': '9cb9e3410b18b515d71892f27856e9b1', + 'info_dict': { + 'id': '916509', + 'ext': 'flv', + 'title': 'Se Gryttens hyllest av Steven Gerrard', + 'description': 'TV 2 Sportens huspoet tar avskjed med Liverpools kaptein Steven Gerrard.', + 'timestamp': 1431715610, + 'upload_date': '20150515', + 'duration': 156.967, + 'view_count': int, + 'categories': list, + } + } + + def _real_extract(self, url): + video_id = self._match_id(url) + + formats = [] + format_urls = [] + for protocol in ('HDS', 'HLS'): + data = self._download_json( + 'http://sumo.tv2.no/api/web/asset/%s/play.json?protocol=%s&videoFormat=SMIL+ISMUSP' % (video_id, protocol), + video_id, 'Downloading play JSON')['playback'] + for item in data['items']['item']: + video_url = item.get('url') + if not video_url or video_url in format_urls: + continue + format_id = '%s-%s' % (protocol.lower(), item.get('mediaFormat')) + if not self._is_valid_url(video_url, video_id, format_id): + continue + format_urls.append(video_url) + ext = determine_ext(video_url) + if ext == 'f4m': + formats.extend(self._extract_f4m_formats( + video_url, video_id, f4m_id=format_id)) + elif ext == 'm3u8': + formats.extend(self._extract_m3u8_formats( + video_url, video_id, 'mp4', m3u8_id=format_id)) + elif ext == 'ism' or video_url.endswith('.ism/Manifest'): + pass + else: + formats.append({ + 'url': video_url, + 'format_id': format_id, + 'tbr': int_or_none(item.get('bitrate')), + 'filesize': int_or_none(item.get('fileSize')), + }) + self._sort_formats(formats) + + asset = self._download_json( + 'http://sumo.tv2.no/api/web/asset/%s.json' % video_id, + video_id, 'Downloading metadata JSON')['asset'] + + title = asset['title'] + description = asset.get('description') + timestamp = parse_iso8601(asset.get('createTime')) + duration = float_or_none(asset.get('accurateDuration') or asset.get('duration')) + view_count = int_or_none(asset.get('views')) + categories = asset.get('keywords', '').split(',') + + thumbnails = [{ + 'id': thumbnail.get('@type'), + 'url': thumbnail.get('url'), + } for _, thumbnail in asset.get('imageVersions', {}).items()] + + return { + 'id': video_id, + 'url': video_url, + 'title': title, + 'description': description, + 'thumbnails': thumbnails, + 'timestamp': timestamp, + 'duration': duration, + 'view_count': view_count, + 'categories': categories, + 'formats': formats, + } + + +class TV2ArticleIE(InfoExtractor): + _VALID_URL = 'http://(?:www\.)?tv2\.no/(?:a|\d{4}/\d{2}/\d{2}(/[^/]+)+)/(?P\d+)' + _TESTS = [{ + 'url': 'http://www.tv2.no/2015/05/16/nyheter/alesund/krim/pingvin/6930542', + 'info_dict': { + 'id': '6930542', + 'title': 'Russen hetses etter pingvintyveri – innrømmer å ha åpnet luken på buret', + 'description': 'md5:339573779d3eea3542ffe12006190954', + }, + 'playlist_count': 2, + }, { + 'url': 'http://www.tv2.no/a/6930542', + 'only_matching': True, + }] + + def _real_extract(self, url): + playlist_id = self._match_id(url) + + webpage = self._download_webpage(url, playlist_id) + + entries = [ + self.url_result('http://www.tv2.no/v/%s' % video_id, 'TV2') + for video_id in re.findall(r'data-assetid="(\d+)"', webpage)] + + title = remove_end(self._og_search_title(webpage), ' - TV2.no') + description = remove_end(self._og_search_description(webpage), ' - TV2.no') + + return self.playlist_result(entries, playlist_id, title, description) diff --git a/youtube_dl/extractor/ultimedia.py b/youtube_dl/extractor/ultimedia.py index 96c809eaf..c4751050e 100644 --- a/youtube_dl/extractor/ultimedia.py +++ b/youtube_dl/extractor/ultimedia.py @@ -4,6 +4,7 @@ from __future__ import unicode_literals import re from .common import InfoExtractor +from ..compat import compat_urllib_parse_urlparse from ..utils import ( ExtractorError, qualities, @@ -44,9 +45,9 @@ class UltimediaIE(InfoExtractor): video_id = self._match_id(url) webpage = self._download_webpage(url, video_id) - deliver_url = self._search_regex( - r']+src="(https?://(?:www\.)?ultimedia\.com/deliver/[^"]+)"', - webpage, 'deliver URL') + deliver_url = self._proto_relative_url(self._search_regex( + r']+src="((?:https?:)?//(?:www\.)?ultimedia\.com/deliver/[^"]+)"', + webpage, 'deliver URL'), compat_urllib_parse_urlparse(url).scheme + ':') deliver_page = self._download_webpage( deliver_url, video_id, 'Downloading iframe page') @@ -57,7 +58,8 @@ class UltimediaIE(InfoExtractor): player = self._parse_json( self._search_regex( - r"jwplayer\('player(?:_temp)?'\)\.setup\(({.+?})\)\.on", deliver_page, 'player'), + r"jwplayer\('player(?:_temp)?'\)\.setup\(({.+?})\)\.on", + deliver_page, 'player'), video_id) quality = qualities(['flash', 'html5']) diff --git a/youtube_dl/extractor/vier.py b/youtube_dl/extractor/vier.py index 619039e51..15377097e 100644 --- a/youtube_dl/extractor/vier.py +++ b/youtube_dl/extractor/vier.py @@ -38,11 +38,14 @@ class VierIE(InfoExtractor): webpage = self._download_webpage(url, display_id) video_id = self._search_regex( - r'"nid"\s*:\s*"(\d+)"', webpage, 'video id') + [r'data-nid="(\d+)"', r'"nid"\s*:\s*"(\d+)"'], + webpage, 'video id') application = self._search_regex( - r'"application"\s*:\s*"([^"]+)"', webpage, 'application', default='vier_vod') + [r'data-application="([^"]+)"', r'"application"\s*:\s*"([^"]+)"'], + webpage, 'application', default='vier_vod') filename = self._search_regex( - r'"filename"\s*:\s*"([^"]+)"', webpage, 'filename') + [r'data-filename="([^"]+)"', r'"filename"\s*:\s*"([^"]+)"'], + webpage, 'filename') playlist_url = 'http://vod.streamcloud.be/%s/mp4:_definst_/%s.mp4/playlist.m3u8' % (application, filename) formats = self._extract_m3u8_formats(playlist_url, display_id, 'mp4') diff --git a/youtube_dl/extractor/vuclip.py b/youtube_dl/extractor/vuclip.py index c3fde53f5..a6d9b5fee 100644 --- a/youtube_dl/extractor/vuclip.py +++ b/youtube_dl/extractor/vuclip.py @@ -49,7 +49,7 @@ class VuClipIE(InfoExtractor): links_code = self._search_regex( r'''(?xs) (?: - | + | \s* ) (.*?) diff --git a/youtube_dl/extractor/vulture.py b/youtube_dl/extractor/vulture.py index 1eb24a3d6..faa167e65 100644 --- a/youtube_dl/extractor/vulture.py +++ b/youtube_dl/extractor/vulture.py @@ -44,7 +44,7 @@ class VultureIE(InfoExtractor): query_webpage = self._download_webpage( query_url, display_id, note='Downloading query page') params_json = self._search_regex( - r'(?sm)new MagnifyEmbeddablePlayer\({.*?contentItem:\s*(\{.*?\})\n,\n', + r'(?sm)new MagnifyEmbeddablePlayer\({.*?contentItem:\s*(\{.*?\})\n?,\n', query_webpage, 'player params') params = json.loads(params_json) diff --git a/youtube_dl/extractor/wimp.py b/youtube_dl/extractor/wimp.py index d6dec25ca..f69d46a28 100644 --- a/youtube_dl/extractor/wimp.py +++ b/youtube_dl/extractor/wimp.py @@ -37,7 +37,8 @@ class WimpIE(InfoExtractor): video_id = mobj.group(1) webpage = self._download_webpage(url, video_id) video_url = self._search_regex( - r"[\"']file[\"']\s*[:,]\s*[\"'](.+?)[\"']", webpage, 'video URL') + [r"[\"']file[\"']\s*[:,]\s*[\"'](.+?)[\"']", r"videoId\s*:\s*[\"']([^\"']+)[\"']"], + webpage, 'video URL') if YoutubeIE.suitable(video_url): self.to_screen('Found YouTube video') return { diff --git a/youtube_dl/extractor/xminus.py b/youtube_dl/extractor/xminus.py index 8c6241aed..7c9d8af6f 100644 --- a/youtube_dl/extractor/xminus.py +++ b/youtube_dl/extractor/xminus.py @@ -43,7 +43,7 @@ class XMinusIE(InfoExtractor): r'minus_track\.dur_sec=\'([0-9]*?)\'', webpage, 'duration', fatal=False)) filesize_approx = parse_filesize(self._html_search_regex( - r'
\s*([0-9.]+\s*[a-zA-Z][bB])', + r'
]*>\s*↓\s*([0-9.]+\s*[a-zA-Z][bB])', webpage, 'approximate filesize', fatal=False)) tbr = int_or_none(self._html_search_regex( r'
\s*([0-9]+)\s*kbps', @@ -58,7 +58,7 @@ class XMinusIE(InfoExtractor): description = re.sub(' *\r *', '\n', description) enc_token = self._html_search_regex( - r'minus_track\.tkn="(.+?)"', webpage, 'enc_token') + r'minus_track\.s?tkn="(.+?)"', webpage, 'enc_token') token = ''.join( c if pos == 3 else compat_chr(compat_ord(c) - 1) for pos, c in enumerate(reversed(enc_token))) diff --git a/youtube_dl/extractor/yahoo.py b/youtube_dl/extractor/yahoo.py index bf4e659ac..f9afbdbab 100644 --- a/youtube_dl/extractor/yahoo.py +++ b/youtube_dl/extractor/yahoo.py @@ -15,6 +15,7 @@ from ..utils import ( unescapeHTML, ExtractorError, int_or_none, + mimetype2ext, ) from .nbc import NBCSportsVPlayerIE @@ -236,6 +237,22 @@ class YahooIE(InfoExtractor): self._sort_formats(formats) + closed_captions = self._html_search_regex( + r'"closedcaptions":(\[[^\]]+\])', webpage, 'closed captions', + default='[]') + + cc_json = self._parse_json(closed_captions, video_id, fatal=False) + subtitles = {} + if cc_json: + for closed_caption in cc_json: + lang = closed_caption['lang'] + if lang not in subtitles: + subtitles[lang] = [] + subtitles[lang].append({ + 'url': closed_caption['url'], + 'ext': mimetype2ext(closed_caption['content_type']), + }) + return { 'id': video_id, 'display_id': display_id, @@ -244,6 +261,7 @@ class YahooIE(InfoExtractor): 'description': clean_html(meta['description']), 'thumbnail': meta['thumbnail'] if meta.get('thumbnail') else self._og_search_thumbnail(webpage), 'duration': int_or_none(meta.get('duration')), + 'subtitles': subtitles, } diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index ed9ed9ed6..52d198fa3 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -1665,6 +1665,7 @@ def mimetype2ext(mt): return { 'x-ms-wmv': 'wmv', 'x-mp4-fragmented': 'mp4', + 'ttml+xml': 'ttml', }.get(res, res) @@ -1848,9 +1849,9 @@ def dfxp2srt(dfxp_data): out = str_or_empty(node.text) for child in node: - if child.tag == _x('ttml:br'): + if child.tag in (_x('ttml:br'), 'br'): out += '\n' + str_or_empty(child.tail) - elif child.tag == _x('ttml:span'): + elif child.tag in (_x('ttml:span'), 'span'): out += str_or_empty(parse_node(child)) else: out += str_or_empty(xml.etree.ElementTree.tostring(child)) @@ -1859,7 +1860,10 @@ def dfxp2srt(dfxp_data): dfxp = xml.etree.ElementTree.fromstring(dfxp_data.encode('utf-8')) out = [] - paras = dfxp.findall(_x('.//ttml:p')) + paras = dfxp.findall(_x('.//ttml:p')) or dfxp.findall('.//p') + + if not paras: + raise ValueError('Invalid dfxp/TTML subtitle') for para, index in zip(paras, itertools.count(1)): begin_time = parse_dfxp_time_expr(para.attrib['begin']) diff --git a/youtube_dl/version.py b/youtube_dl/version.py index 38f00bc9b..b33385153 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,3 +1,3 @@ from __future__ import unicode_literals -__version__ = '2015.05.15' +__version__ = '2015.05.20'