From: Sergey M Date: Thu, 14 Jan 2016 17:20:02 +0000 (+0500) Subject: Merge pull request #8228 from jaimeMF/disable-file-handler X-Git-Url: http://git.bitcoin.ninja/index.cgi?a=commitdiff_plain;h=345f12196c9ed37094aec8c029809be888052d3a;hp=30e2f2d76f6dd52803effce14fa14f3a8051c84a;p=youtube-dl Merge pull request #8228 from jaimeMF/disable-file-handler [YoutubeDL] urlopen: disable the 'file:' protocol (#8227) --- diff --git a/AUTHORS b/AUTHORS index 3d8bebbb0..f4238e1d3 100644 --- a/AUTHORS +++ b/AUTHORS @@ -152,3 +152,4 @@ Evan Lu flatgreen Brian Foley Vignesh Venkat +Tom Gijselinck diff --git a/docs/supportedsites.md b/docs/supportedsites.md index 8d0c7b97a..eb160bd2f 100644 --- a/docs/supportedsites.md +++ b/docs/supportedsites.md @@ -65,6 +65,7 @@ - **Beeg** - **BehindKink** - **Bet** + - **Bigflix** - **Bild**: Bild.de - **BiliBili** - **BleacherReport** @@ -251,7 +252,7 @@ - **Instagram** - **instagram:user**: Instagram user profile - **InternetVideoArchive** - - **IPrima** + - **IPrima** (Currently broken) - **iqiyi**: 爱奇艺 - **Ir90Tv** - **ivi**: ivi.ru @@ -602,7 +603,9 @@ - **TruTube** - **Tube8** - **TubiTv** - - **Tudou** + - **tudou** + - **tudou:album** + - **tudou:playlist** - **Tumblr** - **tunein:clip** - **tunein:program** @@ -655,12 +658,12 @@ - **video.mit.edu** - **VideoDetective** - **videofy.me** - - **VideoMega** + - **VideoMega** (Currently broken) - **videomore** - **videomore:season** - **videomore:video** - **VideoPremium** - - **VideoTt**: video.tt - Your True Tube + - **VideoTt**: video.tt - Your True Tube (Currently broken) - **videoweed**: VideoWeed - **Vidme** - **Vidzi** diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index e4ae9332d..5621c9eb0 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -86,6 +86,7 @@ from .camdemy import ( ) from .canalplus import CanalplusIE from .canalc2 import Canalc2IE +from .canvas import CanvasIE from .cbs import CBSIE from .cbsnews import CBSNewsIE from .cbssports import CBSSportsIE diff --git a/youtube_dl/extractor/beeg.py b/youtube_dl/extractor/beeg.py index c8d921daf..34c2a756f 100644 --- a/youtube_dl/extractor/beeg.py +++ b/youtube_dl/extractor/beeg.py @@ -34,7 +34,7 @@ class BeegIE(InfoExtractor): video_id = self._match_id(url) video = self._download_json( - 'http://beeg.com/api/v5/video/%s' % video_id, video_id) + 'https://api.beeg.com/api/v5/video/%s' % video_id, video_id) def split(o, e): def cut(s, x): @@ -60,7 +60,7 @@ class BeegIE(InfoExtractor): def decrypt_url(encrypted_url): encrypted_url = self._proto_relative_url( - encrypted_url.replace('{DATA_MARKERS}', ''), 'http:') + encrypted_url.replace('{DATA_MARKERS}', ''), 'https:') key = self._search_regex( r'/key=(.*?)%2Cend=', encrypted_url, 'key', default=None) if not key: diff --git a/youtube_dl/extractor/canvas.py b/youtube_dl/extractor/canvas.py new file mode 100644 index 000000000..ee19ff836 --- /dev/null +++ b/youtube_dl/extractor/canvas.py @@ -0,0 +1,65 @@ +from __future__ import unicode_literals + +from .common import InfoExtractor +from ..utils import float_or_none + + +class CanvasIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?canvas\.be/video/(?:[^/]+/)*(?P[^/?#&]+)' + _TEST = { + 'url': 'http://www.canvas.be/video/de-afspraak/najaar-2015/de-afspraak-veilt-voor-de-warmste-week', + 'md5': 'ea838375a547ac787d4064d8c7860a6c', + 'info_dict': { + 'id': 'mz-ast-5e5f90b6-2d72-4c40-82c2-e134f884e93e', + 'display_id': 'de-afspraak-veilt-voor-de-warmste-week', + 'ext': 'mp4', + 'title': 'De afspraak veilt voor de Warmste Week', + 'description': 'md5:24cb860c320dc2be7358e0e5aa317ba6', + 'thumbnail': 're:^https?://.*\.jpg$', + 'duration': 49.02, + } + } + + def _real_extract(self, url): + display_id = self._match_id(url) + + webpage = self._download_webpage(url, display_id) + + title = self._search_regex( + r']+class="video__body__header__title"[^>]*>(.+?)', + webpage, 'title', default=None) or self._og_search_title(webpage) + + video_id = self._html_search_regex( + r'data-video=(["\'])(?P.+?)\1', webpage, 'video id', group='id') + + data = self._download_json( + 'https://mediazone.vrt.be/api/v1/canvas/assets/%s' % video_id, display_id) + + formats = [] + for target in data['targetUrls']: + format_url, format_type = target.get('url'), target.get('type') + if not format_url or not format_type: + continue + if format_type == 'HLS': + formats.extend(self._extract_m3u8_formats( + format_url, display_id, entry_protocol='m3u8_native', + ext='mp4', preference=0, fatal=False, m3u8_id=format_type)) + elif format_type == 'HDS': + formats.extend(self._extract_f4m_formats( + format_url, display_id, f4m_id=format_type, fatal=False)) + else: + formats.append({ + 'format_id': format_type, + 'url': format_url, + }) + self._sort_formats(formats) + + return { + 'id': video_id, + 'display_id': display_id, + 'title': title, + 'description': self._og_search_description(webpage), + 'formats': formats, + 'duration': float_or_none(data.get('duration'), 1000), + 'thumbnail': data.get('posterImageUrl'), + } diff --git a/youtube_dl/extractor/ntvde.py b/youtube_dl/extractor/ntvde.py index d2cfe0961..a83e85cb8 100644 --- a/youtube_dl/extractor/ntvde.py +++ b/youtube_dl/extractor/ntvde.py @@ -2,6 +2,7 @@ from __future__ import unicode_literals from .common import InfoExtractor +from ..compat import compat_urlparse from ..utils import ( int_or_none, js_to_json, @@ -34,7 +35,7 @@ class NTVDeIE(InfoExtractor): webpage = self._download_webpage(url, video_id) info = self._parse_json(self._search_regex( - r'(?s)ntv.pageInfo.article =\s(\{.*?\});', webpage, 'info'), + r'(?s)ntv\.pageInfo\.article\s*=\s*(\{.*?\});', webpage, 'info'), video_id, transform_source=js_to_json) timestamp = int_or_none(info.get('publishedDateAsUnixTimeStamp')) vdata = self._parse_json(self._search_regex( @@ -42,18 +43,24 @@ class NTVDeIE(InfoExtractor): webpage, 'player data'), video_id, transform_source=js_to_json) duration = parse_duration(vdata.get('duration')) - formats = [{ - 'format_id': 'flash', - 'url': 'rtmp://fms.n-tv.de/' + vdata['video'], - }, { - 'format_id': 'mobile', - 'url': 'http://video.n-tv.de' + vdata['videoMp4'], - 'tbr': 400, # estimation - }] - m3u8_url = 'http://video.n-tv.de' + vdata['videoM3u8'] - formats.extend(self._extract_m3u8_formats( - m3u8_url, video_id, ext='mp4', - entry_protocol='m3u8_native', preference=0)) + + formats = [] + if vdata.get('video'): + formats.append({ + 'format_id': 'flash', + 'url': 'rtmp://fms.n-tv.de/%s' % vdata['video'], + }) + if vdata.get('videoMp4'): + formats.append({ + 'format_id': 'mobile', + 'url': compat_urlparse.urljoin('http://video.n-tv.de', vdata['videoMp4']), + 'tbr': 400, # estimation + }) + if vdata.get('videoM3u8'): + m3u8_url = compat_urlparse.urljoin('http://video.n-tv.de', vdata['videoM3u8']) + formats.extend(self._extract_m3u8_formats( + m3u8_url, video_id, ext='mp4', entry_protocol='m3u8_native', + preference=0, m3u8_id='hls', fatal=False)) self._sort_formats(formats) return { diff --git a/youtube_dl/extractor/orf.py b/youtube_dl/extractor/orf.py index 2e6c9872b..c54775d54 100644 --- a/youtube_dl/extractor/orf.py +++ b/youtube_dl/extractor/orf.py @@ -170,7 +170,21 @@ class ORFOE1IE(InfoExtractor): class ORFFM4IE(InfoExtractor): IE_NAME = 'orf:fm4' IE_DESC = 'radio FM4' - _VALID_URL = r'http://fm4\.orf\.at/7tage/?#(?P[0-9]+)/(?P\w+)' + _VALID_URL = r'http://fm4\.orf\.at/(?:7tage/?#|player/)(?P[0-9]+)/(?P\w+)' + + _TEST = { + 'url': 'http://fm4.orf.at/player/20160110/IS/', + 'md5': '01e736e8f1cef7e13246e880a59ad298', + 'info_dict': { + 'id': '2016-01-10_2100_tl_54_7DaysSun13_11244', + 'ext': 'mp3', + 'title': 'Im Sumpf', + 'description': 'md5:384c543f866c4e422a55f66a62d669cd', + 'duration': 7173, + 'timestamp': 1452456073, + 'upload_date': '20160110', + }, + } def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) diff --git a/youtube_dl/extractor/prosiebensat1.py b/youtube_dl/extractor/prosiebensat1.py index baa54a3af..670e6950f 100644 --- a/youtube_dl/extractor/prosiebensat1.py +++ b/youtube_dl/extractor/prosiebensat1.py @@ -20,7 +20,7 @@ from ..utils import ( class ProSiebenSat1IE(InfoExtractor): IE_NAME = 'prosiebensat1' IE_DESC = 'ProSiebenSat.1 Digital' - _VALID_URL = r'https?://(?:www\.)?(?:(?:prosieben|prosiebenmaxx|sixx|sat1|kabeleins|the-voice-of-germany)\.(?:de|at|ch)|ran\.de|fem\.com)/(?P.+)' + _VALID_URL = r'https?://(?:www\.)?(?:(?:prosieben|prosiebenmaxx|sixx|sat1|kabeleins|the-voice-of-germany|7tv)\.(?:de|at|ch)|ran\.de|fem\.com)/(?P.+)' _TESTS = [ { @@ -32,7 +32,7 @@ class ProSiebenSat1IE(InfoExtractor): 'url': 'http://www.prosieben.de/tv/circus-halligalli/videos/218-staffel-2-episode-18-jahresrueckblick-ganze-folge', 'info_dict': { 'id': '2104602', - 'ext': 'mp4', + 'ext': 'flv', 'title': 'Episode 18 - Staffel 2', 'description': 'md5:8733c81b702ea472e069bc48bb658fc1', 'upload_date': '20131231', @@ -138,14 +138,13 @@ class ProSiebenSat1IE(InfoExtractor): 'url': 'http://www.the-voice-of-germany.de/video/31-andreas-kuemmert-rocket-man-clip', 'info_dict': { 'id': '2572814', - 'ext': 'mp4', + 'ext': 'flv', 'title': 'Andreas Kümmert: Rocket Man', 'description': 'md5:6ddb02b0781c6adf778afea606652e38', 'upload_date': '20131017', 'duration': 469.88, }, 'params': { - # rtmp download 'skip_download': True, }, }, @@ -153,13 +152,12 @@ class ProSiebenSat1IE(InfoExtractor): 'url': 'http://www.fem.com/wellness/videos/wellness-video-clip-kurztripps-zum-valentinstag.html', 'info_dict': { 'id': '2156342', - 'ext': 'mp4', + 'ext': 'flv', 'title': 'Kurztrips zum Valentinstag', - 'description': 'Romantischer Kurztrip zum Valentinstag? Wir verraten, was sich hier wirklich lohnt.', + 'description': 'Romantischer Kurztrip zum Valentinstag? Nina Heinemann verrät, was sich hier wirklich lohnt.', 'duration': 307.24, }, 'params': { - # rtmp download 'skip_download': True, }, }, @@ -172,12 +170,26 @@ class ProSiebenSat1IE(InfoExtractor): }, 'playlist_count': 2, }, + { + 'url': 'http://www.7tv.de/circus-halligalli/615-best-of-circus-halligalli-ganze-folge', + 'info_dict': { + 'id': '4187506', + 'ext': 'flv', + 'title': 'Best of Circus HalliGalli', + 'description': 'md5:8849752efd90b9772c9db6fdf87fb9e9', + 'upload_date': '20151229', + }, + 'params': { + 'skip_download': True, + }, + }, ] _CLIPID_REGEXES = [ r'"clip_id"\s*:\s+"(\d+)"', r'clipid: "(\d+)"', r'clip[iI]d=(\d+)', + r'clip[iI]d\s*=\s*["\'](\d+)', r"'itemImageUrl'\s*:\s*'/dynamic/thumbnails/full/\d+/(\d+)", ] _TITLE_REGEXES = [ @@ -186,12 +198,16 @@ class ProSiebenSat1IE(InfoExtractor): r'\s*

(.+?)

', r'

\s*(.+?)

', r'
\s*

([^<]+)

\s*
', + r'

\s*(.+?)

', + r']+id="veeseoTitle"[^>]*>(.+?)', ] _DESCRIPTION_REGEXES = [ r'

\s*(.+?)

', r'
\s*

Beschreibung: (.+?)

', r'
\s*
\s*\s*(.+?)\s*