X-Git-Url: http://git.bitcoin.ninja/index.cgi?a=blobdiff_plain;f=youtube_dl%2Fextractor%2Fvrt.py;h=bec7ab327008803f8609ea0e78e7d70577556940;hb=7f29cf545a45db2bd5c2681a2fac782ca460b153;hp=bbd3bbf7bad98c787c0840ed0f302198ebb7932a;hpb=cf372f0778e82cdc181a6173909589e640ac29fb;p=youtube-dl diff --git a/youtube_dl/extractor/vrt.py b/youtube_dl/extractor/vrt.py index bbd3bbf7b..bec7ab327 100644 --- a/youtube_dl/extractor/vrt.py +++ b/youtube_dl/extractor/vrt.py @@ -4,11 +4,14 @@ from __future__ import unicode_literals import re from .common import InfoExtractor -from ..utils import float_or_none +from ..utils import ( + determine_ext, + float_or_none, +) class VRTIE(InfoExtractor): - _VALID_URL = r'https?://(?:deredactie|sporza|cobra)\.be/cm/(?:[^/]+/)+(?P[^/]+)/*' + _VALID_URL = r'https?://(?:deredactie|sporza|cobra(?:\.canvas)?)\.be/cm/(?:[^/]+/)+(?P[^/]+)/*' _TESTS = [ # deredactie.be { @@ -22,7 +25,8 @@ class VRTIE(InfoExtractor): 'timestamp': 1414271750.949, 'upload_date': '20141025', 'duration': 929, - } + }, + 'skip': 'HTTP Error 404: Not Found', }, # sporza.be { @@ -36,7 +40,8 @@ class VRTIE(InfoExtractor): 'timestamp': 1413835980.560, 'upload_date': '20141020', 'duration': 3238, - } + }, + 'skip': 'HTTP Error 404: Not Found', }, # cobra.be { @@ -50,8 +55,40 @@ class VRTIE(InfoExtractor): 'timestamp': 1413967500.494, 'upload_date': '20141022', 'duration': 661, - } + }, + 'skip': 'HTTP Error 404: Not Found', }, + { + # YouTube video + 'url': 'http://deredactie.be/cm/vrtnieuws/videozone/nieuws/cultuurenmedia/1.2622957', + 'md5': 'b8b93da1df1cea6c8556255a796b7d61', + 'info_dict': { + 'id': 'Wji-BZ0oCwg', + 'ext': 'mp4', + 'title': 'ROGUE ONE: A STAR WARS STORY Official Teaser Trailer', + 'description': 'md5:8e468944dce15567a786a67f74262583', + 'uploader': 'Star Wars', + 'uploader_id': 'starwars', + 'upload_date': '20160407', + }, + 'add_ie': ['Youtube'], + }, + { + 'url': 'http://cobra.canvas.be/cm/cobra/videozone/rubriek/film-videozone/1.2377055', + 'md5': '', + 'info_dict': { + 'id': '2377055', + 'ext': 'mp4', + 'title': 'Cafe Derby', + 'description': 'Lenny Van Wesemael debuteert met de langspeelfilm Café Derby. Een waar gebeurd maar ook verzonnen verhaal.', + 'upload_date': '20150626', + 'timestamp': 1435305240.769, + }, + 'params': { + # m3u8 download + 'skip_download': True, + } + } ] def _real_extract(self, url): @@ -62,18 +99,63 @@ class VRTIE(InfoExtractor): video_id = self._search_regex( r'data-video-id="([^"]+)_[^"]+"', webpage, 'video id', fatal=False) + src = self._search_regex( + r'data-video-src="([^"]+)"', webpage, 'video src', default=None) + + video_type = self._search_regex( + r'data-video-type="([^"]+)"', webpage, 'video type', default=None) + + if video_type == 'YouTubeVideo': + return self.url_result(src, 'Youtube') + formats = [] + mobj = re.search( r'data-video-iphone-server="(?P[^"]+)"\s+data-video-iphone-path="(?P[^"]+)"', webpage) if mobj: formats.extend(self._extract_m3u8_formats( '%s/%s' % (mobj.group('server'), mobj.group('path')), - video_id, 'mp4')) - mobj = re.search(r'data-video-src="(?P[^"]+)"', webpage) - if mobj: - formats.extend(self._extract_f4m_formats( - '%s/manifest.f4m' % mobj.group('src'), video_id)) + video_id, 'mp4', m3u8_id='hls', fatal=False)) + + if src: + if determine_ext(src) == 'm3u8': + formats.extend(self._extract_m3u8_formats( + src, video_id, 'mp4', entry_protocol='m3u8_native', + m3u8_id='hls', fatal=False)) + formats.extend(self._extract_f4m_formats( + src.replace('playlist.m3u8', 'manifest.f4m'), + video_id, f4m_id='hds', fatal=False)) + if 'data-video-geoblocking="true"' not in webpage: + rtmp_formats = self._extract_smil_formats( + src.replace('playlist.m3u8', 'jwplayer.smil'), + video_id, fatal=False) + formats.extend(rtmp_formats) + for rtmp_format in rtmp_formats: + rtmp_format_c = rtmp_format.copy() + rtmp_format_c['url'] = '%s/%s' % (rtmp_format['url'], rtmp_format['play_path']) + del rtmp_format_c['play_path'] + del rtmp_format_c['ext'] + http_format = rtmp_format_c.copy() + http_format.update({ + 'url': rtmp_format_c['url'].replace('rtmp://', 'http://').replace('vod.', 'download.').replace('/_definst_/', '/').replace('mp4:', ''), + 'format_id': rtmp_format['format_id'].replace('rtmp', 'http'), + 'protocol': 'http', + }) + rtsp_format = rtmp_format_c.copy() + rtsp_format.update({ + 'url': rtsp_format['url'].replace('rtmp://', 'rtsp://'), + 'format_id': rtmp_format['format_id'].replace('rtmp', 'rtsp'), + 'protocol': 'rtsp', + }) + formats.extend([http_format, rtsp_format]) + else: + formats.extend(self._extract_f4m_formats( + '%s/manifest.f4m' % src, video_id, f4m_id='hds', fatal=False)) + + if not formats and 'data-video-geoblocking="true"' in webpage: + self.raise_geo_restricted('This video is only available in Belgium') + self._sort_formats(formats) title = self._og_search_title(webpage)