X-Git-Url: http://git.bitcoin.ninja/index.cgi?a=blobdiff_plain;f=youtube_dl%2Fextractor%2Forf.py;h=6ae30679a0a226b0d242b2ef773fbab9a90920c5;hb=99ed78c79e94c14ce24bc5bdccaf9573d4f83552;hp=a6e722bf5cf972597abc650f2914147bf6741d4e;hpb=74f91c4af7640a62dfe610fae80e92b0a40593c2;p=youtube-dl diff --git a/youtube_dl/extractor/orf.py b/youtube_dl/extractor/orf.py index a6e722bf5..6ae30679a 100644 --- a/youtube_dl/extractor/orf.py +++ b/youtube_dl/extractor/orf.py @@ -11,15 +11,20 @@ from ..utils import ( HEADRequest, unified_strdate, ExtractorError, + strip_jsonp, + int_or_none, + float_or_none, + determine_ext, + remove_end, ) class ORFTVthekIE(InfoExtractor): IE_NAME = 'orf:tvthek' IE_DESC = 'ORF TVthek' - _VALID_URL = r'https?://tvthek\.orf\.at/(?:programs/.+?/episodes|topics/.+?|program/[^/]+)/(?P\d+)' + _VALID_URL = r'https?://tvthek\.orf\.at/(?:programs/.+?/episodes|topics?/.+?|program/[^/]+)/(?P\d+)' - _TEST = { + _TESTS = [{ 'url': 'http://tvthek.orf.at/program/Aufgetischt/2745173/Aufgetischt-Mit-der-Steirischen-Tafelrunde/8891389', 'playlist': [{ 'md5': '2942210346ed779588f428a92db88712', @@ -32,8 +37,21 @@ class ORFTVthekIE(InfoExtractor): 'upload_date': '20141208', }, }], - 'skip': 'Blocked outside of Austria', - } + 'skip': 'Blocked outside of Austria / Germany', + }, { + 'url': 'http://tvthek.orf.at/topic/Im-Wandel-der-Zeit/8002126/Best-of-Ingrid-Thurnher/7982256', + 'info_dict': { + 'id': '7982259', + 'ext': 'mp4', + 'title': 'Best of Ingrid Thurnher', + 'upload_date': '20140527', + 'description': 'Viele Jahre war Ingrid Thurnher das "Gesicht" der ZIB 2. Vor ihrem Wechsel zur ZIB 2 im Jahr 1995 moderierte sie unter anderem "Land und Leute", "Österreich-Bild" und "Niederösterreich heute".', + }, + 'params': { + 'skip_download': True, # rtsp downloads + }, + '_skip': 'Blocked outside of Austria / Germany', + }] def _real_extract(self, url): playlist_id = self._match_id(url) @@ -45,7 +63,9 @@ class ORFTVthekIE(InfoExtractor): def get_segments(all_data): for data in all_data: - if data['name'] == 'Tracker::EPISODE_DETAIL_PAGE_OVER_PROGRAM': + if data['name'] in ( + 'Tracker::EPISODE_DETAIL_PAGE_OVER_PROGRAM', + 'Tracker::EPISODE_DETAIL_PAGE_OVER_TOPIC'): return data['values']['segments'] sdata = get_segments(all_data) @@ -92,6 +112,7 @@ class ORFTVthekIE(InfoExtractor): % geo_str), fatal=False) + self._check_formats(formats, video_id) self._sort_formats(formats) upload_date = unified_strdate(sd['created_date']) @@ -113,13 +134,19 @@ class ORFTVthekIE(InfoExtractor): } -# Audios on ORF radio are only available for 7 days, so we can't add tests. - - class ORFOE1IE(InfoExtractor): IE_NAME = 'orf:oe1' IE_DESC = 'Radio Österreich 1' - _VALID_URL = r'http://oe1\.orf\.at/programm/(?P[0-9]+)' + _VALID_URL = r'https?://oe1\.orf\.at/(?:programm/|konsole\?.*?\btrack_id=)(?P[0-9]+)' + + # Audios on ORF radio are only available for 7 days, so we can't add tests. + _TESTS = [{ + 'url': 'http://oe1.orf.at/konsole?show=on_demand#?track_id=394211', + 'only_matching': True, + }, { + 'url': 'http://oe1.orf.at/konsole?show=ondemand&track_id=443608&load_day=/programm/konsole/tag/20160726', + 'only_matching': True, + }] def _real_extract(self, url): show_id = self._match_id(url) @@ -145,9 +172,24 @@ class ORFOE1IE(InfoExtractor): class ORFFM4IE(InfoExtractor): - IE_DESC = 'orf:fm4' + IE_NAME = 'orf:fm4' IE_DESC = 'radio FM4' - _VALID_URL = r'http://fm4\.orf\.at/7tage/?#(?P[0-9]+)/(?P\w+)' + _VALID_URL = r'https?://fm4\.orf\.at/(?:7tage/?#|player/)(?P[0-9]+)/(?P\w+)' + + _TEST = { + 'url': 'http://fm4.orf.at/player/20160110/IS/', + 'md5': '01e736e8f1cef7e13246e880a59ad298', + 'info_dict': { + 'id': '2016-01-10_2100_tl_54_7DaysSun13_11244', + 'ext': 'mp3', + 'title': 'Im Sumpf', + 'description': 'md5:384c543f866c4e422a55f66a62d669cd', + 'duration': 7173, + 'timestamp': 1452456073, + 'upload_date': '20160110', + }, + 'skip': 'Live streams on FM4 got deleted soon', + } def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) @@ -179,3 +221,92 @@ class ORFFM4IE(InfoExtractor): 'description': data['subtitle'], 'entries': entries } + + +class ORFIPTVIE(InfoExtractor): + IE_NAME = 'orf:iptv' + IE_DESC = 'iptv.ORF.at' + _VALID_URL = r'https?://iptv\.orf\.at/(?:#/)?stories/(?P\d+)' + + _TEST = { + 'url': 'http://iptv.orf.at/stories/2275236/', + 'md5': 'c8b22af4718a4b4af58342529453e3e5', + 'info_dict': { + 'id': '350612', + 'ext': 'flv', + 'title': 'Weitere Evakuierungen um Vulkan Calbuco', + 'description': 'md5:d689c959bdbcf04efeddedbf2299d633', + 'duration': 68.197, + 'thumbnail': 're:^https?://.*\.jpg$', + 'upload_date': '20150425', + }, + } + + def _real_extract(self, url): + story_id = self._match_id(url) + + webpage = self._download_webpage( + 'http://iptv.orf.at/stories/%s' % story_id, story_id) + + video_id = self._search_regex( + r'data-video(?:id)?="(\d+)"', webpage, 'video id') + + data = self._download_json( + 'http://bits.orf.at/filehandler/static-api/json/current/data.json?file=%s' % video_id, + video_id)[0] + + duration = float_or_none(data['duration'], 1000) + + video = data['sources']['default'] + load_balancer_url = video['loadBalancerUrl'] + abr = int_or_none(video.get('audioBitrate')) + vbr = int_or_none(video.get('bitrate')) + fps = int_or_none(video.get('videoFps')) + width = int_or_none(video.get('videoWidth')) + height = int_or_none(video.get('videoHeight')) + thumbnail = video.get('preview') + + rendition = self._download_json( + load_balancer_url, video_id, transform_source=strip_jsonp) + + f = { + 'abr': abr, + 'vbr': vbr, + 'fps': fps, + 'width': width, + 'height': height, + } + + formats = [] + for format_id, format_url in rendition['redirect'].items(): + if format_id == 'rtmp': + ff = f.copy() + ff.update({ + 'url': format_url, + 'format_id': format_id, + }) + formats.append(ff) + elif determine_ext(format_url) == 'f4m': + formats.extend(self._extract_f4m_formats( + format_url, video_id, f4m_id=format_id)) + elif determine_ext(format_url) == 'm3u8': + formats.extend(self._extract_m3u8_formats( + format_url, video_id, 'mp4', m3u8_id=format_id)) + else: + continue + self._sort_formats(formats) + + title = remove_end(self._og_search_title(webpage), ' - iptv.ORF.at') + description = self._og_search_description(webpage) + upload_date = unified_strdate(self._html_search_meta( + 'dc.date', webpage, 'upload date')) + + return { + 'id': video_id, + 'title': title, + 'description': description, + 'duration': duration, + 'thumbnail': thumbnail, + 'upload_date': upload_date, + 'formats': formats, + }