X-Git-Url: http://git.bitcoin.ninja/index.cgi?a=blobdiff_plain;f=youtube_dl%2Fextractor%2Forf.py;h=ca1a5bb3cd520fd9add9f195c89d8ac13619467b;hb=674fb0fcc54c72448f80a0573f7fd116f220827e;hp=88f03608ba36e52db309f5249d2bd7426faced86;hpb=5f263296eaa72ddca232d734a2625bcd85771908;p=youtube-dl diff --git a/youtube_dl/extractor/orf.py b/youtube_dl/extractor/orf.py index 88f03608b..ca1a5bb3c 100644 --- a/youtube_dl/extractor/orf.py +++ b/youtube_dl/extractor/orf.py @@ -3,39 +3,74 @@ from __future__ import unicode_literals import json import re +import calendar +import datetime from .common import InfoExtractor from ..utils import ( HEADRequest, unified_strdate, + ExtractorError, + strip_jsonp, + int_or_none, + float_or_none, + determine_ext, + remove_end, ) -class ORFIE(InfoExtractor): - _VALID_URL = r'https?://tvthek\.orf\.at/(?:programs/.+?/episodes|topics/.+?|program/[^/]+)/(?P\d+)' +class ORFTVthekIE(InfoExtractor): + IE_NAME = 'orf:tvthek' + IE_DESC = 'ORF TVthek' + _VALID_URL = r'https?://tvthek\.orf\.at/(?:programs/.+?/episodes|topics?/.+?|program/[^/]+)/(?P\d+)' - _TEST = { - 'url': 'http://tvthek.orf.at/program/matinee-Was-Sie-schon-immer-ueber-Klassik-wissen-wollten/7317210/Was-Sie-schon-immer-ueber-Klassik-wissen-wollten/7319746/Was-Sie-schon-immer-ueber-Klassik-wissen-wollten/7319747', - 'file': '7319747.mp4', - 'md5': 'bd803c5d8c32d3c64a0ea4b4eeddf375', - 'info_dict': { - 'title': 'Was Sie schon immer über Klassik wissen wollten', - 'description': 'md5:0ddf0d5f0060bd53f744edaa5c2e04a4', - 'duration': 3508, - 'upload_date': '20140105', - }, - 'skip': 'Blocked outside of Austria', - } + _TESTS = [{ + 'url': 'http://tvthek.orf.at/program/Aufgetischt/2745173/Aufgetischt-Mit-der-Steirischen-Tafelrunde/8891389', + 'playlist': [{ + 'md5': '2942210346ed779588f428a92db88712', + 'info_dict': { + 'id': '8896777', + 'ext': 'mp4', + 'title': 'Aufgetischt: Mit der Steirischen Tafelrunde', + 'description': 'md5:c1272f0245537812d4e36419c207b67d', + 'duration': 2668, + 'upload_date': '20141208', + }, + }], + 'skip': 'Blocked outside of Austria / Germany', + }, { + 'url': 'http://tvthek.orf.at/topic/Im-Wandel-der-Zeit/8002126/Best-of-Ingrid-Thurnher/7982256', + 'playlist': [{ + 'md5': '68f543909aea49d621dfc7703a11cfaf', + 'info_dict': { + 'id': '7982259', + 'ext': 'mp4', + 'title': 'Best of Ingrid Thurnher', + 'upload_date': '20140527', + 'description': 'Viele Jahre war Ingrid Thurnher das "Gesicht" der ZIB 2. Vor ihrem Wechsel zur ZIB 2 im jahr 1995 moderierte sie unter anderem "Land und Leute", "Österreich-Bild" und "Niederösterreich heute".', + } + }], + '_skip': 'Blocked outside of Austria / Germany', + }] def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - playlist_id = mobj.group('id') + playlist_id = self._match_id(url) webpage = self._download_webpage(url, playlist_id) data_json = self._search_regex( r'initializeAdworx\((.+?)\);\n', webpage, 'video info') all_data = json.loads(data_json) - sdata = all_data[0]['values']['segments'] + + def get_segments(all_data): + for data in all_data: + if data['name'] in ( + 'Tracker::EPISODE_DETAIL_PAGE_OVER_PROGRAM', + 'Tracker::EPISODE_DETAIL_PAGE_OVER_TOPIC'): + return data['values']['segments'] + + sdata = get_segments(all_data) + if not sdata: + raise ExtractorError('Unable to extract segments') def quality_to_int(s): m = re.search('([0-9]+)', s) @@ -68,7 +103,7 @@ class ORFIE(InfoExtractor): pass else: req = HEADRequest(http_url) - response = self._request_webpage( + self._request_webpage( req, video_id, note='Testing for geoblocking', errnote=(( @@ -96,3 +131,163 @@ class ORFIE(InfoExtractor): 'entries': entries, 'id': playlist_id, } + + +class ORFOE1IE(InfoExtractor): + IE_NAME = 'orf:oe1' + IE_DESC = 'Radio Österreich 1' + _VALID_URL = r'http://oe1\.orf\.at/(?:programm/|konsole.*?#\?track_id=)(?P[0-9]+)' + + # Audios on ORF radio are only available for 7 days, so we can't add tests. + _TEST = { + 'url': 'http://oe1.orf.at/konsole?show=on_demand#?track_id=394211', + 'only_matching': True, + } + + def _real_extract(self, url): + show_id = self._match_id(url) + data = self._download_json( + 'http://oe1.orf.at/programm/%s/konsole' % show_id, + show_id + ) + + timestamp = datetime.datetime.strptime('%s %s' % ( + data['item']['day_label'], + data['item']['time'] + ), '%d.%m.%Y %H:%M') + unix_timestamp = calendar.timegm(timestamp.utctimetuple()) + + return { + 'id': show_id, + 'title': data['item']['title'], + 'url': data['item']['url_stream'], + 'ext': 'mp3', + 'description': data['item'].get('info'), + 'timestamp': unix_timestamp + } + + +class ORFFM4IE(InfoExtractor): + IE_NAME = 'orf:fm4' + IE_DESC = 'radio FM4' + _VALID_URL = r'http://fm4\.orf\.at/7tage/?#(?P[0-9]+)/(?P\w+)' + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + show_date = mobj.group('date') + show_id = mobj.group('show') + + data = self._download_json( + 'http://audioapi.orf.at/fm4/json/2.0/broadcasts/%s/4%s' % (show_date, show_id), + show_id + ) + + def extract_entry_dict(info, title, subtitle): + return { + 'id': info['loopStreamId'].replace('.mp3', ''), + 'url': 'http://loopstream01.apa.at/?channel=fm4&id=%s' % info['loopStreamId'], + 'title': title, + 'description': subtitle, + 'duration': (info['end'] - info['start']) / 1000, + 'timestamp': info['start'] / 1000, + 'ext': 'mp3' + } + + entries = [extract_entry_dict(t, data['title'], data['subtitle']) for t in data['streams']] + + return { + '_type': 'playlist', + 'id': show_id, + 'title': data['title'], + 'description': data['subtitle'], + 'entries': entries + } + + +class ORFIPTVIE(InfoExtractor): + IE_NAME = 'orf:iptv' + IE_DESC = 'iptv.ORF.at' + _VALID_URL = r'http://iptv\.orf\.at/(?:#/)?stories/(?P\d+)' + + _TEST = { + 'url': 'http://iptv.orf.at/stories/2267952', + 'md5': '26ffa4bab6dbce1eee78bbc7021016cd', + 'info_dict': { + 'id': '339775', + 'ext': 'flv', + 'title': 'Kreml-Kritiker Nawalny wieder frei', + 'description': 'md5:6f24e7f546d364dacd0e616a9e409236', + 'duration': 84.729, + 'thumbnail': 're:^https?://.*\.jpg$', + 'upload_date': '20150306', + }, + } + + def _real_extract(self, url): + story_id = self._match_id(url) + + webpage = self._download_webpage( + 'http://iptv.orf.at/stories/%s' % story_id, story_id) + + video_id = self._search_regex( + r'data-video(?:id)?="(\d+)"', webpage, 'video id') + + data = self._download_json( + 'http://bits.orf.at/filehandler/static-api/json/current/data.json?file=%s' % video_id, + video_id)[0] + + duration = float_or_none(data['duration'], 1000) + + video = data['sources']['default'] + load_balancer_url = video['loadBalancerUrl'] + abr = int_or_none(video.get('audioBitrate')) + vbr = int_or_none(video.get('bitrate')) + fps = int_or_none(video.get('videoFps')) + width = int_or_none(video.get('videoWidth')) + height = int_or_none(video.get('videoHeight')) + thumbnail = video.get('preview') + + rendition = self._download_json( + load_balancer_url, video_id, transform_source=strip_jsonp) + + f = { + 'abr': abr, + 'vbr': vbr, + 'fps': fps, + 'width': width, + 'height': height, + } + + formats = [] + for format_id, format_url in rendition['redirect'].items(): + if format_id == 'rtmp': + ff = f.copy() + ff.update({ + 'url': format_url, + 'format_id': format_id, + }) + formats.append(ff) + elif determine_ext(format_url) == 'f4m': + formats.extend(self._extract_f4m_formats( + format_url, video_id, f4m_id=format_id)) + elif determine_ext(format_url) == 'm3u8': + formats.extend(self._extract_m3u8_formats( + format_url, video_id, 'mp4', m3u8_id=format_id)) + else: + continue + self._sort_formats(formats) + + title = remove_end(self._og_search_title(webpage), ' - iptv.ORF.at') + description = self._og_search_description(webpage) + upload_date = unified_strdate(self._html_search_meta( + 'dc.date', webpage, 'upload date')) + + return { + 'id': video_id, + 'title': title, + 'description': description, + 'duration': duration, + 'thumbnail': thumbnail, + 'upload_date': upload_date, + 'formats': formats, + }