X-Git-Url: http://git.bitcoin.ninja/index.cgi?a=blobdiff_plain;f=youtube_dl%2Fextractor%2Forf.py;h=4e293392b3d39b46ad1612d884068a2dbfaeef23;hb=bc694039e47cc871c98abacdf1c0a2e5a257a8a4;hp=5f5694393765104b45b573c53155d447a45b1e50;hpb=4f81667d76dca6844b454dde61352f7d889237c0;p=youtube-dl diff --git a/youtube_dl/extractor/orf.py b/youtube_dl/extractor/orf.py index 5f5694393..4e293392b 100644 --- a/youtube_dl/extractor/orf.py +++ b/youtube_dl/extractor/orf.py @@ -3,39 +3,69 @@ from __future__ import unicode_literals import json import re +import calendar +import datetime from .common import InfoExtractor from ..utils import ( HEADRequest, unified_strdate, + ExtractorError, ) -class ORFIE(InfoExtractor): - _VALID_URL = r'https?://tvthek\.orf\.at/(?:programs/.+?/episodes|topics/.+?|program/[^/]+)/(?P\d+)' +class ORFTVthekIE(InfoExtractor): + IE_NAME = 'orf:tvthek' + IE_DESC = 'ORF TVthek' + _VALID_URL = r'https?://tvthek\.orf\.at/(?:programs/.+?/episodes|topics?/.+?|program/[^/]+)/(?P\d+)' - _TEST = { - 'url': 'http://tvthek.orf.at/program/matinee-Was-Sie-schon-immer-ueber-Klassik-wissen-wollten/7317210/Was-Sie-schon-immer-ueber-Klassik-wissen-wollten/7319746/Was-Sie-schon-immer-ueber-Klassik-wissen-wollten/7319747', - 'file': '7319747.mp4', - 'md5': 'bd803c5d8c32d3c64a0ea4b4eeddf375', - 'info_dict': { - 'title': 'Was Sie schon immer über Klassik wissen wollten', - 'description': 'md5:0ddf0d5f0060bd53f744edaa5c2e04a4', - 'duration': 3508, - 'upload_date': '20140105', - }, - 'skip': 'Blocked outside of Austria', - } + _TESTS = [{ + 'url': 'http://tvthek.orf.at/program/Aufgetischt/2745173/Aufgetischt-Mit-der-Steirischen-Tafelrunde/8891389', + 'playlist': [{ + 'md5': '2942210346ed779588f428a92db88712', + 'info_dict': { + 'id': '8896777', + 'ext': 'mp4', + 'title': 'Aufgetischt: Mit der Steirischen Tafelrunde', + 'description': 'md5:c1272f0245537812d4e36419c207b67d', + 'duration': 2668, + 'upload_date': '20141208', + }, + }], + 'skip': 'Blocked outside of Austria / Germany', + }, { + 'url': 'http://tvthek.orf.at/topic/Im-Wandel-der-Zeit/8002126/Best-of-Ingrid-Thurnher/7982256', + 'playlist': [{ + 'md5': '68f543909aea49d621dfc7703a11cfaf', + 'info_dict': { + 'id': '7982259', + 'ext': 'mp4', + 'title': 'Best of Ingrid Thurnher', + 'upload_date': '20140527', + 'description': 'Viele Jahre war Ingrid Thurnher das "Gesicht" der ZIB 2. Vor ihrem Wechsel zur ZIB 2 im jahr 1995 moderierte sie unter anderem "Land und Leute", "Österreich-Bild" und "Niederösterreich heute".', + } + }], + '_skip': 'Blocked outside of Austria / Germany', + }] def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - playlist_id = mobj.group('id') + playlist_id = self._match_id(url) webpage = self._download_webpage(url, playlist_id) data_json = self._search_regex( r'initializeAdworx\((.+?)\);\n', webpage, 'video info') all_data = json.loads(data_json) - sdata = all_data[0]['values']['segments'] + + def get_segments(all_data): + for data in all_data: + if data['name'] in ( + 'Tracker::EPISODE_DETAIL_PAGE_OVER_PROGRAM', + 'Tracker::EPISODE_DETAIL_PAGE_OVER_TOPIC'): + return data['values']['segments'] + + sdata = get_segments(all_data) + if not sdata: + raise ExtractorError('Unable to extract segments') def quality_to_int(s): m = re.search('([0-9]+)', s) @@ -96,3 +126,74 @@ class ORFIE(InfoExtractor): 'entries': entries, 'id': playlist_id, } + + +class ORFOE1IE(InfoExtractor): + IE_NAME = 'orf:oe1' + IE_DESC = 'Radio Österreich 1' + _VALID_URL = r'http://oe1\.orf\.at/(?:programm/|konsole.*?#\?track_id=)(?P[0-9]+)' + + # Audios on ORF radio are only available for 7 days, so we can't add tests. + _TEST = { + 'url': 'http://oe1.orf.at/konsole?show=on_demand#?track_id=394211', + 'only_matching': True, + } + + def _real_extract(self, url): + show_id = self._match_id(url) + data = self._download_json( + 'http://oe1.orf.at/programm/%s/konsole' % show_id, + show_id + ) + + timestamp = datetime.datetime.strptime('%s %s' % ( + data['item']['day_label'], + data['item']['time'] + ), '%d.%m.%Y %H:%M') + unix_timestamp = calendar.timegm(timestamp.utctimetuple()) + + return { + 'id': show_id, + 'title': data['item']['title'], + 'url': data['item']['url_stream'], + 'ext': 'mp3', + 'description': data['item'].get('info'), + 'timestamp': unix_timestamp + } + + +class ORFFM4IE(InfoExtractor): + IE_NAME = 'orf:fm4' + IE_DESC = 'radio FM4' + _VALID_URL = r'http://fm4\.orf\.at/7tage/?#(?P[0-9]+)/(?P\w+)' + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + show_date = mobj.group('date') + show_id = mobj.group('show') + + data = self._download_json( + 'http://audioapi.orf.at/fm4/json/2.0/broadcasts/%s/4%s' % (show_date, show_id), + show_id + ) + + def extract_entry_dict(info, title, subtitle): + return { + 'id': info['loopStreamId'].replace('.mp3', ''), + 'url': 'http://loopstream01.apa.at/?channel=fm4&id=%s' % info['loopStreamId'], + 'title': title, + 'description': subtitle, + 'duration': (info['end'] - info['start']) / 1000, + 'timestamp': info['start'] / 1000, + 'ext': 'mp3' + } + + entries = [extract_entry_dict(t, data['title'], data['subtitle']) for t in data['streams']] + + return { + '_type': 'playlist', + 'id': show_id, + 'title': data['title'], + 'description': data['subtitle'], + 'entries': entries + }