X-Git-Url: http://git.bitcoin.ninja/index.cgi?a=blobdiff_plain;f=youtube_dl%2Fextractor%2Fdrtv.py;h=390e79f8cfae9cc0ec4b0f045ed8bac88bd6c523;hb=HEAD;hp=cdccfd376b80ee5ebc61c25ab4cd00e12dcfc458;hpb=3a0879c8c801d27087396613d80f83c112a328f9;p=youtube-dl diff --git a/youtube_dl/extractor/drtv.py b/youtube_dl/extractor/drtv.py index cdccfd376..390e79f8c 100644 --- a/youtube_dl/extractor/drtv.py +++ b/youtube_dl/extractor/drtv.py @@ -1,41 +1,168 @@ +# coding: utf-8 from __future__ import unicode_literals +import binascii +import hashlib import re -from .subtitles import SubtitlesInfoExtractor -from .common import ExtractorError -from ..utils import parse_iso8601 +from .common import InfoExtractor +from ..aes import aes_cbc_decrypt +from ..compat import compat_urllib_parse_unquote +from ..utils import ( + bytes_to_intlist, + ExtractorError, + int_or_none, + intlist_to_bytes, + float_or_none, + mimetype2ext, + str_or_none, + try_get, + unified_timestamp, + update_url_query, + url_or_none, +) -class DRTVIE(SubtitlesInfoExtractor): - _VALID_URL = r'http://(?:www\.)?dr\.dk/tv/se/[^/]+/(?P[\da-z-]+)' - _TEST = { - 'url': 'http://www.dr.dk/tv/se/partiets-mand/partiets-mand-7-8', - 'md5': '4a7e1dd65cdb2643500a3f753c942f25', +class DRTVIE(InfoExtractor): + _VALID_URL = r'''(?x) + https?:// + (?: + (?:www\.)?dr\.dk/(?:tv/se|nyheder|radio(?:/ondemand)?)/(?:[^/]+/)*| + (?:www\.)?(?:dr\.dk|dr-massive\.com)/drtv/(?:se|episode)/ + ) + (?P[\da-z_-]+) + ''' + _GEO_BYPASS = False + _GEO_COUNTRIES = ['DK'] + IE_NAME = 'drtv' + _TESTS = [{ + 'url': 'https://www.dr.dk/tv/se/boern/ultra/klassen-ultra/klassen-darlig-taber-10', + 'md5': '25e659cccc9a2ed956110a299fdf5983', 'info_dict': { - 'id': 'partiets-mand-7-8', + 'id': 'klassen-darlig-taber-10', 'ext': 'mp4', - 'title': 'Partiets mand (7:8)', - 'description': 'md5:a684b90a8f9336cd4aab94b7647d7862', - 'timestamp': 1403047940, - 'upload_date': '20140617', - 'duration': 1299.040, + 'title': 'Klassen - Dårlig taber (10)', + 'description': 'md5:815fe1b7fa656ed80580f31e8b3c79aa', + 'timestamp': 1539085800, + 'upload_date': '20181009', + 'duration': 606.84, + 'series': 'Klassen', + 'season': 'Klassen I', + 'season_number': 1, + 'season_id': 'urn:dr:mu:bundle:57d7e8216187a4031cfd6f6b', + 'episode': 'Episode 10', + 'episode_number': 10, + 'release_year': 2016, }, - } + 'expected_warnings': ['Unable to download f4m manifest'], + }, { + # embed + 'url': 'https://www.dr.dk/nyheder/indland/live-christianias-rydning-af-pusher-street-er-i-gang', + 'info_dict': { + 'id': 'urn:dr:mu:programcard:57c926176187a50a9c6e83c6', + 'ext': 'mp4', + 'title': 'christiania pusher street ryddes drdkrjpo', + 'description': 'md5:2a71898b15057e9b97334f61d04e6eb5', + 'timestamp': 1472800279, + 'upload_date': '20160902', + 'duration': 131.4, + }, + 'params': { + 'skip_download': True, + }, + 'expected_warnings': ['Unable to download f4m manifest'], + }, { + # with SignLanguage formats + 'url': 'https://www.dr.dk/tv/se/historien-om-danmark/-/historien-om-danmark-stenalder', + 'info_dict': { + 'id': 'historien-om-danmark-stenalder', + 'ext': 'mp4', + 'title': 'Historien om Danmark: Stenalder', + 'description': 'md5:8c66dcbc1669bbc6f873879880f37f2a', + 'timestamp': 1546628400, + 'upload_date': '20190104', + 'duration': 3502.56, + 'formats': 'mincount:20', + }, + 'params': { + 'skip_download': True, + }, + }, { + 'url': 'https://www.dr.dk/radio/p4kbh/regionale-nyheder-kh4/p4-nyheder-2019-06-26-17-30-9', + 'only_matching': True, + }, { + 'url': 'https://www.dr.dk/drtv/se/bonderoeven_71769', + 'info_dict': { + 'id': '00951930010', + 'ext': 'mp4', + 'title': 'Bonderøven (1:8)', + 'description': 'md5:3cf18fc0d3b205745d4505f896af8121', + 'timestamp': 1546542000, + 'upload_date': '20190103', + 'duration': 2576.6, + }, + 'params': { + 'skip_download': True, + }, + }, { + 'url': 'https://www.dr.dk/drtv/episode/bonderoeven_71769', + 'only_matching': True, + }, { + 'url': 'https://dr-massive.com/drtv/se/bonderoeven_71769', + 'only_matching': True, + }] def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - video_id = mobj.group('id') + video_id = self._match_id(url) + + webpage = self._download_webpage(url, video_id) + + if '>Programmet er ikke længere tilgængeligt' in webpage: + raise ExtractorError( + 'Video %s is not available' % video_id, expected=True) - programcard = self._download_json( - 'http://www.dr.dk/mu/programcard/expanded/%s' % video_id, video_id, 'Downloading video JSON') + video_id = self._search_regex( + (r'data-(?:material-identifier|episode-slug)="([^"]+)"', + r'data-resource="[^>"]+mu/programcard/expanded/([^"]+)"'), + webpage, 'video id', default=None) - data = programcard['Data'][0] + if not video_id: + video_id = self._search_regex( + r'(urn(?:%3A|:)dr(?:%3A|:)mu(?:%3A|:)programcard(?:%3A|:)[\da-f]+)', + webpage, 'urn', default=None) + if video_id: + video_id = compat_urllib_parse_unquote(video_id) - title = data['Title'] - description = data['Description'] - timestamp = parse_iso8601(data['CreatedTime'][:-5]) + _PROGRAMCARD_BASE = 'https://www.dr.dk/mu-online/api/1.4/programcard' + query = {'expanded': 'true'} + + if video_id: + programcard_url = '%s/%s' % (_PROGRAMCARD_BASE, video_id) + else: + programcard_url = _PROGRAMCARD_BASE + page = self._parse_json( + self._search_regex( + r'data\s*=\s*({.+?})\s*(?:;|[\da-z-]+)' + _GEO_COUNTRIES = ['DK'] + _TEST = { + 'url': 'https://www.dr.dk/tv/live/dr1', + 'info_dict': { + 'id': 'dr1', + 'ext': 'mp4', + 'title': 're:^DR1 [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$', + }, + 'params': { + # m3u8 download + 'skip_download': True, + }, + } + + def _real_extract(self, url): + channel_id = self._match_id(url) + channel_data = self._download_json( + 'https://www.dr.dk/mu-online/api/1.0/channel/' + channel_id, + channel_id) + title = self._live_title(channel_data['Title']) + + formats = [] + for streaming_server in channel_data.get('StreamingServers', []): + server = streaming_server.get('Server') + if not server: + continue + link_type = streaming_server.get('LinkType') + for quality in streaming_server.get('Qualities', []): + for stream in quality.get('Streams', []): + stream_path = stream.get('Stream') + if not stream_path: + continue + stream_url = update_url_query( + '%s/%s' % (server, stream_path), {'b': ''}) + if link_type == 'HLS': + formats.extend(self._extract_m3u8_formats( + stream_url, channel_id, 'mp4', + m3u8_id=link_type, fatal=False, live=True)) + elif link_type == 'HDS': + formats.extend(self._extract_f4m_formats(update_url_query( + '%s/%s' % (server, stream_path), {'hdcore': '3.7.0'}), + channel_id, f4m_id=link_type, fatal=False)) + self._sort_formats(formats) + + return { + 'id': channel_id, + 'title': title, + 'thumbnail': channel_data.get('PrimaryImageUri'), + 'formats': formats, + 'is_live': True, }