X-Git-Url: http://git.bitcoin.ninja/index.cgi?a=blobdiff_plain;f=youtube_dl%2Fextractor%2Fnova.py;h=aefd8eab150fe81b4a3febcdaf0c0c94d0529b23;hb=8cd809fb3dc446ec06afa2c1d398c9fe31a435cc;hp=3f9c776ef665ab47624eeab7ba60f5754dbf213e;hpb=c23d5ce9260caef2c499efe69e1ffed3f14b979d;p=youtube-dl diff --git a/youtube_dl/extractor/nova.py b/youtube_dl/extractor/nova.py index 3f9c776ef..aefd8eab1 100644 --- a/youtube_dl/extractor/nova.py +++ b/youtube_dl/extractor/nova.py @@ -1,4 +1,4 @@ -# encoding: utf-8 +# coding: utf-8 from __future__ import unicode_literals import re @@ -6,28 +6,97 @@ import re from .common import InfoExtractor from ..utils import ( clean_html, + int_or_none, + js_to_json, + qualities, unified_strdate, + url_or_none, ) -class NovaIE(InfoExtractor): - IE_DESC = 'TN.cz, Prásk.tv, Nova.cz, Novaplus.cz, FANDA.tv, Krásná.cz and Doma.cz' - _VALID_URL = 'http://(?:[^.]+\.)?(?Ptv(?:noviny)?|tn|novaplus|vymena|fanda|krasna|doma|prask)\.nova\.cz/(?:[^/]+/)+(?P[^/]+?)(?:\.html|/|$)' - _TESTS = [{ - 'url': 'http://tvnoviny.nova.cz/clanek/novinky/co-na-sebe-sportaci-praskli-vime-jestli-pujde-hrdlicka-na-materskou.html?utm_source=tvnoviny&utm_medium=cpfooter&utm_campaign=novaplus', +class NovaEmbedIE(InfoExtractor): + _VALID_URL = r'https?://media\.cms\.nova\.cz/embed/(?P[^/?#&]+)' + _TEST = { + 'url': 'https://media.cms.nova.cz/embed/8o0n0r?autoplay=1', + 'md5': 'b3834f6de5401baabf31ed57456463f7', 'info_dict': { - 'id': '1608920', - 'display_id': 'co-na-sebe-sportaci-praskli-vime-jestli-pujde-hrdlicka-na-materskou', - 'ext': 'flv', - 'title': 'Duel: Michal Hrdlička a Petr Suchoň', - 'description': 'md5:d0cc509858eee1b1374111c588c6f5d5', - 'thumbnail': 're:^https?://.*\.(?:jpg)', + 'id': '8o0n0r', + 'ext': 'mp4', + 'title': '2180. díl', + 'thumbnail': r're:^https?://.*\.jpg', + 'duration': 2578, }, - 'params': { - # rtmp download - 'skip_download': True, + } + + def _real_extract(self, url): + video_id = self._match_id(url) + + webpage = self._download_webpage(url, video_id) + + bitrates = self._parse_json( + self._search_regex( + r'(?s)(?:src|bitrates)\s*=\s*({.+?})\s*;', webpage, 'formats'), + video_id, transform_source=js_to_json) + + QUALITIES = ('lq', 'mq', 'hq', 'hd') + quality_key = qualities(QUALITIES) + + formats = [] + for format_id, format_list in bitrates.items(): + if format_id == 'hls': + m3u8_url = url_or_none(format_list) + if not m3u8_url: + continue + formats.extend(self._extract_m3u8_formats( + m3u8_url, video_id, ext='mp4', m3u8_id='hls', fatal=False)) + + if not isinstance(format_list, list): + continue + for format_url in format_list: + format_url = url_or_none(format_url) + if not format_url: + continue + f = { + 'url': format_url, + } + f_id = format_id + for quality in QUALITIES: + if '%s.mp4' % quality in format_url: + f_id += '-%s' % quality + f.update({ + 'quality': quality_key(quality), + 'format_note': quality.upper(), + }) + break + f['format_id'] = f_id + formats.append(f) + self._sort_formats(formats) + + title = self._og_search_title( + webpage, default=None) or self._search_regex( + (r'(?P[^<]+)', + r'videoTitle\s*:\s*(["\'])(?P<value>(?:(?!\1).)+)\1'), webpage, + 'title', group='value') + thumbnail = self._og_search_thumbnail( + webpage, default=None) or self._search_regex( + r'poster\s*:\s*(["\'])(?P<value>(?:(?!\1).)+)\1', webpage, + 'thumbnail', fatal=False, group='value') + duration = int_or_none(self._search_regex( + r'videoDuration\s*:\s*(\d+)', webpage, 'duration', fatal=False)) + + return { + 'id': video_id, + 'title': title, + 'thumbnail': thumbnail, + 'duration': duration, + 'formats': formats, } - }, { + + +class NovaIE(InfoExtractor): + IE_DESC = 'TN.cz, Prásk.tv, Nova.cz, Novaplus.cz, FANDA.tv, Krásná.cz and Doma.cz' + _VALID_URL = r'https?://(?:[^.]+\.)?(?P<site>tv(?:noviny)?|tn|novaplus|vymena|fanda|krasna|doma|prask)\.nova\.cz/(?:[^/]+/)+(?P<id>[^/]+?)(?:\.html|/|$)' + _TESTS = [{ 'url': 'http://tn.nova.cz/clanek/tajemstvi-ukryte-v-podzemi-specialni-nemocnice-v-prazske-krci.html#player_13260', 'md5': '1dd7b9d5ea27bc361f110cd855a19bd3', 'info_dict': { @@ -36,34 +105,7 @@ class NovaIE(InfoExtractor): 'ext': 'mp4', 'title': 'Podzemní nemocnice v pražské Krči', 'description': 'md5:f0a42dd239c26f61c28f19e62d20ef53', - 'thumbnail': 're:^https?://.*\.(?:jpg)', - } - }, { - 'url': 'http://novaplus.nova.cz/porad/policie-modrava/video/5591-policie-modrava-15-dil-blondynka-na-hrbitove', - 'info_dict': { - 'id': '1756825', - 'display_id': '5591-policie-modrava-15-dil-blondynka-na-hrbitove', - 'ext': 'flv', - 'title': 'Policie Modrava - 15. díl - Blondýnka na hřbitově', - 'description': 'md5:dc24e50be5908df83348e50d1431295e', # Make sure this description is clean of html tags - 'thumbnail': 're:^https?://.*\.(?:jpg)', - }, - 'params': { - # rtmp download - 'skip_download': True, - } - }, { - 'url': 'http://novaplus.nova.cz/porad/televizni-noviny/video/5585-televizni-noviny-30-5-2015/', - 'info_dict': { - 'id': '1756858', - 'ext': 'flv', - 'title': 'Televizní noviny - 30. 5. 2015', - 'thumbnail': 're:^https?://.*\.(?:jpg)', - 'upload_date': '20150530', - }, - 'params': { - # rtmp download - 'skip_download': True, + 'thumbnail': r're:^https?://.*\.(?:jpg)', } }, { 'url': 'http://fanda.nova.cz/clanek/fun-and-games/krvavy-epos-zaklinac-3-divoky-hon-vychazi-vyhrajte-ho-pro-sebe.html', @@ -72,13 +114,27 @@ class NovaIE(InfoExtractor): 'ext': 'mp4', 'title': 'Zaklínač 3: Divoký hon', 'description': 're:.*Pokud se stejně jako my nemůžete.*', - 'thumbnail': 're:https?://.*\.jpg(\?.*)?', + 'thumbnail': r're:https?://.*\.jpg(\?.*)?', 'upload_date': '20150521', }, 'params': { # rtmp download 'skip_download': True, } + }, { + # media.cms.nova.cz embed + 'url': 'https://novaplus.nova.cz/porad/ulice/epizoda/18760-2180-dil', + 'info_dict': { + 'id': '8o0n0r', + 'ext': 'mp4', + 'title': '2180. díl', + 'thumbnail': r're:^https?://.*\.jpg', + 'duration': 2578, + }, + 'params': { + 'skip_download': True, + }, + 'add_ie': [NovaEmbedIE.ie_key()], }, { 'url': 'http://sport.tn.nova.cz/clanek/sport/hokej/nhl/zivot-jde-dal-hodnotil-po-vyrazeni-z-playoff-jiri-sekac.html', 'only_matching': True, @@ -103,6 +159,15 @@ class NovaIE(InfoExtractor): webpage = self._download_webpage(url, display_id) + # novaplus + embed_id = self._search_regex( + r'<iframe[^>]+\bsrc=["\'](?:https?:)?//media\.cms\.nova\.cz/embed/([^/?#&]+)', + webpage, 'embed url', default=None) + if embed_id: + return self.url_result( + 'https://media.cms.nova.cz/embed/%s' % embed_id, + ie=NovaEmbedIE.ie_key(), video_id=embed_id) + video_id = self._search_regex( [r"(?:media|video_id)\s*:\s*'(\d+)'", r'media=(\d+)', @@ -111,8 +176,21 @@ class NovaIE(InfoExtractor): webpage, 'video id') config_url = self._search_regex( - r'src="(http://tn\.nova\.cz/bin/player/videojs/config\.php\?[^"]+)"', + r'src="(https?://(?:tn|api)\.nova\.cz/bin/player/videojs/config\.php\?[^"]+)"', webpage, 'config url', default=None) + config_params = {} + + if not config_url: + player = self._parse_json( + self._search_regex( + r'(?s)Player\s*\(.+?\s*,\s*({.+?\bmedia\b["\']?\s*:\s*["\']?\d+.+?})\s*\)', webpage, + 'player', default='{}'), + video_id, transform_source=js_to_json, fatal=False) + if player: + config_url = url_or_none(player.get('configUrl')) + params = player.get('configParams') + if isinstance(params, dict): + config_params = params if not config_url: DEFAULT_SITE_ID = '23000' @@ -127,14 +205,20 @@ class NovaIE(InfoExtractor): } site_id = self._search_regex( - r'site=(\d+)', webpage, 'site id', default=None) or SITES.get(site, DEFAULT_SITE_ID) + r'site=(\d+)', webpage, 'site id', default=None) or SITES.get( + site, DEFAULT_SITE_ID) - config_url = ('http://tn.nova.cz/bin/player/videojs/config.php?site=%s&media=%s&jsVar=vjsconfig' - % (site_id, video_id)) + config_url = 'https://api.nova.cz/bin/player/videojs/config.php' + config_params = { + 'site': site_id, + 'media': video_id, + 'quality': 3, + 'version': 1, + } config = self._download_json( config_url, display_id, - 'Downloading config JSON', + 'Downloading config JSON', query=config_params, transform_source=lambda s: s[s.index('{'):s.rindex('}') + 1]) mediafile = config['mediafile']