X-Git-Url: http://git.bitcoin.ninja/index.cgi?a=blobdiff_plain;f=youtube_dl%2Fextractor%2Ftvplay.py;h=8f1ff3b761d8bd6e21ee4f122e1ed62fbabd2d78;hb=2f483bc1c389709623117079439708783122b5ec;hp=c8ec2465c84eb8f57cb8c90fc9e58973c59029ae;hpb=b35b0d73d853c52ca96ccf4488a4f8960a12e2ae;p=youtube-dl diff --git a/youtube_dl/extractor/tvplay.py b/youtube_dl/extractor/tvplay.py index c8ec2465c..8f1ff3b76 100644 --- a/youtube_dl/extractor/tvplay.py +++ b/youtube_dl/extractor/tvplay.py @@ -15,7 +15,11 @@ from ..utils import ( int_or_none, parse_iso8601, qualities, + smuggle_url, + try_get, + unsmuggle_url, update_url_query, + url_or_none, ) @@ -28,12 +32,12 @@ class TVPlayIE(InfoExtractor): https?:// (?:www\.)? (?: - tvplay(?:\.skaties)?\.lv/parraides| - (?:tv3play|play\.tv3)\.lt/programos| + tvplay(?:\.skaties)?\.lv(?:/parraides)?| + (?:tv3play|play\.tv3)\.lt(?:/programos)?| tv3play(?:\.tv3)?\.ee/sisu| (?:tv(?:3|6|8|10)play|viafree)\.se/program| (?:(?:tv3play|viasat4play|tv6play|viafree)\.no|(?:tv3play|viafree)\.dk)/programmer| - play\.novatv\.bg/programi + play\.nova(?:tv)?\.bg/programi ) /(?:[^/]+/)+ ) @@ -199,10 +203,23 @@ class TVPlayIE(InfoExtractor): 'skip_download': True, }, }, + { + 'url': 'https://play.nova.bg/programi/zdravei-bulgariya/764300?autostart=true', + 'only_matching': True, + }, { 'url': 'http://tvplay.skaties.lv/parraides/vinas-melo-labak/418113?autostart=true', 'only_matching': True, }, + { + 'url': 'https://tvplay.skaties.lv/vinas-melo-labak/418113/?autostart=true', + 'only_matching': True, + }, + { + # views is null + 'url': 'http://tvplay.skaties.lv/parraides/tv3-zinas/760183', + 'only_matching': True, + }, { 'url': 'http://tv3play.tv3.ee/sisu/kodu-keset-linna/238551?autostart=true', 'only_matching': True, @@ -218,8 +235,17 @@ class TVPlayIE(InfoExtractor): ] def _real_extract(self, url): - video_id = self._match_id(url) + url, smuggled_data = unsmuggle_url(url, {}) + self._initialize_geo_bypass({ + 'countries': smuggled_data.get('geo_countries'), + }) + video_id = self._match_id(url) + geo_country = self._search_regex( + r'https?://[^/]+\.([a-z]{2})', url, + 'geo country', default=None) + if geo_country: + self._initialize_geo_bypass({'countries': [geo_country.upper()]}) video = self._download_json( 'http://playapi.mtgx.tv/v3/videos/%s' % video_id, video_id, 'Downloading video JSON') @@ -238,7 +264,8 @@ class TVPlayIE(InfoExtractor): quality = qualities(['hls', 'medium', 'high']) formats = [] for format_id, video_url in streams.get('streams', {}).items(): - if not video_url or not isinstance(video_url, compat_str): + video_url = url_or_none(video_url) + if not video_url: continue ext = determine_ext(video_url) if ext == 'f4m': @@ -258,6 +285,8 @@ class TVPlayIE(InfoExtractor): 'ext': ext, } if video_url.startswith('rtmp'): + if smuggled_data.get('skip_rtmp'): + continue m = re.search( r'^(?Prtmp://[^/]+/(?P[^/]+))/(?P.+)$', video_url) if not m: @@ -267,6 +296,7 @@ class TVPlayIE(InfoExtractor): 'url': m.group('url'), 'app': m.group('app'), 'play_path': m.group('playpath'), + 'preference': -1, }) else: fmt.update({ @@ -306,7 +336,7 @@ class TVPlayIE(InfoExtractor): 'season_number': season_number, 'duration': int_or_none(video.get('duration')), 'timestamp': parse_iso8601(video.get('created_at')), - 'view_count': int_or_none(video.get('views', {}).get('total')), + 'view_count': try_get(video, lambda x: x['views']['total'], int), 'age_limit': int_or_none(video.get('age_limit', 0)), 'formats': formats, 'subtitles': subtitles, @@ -342,6 +372,29 @@ class ViafreeIE(InfoExtractor): 'skip_download': True, }, 'add_ie': [TVPlayIE.ie_key()], + }, { + # with relatedClips + 'url': 'http://www.viafree.se/program/reality/sommaren-med-youtube-stjarnorna/sasong-1/avsnitt-1', + 'info_dict': { + 'id': '758770', + 'ext': 'mp4', + 'title': 'Sommaren med YouTube-stjärnorna S01E01', + 'description': 'md5:2bc69dce2c4bb48391e858539bbb0e3f', + 'series': 'Sommaren med YouTube-stjärnorna', + 'season': 'Säsong 1', + 'season_number': 1, + 'duration': 1326, + 'timestamp': 1470905572, + 'upload_date': '20160811', + }, + 'params': { + 'skip_download': True, + }, + 'add_ie': [TVPlayIE.ie_key()], + }, { + # Different og:image URL schema + 'url': 'http://www.viafree.se/program/reality/sommaren-med-youtube-stjarnorna/sasong-1/avsnitt-2', + 'only_matching': True, }, { 'url': 'http://www.viafree.no/programmer/underholdning/det-beste-vorspielet/sesong-2/episode-1', 'only_matching': True, @@ -354,13 +407,151 @@ class ViafreeIE(InfoExtractor): def suitable(cls, url): return False if TVPlayIE.suitable(url) else super(ViafreeIE, cls).suitable(url) + def _real_extract(self, url): + video_id = self._match_id(url) + + webpage = self._download_webpage(url, video_id) + + data = self._parse_json( + self._search_regex( + r'(?s)window\.App\s*=\s*({.+?})\s*;\s*\d+)' + _TESTS = [{ + 'url': 'https://tvplay.tv3.lt/aferistai-n-7/aferistai-10047125/', + 'info_dict': { + 'id': '366367', + 'ext': 'mp4', + 'title': 'Aferistai', + 'description': 'Aferistai. Kalėdinė pasaka.', + 'series': 'Aferistai [N-7]', + 'season': '1 sezonas', + 'season_number': 1, + 'duration': 464, + 'timestamp': 1394209658, + 'upload_date': '20140307', + 'age_limit': 18, + }, + 'params': { + 'skip_download': True, + }, + 'add_ie': [TVPlayIE.ie_key()], + }, { + 'url': 'https://tvplay.skaties.lv/vinas-melo-labak/vinas-melo-labak-10280317/', + 'only_matching': True, + }, { + 'url': 'https://tvplay.tv3.ee/cool-d-ga-mehhikosse/cool-d-ga-mehhikosse-10044354/', + 'only_matching': True, + }] + def _real_extract(self, url): video_id = self._match_id(url) webpage = self._download_webpage(url, video_id) video_id = self._search_regex( - r'currentVideo["\']\s*:\s*.+?["\']id["\']\s*:\s*["\'](?P\d{6,})', - webpage, 'video id') + r'data-asset-id\s*=\s*["\'](\d{5,7})\b', webpage, 'video id', + default=None) + + if video_id: + return self.url_result( + 'mtg:%s' % video_id, ie=TVPlayIE.ie_key(), video_id=video_id) - return self.url_result('mtg:%s' % video_id, TVPlayIE.ie_key()) + m3u8_url = self._search_regex( + r'data-file\s*=\s*(["\'])(?P(?:(?!\1).)+)\1', webpage, + 'm3u8 url', group='url') + + formats = self._extract_m3u8_formats( + m3u8_url, video_id, 'mp4', entry_protocol='m3u8_native', + m3u8_id='hls') + self._sort_formats(formats) + + title = self._search_regex( + r'data-title\s*=\s*(["\'])(?P(?:(?!\1).)+)\1', webpage, + 'title', default=None, group='value') or self._html_search_meta( + 'title', webpage, default=None) or self._og_search_title( + webpage) + + description = self._html_search_meta( + 'description', webpage, + default=None) or self._og_search_description(webpage) + + thumbnail = self._search_regex( + r'data-image\s*=\s*(["\'])(?P(?:(?!\1).)+)\1', webpage, + 'thumbnail', default=None, group='url') or self._html_search_meta( + 'thumbnail', webpage, default=None) or self._og_search_thumbnail( + webpage) + + duration = int_or_none(self._search_regex( + r'data-duration\s*=\s*["\'](\d+)', webpage, 'duration', + fatal=False)) + + season = self._search_regex( + (r'data-series-title\s*=\s*(["\'])[^/]+/(?P(?:(?!\1).)+)\1', + r'\bseason\s*:\s*(["\'])(?P(?:(?!\1).)+)\1'), webpage, + 'season', default=None, group='value') + season_number = int_or_none(self._search_regex( + r'(\d+)(?:[.\s]+sezona|\s+HOOAEG)', season or '', 'season number', + default=None)) + episode = self._search_regex( + r'(["\'])(?P(?:(?!\1).)+)\1', webpage, 'episode', + default=None, group='value') + episode_number = int_or_none(self._search_regex( + r'(?:S[eē]rija|Osa)\s+(\d+)', episode or '', 'episode number', + default=None)) + + return { + 'id': video_id, + 'title': title, + 'description': description, + 'thumbnail': thumbnail, + 'duration': duration, + 'season': season, + 'season_number': season_number, + 'episode': episode, + 'episode_number': episode_number, + 'formats': formats, + }