X-Git-Url: http://git.bitcoin.ninja/index.cgi?a=blobdiff_plain;f=youtube_dl%2Fextractor%2Fsportbox.py;h=e7bd5bf91921752757e1bc9beb390798b86a9c8a;hb=329e3dd5adf52520c87ba31395d090455114783b;hp=8686f9d11fa3178eefe3eb71fe5a1413e729beeb;hpb=ef28a6cb26630f8f198a72eee34a2b5c8bd2f802;p=youtube-dl diff --git a/youtube_dl/extractor/sportbox.py b/youtube_dl/extractor/sportbox.py index 8686f9d11..e7bd5bf91 100644 --- a/youtube_dl/extractor/sportbox.py +++ b/youtube_dl/extractor/sportbox.py @@ -4,72 +4,7 @@ from __future__ import unicode_literals import re from .common import InfoExtractor -from ..compat import compat_urlparse -from ..utils import ( - parse_duration, - parse_iso8601, -) - - -class SportBoxIE(InfoExtractor): - _VALID_URL = r'https?://news\.sportbox\.ru/(?:[^/]+/)+spbvideo_NI\d+_(?P.+)' - _TESTS = [{ - 'url': 'http://news.sportbox.ru/Vidy_sporta/Avtosport/Rossijskij/spbvideo_NI483529_Gonka-2-zaezd-Obyedinenniy-2000-klassi-Turing-i-S', - 'md5': 'ff56a598c2cf411a9a38a69709e97079', - 'info_dict': { - 'id': '80822', - 'ext': 'mp4', - 'title': 'Гонка 2 заезд ««Объединенный 2000»: классы Туринг и Супер-продакшн', - 'description': 'md5:81715fa9c4ea3d9e7915dc8180c778ed', - 'thumbnail': 're:^https?://.*\.jpg$', - 'timestamp': 1411896237, - 'upload_date': '20140928', - 'duration': 4846, - }, - 'params': { - # m3u8 download - 'skip_download': True, - }, - }, { - 'url': 'http://news.sportbox.ru/Vidy_sporta/billiard/spbvideo_NI486287_CHempionat-mira-po-dinamichnoy-piramide-4', - 'only_matching': True, - }, { - 'url': 'http://news.sportbox.ru/video/no_ads/spbvideo_NI536574_V_Novorossijske_proshel_detskij_turnir_Pole_slavy_bojevoj?ci=211355', - 'only_matching': True, - }] - - def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - display_id = mobj.group('display_id') - - webpage = self._download_webpage(url, display_id) - - player = self._search_regex( - r'src="/?(vdl/player/[^"]+)"', webpage, 'player') - - title = self._html_search_regex( - r'

([^<]+)

', webpage, 'title') - description = self._html_search_regex( - r'(?s)
(.+?)
', - webpage, 'description', fatal=False) - thumbnail = self._og_search_thumbnail(webpage) - timestamp = parse_iso8601(self._search_regex( - r'([^<]+)', - webpage, 'timestamp', fatal=False)) - duration = parse_duration(self._html_search_regex( - r'', - webpage, 'duration', fatal=False)) - - return { - '_type': 'url_transparent', - 'url': compat_urlparse.urljoin(url, '/%s' % player), - 'display_id': display_id, - 'title': title, - 'description': description, - 'thumbnail': thumbnail, - 'timestamp': timestamp, - 'duration': duration, - } +from ..utils import js_to_json class SportBoxEmbedIE(InfoExtractor): @@ -80,7 +15,7 @@ class SportBoxEmbedIE(InfoExtractor): 'id': '211355', 'ext': 'mp4', 'title': 'В Новороссийске прошел детский турнир «Поле славы боевой»', - 'thumbnail': 're:^https?://.*\.jpg$', + 'thumbnail': r're:^https?://.*\.jpg$', }, 'params': { # m3u8 download @@ -102,18 +37,32 @@ class SportBoxEmbedIE(InfoExtractor): webpage = self._download_webpage(url, video_id) - hls = self._search_regex( - r"sportboxPlayer\.jwplayer_common_params\.file\s*=\s*['\"]([^'\"]+)['\"]", - webpage, 'hls file') + formats = [] + + def cleanup_js(code): + # desktop_advert_config contains complex Javascripts and we don't need it + return js_to_json(re.sub(r'desktop_advert_config.*', '', code)) + + jwplayer_data = self._parse_json(self._search_regex( + r'(?s)player\.setup\(({.+?})\);', webpage, 'jwplayer settings'), video_id, + transform_source=cleanup_js) + + hls_url = jwplayer_data.get('hls_url') + if hls_url: + formats.extend(self._extract_m3u8_formats( + hls_url, video_id, ext='mp4', m3u8_id='hls')) - formats = self._extract_m3u8_formats(hls, video_id, 'mp4') + rtsp_url = jwplayer_data.get('rtsp_url') + if rtsp_url: + formats.append({ + 'url': rtsp_url, + 'format_id': 'rtsp', + }) - title = self._search_regex( - r'sportboxPlayer\.node_title\s*=\s*"([^"]+)"', webpage, 'title') + self._sort_formats(formats) - thumbnail = self._search_regex( - r'sportboxPlayer\.jwplayer_common_params\.image\s*=\s*"([^"]+)"', - webpage, 'thumbnail', default=None) + title = jwplayer_data['node_title'] + thumbnail = jwplayer_data.get('image_url') return { 'id': video_id,