X-Git-Url: http://git.bitcoin.ninja/index.cgi?a=blobdiff_plain;f=youtube_dl%2Fextractor%2Fjoj.py;h=62b28e9809856abaca23c4690c4670cacc96965a;hb=HEAD;hp=2ebfec902940f2df449ec24ec2129fafefc4ee37;hpb=256a746d21634eccad07a1e6dcafedcdf8b6181b;p=youtube-dl diff --git a/youtube_dl/extractor/joj.py b/youtube_dl/extractor/joj.py old mode 100755 new mode 100644 index 2ebfec902..62b28e980 --- a/youtube_dl/extractor/joj.py +++ b/youtube_dl/extractor/joj.py @@ -1,56 +1,108 @@ # coding: utf-8 from __future__ import unicode_literals -from .common import InfoExtractor import re +from .common import InfoExtractor +from ..compat import compat_str +from ..utils import ( + int_or_none, + js_to_json, + try_get, +) + class JojIE(InfoExtractor): - _VALID_URL = r'https?://[a-z0-9]+\.joj\.sk/([^/]+/)*(?P(?P[0-9]{4}(-[0-9]{2}){2}).*)' # noqa + _VALID_URL = r'''(?x) + (?: + joj:| + https?://media\.joj\.sk/embed/ + ) + (?P[^/?#^]+) + ''' _TESTS = [{ - 'url': 'https://www.joj.sk/nove-byvanie/archiv/2017-05-28-nove-byvanie', # noqa + 'url': 'https://media.joj.sk/embed/a388ec4c-6019-4a4a-9312-b1bee194e932', 'info_dict': { 'id': 'a388ec4c-6019-4a4a-9312-b1bee194e932', 'ext': 'mp4', - 'title': 'Nové Bývanie', - 'release_date': '20170528' + 'title': 'NOVÉ BÝVANIE', + 'thumbnail': r're:^https?://.*\.jpg$', + 'duration': 3118, } }, { - 'url': 'http://nasi.joj.sk/epizody/2016-09-06-stari-rodicia', - 'info_dict': { - 'id': 'f18b2c5f-9ea8-4941-a164-a814c53306ad', - 'ext': 'mp4', - 'title': 'Starí Rodičia', - 'release_date': '20160906' - } + 'url': 'https://media.joj.sk/embed/9i1cxv', + 'only_matching': True, + }, { + 'url': 'joj:a388ec4c-6019-4a4a-9312-b1bee194e932', + 'only_matching': True, + }, { + 'url': 'joj:9i1cxv', + 'only_matching': True, }] - media_src_url = 'http://n16.joj.sk/storage/' - xml_source_url = 'https://media.joj.sk/services/Video.php?clip=' + @staticmethod + def _extract_urls(webpage): + return [ + mobj.group('url') + for mobj in re.finditer( + r']+\bsrc=(["\'])(?P(?:https?:)?//media\.joj\.sk/embed/(?:(?!\1).)+)\1', + webpage)] def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - release_date = mobj.group('release_date').replace('-', '') - webpage = self._download_webpage(url, 'id') - video_id = self._html_search_regex( - r'https?://([a-z0-9]+\.)joj\.sk/embed/(?P[a-f0-9\-]+)', - webpage, 'id', group='video_id') - xml_playlist_url = self.xml_source_url + video_id - xml_playlist_et = self._download_xml(xml_playlist_url, 'XML playlist') + video_id = self._match_id(url) + + webpage = self._download_webpage( + 'https://media.joj.sk/embed/%s' % video_id, video_id) + + title = self._search_regex( + (r'videoTitle\s*:\s*(["\'])(?P(?:(?!\1).)+)\1', + r'<title>(?P<title>[^<]+)'), webpage, 'title', + default=None, group='title') or self._og_search_title(webpage) + + bitrates = self._parse_json( + self._search_regex( + r'(?s)(?:src|bitrates)\s*=\s*({.+?});', webpage, 'bitrates', + default='{}'), + video_id, transform_source=js_to_json, fatal=False) + formats = [] - for file_el in xml_playlist_et.findall('files/file'): - try: - height = int(file_el.attrib['id'].replace('p', '')) - except ValueError: - height = 0 - formats.append({'height': height, - 'url': self.media_src_url + file_el.attrib['path'].replace( # noqa - 'dat/', '', 1)}) + for format_url in try_get(bitrates, lambda x: x['mp4'], list) or []: + if isinstance(format_url, compat_str): + height = self._search_regex( + r'(\d+)[pP]\.', format_url, 'height', default=None) + formats.append({ + 'url': format_url, + 'format_id': '%sp' % height if height else None, + 'height': int(height), + }) + if not formats: + playlist = self._download_xml( + 'https://media.joj.sk/services/Video.php?clip=%s' % video_id, + video_id) + for file_el in playlist.findall('./files/file'): + path = file_el.get('path') + if not path: + continue + format_id = file_el.get('id') or file_el.get('label') + formats.append({ + 'url': 'http://n16.joj.sk/storage/%s' % path.replace( + 'dat/', '', 1), + 'format_id': format_id, + 'height': int_or_none(self._search_regex( + r'(\d+)[pP]', format_id or path, 'height', + default=None)), + }) self._sort_formats(formats) + thumbnail = self._og_search_thumbnail(webpage) + + duration = int_or_none(self._search_regex( + r'videoDuration\s*:\s*(\d+)', webpage, 'duration', fatal=False)) + return { 'id': video_id, - 'title': self._og_search_title(webpage).title(), + 'title': title, + 'thumbnail': thumbnail, + 'duration': duration, 'formats': formats, - 'release_date': release_date }