X-Git-Url: http://git.bitcoin.ninja/index.cgi?a=blobdiff_plain;f=youtube_dl%2Fextractor%2Fcanvas.py;h=c506bc5dd2402a95752bdf3223fe4a24cf9d06ae;hb=ca069f68816c5da790c5745713b38c70df6abf65;hp=c7e9b8ff9c4970724aa2a2496a29db892df76ed2;hpb=7913e0fca7df6840e8434449f534f9744c9394f2;p=youtube-dl diff --git a/youtube_dl/extractor/canvas.py b/youtube_dl/extractor/canvas.py index c7e9b8ff9..c506bc5dd 100644 --- a/youtube_dl/extractor/canvas.py +++ b/youtube_dl/extractor/canvas.py @@ -5,20 +5,19 @@ import json from .common import InfoExtractor from .gigya import GigyaBaseIE - - from ..compat import compat_HTTPError from ..utils import ( ExtractorError, strip_or_none, float_or_none, int_or_none, + merge_dicts, parse_iso8601, ) class CanvasIE(InfoExtractor): - _VALID_URL = r'https?://mediazone\.vrt\.be/api/v1/(?Pcanvas|een|ketnet|vrtvideo)/assets/(?P[^/?#&]+)' + _VALID_URL = r'https?://mediazone\.vrt\.be/api/v1/(?Pcanvas|een|ketnet|vrt(?:video|nieuws)|sporza)/assets/(?P[^/?#&]+)' _TESTS = [{ 'url': 'https://mediazone.vrt.be/api/v1/ketnet/assets/md-ast-4ac54990-ce66-4d00-a8ca-9eac86f4c475', 'md5': '90139b746a0a9bd7bb631283f6e2a64e', @@ -36,6 +35,10 @@ class CanvasIE(InfoExtractor): 'url': 'https://mediazone.vrt.be/api/v1/canvas/assets/mz-ast-5e5f90b6-2d72-4c40-82c2-e134f884e93e', 'only_matching': True, }] + _HLS_ENTRY_PROTOCOLS_MAP = { + 'HLS': 'm3u8_native', + 'HLS_AES': 'm3u8', + } def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) @@ -53,9 +56,9 @@ class CanvasIE(InfoExtractor): format_url, format_type = target.get('url'), target.get('type') if not format_url or not format_type: continue - if format_type == 'HLS': + if format_type in self._HLS_ENTRY_PROTOCOLS_MAP: formats.extend(self._extract_m3u8_formats( - format_url, video_id, 'mp4', entry_protocol='m3u8_native', + format_url, video_id, 'mp4', self._HLS_ENTRY_PROTOCOLS_MAP[format_type], m3u8_id=format_type, fatal=False)) elif format_type == 'HDS': formats.extend(self._extract_f4m_formats( @@ -192,7 +195,7 @@ class VrtNUIE(GigyaBaseIE): 'season_number': 1, 'episode_number': 1, }, - # 'skip': 'This video is only available for registered users' + 'skip': 'This video is only available for registered users' }] _NETRC_MACHINE = 'vrtnu' _APIKEY = '3_0Z2HujMtiWq_pkAjgnS2Md2E11a1AwZjYiBETtwNE-EoEHDINgtnvcAOpNgmrVGy' @@ -204,7 +207,7 @@ class VrtNUIE(GigyaBaseIE): def _login(self): username, password = self._get_login_info() if username is None: - self.raise_login_required() + return auth_data = { 'APIKey': self._APIKEY, @@ -248,11 +251,15 @@ class VrtNUIE(GigyaBaseIE): def _real_extract(self, url): display_id = self._match_id(url) - webpage = self._download_webpage(url, display_id) + webpage, urlh = self._download_webpage_handle(url, display_id) + + info = self._search_json_ld(webpage, display_id, default={}) - title = self._html_search_regex( + # title is optional here since it may be extracted by extractor + # that is delegated from here + title = strip_or_none(self._html_search_regex( r'(?ms)

(.+?)

', - webpage, 'title').strip() + webpage, 'title', default=None)) description = self._html_search_regex( r'(?ms)
(.+?)
', @@ -278,20 +285,26 @@ class VrtNUIE(GigyaBaseIE): webpage, 'release_date', default=None)) # If there's a ? or a # in the URL, remove them and everything after - clean_url = url.split('?')[0].split('#')[0].strip('/') + clean_url = urlh.geturl().split('?')[0].split('#')[0].strip('/') securevideo_url = clean_url + '.mssecurevideo.json' - json = self._download_json(securevideo_url, display_id) + try: + video = self._download_json(securevideo_url, display_id) + except ExtractorError as e: + if isinstance(e.cause, compat_HTTPError) and e.cause.code == 401: + self.raise_login_required() + raise + # We are dealing with a '../.relevant' URL - redirect_url = json.get('url') + redirect_url = video.get('url') if redirect_url: - return self.url_result('https:' + redirect_url) - else: - # There is only one entry, but with an unknown key, so just get - # the first one - video_id = list(json.values())[0].get('videoid') + return self.url_result(self._proto_relative_url(redirect_url, 'https:')) - return { + # There is only one entry, but with an unknown key, so just get + # the first one + video_id = list(video.values())[0].get('videoid') + + return merge_dicts(info, { '_type': 'url_transparent', 'url': 'https://mediazone.vrt.be/api/v1/vrtvideo/assets/%s' % video_id, 'ie_key': CanvasIE.ie_key(), @@ -303,4 +316,4 @@ class VrtNUIE(GigyaBaseIE): 'season_number': season_number, 'episode_number': episode_number, 'release_date': release_date, - } + })