From 81ce479f4dcc17b83672f5ca3ffcb2a2d1618c96 Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Thu, 12 Oct 2017 16:04:41 +0000 Subject: [PATCH] [udn] fix extraction --- youtube_dl/extractor/udn.py | 32 +++++++++++++++++++------------- 1 file changed, 19 insertions(+), 13 deletions(-) diff --git a/youtube_dl/extractor/udn.py b/youtube_dl/extractor/udn.py index daf45d0b4..2c8e5c7b4 100644 --- a/youtube_dl/extractor/udn.py +++ b/youtube_dl/extractor/udn.py @@ -1,7 +1,6 @@ # coding: utf-8 from __future__ import unicode_literals -import json import re from .common import InfoExtractor @@ -29,6 +28,7 @@ class UDNEmbedIE(InfoExtractor): # m3u8 download 'skip_download': True, }, + 'expected_warnings': ['Failed to parse JSON Expecting value'], }, { 'url': 'https://video.udn.com/embed/news/300040', 'only_matching': True, @@ -43,10 +43,21 @@ class UDNEmbedIE(InfoExtractor): page = self._download_webpage(url, video_id) - options = json.loads(js_to_json(self._html_search_regex( - r'var\s+options\s*=\s*([^;]+);', page, 'video urls dictionary'))) - - video_urls = options['video'] + options_str = self._html_search_regex( + r'var\s+options\s*=\s*([^;]+);', page, 'options') + trans_options_str = js_to_json(options_str) + options = self._parse_json(trans_options_str, 'options', fatal=False) or {} + if options: + video_urls = options['video'] + title = options['title'] + poster = options.get('poster') + else: + video_urls = self._parse_json(self._html_search_regex( + r'"video"\s*:\s*({.+?})\s*,', trans_options_str, 'video urls'), 'video urls') + title = self._html_search_regex( + r"title\s*:\s*'(.+?)'\s*,", options_str, 'title') + poster = self._html_search_regex( + r"poster\s*:\s*'(.+?)'\s*,", options_str, 'poster', default=None) if video_urls.get('youtube'): return self.url_result(video_urls.get('youtube'), 'Youtube') @@ -68,7 +79,7 @@ class UDNEmbedIE(InfoExtractor): formats.extend(self._extract_f4m_formats( video_url, video_id, f4m_id='hds')) else: - mobj = re.search(r'_(?P\d+)p_(?P\d+).mp4', video_url) + mobj = re.search(r'_(?P\d+)p_(?P\d+)\.mp4', video_url) a_format = { 'url': video_url, # video_type may be 'mp4', which confuses YoutubeDL @@ -83,14 +94,9 @@ class UDNEmbedIE(InfoExtractor): self._sort_formats(formats) - thumbnails = [{ - 'url': img_url, - 'id': img_type, - } for img_type, img_url in options.get('gallery', [{}])[0].items() if img_url] - return { 'id': video_id, 'formats': formats, - 'title': options['title'], - 'thumbnails': thumbnails, + 'title': title, + 'thumbnail': poster, } -- 2.39.5