X-Git-Url: http://git.bitcoin.ninja/index.cgi?a=blobdiff_plain;f=youtube_dl%2Fextractor%2Fyahoo.py;h=f9afbdbab611e233c7f7014ae7d66e996f2b7c31;hb=ca57a598838f7d945a09a1df569e1c29fdd0732b;hp=b777159c5639304edf1433857f626c29299e4bcb;hpb=ed06e9949bd121cdcd5d66e14b615abb2dbbd52e;p=youtube-dl diff --git a/youtube_dl/extractor/yahoo.py b/youtube_dl/extractor/yahoo.py index b777159c5..f9afbdbab 100644 --- a/youtube_dl/extractor/yahoo.py +++ b/youtube_dl/extractor/yahoo.py @@ -15,6 +15,7 @@ from ..utils import ( unescapeHTML, ExtractorError, int_or_none, + mimetype2ext, ) from .nbc import NBCSportsVPlayerIE @@ -22,7 +23,7 @@ from .nbc import NBCSportsVPlayerIE class YahooIE(InfoExtractor): IE_DESC = 'Yahoo screen and movies' - _VALID_URL = r'(?P(?Phttps?://(?:[a-zA-Z]{2}\.)?[\da-zA-Z_-]+\.yahoo\.com)/(?:[^/]+/)*(?P.+?)-(?P[0-9]+)(?:-[a-z]+)?\.html)' + _VALID_URL = r'(?P(?Phttps?://(?:[a-zA-Z]{2}\.)?[\da-zA-Z_-]+\.yahoo\.com)/(?:[^/]+/)*(?P.+)?-(?P[0-9]+)(?:-[a-z]+)?\.html)' _TESTS = [ { 'url': 'http://screen.yahoo.com/julian-smith-travis-legg-watch-214727115.html', @@ -140,12 +141,15 @@ class YahooIE(InfoExtractor): 'description': 'md5:df390f70a9ba7c95ff1daace988f0d8d', 'title': 'Tyler Kalinoski hits buzzer-beater to lift Davidson', } + }, { + 'url': 'https://tw.news.yahoo.com/-100120367.html', + 'only_matching': True, } ] def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) - display_id = mobj.group('display_id') + display_id = mobj.group('display_id') or self._match_id(url) page_id = mobj.group('id') url = mobj.group('url') host = mobj.group('host') @@ -233,6 +237,22 @@ class YahooIE(InfoExtractor): self._sort_formats(formats) + closed_captions = self._html_search_regex( + r'"closedcaptions":(\[[^\]]+\])', webpage, 'closed captions', + default='[]') + + cc_json = self._parse_json(closed_captions, video_id, fatal=False) + subtitles = {} + if cc_json: + for closed_caption in cc_json: + lang = closed_caption['lang'] + if lang not in subtitles: + subtitles[lang] = [] + subtitles[lang].append({ + 'url': closed_caption['url'], + 'ext': mimetype2ext(closed_caption['content_type']), + }) + return { 'id': video_id, 'display_id': display_id, @@ -241,6 +261,7 @@ class YahooIE(InfoExtractor): 'description': clean_html(meta['description']), 'thumbnail': meta['thumbnail'] if meta.get('thumbnail') else self._og_search_thumbnail(webpage), 'duration': int_or_none(meta.get('duration')), + 'subtitles': subtitles, }