X-Git-Url: http://git.bitcoin.ninja/index.cgi?a=blobdiff_plain;f=youtube_dl%2Fextractor%2Fyahoo.py;h=bf4e659ac6981c77f7e5f3c77578c4808634d766;hb=1ed34f3dd6f534a3a5a3d6808d8d3466a9e5dea2;hp=6e72f1e5552d38d4a2baf8d2b14f3ce01ae3342c;hpb=a28ccbabc60c81016c851ae46365be377ea83795;p=youtube-dl diff --git a/youtube_dl/extractor/yahoo.py b/youtube_dl/extractor/yahoo.py index 6e72f1e55..bf4e659ac 100644 --- a/youtube_dl/extractor/yahoo.py +++ b/youtube_dl/extractor/yahoo.py @@ -17,10 +17,12 @@ from ..utils import ( int_or_none, ) +from .nbc import NBCSportsVPlayerIE + class YahooIE(InfoExtractor): IE_DESC = 'Yahoo screen and movies' - _VALID_URL = r'(?P(?Phttps?://(?:[a-zA-Z]{2}\.)?[\da-zA-Z_-]+\.yahoo\.com)/(?:[^/]+/)*(?P.+?)-(?P[0-9]+)(?:-[a-z]+)?\.html)' + _VALID_URL = r'(?P(?Phttps?://(?:[a-zA-Z]{2}\.)?[\da-zA-Z_-]+\.yahoo\.com)/(?:[^/]+/)*(?P.+)?-(?P[0-9]+)(?:-[a-z]+)?\.html)' _TESTS = [ { 'url': 'http://screen.yahoo.com/julian-smith-travis-legg-watch-214727115.html', @@ -138,12 +140,15 @@ class YahooIE(InfoExtractor): 'description': 'md5:df390f70a9ba7c95ff1daace988f0d8d', 'title': 'Tyler Kalinoski hits buzzer-beater to lift Davidson', } + }, { + 'url': 'https://tw.news.yahoo.com/-100120367.html', + 'only_matching': True, } ] def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) - display_id = mobj.group('display_id') + display_id = mobj.group('display_id') or self._match_id(url) page_id = mobj.group('id') url = mobj.group('url') host = mobj.group('host') @@ -161,10 +166,9 @@ class YahooIE(InfoExtractor): video_id = items[0]['id'] return self._get_info(video_id, display_id, webpage) # Look for NBCSports iframes - iframe_m = re.search( - r']+src="(?Phttps?://vplayer\.nbcsports\.com/[^"]+)"', webpage) - if iframe_m: - return self.url_result(iframe_m.group('url'), 'NBCSports') + nbc_sports_url = NBCSportsVPlayerIE._extract_url(webpage) + if nbc_sports_url: + return self.url_result(nbc_sports_url, 'NBCSportsVPlayer') items_json = self._search_regex( r'mediaItems: ({.*?})$', webpage, 'items', flags=re.MULTILINE,