X-Git-Url: http://git.bitcoin.ninja/index.cgi?a=blobdiff_plain;f=youtube_dl%2Fextractor%2Fnbc.py;h=7b5449031ebd2b7245d452c04ae50dfaf970d6ca;hb=fbb21cf528fe5cc4ba72f97f691cdf7ec20ee0e6;hp=aa34665d1669f32ab31a02618c58ef9c4b130fe2;hpb=b1d65c33695feec76a801b7647fe8077869c25ad;p=youtube-dl diff --git a/youtube_dl/extractor/nbc.py b/youtube_dl/extractor/nbc.py index aa34665d1..7b5449031 100644 --- a/youtube_dl/extractor/nbc.py +++ b/youtube_dl/extractor/nbc.py @@ -4,7 +4,11 @@ import re import json from .common import InfoExtractor -from ..utils import find_xpath_attr, compat_str +from ..utils import ( + compat_str, + ExtractorError, + find_xpath_attr, +) class NBCIE(InfoExtractor): @@ -12,9 +16,9 @@ class NBCIE(InfoExtractor): _TEST = { 'url': 'http://www.nbc.com/chicago-fire/video/i-am-a-firefighter/2734188', - 'md5': '54d0fbc33e0b853a65d7b4de5c06d64e', + # md5 checksum is not stable 'info_dict': { - 'id': 'u1RInQZRN7QJ', + 'id': 'bTmnLCvIbaaH', 'ext': 'flv', 'title': 'I Am a Firefighter', 'description': 'An emergency puts Dawson\'sf irefighter skills to the ultimate test in this four-part digital series.', @@ -22,8 +26,7 @@ class NBCIE(InfoExtractor): } def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - video_id = mobj.group('id') + video_id = self._match_id(url) webpage = self._download_webpage(url, video_id) theplatform_url = self._search_regex('class="video-player video-player-full" data-mpx-url="(.*?)"', webpage, 'theplatform url') if theplatform_url.startswith('//'): @@ -53,7 +56,7 @@ class NBCNewsIE(InfoExtractor): 'md5': 'b2421750c9f260783721d898f4c42063', 'info_dict': { 'id': 'I1wpAI_zmhsQ', - 'ext': 'flv', + 'ext': 'mp4', 'title': 'How Twitter Reacted To The Snowden Interview', 'description': 'md5:65a0bd5d76fe114f3c2727aa3a81fe64', }, @@ -85,11 +88,27 @@ class NBCNewsIE(InfoExtractor): flags=re.MULTILINE) bootstrap = json.loads(bootstrap_json) info = bootstrap['results'][0]['video'] - playlist_url = info['fallbackPlaylistUrl'] + '?form=MPXNBCNewsAPI' mpxid = info['mpxId'] - all_videos = self._download_json(playlist_url, title)['videos'] - # The response contains additional videos - info = next(v for v in all_videos if v['mpxId'] == mpxid) + + base_urls = [ + info['fallbackPlaylistUrl'], + info['associatedPlaylistUrl'], + ] + + for base_url in base_urls: + if not base_url: + continue + playlist_url = base_url + '?form=MPXNBCNewsAPI' + all_videos = self._download_json(playlist_url, title)['videos'] + + try: + info = next(v for v in all_videos if v['mpxId'] == mpxid) + break + except StopIteration: + continue + + if info is None: + raise ExtractorError('Could not find video in playlists') return { '_type': 'url',