X-Git-Url: http://git.bitcoin.ninja/index.cgi?a=blobdiff_plain;f=youtube_dl%2Fextractor%2Fnbc.py;h=3645d3033f74ae174e3eaa85ad55bbe677d9daba;hb=614a7e1e230e095d9a11b59b20f4ff7462be8b21;hp=3e3de9e2d8c7ffea00415bfd378df43ca1d33135;hpb=d9aa2b784d914ae99c7d9bbaf83d06f1b9dbd04e;p=youtube-dl diff --git a/youtube_dl/extractor/nbc.py b/youtube_dl/extractor/nbc.py index 3e3de9e2d..3645d3033 100644 --- a/youtube_dl/extractor/nbc.py +++ b/youtube_dl/extractor/nbc.py @@ -1,7 +1,6 @@ from __future__ import unicode_literals import re -import json from .common import InfoExtractor from ..compat import ( @@ -19,13 +18,13 @@ class NBCIE(InfoExtractor): _TESTS = [ { - 'url': 'http://www.nbc.com/chicago-fire/video/i-am-a-firefighter/2734188', + 'url': 'http://www.nbc.com/the-tonight-show/segments/112966', # md5 checksum is not stable 'info_dict': { - 'id': 'bTmnLCvIbaaH', + 'id': 'c9xnCo0YPOPH', 'ext': 'flv', - 'title': 'I Am a Firefighter', - 'description': 'An emergency puts Dawson\'sf irefighter skills to the ultimate test in this four-part digital series.', + 'title': 'Jimmy Fallon Surprises Fans at Ben & Jerry\'s', + 'description': 'Jimmy gives out free scoops of his new "Tonight Dough" ice cream flavor by surprising customers at the Ben & Jerry\'s scoop shop.', }, }, { @@ -52,9 +51,9 @@ class NBCIE(InfoExtractor): class NBCNewsIE(InfoExtractor): - _VALID_URL = r'''(?x)https?://www\.nbcnews\.com/ - ((video/.+?/(?P\d+))| - ((?Pfeature|nightly-news)/[^/]+/(?P.+))) + _VALID_URL = r'''(?x)https?://(?:www\.)?nbcnews\.com/ + (?:video/.+?/(?P<id>\d+)| + (?:feature|nightly-news)/[^/]+/(?P<title>.+)) ''' _TESTS = [ @@ -120,17 +119,10 @@ class NBCNewsIE(InfoExtractor): # "feature" and "nightly-news" pages use theplatform.com title = mobj.group('title') webpage = self._download_webpage(url, title) - program = mobj.group('program') - if program == 'feature': - bootstrap_json = self._search_regex( - r'var bootstrapJson = ({.+})\s*$', webpage, 'bootstrap json', - flags=re.MULTILINE) - else: - # nightly-news - bootstrap_json = self._search_regex( - r'var playlistData = ({.+});\s*$', webpage, 'playlist data', - flags=re.MULTILINE) - bootstrap = json.loads(bootstrap_json) + bootstrap_json = self._search_regex( + r'var\s+(?:bootstrapJson|playlistData)\s*=\s*({.+});?\s*$', + webpage, 'bootstrap json', flags=re.MULTILINE) + bootstrap = self._parse_json(bootstrap_json, video_id) info = bootstrap['results'][0]['video'] mpxid = info['mpxId']