[nbc] Recognize https urls (fixes #5300)

[youtube-dl] / youtube_dl / extractor / nbc.py
diff --git a/youtube_dl/extractor/nbc.py b/youtube_dl/extractor/nbc.py

index 3e3de9e2d8c7ffea00415bfd378df43ca1d33135..80a01c778b6b8ae3b1ce2b65451fc8fd51e9dfb7 100644 (file)
--- a/youtube_dl/extractor/nbc.py
+++ b/youtube_dl/extractor/nbc.py
@@ -1,7 +1,6 @@
  from __future__ import unicode_literals
  
  import re
-import json
  
  from .common import InfoExtractor
  from ..compat import (
@@ -15,17 +14,17 @@ from ..utils import (
  
  
  class NBCIE(InfoExtractor):
-    _VALID_URL = r'http://www\.nbc\.com/(?:[^/]+/)+(?P<id>n?\d+)'
+    _VALID_URL = r'https?://www\.nbc\.com/(?:[^/]+/)+(?P<id>n?\d+)'
  
      _TESTS = [
          {
-            'url': 'http://www.nbc.com/chicago-fire/video/i-am-a-firefighter/2734188',
+            'url': 'http://www.nbc.com/the-tonight-show/segments/112966',
              # md5 checksum is not stable
              'info_dict': {
-                'id': 'bTmnLCvIbaaH',
+                'id': 'c9xnCo0YPOPH',
                  'ext': 'flv',
-                'title': 'I Am a Firefighter',
-                'description': 'An emergency puts Dawson\'sf irefighter skills to the ultimate test in this four-part digital series.',
+                'title': 'Jimmy Fallon Surprises Fans at Ben & Jerry\'s',
+                'description': 'Jimmy gives out free scoops of his new "Tonight Dough" ice cream flavor by surprising customers at the Ben & Jerry\'s scoop shop.',
              },
          },
          {
@@ -52,9 +51,9 @@ class NBCIE(InfoExtractor):
  
  
  class NBCNewsIE(InfoExtractor):
-    _VALID_URL = r'''(?x)https?://www\.nbcnews\.com/
-        ((video/.+?/(?P<id>\d+))|
-        ((?P<program>feature|nightly-news)/[^/]+/(?P<title>.+)))
+    _VALID_URL = r'''(?x)https?://(?:www\.)?nbcnews\.com/
+        (?:video/.+?/(?P<id>\d+)|
+        (?:feature|nightly-news)/[^/]+/(?P<title>.+))
          '''
  
      _TESTS = [
@@ -120,17 +119,10 @@ class NBCNewsIE(InfoExtractor):
              # "feature" and "nightly-news" pages use theplatform.com
              title = mobj.group('title')
              webpage = self._download_webpage(url, title)
-            program = mobj.group('program')
-            if program == 'feature':
-                bootstrap_json = self._search_regex(
-                    r'var bootstrapJson = ({.+})\s*$', webpage, 'bootstrap json',
-                    flags=re.MULTILINE)
-            else:
-                # nightly-news
-                bootstrap_json = self._search_regex(
-                    r'var playlistData = ({.+});\s*$', webpage, 'playlist data',
-                    flags=re.MULTILINE)
-            bootstrap = json.loads(bootstrap_json)
+            bootstrap_json = self._search_regex(
+                r'var\s+(?:bootstrapJson|playlistData)\s*=\s*({.+});?\s*$',
+                webpage, 'bootstrap json', flags=re.MULTILINE)
+            bootstrap = self._parse_json(bootstrap_json, video_id)
              info = bootstrap['results'][0]['video']
              mpxid = info['mpxId']