[nbc] Add new extractor for NBC Olympics (#10295, #10361)
authorYen Chi Hsuan <yan12125@gmail.com>
Sun, 11 Sep 2016 18:55:15 +0000 (02:55 +0800)
committerYen Chi Hsuan <yan12125@gmail.com>
Sun, 11 Sep 2016 18:55:15 +0000 (02:55 +0800)
ChangeLog
youtube_dl/extractor/extractors.py
youtube_dl/extractor/nbc.py

index 669544815d3a0f21203813983cc29d306b1e0845..46eea0626c4c37bd9a3b4e7495eef0f847cf8728 100644 (file)
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,9 @@
+version <unreleased>
+
+Extractors
++ [nbc] Add support for NBC Olympics (#10361)
+
+
 version 2016.09.11.1
 
 Extractors
index a3cd9c2891ddf7e4d9d281ef4828c90f98214991..522691de130a77cc9fca6c8fe148887784d630d2 100644 (file)
@@ -534,6 +534,7 @@ from .nbc import (
     CSNNEIE,
     NBCIE,
     NBCNewsIE,
+    NBCOlympicsIE,
     NBCSportsIE,
     NBCSportsVPlayerIE,
 )
index f694e210b1dadceb030cb24f6498abe30de5b976..f37bf2f307b98a690a6e51caf141e9255e413a40 100644 (file)
@@ -335,3 +335,43 @@ class NBCNewsIE(ThePlatformIE):
                 'url': 'http://feed.theplatform.com/f/2E2eJC/nnd_NBCNews?byId=%s' % video_id,
                 'ie_key': 'ThePlatformFeed',
             }
+
+
+class NBCOlympicsIE(InfoExtractor):
+    _VALID_URL = r'https?://www\.nbcolympics\.com/video/(?P<id>[a-z-]+)'
+
+    _TEST = {
+        # Geo-restricted to US
+        'url': 'http://www.nbcolympics.com/video/justin-roses-son-leo-was-tears-after-his-dad-won-gold',
+        'md5': '54fecf846d05429fbaa18af557ee523a',
+        'info_dict': {
+            'id': 'WjTBzDXx5AUq',
+            'display_id': 'justin-roses-son-leo-was-tears-after-his-dad-won-gold',
+            'ext': 'mp4',
+            'title': 'Rose\'s son Leo was in tears after his dad won gold',
+            'description': 'Olympic gold medalist Justin Rose gets emotional talking to the impact his win in men\'s golf has already had on his children.',
+            'timestamp': 1471274964,
+            'upload_date': '20160815',
+            'uploader': 'NBCU-SPORTS',
+        },
+    }
+
+    def _real_extract(self, url):
+        display_id = self._match_id(url)
+
+        webpage = self._download_webpage(url, display_id)
+
+        drupal_settings = self._parse_json(self._search_regex(
+            r'jQuery\.extend\(Drupal\.settings\s*,\s*({.+?})\);',
+            webpage, 'drupal settings'), display_id)
+
+        iframe_url = drupal_settings['vod']['iframe_url']
+        theplatform_url = iframe_url.replace(
+            'vplayer.nbcolympics.com', 'player.theplatform.com')
+
+        return {
+            '_type': 'url_transparent',
+            'url': theplatform_url,
+            'ie_key': ThePlatformIE.ie_key(),
+            'display_id': display_id,
+        }