[cbnc] Add support for new URL schema (closes #14193)
[youtube-dl] / youtube_dl / extractor / cnbc.py
1 # coding: utf-8
2 from __future__ import unicode_literals
3
4
5 from .common import InfoExtractor
6 from ..utils import (
7     js_to_json,
8     smuggle_url,
9 )
10
11
12 class CNBCIE(InfoExtractor):
13     _VALID_URL = r'https?://video\.cnbc\.com/gallery/\?video=(?P<id>[0-9]+)'
14     _TEST = {
15         'url': 'http://video.cnbc.com/gallery/?video=3000503714',
16         'info_dict': {
17             'id': '3000503714',
18             'ext': 'mp4',
19             'title': 'Fighting zombies is big business',
20             'description': 'md5:0c100d8e1a7947bd2feec9a5550e519e',
21             'timestamp': 1459332000,
22             'upload_date': '20160330',
23             'uploader': 'NBCU-CNBC',
24         },
25         'params': {
26             # m3u8 download
27             'skip_download': True,
28         },
29     }
30
31     def _real_extract(self, url):
32         video_id = self._match_id(url)
33         return {
34             '_type': 'url_transparent',
35             'ie_key': 'ThePlatform',
36             'url': smuggle_url(
37                 'http://link.theplatform.com/s/gZWlPC/media/guid/2408950221/%s?mbr=true&manifest=m3u' % video_id,
38                 {'force_smil_url': True}),
39             'id': video_id,
40         }
41
42
43 class CNBCNewIE(InfoExtractor):
44     IE_NAME = 'CNBC:new'
45     _VALID_URL = r'https?://(?:www)?\.cnbc\.com/video.*/(?P<id>[^.]+)'  
46     _TEST = {
47         'url': 'https://www.cnbc.com/video/2018/07/19/trump-i-dont-necessarily-agree-with-raising-rates.html',
48         'info_dict': {
49             'id': '7000031301',
50             'ext': 'mp4',
51             'title': 'Trump: I don\'t necessarily agree with raising rates',
52             'description': 'md5:878d8f0b4ebb5bb1dda3514b91b49de3',
53             'timestamp': 1531958400,
54             'upload_date': '20180719',
55             'uploader': 'NBCU-CNBC',
56         },
57         'params': {
58             # m3u8 download
59             'skip_download': True,
60         },
61     }
62
63     CNBC_URL_TEMPLATE = 'http://video.cnbc.com/gallery/?video=%s'
64
65     def _real_extract(self, url):
66         display_id = self._match_id(url)
67         webpage = self._download_webpage(url, display_id)
68         video_id = self._parse_json(
69             self._search_regex(
70                 r'(?s).*<script[^>]*>.*?({.+?content_id.+?}).*?</script>',
71                 webpage, display_id),
72             display_id, transform_source=js_to_json
73         )['content_id']
74
75         return self.url_result(self.CNBC_URL_TEMPLATE % video_id, 'CNBC')