[youtube] Fix extraction.
[youtube-dl] / youtube_dl / extractor / cnbc.py
1 # coding: utf-8
2 from __future__ import unicode_literals
3
4
5 from .common import InfoExtractor
6 from ..utils import smuggle_url
7
8
9 class CNBCIE(InfoExtractor):
10     _VALID_URL = r'https?://video\.cnbc\.com/gallery/\?video=(?P<id>[0-9]+)'
11     _TEST = {
12         'url': 'http://video.cnbc.com/gallery/?video=3000503714',
13         'info_dict': {
14             'id': '3000503714',
15             'ext': 'mp4',
16             'title': 'Fighting zombies is big business',
17             'description': 'md5:0c100d8e1a7947bd2feec9a5550e519e',
18             'timestamp': 1459332000,
19             'upload_date': '20160330',
20             'uploader': 'NBCU-CNBC',
21         },
22         'params': {
23             # m3u8 download
24             'skip_download': True,
25         },
26     }
27
28     def _real_extract(self, url):
29         video_id = self._match_id(url)
30         return {
31             '_type': 'url_transparent',
32             'ie_key': 'ThePlatform',
33             'url': smuggle_url(
34                 'http://link.theplatform.com/s/gZWlPC/media/guid/2408950221/%s?mbr=true&manifest=m3u' % video_id,
35                 {'force_smil_url': True}),
36             'id': video_id,
37         }
38
39
40 class CNBCVideoIE(InfoExtractor):
41     _VALID_URL = r'https?://(?:www\.)?cnbc\.com/video/(?:[^/]+/)+(?P<id>[^./?#&]+)'
42     _TEST = {
43         'url': 'https://www.cnbc.com/video/2018/07/19/trump-i-dont-necessarily-agree-with-raising-rates.html',
44         'info_dict': {
45             'id': '7000031301',
46             'ext': 'mp4',
47             'title': "Trump: I don't necessarily agree with raising rates",
48             'description': 'md5:878d8f0b4ebb5bb1dda3514b91b49de3',
49             'timestamp': 1531958400,
50             'upload_date': '20180719',
51             'uploader': 'NBCU-CNBC',
52         },
53         'params': {
54             'skip_download': True,
55         },
56     }
57
58     def _real_extract(self, url):
59         display_id = self._match_id(url)
60         webpage = self._download_webpage(url, display_id)
61         video_id = self._search_regex(
62             r'content_id["\']\s*:\s*["\'](\d+)', webpage, display_id,
63             'video id')
64         return self.url_result(
65             'http://video.cnbc.com/gallery/?video=%s' % video_id,
66             CNBCIE.ie_key())