_ Git - youtube-dl/blob - youtube_dl/extractor/cnbc.py

   1 # coding: utf-8
   2 from __future__ import unicode_literals
   3
   4
   5 from .common import InfoExtractor
   6 from ..utils import (
   7     js_to_json,
   8     smuggle_url,
   9 )
  10
  11
  12 class CNBCIE(InfoExtractor):
  13     _VALID_URL = r'https?://video\.cnbc\.com/gallery/\?video=(?P<id>[0-9]+)'
  14     _TEST = {
  15         'url': 'http://video.cnbc.com/gallery/?video=3000503714',
  16         'info_dict': {
  17             'id': '3000503714',
  18             'ext': 'mp4',
  19             'title': 'Fighting zombies is big business',
  20             'description': 'md5:0c100d8e1a7947bd2feec9a5550e519e',
  21             'timestamp': 1459332000,
  22             'upload_date': '20160330',
  23             'uploader': 'NBCU-CNBC',
  24         },
  25         'params': {
  26             # m3u8 download
  27             'skip_download': True,
  28         },
  29     }
  30
  31     def _real_extract(self, url):
  32         video_id = self._match_id(url)
  33         return {
  34             '_type': 'url_transparent',
  35             'ie_key': 'ThePlatform',
  36             'url': smuggle_url(
  37                 'http://link.theplatform.com/s/gZWlPC/media/guid/2408950221/%s?mbr=true&manifest=m3u' % video_id,
  38                 {'force_smil_url': True}),
  39             'id': video_id,
  40         }
  41
  42
  43 class CNBCNewIE(InfoExtractor):
  44     IE_NAME = 'CNBC:new'
  45     _VALID_URL = r'https?://(?:www)?\.cnbc\.com/video.*/(?P<id>[^.]+)'
  46     _TEST = {
  47         'url': 'https://www.cnbc.com/video/2018/07/19/trump-i-dont-necessarily-agree-with-raising-rates.html',
  48         'info_dict': {
  49             'id': '7000031301',
  50             'ext': 'mp4',
  51             'title': 'Trump: I don\'t necessarily agree with raising rates',
  52             'description': 'md5:878d8f0b4ebb5bb1dda3514b91b49de3',
  53             'timestamp': 1531958400,
  54             'upload_date': '20180719',
  55             'uploader': 'NBCU-CNBC',
  56         },
  57         'params': {
  58             # m3u8 download
  59             'skip_download': True,
  60         },
  61     }
  62
  63     CNBC_URL_TEMPLATE = 'http://video.cnbc.com/gallery/?video=%s'
  64
  65     def _real_extract(self, url):
  66         display_id = self._match_id(url)
  67         webpage = self._download_webpage(url, display_id)
  68         video_id = self._parse_json(
  69             self._search_regex(
  70                 r'(?s).*<script[^>]*>.*?({.+?content_id.+?}).*?</script>',
  71                 webpage, display_id),
  72             display_id, transform_source=js_to_json
  73         )['content_id']
  74
  75         return self.url_result(self.CNBC_URL_TEMPLATE % video_id, 'CNBC')