_ Git - youtube-dl/blob - youtube_dl/extractor/morningstar.py

   1 # coding: utf-8
   2 from __future__ import unicode_literals
   3
   4 import hashlib
   5 import json
   6 import re
   7 import time
   8
   9 from .common import InfoExtractor
  10 from ..utils import (
  11     compat_parse_qs,
  12     compat_str,
  13     int_or_none,
  14 )
  15
  16
  17 class MorningstarIE(InfoExtractor):
  18     IE_DESC = 'morningstar.com'
  19     _VALID_URL = r'https?://(?:www\.)?morningstar\.com/cover/videocenter\.aspx\?id=(?P<id>[0-9]+)'
  20     _TEST = {
  21         'url': 'http://www.morningstar.com/cover/videocenter.aspx?id=615869',
  22         'md5': '6c0acface7a787aadc8391e4bbf7b0f5',
  23         'info_dict': {
  24             'id': '615869',
  25             'ext': 'mp4',
  26             'title': 'Get Ahead of the Curve on 2013 Taxes',
  27             'description': "Vanguard's Joel Dickson on managing higher tax rates for high-income earners and fund capital-gain distributions in 2013.",
  28             'thumbnail': r're:^https?://.*m(?:orning)?star\.com/.+thumb\.jpg$'
  29         }
  30     }
  31
  32     def _real_extract(self, url):
  33         mobj = re.match(self._VALID_URL, url)
  34         video_id = mobj.group('id')
  35
  36         webpage = self._download_webpage(url, video_id)
  37         title = self._html_search_regex(
  38             r'<h1 id="titleLink">(.*?)</h1>', webpage, 'title')
  39         video_url = self._html_search_regex(
  40             r'<input type="hidden" id="hidVideoUrl" value="([^"]+)"',
  41             webpage, 'video URL')
  42         thumbnail = self._html_search_regex(
  43             r'<input type="hidden" id="hidSnapshot" value="([^"]+)"',
  44             webpage, 'thumbnail', fatal=False)
  45         description = self._html_search_regex(
  46             r'<div id="mstarDeck".*?>(.*?)</div>',
  47             webpage, 'description', fatal=False)
  48
  49         return {
  50             'id': video_id,
  51             'title': title,
  52             'url': video_url,
  53             'thumbnail': thumbnail,
  54             'description': description,
  55         }