_ Git - youtube-dl/blob - youtube_dl/extractor/ellentv.py

   1 # coding: utf-8
   2 from __future__ import unicode_literals
   3
   4 import re
   5 import json
   6
   7 from .common import InfoExtractor, ExtractorError
   8
   9 class EllenTVIE(InfoExtractor):
  10     IE_NAME = u'ellentv'
  11     _VALID_URL = r'https?://(?:www\.)?ellentv\.com/videos/(?P<id>[a-z0-9_-]+)'
  12     _TEST = {
  13         'url': 'http://www.ellentv.com/videos/0-7jqrsr18/',
  14         'md5': 'e4af06f3bf0d5f471921a18db5764642',
  15         'info_dict': {
  16             'id': '0-7jqrsr18',
  17             'ext': 'mp4',
  18             'title': u'What\'s Wrong with These Photos? A Whole Lot',
  19             # TODO more properties, either as:
  20             # * A value
  21             # * MD5 checksum; start the string with md5:
  22             # * A regular expression; start the string with re:
  23             # * Any Python type (for example int or float)
  24         }
  25     }
  26
  27     def _real_extract(self, url):
  28         mobj = re.match(self._VALID_URL, url)
  29         id = mobj.group('id')
  30
  31         webpage = self._download_webpage(url, id)
  32
  33         return {
  34             'id': id,
  35             'title': self._og_search_title(webpage),
  36             'url': self._html_search_meta('VideoURL', webpage, 'url')
  37         }
  38
  39 class EllenTVClipsIE(InfoExtractor):
  40     IE_NAME = u'ellentv:clips'
  41     _VALID_URL = r'https?://(?:www\.)?ellentv\.com/episodes/(?P<id>[a-z0-9_-]+)'
  42     _TEST = {
  43         'url': 'http://www.ellentv.com/episodes/meryl-streep-vanessa-hudgens/',
  44         'md5': 'TODO: md5 sum of the first 10KiB of the video file',
  45         'info_dict': {
  46             'id': '0_wf6pizq7',
  47             'ext': 'mp4',
  48             'title': 'Video title goes here',
  49             # TODO more properties, either as:
  50             # * A value
  51             # * MD5 checksum; start the string with md5:
  52             # * A regular expression; start the string with re:
  53             # * Any Python type (for example int or float)
  54         }
  55     }
  56
  57     def _real_extract(self, url):
  58         mobj = re.match(self._VALID_URL, url)
  59         playlist_id = mobj.group('id')
  60
  61         webpage = self._download_webpage(url, playlist_id)
  62         playlist = self._extract_playlist(webpage)
  63
  64         return {
  65             '_type': 'playlist',
  66             'id': playlist_id,
  67             'title': self._og_search_title(webpage),
  68             'entries': self._extract_entries(playlist)
  69         }
  70
  71     def _extract_playlist(self, webpage):
  72         json_string = self._search_regex(r'playerView.addClips\(\[\{(.*?)\}\]\);', webpage, 'json')
  73         try:
  74             return json.loads("[{" + json_string + "}]")
  75         except ValueError as ve:
  76             raise ExtractorError('Failed to download JSON', cause=ve)
  77
  78     def _extract_entries(self, playlist):
  79         return [self.url_result(item[u'url'], 'EllenTV') for item in playlist]