_ Git - youtube-dl/blob - youtube_dl/extractor/godtv.py

   1 from __future__ import unicode_literals
   2
   3 from .common import InfoExtractor
   4 from .ooyala import OoyalaIE
   5 from ..utils import js_to_json
   6
   7
   8 class GodTVIE(InfoExtractor):
   9     _VALID_URL = r'https?://(?:www\.)?god\.tv(?:/[^/]+)+/(?P<id>[^/?#&]+)'
  10     _TESTS = [{
  11         'url': 'http://god.tv/jesus-image/video/jesus-conference-2016/randy-needham',
  12         'info_dict': {
  13             'id': 'lpd3g2MzE6D1g8zFAKz8AGpxWcpu6o_3',
  14             'ext': 'mp4',
  15             'title': 'Randy Needham',
  16             'duration': 3615.08,
  17         },
  18         'params': {
  19             'skip_download': True,
  20         }
  21     }, {
  22         'url': 'http://god.tv/playlist/bible-study',
  23         'info_dict': {
  24             'id': 'bible-study',
  25         },
  26         'playlist_mincount': 37,
  27     }, {
  28         'url': 'http://god.tv/node/15097',
  29         'only_matching': True,
  30     }, {
  31         'url': 'http://god.tv/live/africa',
  32         'only_matching': True,
  33     }]
  34
  35     def _real_extract(self, url):
  36         display_id = self._match_id(url)
  37
  38         webpage = self._download_webpage(url, display_id)
  39
  40         settings = self._parse_json(
  41             self._search_regex(
  42                 r'jQuery\.extend\(Drupal\.settings\s*,\s*({.+?})\);',
  43                 webpage, 'settings', default='{}'),
  44             display_id, transform_source=js_to_json, fatal=False)
  45
  46         ooyala_id = None
  47
  48         if settings:
  49             playlist = settings.get('playlist')
  50             if playlist and isinstance(playlist, list):
  51                 entries = [
  52                     OoyalaIE._build_url_result(video['content_id'])
  53                     for video in playlist if video.get('content_id')]
  54                 if entries:
  55                     return self.playlist_result(entries, display_id)
  56             ooyala_id = settings.get('ooyala', {}).get('content_id')
  57
  58         if not ooyala_id:
  59             ooyala_id = self._search_regex(
  60                 r'["\']content_id["\']\s*:\s*(["\'])(?P<id>[\w-]+)\1',
  61                 webpage, 'ooyala id', group='id')
  62
  63         return OoyalaIE._build_url_result(ooyala_id)