]> git.bitcoin.ninja Git - youtube-dl/blob - youtube_dl/extractor/godtv.py
[extractor/common] add helper method to extract html5 media entries
[youtube-dl] / youtube_dl / extractor / godtv.py
1 from __future__ import unicode_literals
2
3 from .common import InfoExtractor
4 from .ooyala import OoyalaIE
5 from ..utils import js_to_json
6
7
8 class GodTVIE(InfoExtractor):
9     _VALID_URL = r'https?://(?:www\.)?god\.tv(?:/[^/]+)*/(?P<id>[^/?#&]+)'
10     _TESTS = [{
11         'url': 'http://god.tv/jesus-image/video/jesus-conference-2016/randy-needham',
12         'info_dict': {
13             'id': 'lpd3g2MzE6D1g8zFAKz8AGpxWcpu6o_3',
14             'ext': 'mp4',
15             'title': 'Randy Needham',
16             'duration': 3615.08,
17         },
18         'params': {
19             'skip_download': True,
20         }
21     }, {
22         'url': 'http://god.tv/playlist/bible-study',
23         'info_dict': {
24             'id': 'bible-study',
25         },
26         'playlist_mincount': 37,
27     }, {
28         'url': 'http://god.tv/node/15097',
29         'only_matching': True,
30     }, {
31         'url': 'http://god.tv/live/africa',
32         'only_matching': True,
33     }, {
34         'url': 'http://god.tv/liveevents',
35         'only_matching': True,
36     }]
37
38     def _real_extract(self, url):
39         display_id = self._match_id(url)
40
41         webpage = self._download_webpage(url, display_id)
42
43         settings = self._parse_json(
44             self._search_regex(
45                 r'jQuery\.extend\(Drupal\.settings\s*,\s*({.+?})\);',
46                 webpage, 'settings', default='{}'),
47             display_id, transform_source=js_to_json, fatal=False)
48
49         ooyala_id = None
50
51         if settings:
52             playlist = settings.get('playlist')
53             if playlist and isinstance(playlist, list):
54                 entries = [
55                     OoyalaIE._build_url_result(video['content_id'])
56                     for video in playlist if video.get('content_id')]
57                 if entries:
58                     return self.playlist_result(entries, display_id)
59             ooyala_id = settings.get('ooyala', {}).get('content_id')
60
61         if not ooyala_id:
62             ooyala_id = self._search_regex(
63                 r'["\']content_id["\']\s*:\s*(["\'])(?P<id>[\w-]+)\1',
64                 webpage, 'ooyala id', group='id')
65
66         return OoyalaIE._build_url_result(ooyala_id)