[hidive] Add extractor (closes #15494)
[youtube-dl] / youtube_dl / extractor / hidive.py
1 # coding: utf-8
2 from __future__ import unicode_literals
3
4 import re
5
6 from .common import InfoExtractor
7 from ..compat import compat_str
8 from ..utils import (
9     ExtractorError,
10     int_or_none,
11     urlencode_postdata,
12 )
13
14
15 class HiDiveIE(InfoExtractor):
16     _VALID_URL = r'https?://(?:www\.)?hidive\.com/stream/(?P<title>[^/]+)/(?P<key>[^/?#&]+)'
17     # Using X-Forwarded-For results in 403 HTTP error for HLS fragments,
18     # so disabling geo bypass completely
19     _GEO_BYPASS = False
20
21     _TESTS = [{
22         'url': 'https://www.hidive.com/stream/the-comic-artist-and-his-assistants/s01e001',
23         'info_dict': {
24             'id': 'the-comic-artist-and-his-assistants/s01e001',
25             'ext': 'mp4',
26             'title': 'the-comic-artist-and-his-assistants/s01e001',
27             'series': 'the-comic-artist-and-his-assistants',
28             'season_number': 1,
29             'episode_number': 1,
30         },
31         'params': {
32             'skip_download': True,
33             'proxy': '192.99.245.228:3128',
34         },
35     }]
36
37     def _real_extract(self, url):
38         mobj = re.match(self._VALID_URL, url)
39         title, key = mobj.group('title', 'key')
40         video_id = '%s/%s' % (title, key)
41
42         settings = self._download_json(
43             'https://www.hidive.com/play/settings', video_id,
44             data=urlencode_postdata({
45                 'Title': title,
46                 'Key': key,
47             }))
48
49         restriction = settings.get('restrictionReason')
50         if restriction == 'RegionRestricted':
51             self.raise_geo_restricted()
52
53         if restriction and restriction != 'None':
54             raise ExtractorError(
55                 '%s said: %s' % (self.IE_NAME, restriction), expected=True)
56
57         formats = []
58         subtitles = {}
59         for rendition_id, rendition in settings['renditions'].items():
60             bitrates = rendition.get('bitrates')
61             if not isinstance(bitrates, dict):
62                 continue
63             m3u8_url = bitrates.get('hls')
64             if not isinstance(m3u8_url, compat_str):
65                 continue
66             formats.extend(self._extract_m3u8_formats(
67                 m3u8_url, video_id, 'mp4', entry_protocol='m3u8_native',
68                 m3u8_id='%s-hls' % rendition_id, fatal=False))
69             cc_files = rendition.get('ccFiles')
70             if not isinstance(cc_files, list):
71                 continue
72             for cc_file in cc_files:
73                 if not isinstance(cc_file, list) or len(cc_file) < 3:
74                     continue
75                 cc_lang = cc_file[0]
76                 cc_url = cc_file[2]
77                 if not isinstance(cc_lang, compat_str) or not isinstance(
78                         cc_url, compat_str):
79                     continue
80                 subtitles.setdefault(cc_lang, []).append({
81                     'url': cc_url,
82                 })
83
84         season_number = int_or_none(self._search_regex(
85             r's(\d+)', key, 'season number', default=None))
86         episode_number = int_or_none(self._search_regex(
87             r'e(\d+)', key, 'episode number', default=None))
88
89         return {
90             'id': video_id,
91             'title': video_id,
92             'subtitles': subtitles,
93             'formats': formats,
94             'series': title,
95             'season_number': season_number,
96             'episode_number': episode_number,
97         }