[youtube] fix extraction for embed restricted live streams(fixes #16433)
[youtube-dl] / youtube_dl / extractor / hidive.py
1 # coding: utf-8
2 from __future__ import unicode_literals
3
4 import re
5
6 from .common import InfoExtractor
7 from ..compat import compat_str
8 from ..utils import (
9     ExtractorError,
10     int_or_none,
11     urlencode_postdata,
12 )
13
14
15 class HiDiveIE(InfoExtractor):
16     _VALID_URL = r'https?://(?:www\.)?hidive\.com/stream/(?P<title>[^/]+)/(?P<key>[^/?#&]+)'
17     # Using X-Forwarded-For results in 403 HTTP error for HLS fragments,
18     # so disabling geo bypass completely
19     _GEO_BYPASS = False
20
21     _TESTS = [{
22         'url': 'https://www.hidive.com/stream/the-comic-artist-and-his-assistants/s01e001',
23         'info_dict': {
24             'id': 'the-comic-artist-and-his-assistants/s01e001',
25             'ext': 'mp4',
26             'title': 'the-comic-artist-and-his-assistants/s01e001',
27             'series': 'the-comic-artist-and-his-assistants',
28             'season_number': 1,
29             'episode_number': 1,
30         },
31         'params': {
32             'skip_download': True,
33         },
34     }]
35
36     def _real_extract(self, url):
37         mobj = re.match(self._VALID_URL, url)
38         title, key = mobj.group('title', 'key')
39         video_id = '%s/%s' % (title, key)
40
41         settings = self._download_json(
42             'https://www.hidive.com/play/settings', video_id,
43             data=urlencode_postdata({
44                 'Title': title,
45                 'Key': key,
46             }))
47
48         restriction = settings.get('restrictionReason')
49         if restriction == 'RegionRestricted':
50             self.raise_geo_restricted()
51
52         if restriction and restriction != 'None':
53             raise ExtractorError(
54                 '%s said: %s' % (self.IE_NAME, restriction), expected=True)
55
56         formats = []
57         subtitles = {}
58         for rendition_id, rendition in settings['renditions'].items():
59             bitrates = rendition.get('bitrates')
60             if not isinstance(bitrates, dict):
61                 continue
62             m3u8_url = bitrates.get('hls')
63             if not isinstance(m3u8_url, compat_str):
64                 continue
65             formats.extend(self._extract_m3u8_formats(
66                 m3u8_url, video_id, 'mp4', entry_protocol='m3u8_native',
67                 m3u8_id='%s-hls' % rendition_id, fatal=False))
68             cc_files = rendition.get('ccFiles')
69             if not isinstance(cc_files, list):
70                 continue
71             for cc_file in cc_files:
72                 if not isinstance(cc_file, list) or len(cc_file) < 3:
73                     continue
74                 cc_lang = cc_file[0]
75                 cc_url = cc_file[2]
76                 if not isinstance(cc_lang, compat_str) or not isinstance(
77                         cc_url, compat_str):
78                     continue
79                 subtitles.setdefault(cc_lang, []).append({
80                     'url': cc_url,
81                 })
82
83         season_number = int_or_none(self._search_regex(
84             r's(\d+)', key, 'season number', default=None))
85         episode_number = int_or_none(self._search_regex(
86             r'e(\d+)', key, 'episode number', default=None))
87
88         return {
89             'id': video_id,
90             'title': video_id,
91             'subtitles': subtitles,
92             'formats': formats,
93             'series': title,
94             'season_number': season_number,
95             'episode_number': episode_number,
96         }