[hotstar] Bypass geo restriction (closes #14672)
[youtube-dl] / youtube_dl / extractor / hotstar.py
1 # coding: utf-8
2 from __future__ import unicode_literals
3
4 from .common import InfoExtractor
5 from ..utils import (
6     ExtractorError,
7     determine_ext,
8     int_or_none,
9 )
10
11
12 class HotStarIE(InfoExtractor):
13     _VALID_URL = r'https?://(?:www\.)?hotstar\.com/(?:.+?[/-])?(?P<id>\d{10})'
14     _GEO_COUNTRIES = ['IN']
15     _TESTS = [{
16         'url': 'http://www.hotstar.com/on-air-with-aib--english-1000076273',
17         'info_dict': {
18             'id': '1000076273',
19             'ext': 'mp4',
20             'title': 'On Air With AIB - English',
21             'description': 'md5:c957d8868e9bc793ccb813691cc4c434',
22             'timestamp': 1447227000,
23             'upload_date': '20151111',
24             'duration': 381,
25         },
26         'params': {
27             # m3u8 download
28             'skip_download': True,
29         }
30     }, {
31         'url': 'http://www.hotstar.com/sports/cricket/rajitha-sizzles-on-debut-with-329/2001477583',
32         'only_matching': True,
33     }, {
34         'url': 'http://www.hotstar.com/1000000515',
35         'only_matching': True,
36     }]
37
38     def _download_json(self, url_or_request, video_id, note='Downloading JSON metadata', fatal=True, query=None):
39         json_data = super(HotStarIE, self)._download_json(
40             url_or_request, video_id, note, fatal=fatal, query=query)
41         if json_data['resultCode'] != 'OK':
42             if fatal:
43                 raise ExtractorError(json_data['errorDescription'])
44             return None
45         return json_data['resultObj']
46
47     def _real_extract(self, url):
48         video_id = self._match_id(url)
49         video_data = self._download_json(
50             'http://account.hotstar.com/AVS/besc', video_id, query={
51                 'action': 'GetAggregatedContentDetails',
52                 'channel': 'PCTV',
53                 'contentId': video_id,
54             })['contentInfo'][0]
55         title = video_data['episodeTitle']
56
57         if video_data.get('encrypted') == 'Y':
58             raise ExtractorError('This video is DRM protected.', expected=True)
59
60         formats = []
61         for f in ('JIO',):
62             format_data = self._download_json(
63                 'http://getcdn.hotstar.com/AVS/besc',
64                 video_id, 'Downloading %s JSON metadata' % f,
65                 fatal=False, query={
66                     'action': 'GetCDN',
67                     'asJson': 'Y',
68                     'channel': f,
69                     'id': video_id,
70                     'type': 'VOD',
71                 })
72             if format_data:
73                 format_url = format_data.get('src')
74                 if not format_url:
75                     continue
76                 ext = determine_ext(format_url)
77                 if ext == 'm3u8':
78                     formats.extend(self._extract_m3u8_formats(
79                         format_url, video_id, 'mp4',
80                         m3u8_id='hls', fatal=False))
81                 elif ext == 'f4m':
82                     # produce broken files
83                     continue
84                 else:
85                     formats.append({
86                         'url': format_url,
87                         'width': int_or_none(format_data.get('width')),
88                         'height': int_or_none(format_data.get('height')),
89                     })
90         self._sort_formats(formats)
91
92         return {
93             'id': video_id,
94             'title': title,
95             'description': video_data.get('description'),
96             'duration': int_or_none(video_data.get('duration')),
97             'timestamp': int_or_none(video_data.get('broadcastDate')),
98             'formats': formats,
99             'episode': title,
100             'episode_number': int_or_none(video_data.get('episodeNumber')),
101             'series': video_data.get('contentTitle'),
102         }