[hotstar:playlist] Add extractor
[youtube-dl] / youtube_dl / extractor / hotstar.py
1 # coding: utf-8
2 from __future__ import unicode_literals
3
4 from .common import InfoExtractor
5 from ..utils import (
6     ExtractorError,
7     determine_ext,
8     int_or_none,
9 )
10 import re
11
12
13 class HotStarIE(InfoExtractor):
14     _VALID_URL = r'https?://(?:www\.)?hotstar\.com/(?:.+?[/-])?(?P<id>\d{10})'
15     _GEO_COUNTRIES = ['IN']
16     _TESTS = [{
17         'url': 'http://www.hotstar.com/on-air-with-aib--english-1000076273',
18         'info_dict': {
19             'id': '1000076273',
20             'ext': 'mp4',
21             'title': 'On Air With AIB',
22             'description': 'md5:c957d8868e9bc793ccb813691cc4c434',
23             'timestamp': 1447227000,
24             'upload_date': '20151111',
25             'duration': 381,
26         },
27         'params': {
28             # m3u8 download
29             'skip_download': True,
30         }
31     }, {
32         'url': 'http://www.hotstar.com/sports/cricket/rajitha-sizzles-on-debut-with-329/2001477583',
33         'only_matching': True,
34     }, {
35         'url': 'http://www.hotstar.com/1000000515',
36         'only_matching': True,
37     }]
38
39     def _download_json(self, url_or_request, video_id, note='Downloading JSON metadata', fatal=True, query=None):
40         json_data = super(HotStarIE, self)._download_json(
41             url_or_request, video_id, note, fatal=fatal, query=query)
42         if json_data['resultCode'] != 'OK':
43             if fatal:
44                 raise ExtractorError(json_data['errorDescription'])
45             return None
46         return json_data['resultObj']
47
48     def _real_extract(self, url):
49         video_id = self._match_id(url)
50         video_data = self._download_json(
51             'http://account.hotstar.com/AVS/besc', video_id, query={
52                 'action': 'GetAggregatedContentDetails',
53                 'channel': 'PCTV',
54                 'contentId': video_id,
55             })['contentInfo'][0]
56         title = video_data['episodeTitle']
57
58         if video_data.get('encrypted') == 'Y':
59             raise ExtractorError('This video is DRM protected.', expected=True)
60
61         formats = []
62         for f in ('JIO',):
63             format_data = self._download_json(
64                 'http://getcdn.hotstar.com/AVS/besc',
65                 video_id, 'Downloading %s JSON metadata' % f,
66                 fatal=False, query={
67                     'action': 'GetCDN',
68                     'asJson': 'Y',
69                     'channel': f,
70                     'id': video_id,
71                     'type': 'VOD',
72                 })
73             if format_data:
74                 format_url = format_data.get('src')
75                 if not format_url:
76                     continue
77                 ext = determine_ext(format_url)
78                 if ext == 'm3u8':
79                     formats.extend(self._extract_m3u8_formats(
80                         format_url, video_id, 'mp4',
81                         m3u8_id='hls', fatal=False))
82                 elif ext == 'f4m':
83                     # produce broken files
84                     continue
85                 else:
86                     formats.append({
87                         'url': format_url,
88                         'width': int_or_none(format_data.get('width')),
89                         'height': int_or_none(format_data.get('height')),
90                     })
91         self._sort_formats(formats)
92
93         return {
94             'id': video_id,
95             'title': title,
96             'description': video_data.get('description'),
97             'duration': int_or_none(video_data.get('duration')),
98             'timestamp': int_or_none(video_data.get('broadcastDate')),
99             'formats': formats,
100             'episode': title,
101             'episode_number': int_or_none(video_data.get('episodeNumber')),
102             'series': video_data.get('contentTitle'),
103         }
104
105
106 class HotStarPlaylistIE(InfoExtractor):
107     IE_NAME = 'hotstar:playlist'
108     _VALID_URL = r'https?://(?:www\.)?hotstar\.com/tv/(?P<playlist_title>.+)/(?P<series_id>\d+)/episodes/(?P<playlist_id>\d{1,})'
109
110     _TESTS = [{
111         'url': 'http://www.hotstar.com/tv/pow-bandi-yuddh-ke/10999/episodes/10856/9993',
112         'info_dict': {
113             'id': '10856',
114             'title': 'pow-bandi-yuddh-ke',
115         },
116         'playlist_mincount': 0,
117     }, {
118         'url': 'http://www.hotstar.com/tv/pow-bandi-yuddh-ke/10999/episodes/10856/9993',
119         'only_matching': True,
120     }]
121
122     def _extract_episode_info(self, series_id, playlist_title, video):
123
124         picture_url = video.get('urlPictures')
125         thumbnail = ''
126         if picture_url:
127             thumbnail = 'http://media0-starag.startv.in/r1/thumbs/PCTV/%s/%s/PCTV-%s-hs.jpg' % (picture_url[-2:], picture_url, picture_url)
128
129         episode_title = video.get('episodeTitle', '')
130         episode_title = episode_title.lower().replace(' ', '-')
131         url = "http://www.hotstar.com/tv/%s/%s/%s/%s" % (playlist_title, series_id, episode_title, video.get('contentId'))
132
133         info_dict = {
134             'id': video.get('contentId'),
135             'title': video.get('episodeTitle'),
136             'description': video.get('longDescription'),
137             'thumbnail': thumbnail,
138             'url': url,
139             '_type': 'url',
140         }
141         return info_dict
142
143     def _real_extract(self, url):
144         mobj = re.match(self._VALID_URL, url)
145         series_id = mobj.group('series_id')
146         playlist_id = mobj.group('playlist_id')
147         playlist_title = mobj.group('playlist_title')
148
149         collection = self._download_json(
150             "http://search.hotstar.com/AVS/besc?action=SearchContents&appVersion=5.0.39&channel=PCTV&moreFilters=series:%s;&query=*&searchOrder=last_broadcast_date+desc,year+asc,title+asc&type=EPISODE" % playlist_id,
151             playlist_id
152         )
153
154         videos = collection.get('resultObj', {}).get('response', {}).get('docs', [])
155         entries = [
156             self._extract_episode_info(series_id, playlist_title, video)
157             for video in videos if video.get('contentId')]
158         return self.playlist_result(entries, playlist_id, playlist_title)