_ Git - youtube-dl/blob - youtube_dl/extractor/hotstar.py

   1 # coding: utf-8
   2 from __future__ import unicode_literals
   3
   4 from .common import InfoExtractor
   5 from ..utils import (
   6     ExtractorError,
   7     determine_ext,
   8     int_or_none,
   9 )
  10 import re
  11
  12
  13 class HotStarIE(InfoExtractor):
  14     _VALID_URL = r'https?://(?:www\.)?hotstar\.com/(?:.+?[/-])?(?P<id>\d{10})'
  15     _GEO_COUNTRIES = ['IN']
  16     _TESTS = [{
  17         'url': 'http://www.hotstar.com/on-air-with-aib--english-1000076273',
  18         'info_dict': {
  19             'id': '1000076273',
  20             'ext': 'mp4',
  21             'title': 'On Air With AIB',
  22             'description': 'md5:c957d8868e9bc793ccb813691cc4c434',
  23             'timestamp': 1447227000,
  24             'upload_date': '20151111',
  25             'duration': 381,
  26         },
  27         'params': {
  28             # m3u8 download
  29             'skip_download': True,
  30         }
  31     }, {
  32         'url': 'http://www.hotstar.com/sports/cricket/rajitha-sizzles-on-debut-with-329/2001477583',
  33         'only_matching': True,
  34     }, {
  35         'url': 'http://www.hotstar.com/1000000515',
  36         'only_matching': True,
  37     }]
  38
  39     def _download_json(self, url_or_request, video_id, note='Downloading JSON metadata', fatal=True, query=None):
  40         json_data = super(HotStarIE, self)._download_json(
  41             url_or_request, video_id, note, fatal=fatal, query=query)
  42         if json_data['resultCode'] != 'OK':
  43             if fatal:
  44                 raise ExtractorError(json_data['errorDescription'])
  45             return None
  46         return json_data['resultObj']
  47
  48     def _real_extract(self, url):
  49         video_id = self._match_id(url)
  50         video_data = self._download_json(
  51             'http://account.hotstar.com/AVS/besc', video_id, query={
  52                 'action': 'GetAggregatedContentDetails',
  53                 'channel': 'PCTV',
  54                 'contentId': video_id,
  55             })['contentInfo'][0]
  56         title = video_data['episodeTitle']
  57
  58         if video_data.get('encrypted') == 'Y':
  59             raise ExtractorError('This video is DRM protected.', expected=True)
  60
  61         formats = []
  62         for f in ('JIO',):
  63             format_data = self._download_json(
  64                 'http://getcdn.hotstar.com/AVS/besc',
  65                 video_id, 'Downloading %s JSON metadata' % f,
  66                 fatal=False, query={
  67                     'action': 'GetCDN',
  68                     'asJson': 'Y',
  69                     'channel': f,
  70                     'id': video_id,
  71                     'type': 'VOD',
  72                 })
  73             if format_data:
  74                 format_url = format_data.get('src')
  75                 if not format_url:
  76                     continue
  77                 ext = determine_ext(format_url)
  78                 if ext == 'm3u8':
  79                     formats.extend(self._extract_m3u8_formats(
  80                         format_url, video_id, 'mp4',
  81                         m3u8_id='hls', fatal=False))
  82                 elif ext == 'f4m':
  83                     # produce broken files
  84                     continue
  85                 else:
  86                     formats.append({
  87                         'url': format_url,
  88                         'width': int_or_none(format_data.get('width')),
  89                         'height': int_or_none(format_data.get('height')),
  90                     })
  91         self._sort_formats(formats)
  92
  93         return {
  94             'id': video_id,
  95             'title': title,
  96             'description': video_data.get('description'),
  97             'duration': int_or_none(video_data.get('duration')),
  98             'timestamp': int_or_none(video_data.get('broadcastDate')),
  99             'formats': formats,
 100             'episode': title,
 101             'episode_number': int_or_none(video_data.get('episodeNumber')),
 102             'series': video_data.get('contentTitle'),
 103         }
 104
 105
 106 class HotStarPlaylistIE(InfoExtractor):
 107     IE_NAME = 'hotstar:playlist'
 108     _VALID_URL = r'https?://(?:www\.)?hotstar\.com/tv/(?P<playlist_title>.+)/(?P<series_id>\d+)/episodes/(?P<playlist_id>\d{1,})'
 109
 110     _TESTS = [{
 111         'url': 'http://www.hotstar.com/tv/pow-bandi-yuddh-ke/10999/episodes/10856/9993',
 112         'info_dict': {
 113             'id': '10856',
 114             'title': 'pow-bandi-yuddh-ke',
 115         },
 116         'playlist_mincount': 0,
 117     }, {
 118         'url': 'http://www.hotstar.com/tv/pow-bandi-yuddh-ke/10999/episodes/10856/9993',
 119         'only_matching': True,
 120     }]
 121
 122     def _extract_episode_info(self, series_id, playlist_title, video):
 123
 124         picture_url = video.get('urlPictures')
 125         thumbnail = ''
 126         if picture_url:
 127             thumbnail = 'http://media0-starag.startv.in/r1/thumbs/PCTV/%s/%s/PCTV-%s-hs.jpg' % (picture_url[-2:], picture_url, picture_url)
 128
 129         episode_title = video.get('episodeTitle', '')
 130         episode_title = episode_title.lower().replace(' ', '-')
 131         url = "http://www.hotstar.com/tv/%s/%s/%s/%s" % (playlist_title, series_id, episode_title, video.get('contentId'))
 132
 133         info_dict = {
 134             'id': video.get('contentId'),
 135             'title': video.get('episodeTitle'),
 136             'description': video.get('longDescription'),
 137             'thumbnail': thumbnail,
 138             'url': url,
 139             '_type': 'url',
 140         }
 141         return info_dict
 142
 143     def _real_extract(self, url):
 144         mobj = re.match(self._VALID_URL, url)
 145         series_id = mobj.group('series_id')
 146         playlist_id = mobj.group('playlist_id')
 147         playlist_title = mobj.group('playlist_title')
 148
 149         collection = self._download_json(
 150             "http://search.hotstar.com/AVS/besc?action=SearchContents&appVersion=5.0.39&channel=PCTV&moreFilters=series:%s;&query=*&searchOrder=last_broadcast_date+desc,year+asc,title+asc&type=EPISODE" % playlist_id,
 151             playlist_id
 152         )
 153
 154         videos = collection.get('resultObj', {}).get('response', {}).get('docs', [])
 155         entries = [
 156             self._extract_episode_info(series_id, playlist_title, video)
 157             for video in videos if video.get('contentId')]
 158         return self.playlist_result(entries, playlist_id, playlist_title)