_ Git - youtube-dl/blob - youtube_dl/extractor/viki.py

   1 from __future__ import unicode_literals
   2
   3 import time
   4 import hmac
   5 import hashlib
   6 import itertools
   7
   8 from ..utils import (
   9     ExtractorError,
  10     int_or_none,
  11     parse_age_limit,
  12     parse_iso8601,
  13 )
  14 from .common import InfoExtractor
  15
  16
  17 class VikiBaseIE(InfoExtractor):
  18     _API_QUERY_TEMPLATE = '/v4/%sapp=%s&t=%s&site=www.viki.com'
  19     _API_URL_TEMPLATE = 'http://api.viki.io%s&sig=%s'
  20
  21     _APP = '65535a'
  22     _APP_VERSION = '2.2.5.1428709186'
  23     _APP_SECRET = '-$iJ}@p7!G@SyU/je1bEyWg}upLu-6V6-Lg9VD(]siH,r.,m-r|ulZ,U4LC/SeR)'
  24
  25     def _prepare_call(self, path, timestamp=None):
  26         path += '?' if '?' not in path else '&'
  27         if not timestamp:
  28             timestamp = int(time.time())
  29         query = self._API_QUERY_TEMPLATE % (path, self._APP, timestamp)
  30         sig = hmac.new(
  31             self._APP_SECRET.encode('ascii'),
  32             query.encode('ascii'),
  33             hashlib.sha1
  34         ).hexdigest()
  35         return self._API_URL_TEMPLATE % (query, sig)
  36
  37     def _call_api(self, path, video_id, note, timestamp=None):
  38         resp = self._download_json(
  39             self._prepare_call(path, timestamp), video_id, note)
  40
  41         error = resp.get('error')
  42         if error:
  43             if error == 'invalid timestamp':
  44                 resp = self._download_json(
  45                     self._prepare_call(path, int(resp['current_timestamp'])),
  46                     video_id, '%s (retry)' % note)
  47                 error = resp.get('error')
  48             if error:
  49                 self._raise_error(resp['error'])
  50
  51         return resp
  52
  53     def _raise_error(self, error):
  54         raise ExtractorError(
  55             '%s returned error: %s' % (self.IE_NAME, error),
  56             expected=True)
  57
  58
  59 class VikiIE(VikiBaseIE):
  60     IE_NAME = 'viki'
  61     _VALID_URL = r'https?://(?:www\.)?viki\.com/(?:videos|player)/(?P<id>[0-9]+v)'
  62     _TESTS = [{
  63         'url': 'http://www.viki.com/videos/1023585v-heirs-episode-14',
  64         'info_dict': {
  65             'id': '1023585v',
  66             'ext': 'mp4',
  67             'title': 'Heirs Episode 14',
  68             'uploader': 'SBS',
  69             'description': 'md5:c4b17b9626dd4b143dcc4d855ba3474e',
  70             'upload_date': '20131121',
  71             'age_limit': 13,
  72         },
  73         'skip': 'Blocked in the US',
  74     }, {
  75         # clip
  76         'url': 'http://www.viki.com/videos/1067139v-the-avengers-age-of-ultron-press-conference',
  77         'md5': '86c0b5dbd4d83a6611a79987cc7a1989',
  78         'info_dict': {
  79             'id': '1067139v',
  80             'ext': 'mp4',
  81             'title': "'The Avengers: Age of Ultron' Press Conference",
  82             'description': 'md5:d70b2f9428f5488321bfe1db10d612ea',
  83             'duration': 352,
  84             'timestamp': 1430380829,
  85             'upload_date': '20150430',
  86             'uploader': 'Arirang TV',
  87             'like_count': int,
  88             'age_limit': 0,
  89         }
  90     }, {
  91         'url': 'http://www.viki.com/videos/1048879v-ankhon-dekhi',
  92         'info_dict': {
  93             'id': '1048879v',
  94             'ext': 'mp4',
  95             'title': 'Ankhon Dekhi',
  96             'duration': 6512,
  97             'timestamp': 1408532356,
  98             'upload_date': '20140820',
  99             'uploader': 'Spuul',
 100             'like_count': int,
 101             'age_limit': 13,
 102         },
 103         'params': {
 104             # m3u8 download
 105             'skip_download': True,
 106         }
 107     }, {
 108         # episode
 109         'url': 'http://www.viki.com/videos/44699v-boys-over-flowers-episode-1',
 110         'md5': '190f3ef426005ba3a080a63325955bc3',
 111         'info_dict': {
 112             'id': '44699v',
 113             'ext': 'mp4',
 114             'title': 'Boys Over Flowers - Episode 1',
 115             'description': 'md5:52617e4f729c7d03bfd4bcbbb6e946f2',
 116             'duration': 4155,
 117             'timestamp': 1270496524,
 118             'upload_date': '20100405',
 119             'uploader': 'group8',
 120             'like_count': int,
 121             'age_limit': 13,
 122         }
 123     }, {
 124         # youtube external
 125         'url': 'http://www.viki.com/videos/50562v-poor-nastya-complete-episode-1',
 126         'md5': '216d1afdc0c64d1febc1e9f2bd4b864b',
 127         'info_dict': {
 128             'id': '50562v',
 129             'ext': 'mp4',
 130             'title': 'Poor Nastya [COMPLETE] - Episode 1',
 131             'description': '',
 132             'duration': 607,
 133             'timestamp': 1274949505,
 134             'upload_date': '20101213',
 135             'uploader': 'ad14065n',
 136             'uploader_id': 'ad14065n',
 137             'like_count': int,
 138             'age_limit': 13,
 139         }
 140     }, {
 141         'url': 'http://www.viki.com/player/44699v',
 142         'only_matching': True,
 143     }]
 144
 145     def _real_extract(self, url):
 146         video_id = self._match_id(url)
 147
 148         video = self._call_api(
 149             'videos/%s.json' % video_id, video_id, 'Downloading video JSON')
 150
 151         title = None
 152         titles = video.get('titles')
 153         if titles:
 154             title = titles.get('en') or titles[titles.keys()[0]]
 155         if not title:
 156             title = 'Episode %d' % video.get('number') if video.get('type') == 'episode' else video.get('id') or video_id
 157             container_titles = video.get('container', {}).get('titles')
 158             if container_titles:
 159                 container_title = container_titles.get('en') or container_titles[container_titles.keys()[0]]
 160                 title = '%s - %s' % (container_title, title)
 161
 162         descriptions = video.get('descriptions')
 163         description = descriptions.get('en') or descriptions[titles.keys()[0]] if descriptions else None
 164
 165         duration = int_or_none(video.get('duration'))
 166         timestamp = parse_iso8601(video.get('created_at'))
 167         uploader = video.get('author')
 168         like_count = int_or_none(video.get('likes', {}).get('count'))
 169         age_limit = parse_age_limit(video.get('rating'))
 170
 171         thumbnails = []
 172         for thumbnail_id, thumbnail in video.get('images', {}).items():
 173             thumbnails.append({
 174                 'id': thumbnail_id,
 175                 'url': thumbnail.get('url'),
 176             })
 177
 178         subtitles = {}
 179         for subtitle_lang, _ in video.get('subtitle_completions', {}).items():
 180             subtitles[subtitle_lang] = [{
 181                 'ext': subtitles_format,
 182                 'url': self._prepare_call(
 183                     'videos/%s/subtitles/%s.%s' % (video_id, subtitle_lang, subtitles_format)),
 184             } for subtitles_format in ('srt', 'vtt')]
 185
 186         result = {
 187             'id': video_id,
 188             'title': title,
 189             'description': description,
 190             'duration': duration,
 191             'timestamp': timestamp,
 192             'uploader': uploader,
 193             'like_count': like_count,
 194             'age_limit': age_limit,
 195             'thumbnails': thumbnails,
 196             'subtitles': subtitles,
 197         }
 198
 199         streams = self._call_api(
 200             'videos/%s/streams.json' % video_id, video_id,
 201             'Downloading video streams JSON')
 202
 203         if 'external' in streams:
 204             result.update({
 205                 '_type': 'url_transparent',
 206                 'url': streams['external']['url'],
 207             })
 208             return result
 209
 210         formats = []
 211         for format_id, stream_dict in streams.items():
 212             height = self._search_regex(
 213                 r'^(\d+)[pP]$', format_id, 'height', default=None)
 214             for protocol, format_dict in stream_dict.items():
 215                 if format_id == 'm3u8':
 216                     formats = self._extract_m3u8_formats(
 217                         format_dict['url'], video_id, 'mp4', m3u8_id='m3u8-%s' % protocol)
 218                 else:
 219                     formats.append({
 220                         'url': format_dict['url'],
 221                         'format_id': '%s-%s' % (format_id, protocol),
 222                         'height': height,
 223                     })
 224         self._sort_formats(formats)
 225
 226         result['formats'] = formats
 227         return result
 228
 229
 230 class VikiChannelIE(VikiBaseIE):
 231     IE_NAME = 'viki:channel'
 232     _VALID_URL = r'https?://(?:www\.)?viki\.com/(?:tv|news|movies|artists)/(?P<id>[0-9]+c)'
 233     _TESTS = [{
 234         'url': 'http://www.viki.com/tv/50c-boys-over-flowers',
 235         'info_dict': {
 236             'id': '50c',
 237             'title': 'Boys Over Flowers',
 238             'description': 'md5:ecd3cff47967fe193cff37c0bec52790',
 239         },
 240         'playlist_count': 70,
 241     }, {
 242         'url': 'http://www.viki.com/tv/1354c-poor-nastya-complete',
 243         'info_dict': {
 244             'id': '1354c',
 245             'title': 'Poor Nastya [COMPLETE]',
 246             'description': 'md5:05bf5471385aa8b21c18ad450e350525',
 247         },
 248         'playlist_count': 127,
 249     }, {
 250         'url': 'http://www.viki.com/news/24569c-showbiz-korea',
 251         'only_matching': True,
 252     }, {
 253         'url': 'http://www.viki.com/movies/22047c-pride-and-prejudice-2005',
 254         'only_matching': True,
 255     }, {
 256         'url': 'http://www.viki.com/artists/2141c-shinee',
 257         'only_matching': True,
 258     }]
 259
 260     _PER_PAGE = 25
 261
 262     def _real_extract(self, url):
 263         channel_id = self._match_id(url)
 264
 265         channel = self._call_api(
 266             'containers/%s.json' % channel_id, channel_id,
 267             'Downloading channel JSON')
 268
 269         titles = channel['titles']
 270         title = titles.get('en') or titles[titles.keys()[0]]
 271
 272         descriptions = channel['descriptions']
 273         description = descriptions.get('en') or descriptions[descriptions.keys()[0]]
 274
 275         entries = []
 276         for video_type in ('episodes', 'clips', 'movies'):
 277             for page_num in itertools.count(1):
 278                 page = self._call_api(
 279                     'containers/%s/%s.json?per_page=%d&sort=number&direction=asc&with_paging=true&page=%d'
 280                     % (channel_id, video_type, self._PER_PAGE, page_num), channel_id,
 281                     'Downloading %s JSON page #%d' % (video_type, page_num))
 282                 for video in page['response']:
 283                     video_id = video['id']
 284                     entries.append(self.url_result(
 285                         'http://www.viki.com/videos/%s' % video_id, 'Viki'))
 286                 if not page['pagination']['next']:
 287                     break
 288
 289         return self.playlist_result(entries, channel_id, title, description)