_ Git - youtube-dl/blob - youtube_dl/extractor/dramafever.py

   1 # coding: utf-8
   2 from __future__ import unicode_literals
   3
   4 import itertools
   5
   6 from .common import InfoExtractor
   7 from ..compat import (
   8     compat_str,
   9     compat_urlparse,
  10 )
  11 from ..utils import (
  12     clean_html,
  13     ExtractorError,
  14     int_or_none,
  15     parse_age_limit,
  16     parse_duration,
  17     sanitized_Request,
  18     unified_timestamp,
  19     urlencode_postdata
  20 )
  21
  22
  23 class DramaFeverBaseIE(InfoExtractor):
  24     _LOGIN_URL = 'https://www.dramafever.com/accounts/login/'
  25     _NETRC_MACHINE = 'dramafever'
  26
  27     _CONSUMER_SECRET = 'DA59dtVXYLxajktV'
  28
  29     _consumer_secret = None
  30
  31     def _get_consumer_secret(self):
  32         mainjs = self._download_webpage(
  33             'http://www.dramafever.com/static/51afe95/df2014/scripts/main.js',
  34             None, 'Downloading main.js', fatal=False)
  35         if not mainjs:
  36             return self._CONSUMER_SECRET
  37         return self._search_regex(
  38             r"var\s+cs\s*=\s*'([^']+)'", mainjs,
  39             'consumer secret', default=self._CONSUMER_SECRET)
  40
  41     def _real_initialize(self):
  42         self._login()
  43         self._consumer_secret = self._get_consumer_secret()
  44
  45     def _login(self):
  46         (username, password) = self._get_login_info()
  47         if username is None:
  48             return
  49
  50         login_form = {
  51             'username': username,
  52             'password': password,
  53         }
  54
  55         request = sanitized_Request(
  56             self._LOGIN_URL, urlencode_postdata(login_form))
  57         response = self._download_webpage(
  58             request, None, 'Logging in')
  59
  60         if all(logout_pattern not in response
  61                for logout_pattern in ['href="/accounts/logout/"', '>Log out<']):
  62             error = self._html_search_regex(
  63                 r'(?s)<h\d[^>]+\bclass="hidden-xs prompt"[^>]*>(.+?)</h\d',
  64                 response, 'error message', default=None)
  65             if error:
  66                 raise ExtractorError('Unable to login: %s' % error, expected=True)
  67             raise ExtractorError('Unable to log in')
  68
  69
  70 class DramaFeverIE(DramaFeverBaseIE):
  71     IE_NAME = 'dramafever'
  72     _VALID_URL = r'https?://(?:www\.)?dramafever\.com/(?:[^/]+/)?drama/(?P<id>[0-9]+/[0-9]+)(?:/|$)'
  73     _TESTS = [{
  74         'url': 'https://www.dramafever.com/drama/4274/1/Heirs/',
  75         'info_dict': {
  76             'id': '4274.1',
  77             'ext': 'wvm',
  78             'title': 'Heirs - Episode 1',
  79             'description': 'md5:362a24ba18209f6276e032a651c50bc2',
  80             'thumbnail': r're:^https?://.*\.jpg',
  81             'duration': 3783,
  82             'timestamp': 1381354993,
  83             'upload_date': '20131009',
  84             'series': 'Heirs',
  85             'season_number': 1,
  86             'episode': 'Episode 1',
  87             'episode_number': 1,
  88         },
  89         'params': {
  90             # m3u8 download
  91             'skip_download': True,
  92         },
  93     }, {
  94         'url': 'http://www.dramafever.com/drama/4826/4/Mnet_Asian_Music_Awards_2015/?ap=1',
  95         'info_dict': {
  96             'id': '4826.4',
  97             'ext': 'flv',
  98             'title': 'Mnet Asian Music Awards 2015',
  99             'description': 'md5:3ff2ee8fedaef86e076791c909cf2e91',
 100             'episode': 'Mnet Asian Music Awards 2015 - Part 3',
 101             'episode_number': 4,
 102             'thumbnail': r're:^https?://.*\.jpg',
 103             'timestamp': 1450213200,
 104             'upload_date': '20151215',
 105             'duration': 5359,
 106         },
 107         'params': {
 108             # m3u8 download
 109             'skip_download': True,
 110         },
 111     }, {
 112         'url': 'https://www.dramafever.com/zh-cn/drama/4972/15/Doctor_Romantic/',
 113         'only_matching': True,
 114     }]
 115
 116     def _call_api(self, path, video_id, note, fatal=False):
 117         return self._download_json(
 118             'https://www.dramafever.com/api/5/' + path,
 119             video_id, note=note, headers={
 120                 'x-consumer-key': self._consumer_secret,
 121             }, fatal=fatal)
 122
 123     def _get_subtitles(self, video_id):
 124         subtitles = {}
 125         subs = self._call_api(
 126             'video/%s/subtitles/webvtt/' % video_id, video_id,
 127             'Downloading subtitles JSON', fatal=False)
 128         if not subs or not isinstance(subs, list):
 129             return subtitles
 130         for sub in subs:
 131             if not isinstance(sub, dict):
 132                 continue
 133             sub_url = sub.get('url')
 134             if not sub_url or not isinstance(sub_url, compat_str):
 135                 continue
 136             subtitles.setdefault(
 137                 sub.get('code') or sub.get('language') or 'en', []).append({
 138                     'url': sub_url
 139                 })
 140         return subtitles
 141
 142     def _real_extract(self, url):
 143         video_id = self._match_id(url).replace('/', '.')
 144
 145         series_id, episode_number = video_id.split('.')
 146
 147         video = self._call_api(
 148             'series/%s/episodes/%s/' % (series_id, episode_number), video_id,
 149             'Downloading video JSON')
 150
 151         formats = []
 152         download_assets = video.get('download_assets')
 153         if download_assets and isinstance(download_assets, dict):
 154             for format_id, format_dict in download_assets.items():
 155                 if not isinstance(format_dict, dict):
 156                     continue
 157                 format_url = format_dict.get('url')
 158                 if not format_url or not isinstance(format_url, compat_str):
 159                     continue
 160                 formats.append({
 161                     'url': format_url,
 162                     'format_id': format_id,
 163                     'filesize': int_or_none(video.get('filesize')),
 164                 })
 165
 166         stream = self._call_api(
 167             'video/%s/stream/' % video_id, video_id, 'Downloading stream JSON',
 168             fatal=False)
 169         if stream:
 170             stream_url = stream.get('stream_url')
 171             if stream_url:
 172                 formats.extend(self._extract_m3u8_formats(
 173                     stream_url, video_id, 'mp4', entry_protocol='m3u8_native',
 174                     m3u8_id='hls', fatal=False))
 175         self._sort_formats(formats)
 176
 177         title = video.get('title') or 'Episode %s' % episode_number
 178         description = video.get('description')
 179         thumbnail = video.get('thumbnail')
 180         timestamp = unified_timestamp(video.get('release_date'))
 181         duration = parse_duration(video.get('duration'))
 182         age_limit = parse_age_limit(video.get('tv_rating'))
 183         series = video.get('series_title')
 184         season_number = int_or_none(video.get('season'))
 185
 186         if series:
 187             title = '%s - %s' % (series, title)
 188
 189         subtitles = self.extract_subtitles(video_id)
 190
 191         return {
 192             'id': video_id,
 193             'title': title,
 194             'description': description,
 195             'thumbnail': thumbnail,
 196             'duration': duration,
 197             'timestamp': timestamp,
 198             'age_limit': age_limit,
 199             'series': series,
 200             'season_number': season_number,
 201             'episode_number': int_or_none(episode_number),
 202             'formats': formats,
 203             'subtitles': subtitles,
 204         }
 205
 206
 207 class DramaFeverSeriesIE(DramaFeverBaseIE):
 208     IE_NAME = 'dramafever:series'
 209     _VALID_URL = r'https?://(?:www\.)?dramafever\.com/(?:[^/]+/)?drama/(?P<id>[0-9]+)(?:/(?:(?!\d+(?:/|$)).+)?)?$'
 210     _TESTS = [{
 211         'url': 'http://www.dramafever.com/drama/4512/Cooking_with_Shin/',
 212         'info_dict': {
 213             'id': '4512',
 214             'title': 'Cooking with Shin',
 215             'description': 'md5:84a3f26e3cdc3fb7f500211b3593b5c1',
 216         },
 217         'playlist_count': 4,
 218     }, {
 219         'url': 'http://www.dramafever.com/drama/124/IRIS/',
 220         'info_dict': {
 221             'id': '124',
 222             'title': 'IRIS',
 223             'description': 'md5:b3a30e587cf20c59bd1c01ec0ee1b862',
 224         },
 225         'playlist_count': 20,
 226     }]
 227
 228     _PAGE_SIZE = 60  # max is 60 (see http://api.drama9.com/#get--api-4-episode-series-)
 229
 230     def _real_extract(self, url):
 231         series_id = self._match_id(url)
 232
 233         series = self._download_json(
 234             'http://www.dramafever.com/api/4/series/query/?cs=%s&series_id=%s'
 235             % (self._consumer_secret, series_id),
 236             series_id, 'Downloading series JSON')['series'][series_id]
 237
 238         title = clean_html(series['name'])
 239         description = clean_html(series.get('description') or series.get('description_short'))
 240
 241         entries = []
 242         for page_num in itertools.count(1):
 243             episodes = self._download_json(
 244                 'http://www.dramafever.com/api/4/episode/series/?cs=%s&series_id=%s&page_size=%d&page_number=%d'
 245                 % (self._consumer_secret, series_id, self._PAGE_SIZE, page_num),
 246                 series_id, 'Downloading episodes JSON page #%d' % page_num)
 247             for episode in episodes.get('value', []):
 248                 episode_url = episode.get('episode_url')
 249                 if not episode_url:
 250                     continue
 251                 entries.append(self.url_result(
 252                     compat_urlparse.urljoin(url, episode_url),
 253                     'DramaFever', episode.get('guid')))
 254             if page_num == episodes['num_pages']:
 255                 break
 256
 257         return self.playlist_result(entries, series_id, title, description)