2 from __future__ import unicode_literals
12 from .common import InfoExtractor
13 from ..compat import (
17 compat_urllib_parse_urlparse,
29 return hashlib.md5(text.encode('utf-8')).hexdigest()
32 class IqiyiSDK(object):
33 def __init__(self, target, ip, timestamp):
36 self.timestamp = timestamp
40 return compat_str(sum(map(lambda p: int(p, 16), list(data))))
44 if isinstance(num, int):
46 return compat_str(sum(map(int, num)))
49 even = self.digit_sum(compat_str(self.timestamp)[::2])
50 odd = self.digit_sum(compat_str(self.timestamp)[1::2])
53 def preprocess(self, chunksize):
54 self.target = md5_text(self.target)
56 for i in range(32 // chunksize):
57 chunks.append(self.target[chunksize * i:chunksize * (i + 1)])
59 chunks.append(self.target[32 - 32 % chunksize:])
60 return chunks, list(map(int, self.ip.split('.')))
62 def mod(self, modulus):
63 chunks, ip = self.preprocess(32)
64 self.target = chunks[0] + ''.join(map(lambda p: compat_str(p % modulus), ip))
66 def split(self, chunksize):
73 chunks, ip = self.preprocess(chunksize)
75 for i in range(len(chunks)):
76 ip_part = compat_str(ip[i] % modulus_map[chunksize]) if i < 4 else ''
78 ret += ip_part + chunks[i]
80 ret += chunks[i] + ip_part
83 def handle_input16(self):
84 self.target = md5_text(self.target)
85 self.target = self.split_sum(self.target[:16]) + self.target + self.split_sum(self.target[16:])
87 def handle_input8(self):
88 self.target = md5_text(self.target)
91 part = self.target[8 * i:8 * (i + 1)]
92 ret += self.split_sum(part) + part
96 self.target = md5_text(self.target)
97 self.target = self.split_sum(self.target) + self.target
99 def date(self, scheme):
100 self.target = md5_text(self.target)
101 d = time.localtime(self.timestamp)
103 'y': compat_str(d.tm_year),
104 'm': '%02d' % d.tm_mon,
105 'd': '%02d' % d.tm_mday,
107 self.target += ''.join(map(lambda c: strings[c], list(scheme)))
109 def split_time_even_odd(self):
110 even, odd = self.even_odd()
111 self.target = odd + md5_text(self.target) + even
113 def split_time_odd_even(self):
114 even, odd = self.even_odd()
115 self.target = even + md5_text(self.target) + odd
117 def split_ip_time_sum(self):
118 chunks, ip = self.preprocess(32)
119 self.target = compat_str(sum(ip)) + chunks[0] + self.digit_sum(self.timestamp)
121 def split_time_ip_sum(self):
122 chunks, ip = self.preprocess(32)
123 self.target = self.digit_sum(self.timestamp) + chunks[0] + compat_str(sum(ip))
126 class IqiyiSDKInterpreter(object):
127 BASE62_TABLE = '0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ'
129 def __init__(self, sdk_code):
130 self.sdk_code = sdk_code
133 def base62(cls, num):
138 ret = cls.BASE62_TABLE[num % 62] + ret
142 def decode_eval_codes(self):
143 self.sdk_code = self.sdk_code[5:-3]
146 r"'([^']+)',62,(\d+),'([^']+)'\.split\('\|'\),[^,]+,{}",
148 obfucasted_code, count, symbols = mobj.groups()
150 symbols = symbols.split('|')
155 b62count = self.base62(count)
156 symbol_table[b62count] = symbols[count] or b62count
158 self.sdk_code = re.sub(
159 r'\b(\w+)\b', lambda mobj: symbol_table[mobj.group(0)],
162 def run(self, target, ip, timestamp):
163 self.decode_eval_codes()
165 functions = re.findall(r'input=([a-zA-Z0-9]+)\(input', self.sdk_code)
167 sdk = IqiyiSDK(target, ip, timestamp)
170 'handleSum': sdk.handleSum,
171 'handleInput8': sdk.handle_input8,
172 'handleInput16': sdk.handle_input16,
173 'splitTimeEvenOdd': sdk.split_time_even_odd,
174 'splitTimeOddEven': sdk.split_time_odd_even,
175 'splitIpTimeSum': sdk.split_ip_time_sum,
176 'splitTimeIpSum': sdk.split_time_ip_sum,
178 for function in functions:
179 if re.match(r'mod\d+', function):
180 sdk.mod(int(function[3:]))
181 elif re.match(r'date[ymd]{3}', function):
182 sdk.date(function[4:])
183 elif re.match(r'split\d+', function):
184 sdk.split(int(function[5:]))
185 elif function in other_functions:
186 other_functions[function]()
188 raise ExtractorError('Unknown funcion %s' % function)
193 class IqiyiIE(InfoExtractor):
197 _VALID_URL = r'http://(?:[^.]+\.)?iqiyi\.com/.+\.html'
199 _NETRC_MACHINE = 'iqiyi'
202 'url': 'http://www.iqiyi.com/v_19rrojlavg.html',
203 'md5': '2cb594dc2781e6c941a110d8f358118b',
205 'id': '9c1fb1b99d192b21c559e5a1a2cb3c73',
206 'title': '美国德州空中惊现奇异云团 酷似UFO',
210 'url': 'http://www.iqiyi.com/v_19rrhnnclk.html',
212 'id': 'e3f585b550a280af23c98b6cb2be19fb',
213 'title': '名侦探柯南第752集',
217 'id': 'e3f585b550a280af23c98b6cb2be19fb_part1',
219 'title': '名侦探柯南第752集',
223 'id': 'e3f585b550a280af23c98b6cb2be19fb_part2',
225 'title': '名侦探柯南第752集',
229 'id': 'e3f585b550a280af23c98b6cb2be19fb_part3',
231 'title': '名侦探柯南第752集',
235 'id': 'e3f585b550a280af23c98b6cb2be19fb_part4',
237 'title': '名侦探柯南第752集',
241 'id': 'e3f585b550a280af23c98b6cb2be19fb_part5',
243 'title': '名侦探柯南第752集',
247 'id': 'e3f585b550a280af23c98b6cb2be19fb_part6',
249 'title': '名侦探柯南第752集',
253 'id': 'e3f585b550a280af23c98b6cb2be19fb_part7',
255 'title': '名侦探柯南第752集',
259 'id': 'e3f585b550a280af23c98b6cb2be19fb_part8',
261 'title': '名侦探柯南第752集',
265 'skip_download': True,
268 'url': 'http://www.iqiyi.com/w_19rt6o8t9p.html',
269 'only_matching': True,
271 'url': 'http://www.iqiyi.com/a_19rrhbc6kt.html',
272 'only_matching': True,
274 'url': 'http://yule.iqiyi.com/pcb.html',
275 'only_matching': True,
277 # VIP-only video. The first 2 parts (6 minutes) are available without login
278 # MD5 sums omitted as values are different on Travis CI and my machine
279 'url': 'http://www.iqiyi.com/v_19rrny4w8w.html',
281 'id': 'f3cf468b39dddb30d676f89a91200dc1',
286 'id': 'f3cf468b39dddb30d676f89a91200dc1_part1',
292 'id': 'f3cf468b39dddb30d676f89a91200dc1_part2',
297 'expected_warnings': ['Needs a VIP account for full video'],
309 def _real_initialize(self):
314 # public key extracted from http://static.iqiyi.com/js/qiyiV2/20160129180840/jobs/i18n/i18nIndex.js
315 N = 0xab86b6371b5318aaa1d3c9e612a9f1264f372323c8c0f19875b5fc3b3fd3afcc1e5bec527aa94bfa85bffc157e4245aebda05389a5357b75115ac94f074aefcd
318 return ohdave_rsa_encrypt(data, e, N)
321 (username, password) = self._get_login_info()
323 # No authentication to be performed
327 data = self._download_json(
328 'http://kylin.iqiyi.com/get_token', None,
329 note='Get token for logging', errnote='Unable to get token for logging')
331 timestamp = int(time.time())
332 target = '/apis/reglogin/login.action?lang=zh_TW&area_code=null&email=%s&passwd=%s&agenttype=1&from=undefined&keeplogin=0&piccode=&fromurl=&_pos=1' % (
333 username, self._rsa_fun(password.encode('utf-8')))
335 interp = IqiyiSDKInterpreter(sdk)
336 sign = interp.run(target, data['ip'], timestamp)
338 validation_params = {
340 'server': 'BEA3AA1908656AABCCFF76582C4C6660',
341 'token': data['token'],
342 'bird_src': 'f8d91d57af224da7893dd397d52d811a',
346 validation_result = self._download_json(
347 'http://kylin.iqiyi.com/validate?' + compat_urllib_parse.urlencode(validation_params), None,
348 note='Validate credentials', errnote='Unable to validate credentials')
351 'P00107': 'please login via the web interface and enter the CAPTCHA code',
352 'P00117': 'bad username or password',
355 code = validation_result['code']
357 msg = MSG_MAP.get(code)
359 msg = 'error %s' % code
360 if validation_result.get('msg'):
361 msg += ': ' + validation_result['msg']
362 self._downloader.report_warning('unable to log in: ' + msg)
367 def _authenticate_vip_video(self, api_video_url, video_id, tvid, _uuid, do_report_warning):
369 # version and platform hard-coded in com/qiyi/player/core/model/remote/AuthenticationRemote.as
371 'platform': 'b6c13e26323c537d',
376 'playType': 'main', # XXX: always main?
377 'filename': os.path.splitext(url_basename(api_video_url))[0],
380 qd_items = compat_parse_qs(compat_urllib_parse_urlparse(api_video_url).query)
381 for key, val in qd_items.items():
382 auth_params[key] = val[0]
384 auth_req = sanitized_Request(
385 'http://api.vip.iqiyi.com/services/ckn.action',
386 urlencode_postdata(auth_params))
387 # iQiyi server throws HTTP 405 error without the following header
388 auth_req.add_header('Content-Type', 'application/x-www-form-urlencoded')
389 auth_result = self._download_json(
391 note='Downloading video authentication JSON',
392 errnote='Unable to download video authentication JSON')
393 if auth_result['code'] == 'Q00506': # requires a VIP account
394 if do_report_warning:
395 self.report_warning('Needs a VIP account for full video')
400 def construct_video_urls(self, data, video_id, _uuid, tvid):
409 def get_encode_code(l):
414 for i in range(c - 1, -1, -1):
415 a = do_xor(int(b[c - i - 1], 16), i)
419 def get_path_key(x, format_id, segment_index):
420 mg = ')(*&^flash@#$%a'
421 tm = self._download_json(
422 'http://data.video.qiyi.com/t?tn=' + str(random.random()), video_id,
423 note='Download path key of segment %d for format %s' % (segment_index + 1, format_id)
425 t = str(int(math.floor(int(tm) / (600.0))))
426 return md5_text(t + mg + x)
429 need_vip_warning_report = True
430 for format_item in data['vp']['tkl'][0]['vs']:
431 if 0 < int(format_item['bid']) <= 10:
432 format_id = self.get_format(format_item['bid'])
438 video_urls_info = format_item['fs']
439 if not format_item['fs'][0]['l'].startswith('/'):
440 t = get_encode_code(format_item['fs'][0]['l'])
441 if t.endswith('mp4'):
442 video_urls_info = format_item['flvs']
444 for segment_index, segment in enumerate(video_urls_info):
446 if not vl.startswith('/'):
447 vl = get_encode_code(vl)
448 is_vip_video = '/vip/' in vl
449 filesize = segment['b']
450 base_url = data['vp']['du'].split('/')
453 vl.split('/')[-1].split('.')[0], format_id, segment_index)
454 base_url.insert(-1, key)
455 base_url = '/'.join(base_url)
458 'qyid': uuid.uuid4().hex,
463 'tn': str(int(time.time()))
465 api_video_url = base_url + vl
467 api_video_url = api_video_url.replace('.f4v', '.hml')
468 auth_result = self._authenticate_vip_video(
469 api_video_url, video_id, tvid, _uuid, need_vip_warning_report)
470 if auth_result is False:
471 need_vip_warning_report = False
474 't': auth_result['data']['t'],
475 # cid is hard-coded in com/qiyi/player/core/player/RuntimeData.as
476 'cid': 'afbe8fd3d73448c9',
478 'QY00001': auth_result['data']['u'],
480 api_video_url += '?' if '?' not in api_video_url else '&'
481 api_video_url += compat_urllib_parse.urlencode(param)
482 js = self._download_json(
483 api_video_url, video_id,
484 note='Download video info of segment %d for format %s' % (segment_index + 1, format_id))
487 (video_url, filesize))
489 video_urls_dict[format_id] = video_urls
490 return video_urls_dict
492 def get_format(self, bid):
493 matched_format_ids = [_format_id for _bid, _format_id in self._FORMATS_MAP if _bid == str(bid)]
494 return matched_format_ids[0] if len(matched_format_ids) else None
496 def get_bid(self, format_id):
497 matched_bids = [_bid for _bid, _format_id in self._FORMATS_MAP if _format_id == format_id]
498 return matched_bids[0] if len(matched_bids) else None
500 def get_raw_data(self, tvid, video_id, enc_key, _uuid):
501 tm = str(int(time.time()))
505 'src': md5_text('youtube-dl'),
510 'enc': md5_text(enc_key + tail),
512 'tn': random.random(),
514 'authkey': md5_text(md5_text('') + tail),
518 api_url = 'http://cache.video.qiyi.com/vms' + '?' + \
519 compat_urllib_parse.urlencode(param)
520 raw_data = self._download_json(api_url, video_id)
523 def get_enc_key(self, swf_url, video_id):
524 # TODO: automatic key extraction
525 # last update at 2016-01-22 for Zombie::bite
526 enc_key = '6ab6d0280511493ba85594779759d4ed'
529 def _real_extract(self, url):
530 webpage = self._download_webpage(
531 url, 'temp_id', note='download video page')
532 tvid = self._search_regex(
533 r'data-player-tvid\s*=\s*[\'"](\d+)', webpage, 'tvid')
534 video_id = self._search_regex(
535 r'data-player-videoid\s*=\s*[\'"]([a-f\d]+)', webpage, 'video_id')
536 swf_url = self._search_regex(
537 r'(http://[^\'"]+MainPlayer[^.]+\.swf)', webpage, 'swf player URL')
538 _uuid = uuid.uuid4().hex
540 enc_key = self.get_enc_key(swf_url, video_id)
542 raw_data = self.get_raw_data(tvid, video_id, enc_key, _uuid)
544 if raw_data['code'] != 'A000000':
545 raise ExtractorError('Unable to load data. Error code: ' + raw_data['code'])
547 data = raw_data['data']
549 title = data['vi']['vn']
551 # generate video_urls_dict
552 video_urls_dict = self.construct_video_urls(
553 data, video_id, _uuid, tvid)
557 for format_id in video_urls_dict:
558 video_urls = video_urls_dict[format_id]
559 for i, video_url_info in enumerate(video_urls):
560 if len(entries) < i + 1:
561 entries.append({'formats': []})
562 entries[i]['formats'].append(
564 'url': video_url_info[0],
565 'filesize': video_url_info[-1],
566 'format_id': format_id,
567 'preference': int(self.get_bid(format_id))
571 for i in range(len(entries)):
572 self._sort_formats(entries[i]['formats'])
575 'id': '%s_part%d' % (video_id, i + 1),
582 '_type': 'multi_video',
589 info['id'] = video_id
590 info['title'] = title