X-Git-Url: http://git.bitcoin.ninja/index.cgi?a=blobdiff_plain;ds=inline;f=youtube_dl%2Fextractor%2Fiqiyi.py;h=ddcb3c916e6a0610484dc5ceddbd84b507e761fd;hb=877032314fdf2d9b391325f96e3bc53a60ea067c;hp=541fe7ae3053aa28bf3e3f4176ef5ff31db12e55;hpb=99709cc3f121010421360ba94d75e16ce15f0be5;p=youtube-dl diff --git a/youtube_dl/extractor/iqiyi.py b/youtube_dl/extractor/iqiyi.py index 541fe7ae3..ddcb3c916 100644 --- a/youtube_dl/extractor/iqiyi.py +++ b/youtube_dl/extractor/iqiyi.py @@ -2,6 +2,7 @@ from __future__ import unicode_literals import hashlib +import itertools import math import os import random @@ -13,12 +14,14 @@ from .common import InfoExtractor from ..compat import ( compat_parse_qs, compat_str, - compat_urllib_parse, + compat_urllib_parse_urlencode, compat_urllib_parse_urlparse, ) from ..utils import ( + decode_packed_codes, ExtractorError, ohdave_rsa_encrypt, + remove_start, sanitized_Request, urlencode_postdata, url_basename, @@ -124,43 +127,11 @@ class IqiyiSDK(object): class IqiyiSDKInterpreter(object): - BASE62_TABLE = '0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ' - def __init__(self, sdk_code): self.sdk_code = sdk_code - @classmethod - def base62(cls, num): - if num == 0: - return '0' - ret = '' - while num: - ret = cls.BASE62_TABLE[num % 62] + ret - num = num // 62 - return ret - - def decode_eval_codes(self): - self.sdk_code = self.sdk_code[5:-3] - - mobj = re.search( - r"'([^']+)',62,(\d+),'([^']+)'\.split\('\|'\),[^,]+,{}", - self.sdk_code) - obfucasted_code, count, symbols = mobj.groups() - count = int(count) - symbols = symbols.split('|') - symbol_table = {} - - while count: - count -= 1 - b62count = self.base62(count) - symbol_table[b62count] = symbols[count] or b62count - - self.sdk_code = re.sub( - r'\b(\w+)\b', lambda mobj: symbol_table[mobj.group(0)], - obfucasted_code) - def run(self, target, ip, timestamp): - self.decode_eval_codes() + self.sdk_code = decode_packed_codes(self.sdk_code) functions = re.findall(r'input=([a-zA-Z0-9]+)\(input', self.sdk_code) @@ -194,7 +165,7 @@ class IqiyiIE(InfoExtractor): IE_NAME = 'iqiyi' IE_DESC = '爱奇艺' - _VALID_URL = r'http://(?:[^.]+\.)?iqiyi\.com/.+\.html' + _VALID_URL = r'https?://(?:(?:[^.]+\.)?iqiyi\.com|www\.pps\.tv)/.+\.html' _NETRC_MACHINE = 'iqiyi' @@ -295,6 +266,16 @@ class IqiyiIE(InfoExtractor): }, }], 'expected_warnings': ['Needs a VIP account for full video'], + }, { + 'url': 'http://www.iqiyi.com/a_19rrhb8ce1.html', + 'info_dict': { + 'id': '202918101', + 'title': '灌篮高手 国语版', + }, + 'playlist_count': 101, + }, { + 'url': 'http://www.pps.tv/w_19rrbav0ph.html', + 'only_matching': True, }] _FORMATS_MAP = [ @@ -306,6 +287,13 @@ class IqiyiIE(InfoExtractor): ('10', 'h1'), ] + AUTH_API_ERRORS = { + # No preview available (不允许试看鉴权失败) + 'Q00505': 'This video requires a VIP account', + # End of preview time (试看结束鉴权失败) + 'Q00506': 'Needs a VIP account for full video', + } + def _real_initialize(self): self._login() @@ -344,7 +332,7 @@ class IqiyiIE(InfoExtractor): 'bird_t': timestamp, } validation_result = self._download_json( - 'http://kylin.iqiyi.com/validate?' + compat_urllib_parse.urlencode(validation_params), None, + 'http://kylin.iqiyi.com/validate?' + compat_urllib_parse_urlencode(validation_params), None, note='Validate credentials', errnote='Unable to validate credentials') MSG_MAP = { @@ -390,12 +378,19 @@ class IqiyiIE(InfoExtractor): auth_req, video_id, note='Downloading video authentication JSON', errnote='Unable to download video authentication JSON') - if auth_result['code'] == 'Q00506': # requires a VIP account + + code = auth_result.get('code') + msg = self.AUTH_API_ERRORS.get(code) or auth_result.get('msg') or code + if code == 'Q00506': if do_report_warning: - self.report_warning('Needs a VIP account for full video') + self.report_warning(msg) return False + if 'data' not in auth_result: + if msg is not None: + raise ExtractorError('%s said: %s' % (self.IE_NAME, msg), expected=True) + raise ExtractorError('Unexpected error from Iqiyi auth API') - return auth_result + return auth_result['data'] def construct_video_urls(self, data, video_id, _uuid, tvid): def do_xor(x, y): @@ -471,14 +466,14 @@ class IqiyiIE(InfoExtractor): need_vip_warning_report = False break param.update({ - 't': auth_result['data']['t'], + 't': auth_result['t'], # cid is hard-coded in com/qiyi/player/core/player/RuntimeData.as 'cid': 'afbe8fd3d73448c9', 'vid': video_id, - 'QY00001': auth_result['data']['u'], + 'QY00001': auth_result['u'], }) api_video_url += '?' if '?' not in api_video_url else '&' - api_video_url += compat_urllib_parse.urlencode(param) + api_video_url += compat_urllib_parse_urlencode(param) js = self._download_json( api_video_url, video_id, note='Download video info of segment %d for format %s' % (segment_index + 1, format_id)) @@ -510,34 +505,75 @@ class IqiyiIE(InfoExtractor): 'enc': md5_text(enc_key + tail), 'qyid': _uuid, 'tn': random.random(), - 'um': 0, + # In iQiyi's flash player, um is set to 1 if there's a logged user + # Some 1080P formats are only available with a logged user. + # Here force um=1 to trick the iQiyi server + 'um': 1, 'authkey': md5_text(md5_text('') + tail), 'k_tag': 1, } api_url = 'http://cache.video.qiyi.com/vms' + '?' + \ - compat_urllib_parse.urlencode(param) + compat_urllib_parse_urlencode(param) raw_data = self._download_json(api_url, video_id) return raw_data - def get_enc_key(self, swf_url, video_id): + def get_enc_key(self, video_id): # TODO: automatic key extraction # last update at 2016-01-22 for Zombie::bite - enc_key = '6ab6d0280511493ba85594779759d4ed' + enc_key = '4a1caba4b4465345366f28da7c117d20' return enc_key + def _extract_playlist(self, webpage): + PAGE_SIZE = 50 + + links = re.findall( + r']+class="site-piclist_pic_link"[^>]+href="(http://www\.iqiyi\.com/.+\.html)"', + webpage) + if not links: + return + + album_id = self._search_regex( + r'albumId\s*:\s*(\d+),', webpage, 'album ID') + album_title = self._search_regex( + r'data-share-title="([^"]+)"', webpage, 'album title', fatal=False) + + entries = list(map(self.url_result, links)) + + # Start from 2 because links in the first page are already on webpage + for page_num in itertools.count(2): + pagelist_page = self._download_webpage( + 'http://cache.video.qiyi.com/jp/avlist/%s/%d/%d/' % (album_id, page_num, PAGE_SIZE), + album_id, + note='Download playlist page %d' % page_num, + errnote='Failed to download playlist page %d' % page_num) + pagelist = self._parse_json( + remove_start(pagelist_page, 'var tvInfoJs='), album_id) + vlist = pagelist['data']['vlist'] + for item in vlist: + entries.append(self.url_result(item['vurl'])) + if len(vlist) < PAGE_SIZE: + break + + return self.playlist_result(entries, album_id, album_title) + def _real_extract(self, url): webpage = self._download_webpage( url, 'temp_id', note='download video page') + + # There's no simple way to determine whether an URL is a playlist or not + # So detect it + playlist_result = self._extract_playlist(webpage) + if playlist_result: + return playlist_result + tvid = self._search_regex( r'data-player-tvid\s*=\s*[\'"](\d+)', webpage, 'tvid') video_id = self._search_regex( r'data-player-videoid\s*=\s*[\'"]([a-f\d]+)', webpage, 'video_id') - swf_url = self._search_regex( - r'(http://[^\'"]+MainPlayer[^.]+\.swf)', webpage, 'swf player URL') _uuid = uuid.uuid4().hex - enc_key = self.get_enc_key(swf_url, video_id) + enc_key = self.get_enc_key(video_id) raw_data = self.get_raw_data(tvid, video_id, enc_key, _uuid)