X-Git-Url: http://git.bitcoin.ninja/index.cgi?a=blobdiff_plain;f=youtube_dl%2Fextractor%2Fiqiyi.py;h=c3731a110b9a0ce222e742161fdbc1081fbbab61;hb=9f0ee2a3883ec6f6fdccba90085cb925aaa2f617;hp=747f3f90281caf81fcbfcbb1eacace97715e8e34;hpb=9bac8c57e3db49c6639c115478a36dde8d465ea7;p=youtube-dl diff --git a/youtube_dl/extractor/iqiyi.py b/youtube_dl/extractor/iqiyi.py index 747f3f902..c3731a110 100644 --- a/youtube_dl/extractor/iqiyi.py +++ b/youtube_dl/extractor/iqiyi.py @@ -1,37 +1,105 @@ # coding: utf-8 - from __future__ import unicode_literals -from .common import InfoExtractor +import hashlib +import math +import random +import time +import uuid +from .common import InfoExtractor from ..compat import compat_urllib_parse - from ..utils import ExtractorError -import re -import time -import uuid -import math -import random -import zlib -import hashlib class IqiyiIE(InfoExtractor): IE_NAME = 'iqiyi' + IE_DESC = '爱奇艺' - _VALID_URL = r'http://(?:www\.)iqiyi.com/.+?\.html' + _VALID_URL = r'http://(?:www\.)iqiyi.com/v_.+?\.html' - _TEST = { - 'url': 'http://www.iqiyi.com/v_19rrojlavg.html', - 'md5': '2cb594dc2781e6c941a110d8f358118b', + _TESTS = [{ + 'url': 'http://www.iqiyi.com/v_19rrojlavg.html', + 'md5': '2cb594dc2781e6c941a110d8f358118b', + 'info_dict': { + 'id': '9c1fb1b99d192b21c559e5a1a2cb3c73', + 'title': '美国德州空中惊现奇异云团 酷似UFO', + 'ext': 'f4v', + } + }, { + 'url': 'http://www.iqiyi.com/v_19rrhnnclk.html', + 'info_dict': { + 'id': 'e3f585b550a280af23c98b6cb2be19fb', + 'title': '名侦探柯南第752集', + }, + 'playlist': [{ 'info_dict': { - 'id': '9c1fb1b99d192b21c559e5a1a2cb3c73', - 'title': '美国德州空中惊现奇异云团 酷似UFO', + 'id': 'e3f585b550a280af23c98b6cb2be19fb_part1', 'ext': 'f4v', - } - } - - def construct_video_urls(self, data, video_id, _uuid, bid): + 'title': '名侦探柯南第752集', + }, + }, { + 'info_dict': { + 'id': 'e3f585b550a280af23c98b6cb2be19fb_part2', + 'ext': 'f4v', + 'title': '名侦探柯南第752集', + }, + }, { + 'info_dict': { + 'id': 'e3f585b550a280af23c98b6cb2be19fb_part3', + 'ext': 'f4v', + 'title': '名侦探柯南第752集', + }, + }, { + 'info_dict': { + 'id': 'e3f585b550a280af23c98b6cb2be19fb_part4', + 'ext': 'f4v', + 'title': '名侦探柯南第752集', + }, + }, { + 'info_dict': { + 'id': 'e3f585b550a280af23c98b6cb2be19fb_part5', + 'ext': 'f4v', + 'title': '名侦探柯南第752集', + }, + }, { + 'info_dict': { + 'id': 'e3f585b550a280af23c98b6cb2be19fb_part6', + 'ext': 'f4v', + 'title': '名侦探柯南第752集', + }, + }, { + 'info_dict': { + 'id': 'e3f585b550a280af23c98b6cb2be19fb_part7', + 'ext': 'f4v', + 'title': '名侦探柯南第752集', + }, + }, { + 'info_dict': { + 'id': 'e3f585b550a280af23c98b6cb2be19fb_part8', + 'ext': 'f4v', + 'title': '名侦探柯南第752集', + }, + }], + 'params': { + 'skip_download': True, + }, + }] + + _FORMATS_MAP = [ + ('1', 'h6'), + ('2', 'h5'), + ('3', 'h4'), + ('4', 'h3'), + ('5', 'h2'), + ('10', 'h1'), + ] + + @staticmethod + def md5_text(text): + return hashlib.md5(text.encode('utf-8')).hexdigest() + + def construct_video_urls(self, data, video_id, _uuid): def do_xor(x, y): a = y % 3 if a == 1: @@ -46,53 +114,41 @@ class IqiyiIE(InfoExtractor): c = len(b) s = '' for i in range(c - 1, -1, -1): - a = do_xor(int(b[c-i-1], 16), i) + a = do_xor(int(b[c - i - 1], 16), i) s += chr(a) return s[::-1] - def get_path_key(x): + def get_path_key(x, format_id, segment_index): mg = ')(*&^flash@#$%a' tm = self._download_json( - 'http://data.video.qiyi.com/t?tn=' + str(random.random()), video_id)['t'] - t = str(int(math.floor(int(tm)/(600.0)))) - return hashlib.md5( - (t+mg+x).encode('utf8')).hexdigest() - - # get accept format - # getting all format will spend minutes for a big video. - if bid == 'best': - bids = [int(i['bid']) for i in data['vp']['tkl'][0]['vs'] \ - if 0 < int(i['bid']) <= 10] - bid = str(max(bids)) + 'http://data.video.qiyi.com/t?tn=' + str(random.random()), video_id, + note='Download path key of segment %d for format %s' % (segment_index + 1, format_id) + )['t'] + t = str(int(math.floor(int(tm) / (600.0)))) + return self.md5_text(t + mg + x) video_urls_dict = {} - for i in data['vp']['tkl'][0]['vs']: - if 0 < int(i['bid']) <= 10: - format_id = self.get_format(i['bid']) + for format_item in data['vp']['tkl'][0]['vs']: + if 0 < int(format_item['bid']) <= 10: + format_id = self.get_format(format_item['bid']) else: continue video_urls = [] - video_urls_info = i['fs'] - if not i['fs'][0]['l'].startswith('/'): - t = get_encode_code(i['fs'][0]['l']) + video_urls_info = format_item['fs'] + if not format_item['fs'][0]['l'].startswith('/'): + t = get_encode_code(format_item['fs'][0]['l']) if t.endswith('mp4'): - video_urls_info = i['flvs'] + video_urls_info = format_item['flvs'] - if int(i['bid']) != int(bid): # ignore missing match format - video_urls.extend( - [('http://example.com/v.flv', ii['b']) for ii in video_urls_info]) - video_urls_dict[format_id] = video_urls - continue - - for ii in video_urls_info: - vl = ii['l'] + for segment_index, segment in enumerate(video_urls_info): + vl = segment['l'] if not vl.startswith('/'): vl = get_encode_code(vl) key = get_path_key( - vl.split('/')[-1].split('.')[0]) - filesize = ii['b'] + vl.split('/')[-1].split('.')[0], format_id, segment_index) + filesize = segment['b'] base_url = data['vp']['du'].split('/') base_url.insert(-1, key) base_url = '/'.join(base_url) @@ -107,7 +163,9 @@ class IqiyiIE(InfoExtractor): } api_video_url = base_url + vl + '?' + \ compat_urllib_parse.urlencode(param) - js = self._download_json(api_video_url, video_id) + js = self._download_json( + api_video_url, video_id, + note='Download video info of segment %d for format %s' % (segment_index + 1, format_id)) video_url = js['l'] video_urls.append( (video_url, filesize)) @@ -116,44 +174,28 @@ class IqiyiIE(InfoExtractor): return video_urls_dict def get_format(self, bid): - _dict = { - '1' : 'h6', - '2' : 'h5', - '3' : 'h4', - '4' : 'h3', - '5' : 'h2', - '10' : 'h1' - } - return _dict.get(str(bid), None) + matched_format_ids = [_format_id for _bid, _format_id in self._FORMATS_MAP if _bid == str(bid)] + return matched_format_ids[0] if len(matched_format_ids) else None def get_bid(self, format_id): - _dict = { - 'h6' : '1', - 'h5' : '2', - 'h4' : '3', - 'h3' : '4', - 'h2' : '5', - 'h1' : '10', - 'best' : 'best' - } - return _dict.get(format_id, None) + matched_bids = [_bid for _bid, _format_id in self._FORMATS_MAP if _format_id == format_id] + return matched_bids[0] if len(matched_bids) else None def get_raw_data(self, tvid, video_id, enc_key, _uuid): tm = str(int(time.time())) + tail = tm + tvid param = { 'key': 'fvip', - 'src': hashlib.md5(b'youtube-dl').hexdigest(), + 'src': self.md5_text('youtube-dl'), 'tvId': tvid, 'vid': video_id, 'vinfo': 1, 'tm': tm, - 'enc': hashlib.md5( - (enc_key + tm + tvid).encode('utf8')).hexdigest(), + 'enc': self.md5_text(enc_key + tail), 'qyid': _uuid, 'tn': random.random(), 'um': 0, - 'authkey': hashlib.md5( - (tm + tvid).encode('utf8')).hexdigest() + 'authkey': self.md5_text(self.md5_text('') + tail), } api_url = 'http://cache.video.qiyi.com/vms' + '?' + \ @@ -162,30 +204,29 @@ class IqiyiIE(InfoExtractor): return raw_data def get_enc_key(self, swf_url, video_id): - req = self._request_webpage( - swf_url, video_id, note='download swf content') - cn = req.read() - cn = zlib.decompress(cn[8:]) - pt = re.compile(b'MixerRemote\x08(?P.+?)\$&vv') - enc_key = self._search_regex(pt, cn, 'enc_key').decode('utf8') + # TODO: automatic key extraction + # last update at 2015-12-18 for Zombie::bite + enc_key = '8b6b683780897eb8d9a48a02ccc4817d'[::-1] return enc_key def _real_extract(self, url): webpage = self._download_webpage( url, 'temp_id', note='download video page') tvid = self._search_regex( - r'tvId ?= ?(\'|\")(?P\d+)', webpage, 'tvid', flags=re.I, group='tvid') + r'data-player-tvid\s*=\s*[\'"](\d+)', webpage, 'tvid') video_id = self._search_regex( - r'videoId ?= ?(\'|\")(?P[a-z\d]+)', - webpage, 'video_id', flags=re.I, group='video_id') + r'data-player-videoid\s*=\s*[\'"]([a-f\d]+)', webpage, 'video_id') swf_url = self._search_regex( - r'(?Phttp://.+?MainPlayer.+?\.swf)', webpage, 'swf') + r'(http://[^\'"]+MainPlayer[^.]+\.swf)', webpage, 'swf player URL') _uuid = uuid.uuid4().hex enc_key = self.get_enc_key(swf_url, video_id) raw_data = self.get_raw_data(tvid, video_id, enc_key, _uuid) - assert raw_data['code'] == 'A000000' + + if raw_data['code'] != 'A000000': + raise ExtractorError('Unable to load data. Error code: ' + raw_data['code']) + if not raw_data['data']['vp']['tkl']: raise ExtractorError('No support iQiqy VIP video') @@ -193,21 +234,16 @@ class IqiyiIE(InfoExtractor): title = data['vi']['vn'] - format = self._downloader.params.get('format', None) - bid = self.get_bid(format) if format else 'best' - if not bid: - raise ExtractorError('Can\'t get format.') - # generate video_urls_dict video_urls_dict = self.construct_video_urls( - data, video_id, _uuid, bid) + data, video_id, _uuid) # construct info entries = [] for format_id in video_urls_dict: video_urls = video_urls_dict[format_id] for i, video_url_info in enumerate(video_urls): - if len(entries) < i+1: + if len(entries) < i + 1: entries.append({'formats': []}) entries[i]['formats'].append( { @@ -222,7 +258,7 @@ class IqiyiIE(InfoExtractor): self._sort_formats(entries[i]['formats']) entries[i].update( { - 'id': '_part%d' % (i+1), + 'id': '%s_part%d' % (video_id, i + 1), 'title': title, } )