_ Git - youtube-dl/blob - youtube_dl/extractor/iqiyi.py

   1 # coding: utf-8
   2
   3 from __future__ import unicode_literals
   4
   5 from .common import InfoExtractor
   6
   7 from ..compat import compat_urllib_parse
   8
   9 from ..utils import ExtractorError
  10
  11 import re
  12 import time
  13 import uuid
  14 import math
  15 import random
  16 import zlib
  17 import hashlib
  18
  19
  20 class IqiyiIE(InfoExtractor):
  21     IE_NAME = 'iqiyi'
  22
  23     _VALID_URL = r'http://(?:www\.)iqiyi.com/.+?\.html'
  24
  25     _TEST = {
  26         'url': 'http://www.iqiyi.com/v_19rrojlavg.html',
  27         'md5': '2cb594dc2781e6c941a110d8f358118b',
  28         'info_dict': {
  29             'id': '9c1fb1b99d192b21c559e5a1a2cb3c73',
  30             'title': '美国德州空中惊现奇异云团 酷似UFO',
  31             'ext': 'f4v',
  32         }
  33     }
  34
  35     def construct_video_urls(self, data, video_id, _uuid):
  36         def do_xor(x, y):
  37             a = y % 3
  38             if a == 1:
  39                 return x ^ 121
  40             if a == 2:
  41                 return x ^ 72
  42             return x ^ 103
  43
  44         def get_encode_code(l):
  45             a = 0
  46             b = l.split('-')
  47             c = len(b)
  48             s = ''
  49             for i in range(c - 1, -1, -1):
  50                 a = do_xor(int(b[c - i - 1], 16), i)
  51                 s += chr(a)
  52             return s[::-1]
  53
  54         def get_path_key(x):
  55             mg = ')(*&^flash@#$%a'
  56             tm = self._download_json(
  57                 'http://data.video.qiyi.com/t?tn=' + str(random.random()), video_id)['t']
  58             t = str(int(math.floor(int(tm) / (600.0))))
  59             return hashlib.md5((t + mg + x).encode('utf8')).hexdigest()
  60
  61         video_urls_dict = {}
  62         for i in data['vp']['tkl'][0]['vs']:
  63             if 0 < int(i['bid']) <= 10:
  64                 format_id = self.get_format(i['bid'])
  65             else:
  66                 continue
  67
  68             video_urls = []
  69
  70             video_urls_info = i['fs']
  71             if not i['fs'][0]['l'].startswith('/'):
  72                 t = get_encode_code(i['fs'][0]['l'])
  73                 if t.endswith('mp4'):
  74                     video_urls_info = i['flvs']
  75
  76             for ii in video_urls_info:
  77                 vl = ii['l']
  78                 if not vl.startswith('/'):
  79                     vl = get_encode_code(vl)
  80                 key = get_path_key(
  81                     vl.split('/')[-1].split('.')[0])
  82                 filesize = ii['b']
  83                 base_url = data['vp']['du'].split('/')
  84                 base_url.insert(-1, key)
  85                 base_url = '/'.join(base_url)
  86                 param = {
  87                     'su': _uuid,
  88                     'qyid': uuid.uuid4().hex,
  89                     'client': '',
  90                     'z': '',
  91                     'bt': '',
  92                     'ct': '',
  93                     'tn': str(int(time.time()))
  94                 }
  95                 api_video_url = base_url + vl + '?' + \
  96                     compat_urllib_parse.urlencode(param)
  97                 js = self._download_json(api_video_url, video_id)
  98                 video_url = js['l']
  99                 video_urls.append(
 100                     (video_url, filesize))
 101
 102             video_urls_dict[format_id] = video_urls
 103         return video_urls_dict
 104
 105     def get_format(self, bid):
 106         _dict = {
 107             '1': 'h6',
 108             '2': 'h5',
 109             '3': 'h4',
 110             '4': 'h3',
 111             '5': 'h2',
 112             '10': 'h1'
 113         }
 114         return _dict.get(str(bid), None)
 115
 116     def get_bid(self, format_id):
 117         _dict = {
 118             'h6': '1',
 119             'h5': '2',
 120             'h4': '3',
 121             'h3': '4',
 122             'h2': '5',
 123             'h1': '10',
 124             'best': 'best'
 125         }
 126         return _dict.get(format_id, None)
 127
 128     def get_raw_data(self, tvid, video_id, enc_key, _uuid):
 129         tm = str(int(time.time()))
 130         param = {
 131             'key': 'fvip',
 132             'src': hashlib.md5(b'youtube-dl').hexdigest(),
 133             'tvId': tvid,
 134             'vid': video_id,
 135             'vinfo': 1,
 136             'tm': tm,
 137             'enc': hashlib.md5(
 138                 (enc_key + tm + tvid).encode('utf8')).hexdigest(),
 139             'qyid': _uuid,
 140             'tn': random.random(),
 141             'um': 0,
 142             'authkey': hashlib.md5(
 143                 (tm + tvid).encode('utf8')).hexdigest()
 144         }
 145
 146         api_url = 'http://cache.video.qiyi.com/vms' + '?' + \
 147             compat_urllib_parse.urlencode(param)
 148         raw_data = self._download_json(api_url, video_id)
 149         return raw_data
 150
 151     def get_enc_key(self, swf_url, video_id):
 152         req = self._request_webpage(
 153             swf_url, video_id, note='download swf content')
 154         cn = req.read()
 155         cn = zlib.decompress(cn[8:])
 156         pt = re.compile(b'MixerRemote\x08(?P<enc_key>.+?)\$&vv')
 157         enc_key = self._search_regex(pt, cn, 'enc_key').decode('utf8')
 158         return enc_key
 159
 160     def _real_extract(self, url):
 161         webpage = self._download_webpage(
 162             url, 'temp_id', note='download video page')
 163         tvid = self._search_regex(
 164             r'data-player-tvid\s*=\s*[\'"](\d+)', webpage, 'tvid')
 165         video_id = self._search_regex(
 166             r'data-player-videoid\s*=\s*[\'"]([a-f\d]+)', webpage, 'video_id')
 167         swf_url = self._search_regex(
 168             r'(http://.+?MainPlayer.+?\.swf)', webpage, 'swf player URL')
 169         _uuid = uuid.uuid4().hex
 170
 171         enc_key = self.get_enc_key(swf_url, video_id)
 172
 173         raw_data = self.get_raw_data(tvid, video_id, enc_key, _uuid)
 174
 175         if raw_data['code'] != 'A000000':
 176             raise ExtractorError('Unable to load data. Error code: ' + raw_data['code'])
 177
 178         if not raw_data['data']['vp']['tkl']:
 179             raise ExtractorError('No support iQiqy VIP video')
 180
 181         data = raw_data['data']
 182
 183         title = data['vi']['vn']
 184
 185         # generate video_urls_dict
 186         video_urls_dict = self.construct_video_urls(
 187             data, video_id, _uuid)
 188
 189         # construct info
 190         entries = []
 191         for format_id in video_urls_dict:
 192             video_urls = video_urls_dict[format_id]
 193             for i, video_url_info in enumerate(video_urls):
 194                 if len(entries) < i + 1:
 195                     entries.append({'formats': []})
 196                 entries[i]['formats'].append(
 197                     {
 198                         'url': video_url_info[0],
 199                         'filesize': video_url_info[-1],
 200                         'format_id': format_id,
 201                         'preference': int(self.get_bid(format_id))
 202                     }
 203                 )
 204
 205         for i in range(len(entries)):
 206             self._sort_formats(entries[i]['formats'])
 207             entries[i].update(
 208                 {
 209                     'id': '_part%d' % (i + 1),
 210                     'title': title,
 211                 }
 212             )
 213
 214         if len(entries) > 1:
 215             info = {
 216                 '_type': 'multi_video',
 217                 'id': video_id,
 218                 'title': title,
 219                 'entries': entries,
 220             }
 221         else:
 222             info = entries[0]
 223             info['id'] = video_id
 224             info['title'] = title
 225
 226         return info