_ Git - youtube-dl/blob - youtube_dl/extractor/iqiyi.py

   1 # coding: utf-8
   2
   3 from __future__ import unicode_literals
   4
   5 from .common import InfoExtractor
   6
   7 from ..compat import (
   8     compat_chr,
   9     compat_parse_qs,
  10     compat_urllib_parse,
  11     compat_urllib_request,
  12     compat_urlparse,
  13     compat_str,
  14 )
  15
  16 from ..utils import ExtractorError
  17
  18 import re
  19 import time
  20 import json
  21 import uuid
  22 import math
  23 import random
  24 import zlib
  25 import hashlib
  26
  27 class IqiyiIE(InfoExtractor):
  28     IE_NAME = 'iqiyi'
  29
  30     _VALID_URL = r'http://(?:www\.)iqiyi.com/.+?\.html'
  31
  32     _TEST = {
  33             'url': 'http://www.iqiyi.com/v_19rrojlavg.html',
  34             'md5': '260f0f59686e65e886995d0ba791ab83',
  35             'info_dict': {
  36                 'id': '9c1fb1b99d192b21c559e5a1a2cb3c73',
  37                 'title': '美国德州空中惊现奇异云团 酷似UFO',
  38                 'ext': 'f4v'
  39             }
  40     }
  41
  42     def construct_video_urls(self, data, video_id, _uuid):
  43         def do_xor(x, y):
  44             a = y % 3
  45             if a == 1:
  46                 return x ^ 121
  47             if a == 2:
  48                 return x ^ 72
  49             return x ^ 103
  50
  51         def get_encode_code(l):
  52             a = 0
  53             b = l.split('-')
  54             c = len(b)
  55             s = ''
  56             for i in range(c - 1, -1, -1):
  57                 a = do_xor(int(b[c-i-1], 16), i)
  58                 s += chr(a)
  59             return s[::-1]
  60
  61         def get_path_key(x):
  62             mg = ')(*&^flash@#$%a'
  63             tm = self._download_json(
  64                 'http://data.video.qiyi.com/t?tn=' + str(random.random()), video_id)['t']
  65             t = str(int(math.floor(int(tm)/(600.0))))
  66             return hashlib.md5(
  67                  (t+mg+x).encode('utf8')).hexdigest()
  68
  69         video_urls_dict = {}
  70         for i in data['vp']['tkl'][0]['vs']:
  71             if 0 < int(i['bid']) <= 10:
  72                 format_id = self.get_format(i['bid'])
  73
  74             video_urls_info = i['fs']
  75             if not i['fs'][0]['l'].startswith('/'):
  76                 t = get_encode_code(i['fs'][0]['l'])
  77                 if t.endswith('mp4'):
  78                     video_urls_info = i['flvs']
  79
  80             video_urls = []
  81             for ii in video_urls_info:
  82                 vl = ii['l']
  83                 if not vl.startswith('/'):
  84                     vl = get_encode_code(vl)
  85                 key = get_path_key(
  86                     vl.split('/')[-1].split('.')[0])
  87                 filesize = ii['b']
  88                 base_url = data['vp']['du'].split('/')
  89                 base_url.insert(-1, key)
  90                 base_url = '/'.join(base_url)
  91                 param = {
  92                     'su': _uuid,
  93                     'qyid': uuid.uuid4().hex,
  94                     'client': '',
  95                     'z': '',
  96                     'bt': '',
  97                     'ct': '',
  98                     'tn': str(int(time.time()))
  99                 }
 100                 api_video_url = base_url + vl + '?' + \
 101                     compat_urllib_parse.urlencode(param)
 102                 js = self._download_json(api_video_url, video_id)
 103                 video_url = js['l']
 104                 video_urls.append(
 105                     (video_url, filesize))
 106
 107             video_urls_dict[format_id] = video_urls
 108         return video_urls_dict
 109
 110     def get_format(self, bid):
 111         bid_dict = {
 112             '1': 'standard',
 113             '2': 'high',
 114             '3': 'super',
 115             '4': 'suprt-high',
 116             '5': 'fullhd',
 117             '10': '4k'
 118         }
 119         return bid_dict[str(bid)]
 120
 121     def get_raw_data(self, tvid, video_id, enc_key, _uuid):
 122         tm = str(int(time.time()))
 123         param = {
 124             'key': 'fvip',
 125             'src': hashlib.md5(b'youtube-dl').hexdigest(),
 126             'tvId': tvid,
 127             'vid': video_id,
 128             'vinfo': 1,
 129             'tm': tm,
 130             'enc': hashlib.md5(
 131                 (enc_key + tm + tvid).encode('utf8')).hexdigest(),
 132             'qyid': _uuid,
 133             'tn': random.random(),
 134             'um': 0,
 135             'authkey': hashlib.md5(
 136                 (tm + tvid).encode('utf8')).hexdigest()
 137         }
 138
 139         api_url = 'http://cache.video.qiyi.com/vms' + '?' + \
 140             compat_urllib_parse.urlencode(param)
 141         raw_data = self._download_json(api_url, video_id)
 142         return raw_data
 143
 144     def get_enc_key(self, swf_url, video_id):
 145         req = self._request_webpage(
 146             swf_url, video_id, note='download swf content')
 147         cn = req.read()
 148         cn = zlib.decompress(cn[8:])
 149         pt = re.compile(b'MixerRemote\x08(?P<enc_key>.+?)\$&vv')
 150         enc_key = self._search_regex(pt, cn, 'enc_key').decode('utf8')
 151         return enc_key
 152
 153     def _real_extract(self, url):
 154         webpage = self._download_webpage(
 155             url, 'temp_id', note='download video page')
 156         tvid = self._search_regex(
 157             r'tvId ?= ?(\'|\")(?P<tvid>\d+)', webpage, 'tvid', flags=re.I, group='tvid')
 158         video_id = self._search_regex(
 159             r'videoId ?= ?(\'|\")(?P<video_id>[a-z\d]+)',
 160             webpage, 'video_id', flags=re.I, group='video_id')
 161         swf_url = self._search_regex(
 162             r'(?P<swf>http://.+?MainPlayer.+?\.swf)', webpage, 'swf')
 163         _uuid = uuid.uuid4().hex
 164
 165         enc_key = self.get_enc_key(swf_url, video_id)
 166
 167         raw_data = self.get_raw_data(tvid, video_id, enc_key, _uuid)
 168         assert raw_data['code'] == 'A000000'
 169         if not raw_data['data']['vp']['tkl']:
 170             raise ExtractorError('No support iQiqy VIP video')
 171
 172         data = raw_data['data']
 173
 174         title = data['vi']['vn']
 175
 176         # generate video_urls_dict
 177         video_urls_dict = self.construct_video_urls(data, video_id, _uuid)
 178
 179         # construct info
 180         entries = []
 181         for format_id in video_urls_dict:
 182             video_urls = video_urls_dict[format_id]
 183             for i, video_url_info in enumerate(video_urls):
 184                 if len(entries) < i+1:
 185                     entries.append({'formats': []})
 186                 entries[i]['formats'].append(
 187                     {
 188                         'url': video_url_info[0],
 189                         'filesize': video_url_info[-1],
 190                         'format_id': format_id,
 191                     }
 192                 )
 193
 194         for i in range(len(entries)):
 195             entries[i].update(
 196                 {
 197                     'id': '_part%d' % (i+1),
 198                     'title': title,
 199                 }
 200             )
 201
 202         if len(entries) > 1:
 203             info = {
 204                 '_type': 'multi_video',
 205                 'id': video_id,
 206                 'title': title,
 207                 'entries': entries,
 208             }
 209         else:
 210             info = entries[0]
 211             info['id'] = video_id
 212             info['title'] = title
 213
 214         return info