X-Git-Url: http://git.bitcoin.ninja/index.cgi?a=blobdiff_plain;f=youtube_dl%2Fextractor%2Fyouku.py;h=69ecc837a4d6d94c82a1055cb7cc41d6e6663763;hb=5c2266df4b9aeb7881ed8c026a038e2a25e43734;hp=aed6b960a8ac56fcd5c6fa3b83062b3ffd99e2ba;hpb=8268e94cd424e010466206a77087b5c366db8cc7;p=youtube-dl diff --git a/youtube_dl/extractor/youku.py b/youtube_dl/extractor/youku.py index aed6b960a..69ecc837a 100644 --- a/youtube_dl/extractor/youku.py +++ b/youtube_dl/extractor/youku.py @@ -1,18 +1,22 @@ # coding: utf-8 from __future__ import unicode_literals -import re import base64 from .common import InfoExtractor -from ..utils import ExtractorError +from ..compat import ( + compat_urllib_parse, + compat_ord, +) +from ..utils import ( + ExtractorError, + sanitized_Request, +) -from ..compat import compat_urllib_parse - -bytes_is_str = (bytes == str) # for compatible class YoukuIE(InfoExtractor): IE_NAME = 'youku' + IE_DESC = '优酷' _VALID_URL = r'''(?x) (?: http://(?:v|player)\.youku\.com/(?:v_show/id_|player\.php/sid/)| @@ -20,15 +24,44 @@ class YoukuIE(InfoExtractor): (?P[A-Za-z0-9]+)(?:\.html|/v\.swf|) ''' - _TEST = { - 'url': 'http://v.youku.com/v_show/id_XMTc1ODE5Njcy.html', - 'md5': '5f3af4192eabacc4501508d54a8cabd7', - 'info_dict': { - 'id': 'XMTc1ODE5Njcy', - 'title': '★Smile﹗♡ Git Fresh -Booty Music舞蹈.', - 'ext': 'flv' - } - } + _TESTS = [{ + 'url': 'http://v.youku.com/v_show/id_XMTc1ODE5Njcy.html', + 'md5': '5f3af4192eabacc4501508d54a8cabd7', + 'info_dict': { + 'id': 'XMTc1ODE5Njcy_part1', + 'title': '★Smile﹗♡ Git Fresh -Booty Music舞蹈.', + 'ext': 'flv' + } + }, { + 'url': 'http://player.youku.com/player.php/sid/XNDgyMDQ2NTQw/v.swf', + 'only_matching': True, + }, { + 'url': 'http://v.youku.com/v_show/id_XODgxNjg1Mzk2_ev_1.html', + 'info_dict': { + 'id': 'XODgxNjg1Mzk2', + 'title': '武媚娘传奇 85', + }, + 'playlist_count': 11, + }, { + 'url': 'http://v.youku.com/v_show/id_XMTI1OTczNDM5Mg==.html', + 'info_dict': { + 'id': 'XMTI1OTczNDM5Mg', + 'title': '花千骨 04', + }, + 'playlist_count': 13, + 'skip': 'Available in China only', + }, { + 'url': 'http://v.youku.com/v_show/id_XNjA1NzA2Njgw.html', + 'note': 'Video protected with password', + 'info_dict': { + 'id': 'XNjA1NzA2Njgw', + 'title': '邢義田复旦讲座之想象中的胡人—从“左衽孔子”说起', + }, + 'playlist_count': 19, + 'params': { + 'videopassword': '100600', + }, + }] def construct_video_urls(self, data1, data2): # get sid, token @@ -36,26 +69,20 @@ class YoukuIE(InfoExtractor): ls = list(range(256)) t = 0 for i in range(256): - t = (t + ls[i] + ord(s1[i%len(s1)])) % 256 + t = (t + ls[i] + compat_ord(s1[i % len(s1)])) % 256 ls[i], ls[t] = ls[t], ls[i] - s = '' if not bytes_is_str else b'' + s = bytearray() x, y = 0, 0 for i in range(len(s2)): y = (y + 1) % 256 x = (x + ls[y]) % 256 ls[x], ls[y] = ls[y], ls[x] - if isinstance(s2[i], int): - s += chr(s2[i] ^ ls[(ls[x]+ls[y]) % 256]) - else: - s += chr(ord(s2[i]) ^ ls[(ls[x]+ls[y]) % 256]) - return s + s.append(compat_ord(s2[i]) ^ ls[(ls[x] + ls[y]) % 256]) + return bytes(s) sid, token = yk_t( - 'becaf9be', - base64.b64decode(bytes(data2['ep'], 'ascii')) \ - if not bytes_is_str \ - else base64.b64decode(data2['ep']) - ).split('_') + b'becaf9be', base64.b64decode(data2['ep'].encode('ascii')) + ).decode('ascii').split('_') # get oip oip = data2['ip'] @@ -88,16 +115,10 @@ class YoukuIE(InfoExtractor): def generate_ep(format, n): fileid = get_fileid(format, n) ep_t = yk_t( - 'bf7e5f01', - bytes('%s_%s_%s' % (sid, fileid, token), 'ascii') \ - if not bytes_is_str \ - else ('%s_%s_%s' % (sid, fileid, token)) + b'bf7e5f01', + ('%s_%s_%s' % (sid, fileid, token)).encode('ascii') ) - ep = base64.b64encode( - bytes(ep_t, 'latin') \ - if not bytes_is_str \ - else ep_t - ).decode() + ep = base64.b64encode(ep_t).decode('ascii') return ep # generate video_urls @@ -121,9 +142,9 @@ class YoukuIE(InfoExtractor): video_url = \ 'http://k.youku.com/player/getFlvPath/' + \ 'sid/' + sid + \ - '_' + str(int(n)+1).zfill(2) + \ + '_' + str(int(n) + 1).zfill(2) + \ '/st/' + self.parse_ext_l(format) + \ - '/fileid/' + get_fileid(format, n) + '?' + \ + '/fileid/' + get_fileid(format, n) + '?' + \ compat_urllib_parse.urlencode(param) video_urls.append(video_url) video_urls_dict[format] = video_urls @@ -132,59 +153,75 @@ class YoukuIE(InfoExtractor): def get_hd(self, fm): hd_id_dict = { - 'flv' : '0', - 'mp4' : '1', - 'hd2' : '2', - 'hd3' : '3', - '3gp' : '0', - '3gphd' : '1' + 'flv': '0', + 'mp4': '1', + 'hd2': '2', + 'hd3': '3', + '3gp': '0', + '3gphd': '1' } return hd_id_dict[fm] def parse_ext_l(self, fm): ext_dict = { - 'flv' : 'flv', - 'mp4' : 'mp4', - 'hd2' : 'flv', - 'hd3' : 'flv', - '3gp' : 'flv', - '3gphd' : 'mp4' + 'flv': 'flv', + 'mp4': 'mp4', + 'hd2': 'flv', + 'hd3': 'flv', + '3gp': 'flv', + '3gphd': 'mp4' } return ext_dict[fm] def get_format_name(self, fm): _dict = { - '3gp' : 'h6', - '3gphd' : 'h5', - 'flv' : 'h4', - 'mp4' : 'h3', - 'hd2' : 'h2', - 'hd3' : 'h1' + '3gp': 'h6', + '3gphd': 'h5', + 'flv': 'h4', + 'mp4': 'h3', + 'hd2': 'h2', + 'hd3': 'h1' } return _dict[fm] def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - video_id = mobj.group('id') + video_id = self._match_id(url) + + def retrieve_data(req_url, note): + req = sanitized_Request(req_url) + + cn_verification_proxy = self._downloader.params.get('cn_verification_proxy') + if cn_verification_proxy: + req.add_header('Ytdl-request-proxy', cn_verification_proxy) + + raw_data = self._download_json(req, video_id, note=note) + return raw_data['data'][0] + + video_password = self._downloader.params.get('videopassword', None) # request basic data - data1_url = 'http://v.youku.com/player/getPlayList/VideoIDS/%s' % video_id - data2_url = 'http://v.youku.com/player/getPlayList/VideoIDS/%s/Pf/4/ctype/12/ev/1' % video_id + basic_data_url = 'http://v.youku.com/player/getPlayList/VideoIDS/%s' % video_id + if video_password: + basic_data_url += '?password=%s' % video_password - raw_data1 = self._download_json(data1_url, video_id) - raw_data2 = self._download_json(data2_url, video_id) - data1 = raw_data1['data'][0] - data2 = raw_data2['data'][0] + data1 = retrieve_data( + basic_data_url, + 'Downloading JSON metadata 1') + data2 = retrieve_data( + 'http://v.youku.com/player/getPlayList/VideoIDS/%s/Pf/4/ctype/12/ev/1' % video_id, + 'Downloading JSON metadata 2') error_code = data1.get('error_code') if error_code: - # -8 means blocked outside China. - # Chinese and English, separated by newline. error = data1.get('error') - raise ExtractorError( - error or 'Server reported error %i' % - error_code, - expected=True) + if error is not None and '因版权原因无法观看此视频' in error: + raise ExtractorError( + 'Youku said: Sorry, this video is available in China only', expected=True) + else: + msg = 'Youku server reported error %i' % error_code + if error is not None: + msg += ': ' + error + raise ExtractorError(msg) title = data1['title'] @@ -192,39 +229,26 @@ class YoukuIE(InfoExtractor): video_urls_dict = self.construct_video_urls(data1, data2) # construct info - entries = [] + entries = [{ + 'id': '%s_part%d' % (video_id, i + 1), + 'title': title, + 'formats': [], + # some formats are not available for all parts, we have to detect + # which one has all + } for i in range(max(len(v) for v in data1['segs'].values()))] for fm in data1['streamtypes']: - #formats = [] video_urls = video_urls_dict[fm] - for i in range(len(video_urls)): - if len(entries) < i+1: - entries.append({'formats': []}) - entries[i]['formats'].append( - { - 'url': video_urls[i], - 'format_id': self.get_format_name(fm), - 'ext': self.parse_ext_l(fm), - 'filesize': int(data1['segs'][fm][i]['size']) - } - ) - - for i in range(len(entries)): - entries[i].update( - { - 'id': '_part%d' % (i+1), - 'title': title, - } - ) - - if len(entries) > 1: - info = { - '_type': 'multi_video', - 'id': video_id, - 'title': title, - 'entries': entries, - } - else: - info = entries[0] - info['id'] = video_id - - return info + for video_url, seg, entry in zip(video_urls, data1['segs'][fm], entries): + entry['formats'].append({ + 'url': video_url, + 'format_id': self.get_format_name(fm), + 'ext': self.parse_ext_l(fm), + 'filesize': int(seg['size']), + }) + + return { + '_type': 'multi_video', + 'id': video_id, + 'title': title, + 'entries': entries, + }