X-Git-Url: http://git.bitcoin.ninja/index.cgi?a=blobdiff_plain;f=youtube_dl%2Fextractor%2Fdouyutv.py;h=33efc993eeb89b76af7779ca67de35ad92620150;hb=b281aad2dc658e3c6535579d75b42a5634487b83;hp=bdc768c783b9b3213badc5cf4b354f6159142f9f;hpb=a9793f58a1971e3f458be01200df485f4e9b0bda;p=youtube-dl diff --git a/youtube_dl/extractor/douyutv.py b/youtube_dl/extractor/douyutv.py index bdc768c78..33efc993e 100644 --- a/youtube_dl/extractor/douyutv.py +++ b/youtube_dl/extractor/douyutv.py @@ -3,14 +3,15 @@ from __future__ import unicode_literals import hashlib import time +import uuid from .common import InfoExtractor from ..utils import (ExtractorError, unescapeHTML) -from ..compat import (compat_str, compat_basestring) +from ..compat import (compat_str, compat_basestring, compat_urllib_parse_urlencode) class DouyuTVIE(InfoExtractor): IE_DESC = '斗鱼' - _VALID_URL = r'http://(?:www\.)?douyutv\.com/(?P[A-Za-z0-9]+)' + _VALID_URL = r'https?://(?:www\.)?douyu(?:tv)?\.com/(?P[A-Za-z0-9]+)' _TESTS = [{ 'url': 'http://www.douyutv.com/iseven', 'info_dict': { @@ -18,10 +19,9 @@ class DouyuTVIE(InfoExtractor): 'display_id': 'iseven', 'ext': 'flv', 'title': 're:^清晨醒脑!T-ara根本停不下来! [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$', - 'description': 'md5:f34981259a03e980a3c6404190a3ed61', + 'description': 're:.*m7show@163\.com.*', 'thumbnail': 're:^https?://.*\.jpg$', 'uploader': '7师傅', - 'uploader_id': '431925', 'is_live': True, }, 'params': { @@ -37,13 +37,12 @@ class DouyuTVIE(InfoExtractor): 'description': 'md5:746a2f7a253966a06755a912f0acc0d2', 'thumbnail': 're:^https?://.*\.jpg$', 'uploader': 'douyu小漠', - 'uploader_id': '3769985', 'is_live': True, }, 'params': { 'skip_download': True, }, - 'skip': 'Romm not found', + 'skip': 'Room not found', }, { 'url': 'http://www.douyutv.com/17732', 'info_dict': { @@ -51,15 +50,17 @@ class DouyuTVIE(InfoExtractor): 'display_id': '17732', 'ext': 'flv', 'title': 're:^清晨醒脑!T-ara根本停不下来! [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$', - 'description': 'md5:f34981259a03e980a3c6404190a3ed61', + 'description': 're:.*m7show@163\.com.*', 'thumbnail': 're:^https?://.*\.jpg$', 'uploader': '7师傅', - 'uploader_id': '431925', 'is_live': True, }, 'params': { 'skip_download': True, }, + }, { + 'url': 'http://www.douyu.com/xiaocang', + 'only_matching': True, }] def _real_extract(self, url): @@ -72,59 +73,75 @@ class DouyuTVIE(InfoExtractor): room_id = self._html_search_regex( r'"room_id"\s*:\s*(\d+),', page, 'room id') - prefix = 'room/%s?aid=android&client_sys=android&time=%d' % ( - room_id, int(time.time())) - - auth = hashlib.md5((prefix + '1231').encode('ascii')).hexdigest() - config = self._download_json( - 'http://www.douyutv.com/api/v1/%s&auth=%s' % (prefix, auth), - video_id) - - data = config['data'] + room_url = 'http://m.douyu.com/html5/live?roomId=%s' % room_id + room_content = self._download_webpage(room_url, video_id) + room_json = self._parse_json(room_content, video_id, fatal=False) - error_code = config.get('error', 0) - if error_code is not 0: - error_desc = 'Server reported error %i' % error_code - if isinstance(data, (compat_str, compat_basestring)): - error_desc += ': ' + data - raise ExtractorError(error_desc, expected=True) + room = room_json['data'] - show_status = data.get('show_status') + show_status = room.get('show_status') # 1 = live, 2 = offline if show_status == '2': raise ExtractorError( 'Live stream is offline', expected=True) - base_url = data['rtmp_url'] - live_path = data['rtmp_live'] - - title = self._live_title(unescapeHTML(data['room_name'])) - description = data.get('show_details') - thumbnail = data.get('room_src') + flv_json = None + # Douyu API sometimes returns error "Unable to load the requested class: eticket_redis_cache" + # Retry with different parameters - same parameters cause same errors + for i in range(5): + tt = int(time.time() / 60) + did = uuid.uuid4().hex.upper() + + # Decompile core.swf in webpage by ffdec "Search SWFs in memory" + # core.swf is encrypted originally, but ffdec can dump memory to get the decrypted one + # If API changes in the future, just use this way to update + sign_content = '{room_id}{did}A12Svb&%1UUmf@hC{tt}'.format(room_id = room_id, did = did, tt = tt) + sign = hashlib.md5((sign_content).encode('utf-8')).hexdigest() + + payload = {'cdn': 'ws', 'rate': '0', 'tt': tt, 'did': did, 'sign': sign} + flv_data = compat_urllib_parse_urlencode(payload) + + flv_request_url = 'http://www.douyu.com/lapi/live/getPlay/%s' % room_id + flv_content = self._download_webpage(flv_request_url, video_id, data=flv_data, + headers={'Content-Type': 'application/x-www-form-urlencoded'}) + try: + flv_json = self._parse_json(flv_content, video_id, fatal=False) + except ExtractorError: + # Wait some time before retrying to get a different time() value + self._sleep(1, video_id, msg_template='%(video_id)s: Error occurs. ' + 'Waiting for %(timeout)s seconds before retrying') + continue + else: + break + if flv_json is None: + raise ExtractorError('Unable to fetch API result') + + flv = flv_json['data'] + + error_code = flv_json.get('error', 0) + if error_code is not 0: + error_desc = 'Server reported error %i' % error_code + if isinstance(flv, (compat_str, compat_basestring)): + error_desc += ': ' + flv + raise ExtractorError(error_desc, expected=True) - uploader = data.get('nickname') - uploader_id = data.get('owner_uid') + base_url = flv['rtmp_url'] + live_path = flv['rtmp_live'] - multi_formats = data.get('rtmp_multi_bitrate') - if not isinstance(multi_formats, dict): - multi_formats = {} - multi_formats['live'] = live_path + video_url = '%s/%s' % (base_url, live_path) - formats = [{ - 'url': '%s/%s' % (base_url, format_path), - 'format_id': format_id, - 'preference': 1 if format_id == 'live' else 0, - } for format_id, format_path in multi_formats.items()] - self._sort_formats(formats) + title = self._live_title(unescapeHTML(room['room_name'])) + description = room.get('notice') + thumbnail = room.get('room_src') + uploader = room.get('nickname') return { 'id': room_id, 'display_id': video_id, + 'url': video_url, 'title': title, 'description': description, 'thumbnail': thumbnail, 'uploader': uploader, - 'uploader_id': uploader_id, - 'formats': formats, 'is_live': True, }