_ Git - youtube-dl/blob - youtube_dl/extractor/douyutv.py

   1 # coding: utf-8
   2 from __future__ import unicode_literals
   3
   4 import time
   5 import hashlib
   6 import re
   7
   8 from .common import InfoExtractor
   9 from ..utils import (
  10     ExtractorError,
  11     unescapeHTML,
  12     unified_strdate,
  13     urljoin,
  14 )
  15
  16
  17 class DouyuTVIE(InfoExtractor):
  18     IE_DESC = '斗鱼'
  19     _VALID_URL = r'https?://(?:www\.)?douyu(?:tv)?\.com/(?:[^/]+/)*(?P<id>[A-Za-z0-9]+)'
  20     _TESTS = [{
  21         'url': 'http://www.douyutv.com/iseven',
  22         'info_dict': {
  23             'id': '17732',
  24             'display_id': 'iseven',
  25             'ext': 'flv',
  26             'title': 're:^清晨醒脑！根本停不下来！ [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',
  27             'description': r're:.*m7show@163\.com.*',
  28             'thumbnail': r're:^https?://.*\.jpg$',
  29             'uploader': '7师傅',
  30             'is_live': True,
  31         },
  32         'params': {
  33             'skip_download': True,
  34         },
  35     }, {
  36         'url': 'http://www.douyutv.com/85982',
  37         'info_dict': {
  38             'id': '85982',
  39             'display_id': '85982',
  40             'ext': 'flv',
  41             'title': 're:^小漠从零单排记！——CSOL2躲猫猫 [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',
  42             'description': 'md5:746a2f7a253966a06755a912f0acc0d2',
  43             'thumbnail': r're:^https?://.*\.jpg$',
  44             'uploader': 'douyu小漠',
  45             'is_live': True,
  46         },
  47         'params': {
  48             'skip_download': True,
  49         },
  50         'skip': 'Room not found',
  51     }, {
  52         'url': 'http://www.douyutv.com/17732',
  53         'info_dict': {
  54             'id': '17732',
  55             'display_id': '17732',
  56             'ext': 'flv',
  57             'title': 're:^清晨醒脑！根本停不下来！ [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',
  58             'description': r're:.*m7show@163\.com.*',
  59             'thumbnail': r're:^https?://.*\.jpg$',
  60             'uploader': '7师傅',
  61             'is_live': True,
  62         },
  63         'params': {
  64             'skip_download': True,
  65         },
  66     }, {
  67         'url': 'http://www.douyu.com/xiaocang',
  68         'only_matching': True,
  69     }, {
  70         # \"room_id\"
  71         'url': 'http://www.douyu.com/t/lpl',
  72         'only_matching': True,
  73     }]
  74
  75     def _real_extract(self, url):
  76         video_id = self._match_id(url)
  77
  78         if video_id.isdigit():
  79             room_id = video_id
  80         else:
  81             page = self._download_webpage(url, video_id)
  82             room_id = self._html_search_regex(
  83                 r'"room_id\\?"\s*:\s*(\d+),', page, 'room id')
  84
  85         # Grab metadata from mobile API
  86         room = self._download_json(
  87             'http://m.douyu.com/html5/live?roomId=%s' % room_id, video_id,
  88             note='Downloading room info')['data']
  89
  90         # 1 = live, 2 = offline
  91         if room.get('show_status') == '2':
  92             raise ExtractorError('Live stream is offline', expected=True)
  93
  94         # Grab the URL from PC client API
  95         # The m3u8 url from mobile API requires re-authentication every 5 minutes
  96         tt = int(time.time())
  97         signContent = 'lapi/live/thirdPart/getPlay/%s?aid=pcclient&rate=0&time=%d9TUk5fjjUjg9qIMH3sdnh' % (room_id, tt)
  98         sign = hashlib.md5(signContent.encode('ascii')).hexdigest()
  99         video_url = self._download_json(
 100             'http://coapi.douyucdn.cn/lapi/live/thirdPart/getPlay/' + room_id,
 101             video_id, note='Downloading video URL info',
 102             query={'rate': 0}, headers={
 103                 'auth': sign,
 104                 'time': str(tt),
 105                 'aid': 'pcclient'
 106             })['data']['live_url']
 107
 108         title = self._live_title(unescapeHTML(room['room_name']))
 109         description = room.get('show_details')
 110         thumbnail = room.get('room_src')
 111         uploader = room.get('nickname')
 112
 113         return {
 114             'id': room_id,
 115             'display_id': video_id,
 116             'url': video_url,
 117             'title': title,
 118             'description': description,
 119             'thumbnail': thumbnail,
 120             'uploader': uploader,
 121             'is_live': True,
 122         }
 123
 124
 125 class DouyuShowIE(InfoExtractor):
 126     _VALID_URL = r'https?://v(?:mobile)?\.douyu\.com/show/(?P<id>[0-9a-zA-Z]+)'
 127
 128     _TESTS = [{
 129         'url': 'https://v.douyu.com/show/rjNBdvnVXNzvE2yw',
 130         'md5': '0c2cfd068ee2afe657801269b2d86214',
 131         'info_dict': {
 132             'id': 'rjNBdvnVXNzvE2yw',
 133             'ext': 'mp4',
 134             'title': '陈一发儿：砒霜 我有个室友系列！04-01 22点场',
 135             'duration': 7150.08,
 136             'thumbnail': r're:^https?://.*\.jpg$',
 137             'uploader': '陈一发儿',
 138             'uploader_id': 'XrZwYelr5wbK',
 139             'uploader_url': 'https://v.douyu.com/author/XrZwYelr5wbK',
 140             'upload_date': '20170402',
 141         },
 142     }, {
 143         'url': 'https://vmobile.douyu.com/show/rjNBdvnVXNzvE2yw',
 144         'only_matching': True,
 145     }]
 146
 147     def _real_extract(self, url):
 148         url = url.replace('vmobile.', 'v.')
 149         video_id = self._match_id(url)
 150
 151         webpage = self._download_webpage(url, video_id)
 152
 153         room_info = self._parse_json(self._search_regex(
 154             r'var\s+\$ROOM\s*=\s*({.+});', webpage, 'room info'), video_id)
 155
 156         video_info = None
 157
 158         for trial in range(5):
 159             # Sometimes Douyu rejects our request. Let's try it more times
 160             try:
 161                 video_info = self._download_json(
 162                     'https://vmobile.douyu.com/video/getInfo', video_id,
 163                     query={'vid': video_id},
 164                     headers={
 165                         'Referer': url,
 166                         'x-requested-with': 'XMLHttpRequest',
 167                     })
 168                 break
 169             except ExtractorError:
 170                 self._sleep(1, video_id)
 171
 172         if not video_info:
 173             raise ExtractorError('Can\'t fetch video info')
 174
 175         formats = self._extract_m3u8_formats(
 176             video_info['data']['video_url'], video_id,
 177             entry_protocol='m3u8_native', ext='mp4')
 178
 179         upload_date = unified_strdate(self._html_search_regex(
 180             r'<em>上传时间：</em><span>([^<]+)</span>', webpage,
 181             'upload date', fatal=False))
 182
 183         uploader = uploader_id = uploader_url = None
 184         mobj = re.search(
 185             r'(?m)<a[^>]+href="/author/([0-9a-zA-Z]+)".+?<strong[^>]+title="([^"]+)"',
 186             webpage)
 187         if mobj:
 188             uploader_id, uploader = mobj.groups()
 189             uploader_url = urljoin(url, '/author/' + uploader_id)
 190
 191         return {
 192             'id': video_id,
 193             'title': room_info['name'],
 194             'formats': formats,
 195             'duration': room_info.get('duration'),
 196             'thumbnail': room_info.get('pic'),
 197             'upload_date': upload_date,
 198             'uploader': uploader,
 199             'uploader_id': uploader_id,
 200             'uploader_url': uploader_url,
 201         }