X-Git-Url: http://git.bitcoin.ninja/index.cgi?a=blobdiff_plain;f=youtube_dl%2Fextractor%2Fbilibili.py;h=6c66a12368ea0a963d89ef5922c9d83f3019ddfc;hb=36e6f62cd0883f0f486d1666d010e5d9e6d515bd;hp=0d5889f5d17c17ffa75eeca1f1079efd7f9c2b8f;hpb=00558d94145f97c644e66ec086fa9b9d8c58280f;p=youtube-dl diff --git a/youtube_dl/extractor/bilibili.py b/youtube_dl/extractor/bilibili.py index 0d5889f5d..6c66a1236 100644 --- a/youtube_dl/extractor/bilibili.py +++ b/youtube_dl/extractor/bilibili.py @@ -2,37 +2,56 @@ from __future__ import unicode_literals import re +import itertools +import json from .common import InfoExtractor +from ..compat import ( + compat_etree_fromstring, +) from ..utils import ( - compat_parse_qs, - ExtractorError, int_or_none, unified_strdate, + ExtractorError, ) class BiliBiliIE(InfoExtractor): _VALID_URL = r'http://www\.bilibili\.(?:tv|com)/video/av(?P[0-9]+)/' - _TEST = { + _TESTS = [{ 'url': 'http://www.bilibili.tv/video/av1074402/', 'md5': '2c301e4dab317596e837c3e7633e7d86', 'info_dict': { - 'id': '1074402', + 'id': '1074402_part1', 'ext': 'flv', 'title': '【金坷垃】金泡沫', 'duration': 308, 'upload_date': '20140420', 'thumbnail': 're:^https?://.+\.jpg', }, - } + }, { + 'url': 'http://www.bilibili.com/video/av1041170/', + 'info_dict': { + 'id': '1041170', + 'title': '【BD1080P】刀语【诸神&异域】', + }, + 'playlist_count': 9, + }] def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - video_id = mobj.group('id') - + video_id = self._match_id(url) webpage = self._download_webpage(url, video_id) + + if '(此视频不存在或被删除)' in webpage: + raise ExtractorError( + 'The video does not exist or was deleted', expected=True) + + if '>你没有权限浏览! 由于版权相关问题 我们不对您所在的地区提供服务<' in webpage: + raise ExtractorError( + 'The video is not available in your region due to copyright reasons', + expected=True) + video_code = self._search_regex( r'(?s)
(.*?)
', webpage, 'video code') @@ -55,19 +74,39 @@ class BiliBiliIE(InfoExtractor): thumbnail = self._html_search_meta( 'thumbnailUrl', video_code, 'thumbnail', fatal=False) - player_params = compat_parse_qs(self._html_search_regex( - r'