X-Git-Url: http://git.bitcoin.ninja/index.cgi?a=blobdiff_plain;f=youtube_dl%2Fextractor%2Fcrunchyroll.py;h=f1da7d09bc934af86f08aa45f8a1a3de32fa4673;hb=cd5b4b0bc2876e16656d33156754ce3c05aa1619;hp=ab03c86029228e521b73f5b4e0af818eccc728e9;hpb=8bcc875676b56c062a4fdd81763a6adb0fb1390c;p=youtube-dl diff --git a/youtube_dl/extractor/crunchyroll.py b/youtube_dl/extractor/crunchyroll.py index ab03c8602..f1da7d09b 100644 --- a/youtube_dl/extractor/crunchyroll.py +++ b/youtube_dl/extractor/crunchyroll.py @@ -9,11 +9,13 @@ import xml.etree.ElementTree from hashlib import sha1 from math import pow, sqrt, floor -from .subtitles import SubtitlesInfoExtractor -from ..utils import ( - ExtractorError, +from .common import InfoExtractor +from ..compat import ( compat_urllib_parse, compat_urllib_request, +) +from ..utils import ( + ExtractorError, bytes_to_intlist, intlist_to_bytes, unified_strdate, @@ -23,14 +25,12 @@ from ..aes import ( aes_cbc_decrypt, inc, ) -from .common import InfoExtractor -class CrunchyrollIE(SubtitlesInfoExtractor): - _VALID_URL = r'https?://(?:(?Pwww|m)\.)?(?Pcrunchyroll\.com/(?:[^/]*/[^/?&]*?|media/\?id=)(?P[0-9]+))(?:[/?&]|$)' - _TEST = { +class CrunchyrollIE(InfoExtractor): + _VALID_URL = r'https?://(?:(?Pwww|m)\.)?(?Pcrunchyroll\.(?:com|fr)/(?:[^/]*/[^/?&]*?|media/\?id=)(?P[0-9]+))(?:[/?&]|$)' + _TESTS = [{ 'url': 'http://www.crunchyroll.com/wanna-be-the-strongest-in-the-world/episode-1-an-idol-wrestler-is-born-645513', - #'md5': 'b1639fd6ddfaa43788c85f6d1dddd412', 'info_dict': { 'id': '645513', 'ext': 'flv', @@ -45,7 +45,10 @@ class CrunchyrollIE(SubtitlesInfoExtractor): # rtmp 'skip_download': True, }, - } + }, { + 'url': 'http://www.crunchyroll.fr/girl-friend-beta/episode-11-goodbye-la-mode-661697', + 'only_matching': True, + }] _FORMAT_IDS = { '360': ('60', '106'), @@ -183,6 +186,38 @@ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text return output + def _get_subtitles(self, video_id, webpage): + subtitles = {} + for sub_id, sub_name in re.findall(r'\?ssid=([0-9]+)" title="([^"]+)', webpage): + sub_page = self._download_webpage( + 'http://www.crunchyroll.com/xml/?req=RpcApiSubtitle_GetXml&subtitle_script_id=' + sub_id, + video_id, note='Downloading subtitles for ' + sub_name) + id = self._search_regex(r'id=\'([0-9]+)', sub_page, 'subtitle_id', fatal=False) + iv = self._search_regex(r'([^<]+)', sub_page, 'subtitle_iv', fatal=False) + data = self._search_regex(r'([^<]+)', sub_page, 'subtitle_data', fatal=False) + if not id or not iv or not data: + continue + id = int(id) + iv = base64.b64decode(iv) + data = base64.b64decode(data) + + subtitle = self._decrypt_subtitles(data, iv, id).decode('utf-8') + lang_code = self._search_regex(r'lang_code=["\']([^"\']+)', subtitle, 'subtitle_lang_code', fatal=False) + if not lang_code: + continue + sub_root = xml.etree.ElementTree.fromstring(subtitle) + subtitles[lang_code] = [ + { + 'ext': 'srt', + 'data': self._convert_subtitles_to_srt(sub_root), + }, + { + 'ext': 'ass', + 'data': self._convert_subtitles_to_ass(sub_root), + }, + ] + return subtitles + def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) video_id = mobj.group('video_id') @@ -224,12 +259,12 @@ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text video_thumbnail = self._search_regex(r'([^<]+)', playerdata, 'thumbnail', fatal=False) formats = [] - for fmt in re.findall(r'\?p([0-9]{3,4})=1', webpage): + for fmt in re.findall(r'showmedia\.([0-9]{3,4})p', webpage): stream_quality, stream_format = self._FORMAT_IDS[fmt] - video_format = fmt +'p' + video_format = fmt + 'p' streamdata_req = compat_urllib_request.Request('http://www.crunchyroll.com/xml/') # urlencode doesn't work! - streamdata_req.data = 'req=RpcApiVideoEncode%5FGetStreamInfo&video%5Fencode%5Fquality=' +stream_quality +'&media%5Fid=' +stream_id +'&video%5Fformat=' +stream_format + streamdata_req.data = 'req=RpcApiVideoEncode%5FGetStreamInfo&video%5Fencode%5Fquality=' + stream_quality + '&media%5Fid=' + stream_id + '&video%5Fformat=' + stream_format streamdata_req.add_header('Content-Type', 'application/x-www-form-urlencoded') streamdata_req.add_header('Content-Length', str(len(streamdata_req.data))) streamdata = self._download_xml( @@ -245,33 +280,7 @@ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text 'format_id': video_format, }) - subtitles = {} - sub_format = self._downloader.params.get('subtitlesformat', 'srt') - for sub_id, sub_name in re.findall(r'\?ssid=([0-9]+)" title="([^"]+)', webpage): - sub_page = self._download_webpage('http://www.crunchyroll.com/xml/?req=RpcApiSubtitle_GetXml&subtitle_script_id=' +sub_id,\ - video_id, note='Downloading subtitles for ' +sub_name) - id = self._search_regex(r'id=\'([0-9]+)', sub_page, 'subtitle_id', fatal=False) - iv = self._search_regex(r'([^<]+)', sub_page, 'subtitle_iv', fatal=False) - data = self._search_regex(r'([^<]+)', sub_page, 'subtitle_data', fatal=False) - if not id or not iv or not data: - continue - id = int(id) - iv = base64.b64decode(iv) - data = base64.b64decode(data) - - subtitle = self._decrypt_subtitles(data, iv, id).decode('utf-8') - lang_code = self._search_regex(r'lang_code=["\']([^"\']+)', subtitle, 'subtitle_lang_code', fatal=False) - if not lang_code: - continue - sub_root = xml.etree.ElementTree.fromstring(subtitle) - if sub_format == 'ass': - subtitles[lang_code] = self._convert_subtitles_to_ass(sub_root) - else: - subtitles[lang_code] = self._convert_subtitles_to_srt(sub_root) - - if self._downloader.params.get('listsubtitles', False): - self._list_available_subtitles(video_id, subtitles) - return + subtitles = self.extract_subtitles(video_id, webpage) return { 'id': video_id,