X-Git-Url: http://git.bitcoin.ninja/index.cgi?a=blobdiff_plain;f=youtube_dl%2Fextractor%2Fcrunchyroll.py;h=026a9177e754de7d606961e6e4793af86da49fe2;hb=7c360e3a04f09b912f51034c7778eb2297872e86;hp=a20b88f0250279abe35b98a6a887b6be11298ec5;hpb=344400951ccddaaa477738957bdbdb86a704c55b;p=youtube-dl diff --git a/youtube_dl/extractor/crunchyroll.py b/youtube_dl/extractor/crunchyroll.py index a20b88f02..026a9177e 100644 --- a/youtube_dl/extractor/crunchyroll.py +++ b/youtube_dl/extractor/crunchyroll.py @@ -2,6 +2,7 @@ from __future__ import unicode_literals import re +import json import base64 import zlib @@ -85,9 +86,8 @@ class CrunchyrollIE(InfoExtractor): return zlib.decompress(decrypted_data) def _convert_subtitles_to_srt(self, subtitles): - i = 1 output = '' - for start, end, text in re.findall(r']*?start="([^"]+)" [^>]*?end="([^"]+)" [^>]*?text="([^"]+)"[^>]*?>', subtitles): + for i, (start, end, text) in enumerate(re.findall(r']*?start="([^"]+)" [^>]*?end="([^"]+)" [^>]*?text="([^"]+)"[^>]*?>', subtitles), 1): start = start.replace('.', ',') end = end.replace('.', ',') text = clean_html(text) @@ -95,7 +95,6 @@ class CrunchyrollIE(InfoExtractor): if not text: continue output += '%d\n%s --> %s\n%s\n\n' % (i, start, end, text) - i += 1 return output def _real_extract(self,url): @@ -113,6 +112,12 @@ class CrunchyrollIE(InfoExtractor): if note_m: raise ExtractorError(note_m) + mobj = re.search(r'Page\.messaging_box_controller\.addItems\(\[(?P{.+?})\]\)', webpage) + if mobj: + msg = json.loads(mobj.group('msg')) + if msg.get('type') == 'error': + raise ExtractorError('crunchyroll returned error: %s' % msg['message_body'], expected=True) + video_title = self._html_search_regex(r']*>(.+?)', webpage, 'video_title', flags=re.DOTALL) video_title = re.sub(r' {2,}', ' ', video_title) video_description = self._html_search_regex(r'"description":"([^"]+)', webpage, 'video_description', default='') @@ -166,7 +171,7 @@ class CrunchyrollIE(InfoExtractor): data = base64.b64decode(data) subtitle = self._decrypt_subtitles(data, iv, id).decode('utf-8') - lang_code = self._search_regex(r'lang_code=\'([^\']+)', subtitle, 'subtitle_lang_code', fatal=False) + lang_code = self._search_regex(r'lang_code=["\']([^"\']+)', subtitle, 'subtitle_lang_code', fatal=False) if not lang_code: continue subtitles[lang_code] = self._convert_subtitles_to_srt(subtitle) @@ -180,4 +185,4 @@ class CrunchyrollIE(InfoExtractor): 'upload_date': video_upload_date, 'subtitles': subtitles, 'formats': formats, - } \ No newline at end of file + }