X-Git-Url: http://git.bitcoin.ninja/index.cgi?a=blobdiff_plain;f=youtube_dl%2Fextractor%2Fcrunchyroll.py;h=7642b868e46ad520a1f417713c76697d9e9c5f35;hb=723e04d0be85fbdbbbda52512f322331d8fda760;hp=920728e01f1c2b2a6e6b75c3b6740e84b3043897;hpb=ff6b7b049b14a061a362e701df8ab4b2f7b82456;p=youtube-dl diff --git a/youtube_dl/extractor/crunchyroll.py b/youtube_dl/extractor/crunchyroll.py index 920728e01..7642b868e 100644 --- a/youtube_dl/extractor/crunchyroll.py +++ b/youtube_dl/extractor/crunchyroll.py @@ -1,7 +1,11 @@ # encoding: utf-8 from __future__ import unicode_literals -import re, base64, zlib +import re +import json +import base64 +import zlib + from hashlib import sha1 from math import pow, sqrt, floor from .common import InfoExtractor @@ -13,19 +17,22 @@ from ..utils import ( intlist_to_bytes, unified_strdate, clean_html, + urlencode_postdata, ) from ..aes import ( aes_cbc_decrypt, inc, ) + class CrunchyrollIE(InfoExtractor): - _VALID_URL = r'(?:https?://)?(?:(?Pwww|m)\.)?(?Pcrunchyroll\.com/(?:[^/]*/[^/?&]*?|media/\?id=)(?P[0-9]+))(?:[/?&]|$)' - _TESTS = [{ + _VALID_URL = r'https?://(?:(?Pwww|m)\.)?(?Pcrunchyroll\.com/(?:[^/]*/[^/?&]*?|media/\?id=)(?P[0-9]+))(?:[/?&]|$)' + _TEST = { 'url': 'http://www.crunchyroll.com/wanna-be-the-strongest-in-the-world/episode-1-an-idol-wrestler-is-born-645513', - 'file': '645513.flv', #'md5': 'b1639fd6ddfaa43788c85f6d1dddd412', 'info_dict': { + 'id': '645513', + 'ext': 'flv', 'title': 'Wanna be the Strongest in the World Episode 1 – An Idol-Wrestler is Born!', 'description': 'md5:2d17137920c64f2f49981a7797d275ef', 'thumbnail': 'http://img1.ak.crunchyroll.com/i/spire1-tmb/20c6b5e10f1a47b10516877d3c039cae1380951166_full.jpg', @@ -36,7 +43,7 @@ class CrunchyrollIE(InfoExtractor): # rtmp 'skip_download': True, }, - }] + } _FORMAT_IDS = { '360': ('60', '106'), @@ -45,6 +52,24 @@ class CrunchyrollIE(InfoExtractor): '1080': ('80', '108'), } + def _login(self): + (username, password) = self._get_login_info() + if username is None: + return + self.report_login() + login_url = 'https://www.crunchyroll.com/?a=formhandler' + data = urlencode_postdata({ + 'formname': 'RpcApiUser_Login', + 'name': username, + 'password': password, + }) + login_request = compat_urllib_request.Request(login_url, data) + login_request.add_header('Content-Type', 'application/x-www-form-urlencoded') + self._download_webpage(login_request, None, False, 'Wrong login info') + + def _real_initialize(self): + self._login() + def _decrypt_subtitles(self, data, iv, id): data = bytes_to_intlist(data) iv = bytes_to_intlist(iv) @@ -68,7 +93,7 @@ class CrunchyrollIE(InfoExtractor): shaHash = bytes_to_intlist(sha1(prefix + str(num4).encode('ascii')).digest()) # Extend 160 Bit hash to 256 Bit return shaHash + [0] * 12 - + key = obfuscate_key(id) class Counter: __value = iv @@ -80,9 +105,8 @@ class CrunchyrollIE(InfoExtractor): return zlib.decompress(decrypted_data) def _convert_subtitles_to_srt(self, subtitles): - i=1 output = '' - for start, end, text in re.findall(r']*?start="([^"]+)" [^>]*?end="([^"]+)" [^>]*?text="([^"]+)"[^>]*?>', subtitles): + for i, (start, end, text) in enumerate(re.findall(r']*?start="([^"]+)" [^>]*?end="([^"]+)" [^>]*?text="([^"]+)"[^>]*?>', subtitles), 1): start = start.replace('.', ',') end = end.replace('.', ',') text = clean_html(text) @@ -90,7 +114,6 @@ class CrunchyrollIE(InfoExtractor): if not text: continue output += '%d\n%s --> %s\n%s\n\n' % (i, start, end, text) - i+=1 return output def _real_extract(self,url): @@ -108,6 +131,12 @@ class CrunchyrollIE(InfoExtractor): if note_m: raise ExtractorError(note_m) + mobj = re.search(r'Page\.messaging_box_controller\.addItems\(\[(?P{.+?})\]\)', webpage) + if mobj: + msg = json.loads(mobj.group('msg')) + if msg.get('type') == 'error': + raise ExtractorError('crunchyroll returned error: %s' % msg['message_body'], expected=True) + video_title = self._html_search_regex(r']*>(.+?)', webpage, 'video_title', flags=re.DOTALL) video_title = re.sub(r' {2,}', ' ', video_title) video_description = self._html_search_regex(r'"description":"([^"]+)', webpage, 'video_description', default='') @@ -123,7 +152,7 @@ class CrunchyrollIE(InfoExtractor): playerdata_req.data = compat_urllib_parse.urlencode({'current_page': webpage_url}) playerdata_req.add_header('Content-Type', 'application/x-www-form-urlencoded') playerdata = self._download_webpage(playerdata_req, video_id, note='Downloading media info') - + stream_id = self._search_regex(r'([^<]+)', playerdata, 'stream_id') video_thumbnail = self._search_regex(r'([^<]+)', playerdata, 'thumbnail', fatal=False) @@ -161,7 +190,7 @@ class CrunchyrollIE(InfoExtractor): data = base64.b64decode(data) subtitle = self._decrypt_subtitles(data, iv, id).decode('utf-8') - lang_code = self._search_regex(r'lang_code=\'([^\']+)', subtitle, 'subtitle_lang_code', fatal=False) + lang_code = self._search_regex(r'lang_code=["\']([^"\']+)', subtitle, 'subtitle_lang_code', fatal=False) if not lang_code: continue subtitles[lang_code] = self._convert_subtitles_to_srt(subtitle)