X-Git-Url: http://git.bitcoin.ninja/index.cgi?a=blobdiff_plain;f=youtube_dl%2Fextractor%2Fcrunchyroll.py;h=95952bc292c05548a88876b7b52cf236f8bdc826;hb=dd467d33d0883c0c0c94d7fc5ed041d5c12bb564;hp=73f1e22efdc5040d55042fdc1eb47a78c4e56468;hpb=3f19b9b7c111ef0f12b880d8676a346280cc3ef4;p=youtube-dl diff --git a/youtube_dl/extractor/crunchyroll.py b/youtube_dl/extractor/crunchyroll.py index 73f1e22ef..95952bc29 100644 --- a/youtube_dl/extractor/crunchyroll.py +++ b/youtube_dl/extractor/crunchyroll.py @@ -12,21 +12,42 @@ from math import pow, sqrt, floor from .common import InfoExtractor from ..compat import ( compat_urllib_parse, + compat_urllib_parse_unquote, compat_urllib_request, + compat_urlparse, ) from ..utils import ( ExtractorError, bytes_to_intlist, intlist_to_bytes, + int_or_none, + remove_end, unified_strdate, urlencode_postdata, + xpath_text, ) from ..aes import ( aes_cbc_decrypt, ) -class CrunchyrollIE(InfoExtractor): +class CrunchyrollBaseIE(InfoExtractor): + def _download_webpage(self, url_or_request, video_id, note=None, errnote=None, fatal=True, tries=1, timeout=5, encoding=None): + request = (url_or_request if isinstance(url_or_request, compat_urllib_request.Request) + else compat_urllib_request.Request(url_or_request)) + # Accept-Language must be set explicitly to accept any language to avoid issues + # similar to https://github.com/rg3/youtube-dl/issues/6797. + # Along with IP address Crunchyroll uses Accept-Language to guess whether georestriction + # should be imposed or not (from what I can see it just takes the first language + # ignoring the priority and requires it to correspond the IP). By the way this causes + # Crunchyroll to not work in georestriction cases in some browsers that don't place + # the locale lang first in header. However allowing any language seems to workaround the issue. + request.add_header('Accept-Language', '*') + return super(CrunchyrollBaseIE, self)._download_webpage( + request, video_id, note, errnote, fatal, tries, timeout, encoding) + + +class CrunchyrollIE(CrunchyrollBaseIE): _VALID_URL = r'https?://(?:(?Pwww|m)\.)?(?Pcrunchyroll\.(?:com|fr)/(?:media(?:-|/\?id=)|[^/]*/[^/?&]*?)(?P[0-9]+))(?:[/?&]|$)' _NETRC_MACHINE = 'crunchyroll' _TESTS = [{ @@ -234,7 +255,9 @@ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text webpage_url = 'http://www.' + mobj.group('url') webpage = self._download_webpage(webpage_url, video_id, 'Downloading webpage') - note_m = self._html_search_regex(r'
(.+?)
', webpage, 'trailer-notice', default='') + note_m = self._html_search_regex( + r'
(.+?)
', + webpage, 'trailer-notice', default='') if note_m: raise ExtractorError(note_m) @@ -244,17 +267,24 @@ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text if msg.get('type') == 'error': raise ExtractorError('crunchyroll returned error: %s' % msg['message_body'], expected=True) + if 'To view this, please log in to verify you are 18 or older.' in webpage: + self.raise_login_required() + video_title = self._html_search_regex(r']*>(.+?)', webpage, 'video_title', flags=re.DOTALL) video_title = re.sub(r' {2,}', ' ', video_title) video_description = self._html_search_regex(r'"description":"([^"]+)', webpage, 'video_description', default='') if not video_description: video_description = None - video_upload_date = self._html_search_regex(r'
Availability for free users:(.+?)
', webpage, 'video_upload_date', fatal=False, flags=re.DOTALL) + video_upload_date = self._html_search_regex( + [r'
Availability for free users:(.+?)
', r'
[^<>]+\s*(.+?\d{4})\s*
'], + webpage, 'video_upload_date', fatal=False, flags=re.DOTALL) if video_upload_date: video_upload_date = unified_strdate(video_upload_date) - video_uploader = self._html_search_regex(r'
\s*Publisher:(.+?)
', webpage, 'video_uploader', fatal=False, flags=re.DOTALL) + video_uploader = self._html_search_regex( + r']+href="/publisher/[^"]+"[^>]*>([^<]+)', webpage, + 'video_uploader', fatal=False) - playerdata_url = compat_urllib_parse.unquote(self._html_search_regex(r'"config_url":"([^"]+)', webpage, 'playerdata_url')) + playerdata_url = compat_urllib_parse_unquote(self._html_search_regex(r'"config_url":"([^"]+)', webpage, 'playerdata_url')) playerdata_req = compat_urllib_request.Request(playerdata_url) playerdata_req.data = compat_urllib_parse.urlencode({'current_page': webpage_url}) playerdata_req.add_header('Content-Type', 'application/x-www-form-urlencoded') @@ -278,13 +308,33 @@ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text stream_info = streamdata.find('./{default}preload/stream_info') video_url = stream_info.find('./host').text video_play_path = stream_info.find('./file').text - formats.append({ + metadata = stream_info.find('./metadata') + format_info = { + 'format': video_format, + 'format_id': video_format, + 'height': int_or_none(xpath_text(metadata, './height')), + 'width': int_or_none(xpath_text(metadata, './width')), + } + + if '.fplive.net/' in video_url: + video_url = re.sub(r'^rtmpe?://', 'http://', video_url.strip()) + parsed_video_url = compat_urlparse.urlparse(video_url) + direct_video_url = compat_urlparse.urlunparse(parsed_video_url._replace( + netloc='v.lvlt.crcdn.net', + path='%s/%s' % (remove_end(parsed_video_url.path, '/'), video_play_path.split(':')[-1]))) + if self._is_valid_url(direct_video_url, video_id, video_format): + format_info.update({ + 'url': direct_video_url, + }) + formats.append(format_info) + continue + + format_info.update({ 'url': video_url, 'play_path': video_play_path, 'ext': 'flv', - 'format': video_format, - 'format_id': video_format, }) + formats.append(format_info) subtitles = self.extract_subtitles(video_id, webpage) @@ -300,7 +350,7 @@ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text } -class CrunchyrollShowPlaylistIE(InfoExtractor): +class CrunchyrollShowPlaylistIE(CrunchyrollBaseIE): IE_NAME = "crunchyroll:playlist" _VALID_URL = r'https?://(?:(?Pwww|m)\.)?(?Pcrunchyroll\.com/(?!(?:news|anime-news|library|forum|launchcalendar|lineup|store|comics|freetrial|login))(?P[\w\-]+))/?$'