[crunchyroll] switch to HTTPS for RpcApi(closes #17749)
[youtube-dl] / youtube_dl / extractor / crunchyroll.py
index 4ed45837236c64dd317d2ce2a5fe0e78b52e95a3..045be0ab5c196751cd09b22707484f8468dbb2e1 100644 (file)
@@ -7,6 +7,7 @@ import zlib
 
 from hashlib import sha1
 from math import pow, sqrt, floor
+from .common import InfoExtractor
 from .vrv import VRVIE
 from ..compat import (
     compat_b64decode,
@@ -34,7 +35,7 @@ from ..aes import (
 )
 
 
-class CrunchyrollBaseIE(VRVIE):
+class CrunchyrollBaseIE(InfoExtractor):
     _LOGIN_URL = 'https://www.crunchyroll.com/login'
     _LOGIN_FORM = 'login_form'
     _NETRC_MACHINE = 'crunchyroll'
@@ -44,7 +45,7 @@ class CrunchyrollBaseIE(VRVIE):
         data['req'] = 'RpcApi' + method
         data = compat_urllib_parse_urlencode(data).encode('utf-8')
         return self._download_xml(
-            'http://www.crunchyroll.com/xml/',
+            'https://www.crunchyroll.com/xml/',
             video_id, note, fatal=False, data=data, headers={
                 'Content-Type': 'application/x-www-form-urlencoded',
             })
@@ -140,7 +141,8 @@ class CrunchyrollBaseIE(VRVIE):
             parsed_url._replace(query=compat_urllib_parse_urlencode(qs, True)))
 
 
-class CrunchyrollIE(CrunchyrollBaseIE):
+class CrunchyrollIE(CrunchyrollBaseIE, VRVIE):
+    IE_NAME = 'crunchyroll'
     _VALID_URL = r'https?://(?:(?P<prefix>www|m)\.)?(?P<url>crunchyroll\.(?:com|fr)/(?:media(?:-|/\?id=)|[^/]*/[^/?&]*?)(?P<video_id>[0-9]+))(?:[/?&]|$)'
     _TESTS = [{
         'url': 'http://www.crunchyroll.com/wanna-be-the-strongest-in-the-world/episode-1-an-idol-wrestler-is-born-645513',
@@ -443,6 +445,10 @@ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text
             webpage, 'vilos media', default='{}'), video_id)
         media_metadata = media.get('metadata') or {}
 
+        language = self._search_regex(
+            r'(?:vilos\.config\.player\.language|LOCALE)\s*=\s*(["\'])(?P<lang>(?:(?!\1).)+)\1',
+            webpage, 'language', default=None, group='lang')
+
         video_title = self._html_search_regex(
             r'(?s)<h1[^>]*>((?:(?!<h1).)*?<span[^>]+itemprop=["\']title["\'][^>]*>(?:(?!<h1).)+?)</h1>',
             webpage, 'video_title')
@@ -464,9 +470,22 @@ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text
 
         formats = []
         for stream in media.get('streams', []):
-            formats.extend(self._extract_vrv_formats(
+            audio_lang = stream.get('audio_lang')
+            hardsub_lang = stream.get('hardsub_lang')
+            vrv_formats = self._extract_vrv_formats(
                 stream.get('url'), video_id, stream.get('format'),
-                stream.get('audio_lang'), stream.get('hardsub_lang')))
+                audio_lang, hardsub_lang)
+            for f in vrv_formats:
+                if not hardsub_lang:
+                    f['preference'] = 1
+                language_preference = 0
+                if audio_lang == language:
+                    language_preference += 1
+                if hardsub_lang == language:
+                    language_preference += 1
+                if language_preference:
+                    f['language_preference'] = language_preference
+            formats.extend(vrv_formats)
         if not formats:
             available_fmts = []
             for a, fmt in re.findall(r'(<a[^>]+token=["\']showmedia\.([0-9]{3,4})p["\'][^>]+>)', webpage):
@@ -555,7 +574,7 @@ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text
                         'ext': 'flv',
                     })
                     formats.append(format_info)
-        self._sort_formats(formats, ('height', 'width', 'tbr', 'fps'))
+        self._sort_formats(formats, ('preference', 'language_preference', 'height', 'width', 'tbr', 'fps'))
 
         metadata = self._call_rpc_api(
             'VideoPlayer_GetMediaMetadata', video_id,