[crunchyroll] reduce requests for formats extraction

author remitamine <remitamine@gmail.com>

Fri, 29 Apr 2016 10:46:42 +0000 (11:46 +0100)

committer remitamine <remitamine@gmail.com>

Fri, 29 Apr 2016 10:46:42 +0000 (11:46 +0100)
author remitamine <remitamine@gmail.com>
Fri, 29 Apr 2016 10:46:42 +0000 (11:46 +0100)
committer remitamine <remitamine@gmail.com>
Fri, 29 Apr 2016 10:46:42 +0000 (11:46 +0100)
diff --git a/youtube_dl/extractor/crunchyroll.py b/youtube_dl/extractor/crunchyroll.py

index dd753c7c3c9e0e0f96c970df3cb2e45c1d02215e..184ba689649ab62430e7ef9a2db057b437d12e7d 100644 (file)
--- a/youtube_dl/extractor/crunchyroll.py
+++ b/youtube_dl/extractor/crunchyroll.py
@@ -26,6 +26,7 @@ from ..utils import (
      unified_strdate,
      urlencode_postdata,
      xpath_text,
+    extract_attributes,
  )
  from ..aes import (
      aes_cbc_decrypt,
@@ -305,9 +306,18 @@ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text
              r'<a[^>]+href="/publisher/[^"]+"[^>]*>([^<]+)</a>', webpage,
              'video_uploader', fatal=False)
  
-        formats = []
+        available_fmts = []
+        for a, fmt in re.findall(r'(<a[^>]+token="showmedia\.([0-9]{3,4})p"[^>]+>.*?</a>)', webpage):
+            attrs = extract_attributes(a)
+            href = attrs.get('href')
+            if href and '/freetrial' in href:
+                continue
+            available_fmts.append(fmt)
+        if not available_fmts:
+            available_fmts = re.findall(r'token="showmedia\.([0-9]{3,4})p"', webpage)
          video_encode_ids = []
-        for fmt in re.findall(r'token="showmedia\.([0-9]{3,4})p"', webpage):
+        formats = []
+        for fmt in available_fmts:
              stream_quality, stream_format = self._FORMAT_IDS[fmt]
              video_format = fmt + 'p'
              streamdata_req = sanitized_Request(
author	remitamine <remitamine@gmail.com>
	Fri, 29 Apr 2016 10:46:42 +0000 (11:46 +0100)
committer	remitamine <remitamine@gmail.com>
	Fri, 29 Apr 2016 10:46:42 +0000 (11:46 +0100)