[utils] Merge base_n functions

[youtube-dl] / youtube_dl / extractor / iqiyi.py
diff --git a/youtube_dl/extractor/iqiyi.py b/youtube_dl/extractor/iqiyi.py

index 541fe7ae3053aa28bf3e3f4176ef5ff31db12e55..76ecd55a4a9ad0859d09c48a3d6f20dd088f8785 100644 (file)
--- a/youtube_dl/extractor/iqiyi.py
+++ b/youtube_dl/extractor/iqiyi.py
@@ -2,6 +2,7 @@
  from __future__ import unicode_literals
  
  import hashlib
+import itertools
  import math
  import os
  import random
@@ -17,8 +18,10 @@ from ..compat import (
      compat_urllib_parse_urlparse,
  )
  from ..utils import (
+    base_n,
      ExtractorError,
      ohdave_rsa_encrypt,
+    remove_start,
      sanitized_Request,
      urlencode_postdata,
      url_basename,
@@ -124,21 +127,9 @@ class IqiyiSDK(object):
  
  
  class IqiyiSDKInterpreter(object):
-    BASE62_TABLE = '0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ'
-
      def __init__(self, sdk_code):
          self.sdk_code = sdk_code
  
-    @classmethod
-    def base62(cls, num):
-        if num == 0:
-            return '0'
-        ret = ''
-        while num:
-            ret = cls.BASE62_TABLE[num % 62] + ret
-            num = num // 62
-        return ret
-
      def decode_eval_codes(self):
          self.sdk_code = self.sdk_code[5:-3]
  
@@ -152,7 +143,7 @@ class IqiyiSDKInterpreter(object):
  
          while count:
              count -= 1
-            b62count = self.base62(count)
+            b62count = base_n(count, 62)
              symbol_table[b62count] = symbols[count] or b62count
  
          self.sdk_code = re.sub(
@@ -295,6 +286,13 @@ class IqiyiIE(InfoExtractor):
              },
          }],
          'expected_warnings': ['Needs a VIP account for full video'],
+    }, {
+        'url': 'http://www.iqiyi.com/a_19rrhb8ce1.html',
+        'info_dict': {
+            'id': '202918101',
+            'title': '灌篮高手 国语版',
+        },
+        'playlist_count': 101,
      }]
  
      _FORMATS_MAP = [
@@ -526,9 +524,49 @@ class IqiyiIE(InfoExtractor):
          enc_key = '6ab6d0280511493ba85594779759d4ed'
          return enc_key
  
+    def _extract_playlist(self, webpage):
+        PAGE_SIZE = 50
+
+        links = re.findall(
+            r'<a[^>]+class="site-piclist_pic_link"[^>]+href="(http://www\.iqiyi\.com/.+\.html)"',
+            webpage)
+        if not links:
+            return
+
+        album_id = self._search_regex(
+            r'albumId\s*:\s*(\d+),', webpage, 'album ID')
+        album_title = self._search_regex(
+            r'data-share-title="([^"]+)"', webpage, 'album title', fatal=False)
+
+        entries = list(map(self.url_result, links))
+
+        # Start from 2 because links in the first page are already on webpage
+        for page_num in itertools.count(2):
+            pagelist_page = self._download_webpage(
+                'http://cache.video.qiyi.com/jp/avlist/%s/%d/%d/' % (album_id, page_num, PAGE_SIZE),
+                album_id,
+                note='Download playlist page %d' % page_num,
+                errnote='Failed to download playlist page %d' % page_num)
+            pagelist = self._parse_json(
+                remove_start(pagelist_page, 'var tvInfoJs='), album_id)
+            vlist = pagelist['data']['vlist']
+            for item in vlist:
+                entries.append(self.url_result(item['vurl']))
+            if len(vlist) < PAGE_SIZE:
+                break
+
+        return self.playlist_result(entries, album_id, album_title)
+
      def _real_extract(self, url):
          webpage = self._download_webpage(
              url, 'temp_id', note='download video page')
+
+        # There's no simple way to determine whether an URL is a playlist or not
+        # So detect it
+        playlist_result = self._extract_playlist(webpage)
+        if playlist_result:
+            return playlist_result
+
          tvid = self._search_regex(
              r'data-player-tvid\s*=\s*[\'"](\d+)', webpage, 'tvid')
          video_id = self._search_regex(