[iqiyi] Make _VALID_URL more accurate
[youtube-dl] / youtube_dl / extractor / viki.py
index 68d5cac6edf8ae0fca80ef82c0b3701d835e250a..7f2fb1ca8896e29e48a41a9efddaded987ba1e96 100644 (file)
@@ -1,9 +1,9 @@
 from __future__ import unicode_literals
 
-import re
 import time
 import hmac
 import hashlib
+import itertools
 
 from ..utils import (
     ExtractorError,
@@ -15,6 +15,7 @@ from .common import InfoExtractor
 
 
 class VikiBaseIE(InfoExtractor):
+    _VALID_URL_BASE = r'https?://(?:www\.)?viki\.(?:com|net|mx|jp|fr)/'
     _API_QUERY_TEMPLATE = '/v4/%sapp=%s&t=%s&site=www.viki.com'
     _API_URL_TEMPLATE = 'http://api.viki.io%s&sig=%s'
 
@@ -58,7 +59,7 @@ class VikiBaseIE(InfoExtractor):
 
 class VikiIE(VikiBaseIE):
     IE_NAME = 'viki'
-    _VALID_URL = r'https?://(?:www\.)?viki\.com/(?:videos|player)/(?P<id>[0-9]+v)'
+    _VALID_URL = r'%s(?:videos|player)/(?P<id>[0-9]+v)' % VikiBaseIE._VALID_URL_BASE
     _TESTS = [{
         'url': 'http://www.viki.com/videos/1023585v-heirs-episode-14',
         'info_dict': {
@@ -156,7 +157,7 @@ class VikiIE(VikiBaseIE):
             title = 'Episode %d' % video.get('number') if video.get('type') == 'episode' else video.get('id') or video_id
             container_titles = video.get('container', {}).get('titles')
             if container_titles:
-                container_title = container_titles.get('en') or container_titles[titles.keys()[0]]
+                container_title = container_titles.get('en') or container_titles[container_titles.keys()[0]]
                 title = '%s - %s' % (container_title, title)
 
         descriptions = video.get('descriptions')
@@ -227,9 +228,9 @@ class VikiIE(VikiBaseIE):
         return result
 
 
-class VikiChannelIE(InfoExtractor):
+class VikiChannelIE(VikiBaseIE):
     IE_NAME = 'viki:channel'
-    _VALID_URL = r'https?://(?:www\.)?viki\.com/tv/(?P<id>[0-9]+c)'
+    _VALID_URL = r'%s(?:tv|news|movies|artists)/(?P<id>[0-9]+c)' % VikiBaseIE._VALID_URL_BASE
     _TESTS = [{
         'url': 'http://www.viki.com/tv/50c-boys-over-flowers',
         'info_dict': {
@@ -246,17 +247,25 @@ class VikiChannelIE(InfoExtractor):
             'description': 'md5:05bf5471385aa8b21c18ad450e350525',
         },
         'playlist_count': 127,
+    }, {
+        'url': 'http://www.viki.com/news/24569c-showbiz-korea',
+        'only_matching': True,
+    }, {
+        'url': 'http://www.viki.com/movies/22047c-pride-and-prejudice-2005',
+        'only_matching': True,
+    }, {
+        'url': 'http://www.viki.com/artists/2141c-shinee',
+        'only_matching': True,
     }]
-    _API_BASE = 'http://api.viki.io/v4/containers'
-    _APP = '100000a'
+
     _PER_PAGE = 25
 
     def _real_extract(self, url):
         channel_id = self._match_id(url)
 
-        channel = self._download_json(
-            '%s/%s.json?app=%s' % (self._API_BASE, channel_id, self._APP),
-            channel_id, 'Downloading channel JSON')
+        channel = self._call_api(
+            'containers/%s.json' % channel_id, channel_id,
+            'Downloading channel JSON')
 
         titles = channel['titles']
         title = titles.get('en') or titles[titles.keys()[0]]
@@ -265,17 +274,17 @@ class VikiChannelIE(InfoExtractor):
         description = descriptions.get('en') or descriptions[descriptions.keys()[0]]
 
         entries = []
-        for video_type in ('episodes', 'clips'):
-            page_url = '%s/%s/%s.json?app=%s&per_page=%d&sort=number&direction=asc&with_paging=true&page=1' % (self._API_BASE, channel_id, video_type, self._APP, self._PER_PAGE)
-            while page_url:
-                page = self._download_json(
-                    page_url, channel_id,
-                    'Downloading %s JSON page #%s'
-                    % (video_type, re.search(r'[?&]page=([0-9]+)', page_url).group(1)))
+        for video_type in ('episodes', 'clips', 'movies'):
+            for page_num in itertools.count(1):
+                page = self._call_api(
+                    'containers/%s/%s.json?per_page=%d&sort=number&direction=asc&with_paging=true&page=%d'
+                    % (channel_id, video_type, self._PER_PAGE, page_num), channel_id,
+                    'Downloading %s JSON page #%d' % (video_type, page_num))
                 for video in page['response']:
                     video_id = video['id']
                     entries.append(self.url_result(
-                        'http://www.viki.com/videos/%s' % video_id, 'Viki', video_id))
-                page_url = page['pagination']['next']
+                        'http://www.viki.com/videos/%s' % video_id, 'Viki'))
+                if not page['pagination']['next']:
+                    break
 
         return self.playlist_result(entries, channel_id, title, description)