Merge pull request #6303 from jaimeMF/dash_no_live

[youtube-dl] / youtube_dl / extractor / viki.py
diff --git a/youtube_dl/extractor/viki.py b/youtube_dl/extractor/viki.py

index 234649ca8f8e885f488bbe976f57d7a487e60eef..51cdc6b65143aaf4a0d2823ffa8c859c96e25972 100644 (file)
--- a/youtube_dl/extractor/viki.py
+++ b/youtube_dl/extractor/viki.py
@@ -1,9 +1,11 @@
+# coding: utf-8
  from __future__ import unicode_literals
  
-import re
+import json
  import time
  import hmac
  import hashlib
+import itertools
  
  from ..utils import (
      ExtractorError,
@@ -11,10 +13,12 @@ from ..utils import (
      parse_age_limit,
      parse_iso8601,
  )
+from ..compat import compat_urllib_request
  from .common import InfoExtractor
  
  
  class VikiBaseIE(InfoExtractor):
+    _VALID_URL_BASE = r'https?://(?:www\.)?viki\.(?:com|net|mx|jp|fr)/'
      _API_QUERY_TEMPLATE = '/v4/%sapp=%s&t=%s&site=www.viki.com'
      _API_URL_TEMPLATE = 'http://api.viki.io%s&sig=%s'
  
@@ -22,27 +26,35 @@ class VikiBaseIE(InfoExtractor):
      _APP_VERSION = '2.2.5.1428709186'
      _APP_SECRET = '-$iJ}@p7!G@SyU/je1bEyWg}upLu-6V6-Lg9VD(]siH,r.,m-r|ulZ,U4LC/SeR)'
  
-    def _prepare_call(self, path, timestamp=None):
+    _NETRC_MACHINE = 'viki'
+
+    _token = None
+
+    def _prepare_call(self, path, timestamp=None, post_data=None):
          path += '?' if '?' not in path else '&'
          if not timestamp:
              timestamp = int(time.time())
          query = self._API_QUERY_TEMPLATE % (path, self._APP, timestamp)
+        if self._token:
+            query += '&token=%s' % self._token
          sig = hmac.new(
              self._APP_SECRET.encode('ascii'),
              query.encode('ascii'),
              hashlib.sha1
          ).hexdigest()
-        return self._API_URL_TEMPLATE % (query, sig)
+        url = self._API_URL_TEMPLATE % (query, sig)
+        return compat_urllib_request.Request(
+            url, json.dumps(post_data).encode('utf-8')) if post_data else url
  
-    def _call_api(self, path, video_id, note, timestamp=None):
+    def _call_api(self, path, video_id, note, timestamp=None, post_data=None):
          resp = self._download_json(
-            self._prepare_call(path, timestamp), video_id, note)
+            self._prepare_call(path, timestamp, post_data), video_id, note)
  
          error = resp.get('error')
          if error:
              if error == 'invalid timestamp':
                  resp = self._download_json(
-                    self._prepare_call(path, int(resp['current_timestamp'])),
+                    self._prepare_call(path, int(resp['current_timestamp']), post_data),
                      video_id, '%s (retry)' % note)
                  error = resp.get('error')
              if error:
@@ -55,10 +67,31 @@ class VikiBaseIE(InfoExtractor):
              '%s returned error: %s' % (self.IE_NAME, error),
              expected=True)
  
+    def _real_initialize(self):
+        self._login()
+
+    def _login(self):
+        (username, password) = self._get_login_info()
+        if username is None:
+            return
+
+        login_form = {
+            'login_id': username,
+            'password': password,
+        }
+
+        login = self._call_api(
+            'sessions.json', None,
+            'Logging in as %s' % username, post_data=login_form)
+
+        self._token = login.get('token')
+        if not self._token:
+            self.report_warning('Unable to get session token, login has probably failed')
+
  
  class VikiIE(VikiBaseIE):
      IE_NAME = 'viki'
-    _VALID_URL = r'https?://(?:www\.)?viki\.com/(?:videos|player)/(?P<id>[0-9]+v)'
+    _VALID_URL = r'%s(?:videos|player)/(?P<id>[0-9]+v)' % VikiBaseIE._VALID_URL_BASE
      _TESTS = [{
          'url': 'http://www.viki.com/videos/1023585v-heirs-episode-14',
          'info_dict': {
@@ -120,6 +153,23 @@ class VikiIE(VikiBaseIE):
              'like_count': int,
              'age_limit': 13,
          }
+    }, {
+        # youtube external
+        'url': 'http://www.viki.com/videos/50562v-poor-nastya-complete-episode-1',
+        'md5': '216d1afdc0c64d1febc1e9f2bd4b864b',
+        'info_dict': {
+            'id': '50562v',
+            'ext': 'mp4',
+            'title': 'Poor Nastya [COMPLETE] - Episode 1',
+            'description': '',
+            'duration': 607,
+            'timestamp': 1274949505,
+            'upload_date': '20101213',
+            'uploader': 'ad14065n',
+            'uploader_id': 'ad14065n',
+            'like_count': int,
+            'age_limit': 13,
+        }
      }, {
          'url': 'http://www.viki.com/player/44699v',
          'only_matching': True,
@@ -128,26 +178,6 @@ class VikiIE(VikiBaseIE):
      def _real_extract(self, url):
          video_id = self._match_id(url)
  
-        streams = self._call_api(
-            'videos/%s/streams.json' % video_id, video_id,
-            'Downloading video streams JSON')
-
-        formats = []
-        for format_id, stream_dict in streams.items():
-            height = self._search_regex(
-                r'^(\d+)[pP]$', format_id, 'height', default=None)
-            for protocol, format_dict in stream_dict.items():
-                if format_id == 'm3u8':
-                    formats = self._extract_m3u8_formats(
-                        format_dict['url'], video_id, 'mp4', m3u8_id='m3u8-%s' % protocol)
-                else:
-                    formats.append({
-                        'url': format_dict['url'],
-                        'format_id': '%s-%s' % (format_id, protocol),
-                        'height': height,
-                    })
-        self._sort_formats(formats)
-
          video = self._call_api(
              'videos/%s.json' % video_id, video_id, 'Downloading video JSON')
  
@@ -159,7 +189,7 @@ class VikiIE(VikiBaseIE):
              title = 'Episode %d' % video.get('number') if video.get('type') == 'episode' else video.get('id') or video_id
              container_titles = video.get('container', {}).get('titles')
              if container_titles:
-                container_title = container_titles.get('en') or container_titles[titles.keys()[0]]
+                container_title = container_titles.get('en') or container_titles[container_titles.keys()[0]]
                  title = '%s - %s' % (container_title, title)
  
          descriptions = video.get('descriptions')
@@ -186,7 +216,7 @@ class VikiIE(VikiBaseIE):
                      'videos/%s/subtitles/%s.%s' % (video_id, subtitle_lang, subtitles_format)),
              } for subtitles_format in ('srt', 'vtt')]
  
-        return {
+        result = {
              'id': video_id,
              'title': title,
              'description': description,
@@ -196,14 +226,43 @@ class VikiIE(VikiBaseIE):
              'like_count': like_count,
              'age_limit': age_limit,
              'thumbnails': thumbnails,
-            'formats': formats,
              'subtitles': subtitles,
          }
  
+        streams = self._call_api(
+            'videos/%s/streams.json' % video_id, video_id,
+            'Downloading video streams JSON')
+
+        if 'external' in streams:
+            result.update({
+                '_type': 'url_transparent',
+                'url': streams['external']['url'],
+            })
+            return result
+
+        formats = []
+        for format_id, stream_dict in streams.items():
+            height = self._search_regex(
+                r'^(\d+)[pP]$', format_id, 'height', default=None)
+            for protocol, format_dict in stream_dict.items():
+                if format_id == 'm3u8':
+                    formats = self._extract_m3u8_formats(
+                        format_dict['url'], video_id, 'mp4', m3u8_id='m3u8-%s' % protocol)
+                else:
+                    formats.append({
+                        'url': format_dict['url'],
+                        'format_id': '%s-%s' % (format_id, protocol),
+                        'height': height,
+                    })
+        self._sort_formats(formats)
+
+        result['formats'] = formats
+        return result
  
-class VikiChannelIE(InfoExtractor):
+
+class VikiChannelIE(VikiBaseIE):
      IE_NAME = 'viki:channel'
-    _VALID_URL = r'https?://(?:www\.)?viki\.com/tv/(?P<id>[0-9]+c)'
+    _VALID_URL = r'%s(?:tv|news|movies|artists)/(?P<id>[0-9]+c)' % VikiBaseIE._VALID_URL_BASE
      _TESTS = [{
          'url': 'http://www.viki.com/tv/50c-boys-over-flowers',
          'info_dict': {
@@ -220,17 +279,25 @@ class VikiChannelIE(InfoExtractor):
              'description': 'md5:05bf5471385aa8b21c18ad450e350525',
          },
          'playlist_count': 127,
+    }, {
+        'url': 'http://www.viki.com/news/24569c-showbiz-korea',
+        'only_matching': True,
+    }, {
+        'url': 'http://www.viki.com/movies/22047c-pride-and-prejudice-2005',
+        'only_matching': True,
+    }, {
+        'url': 'http://www.viki.com/artists/2141c-shinee',
+        'only_matching': True,
      }]
-    _API_BASE = 'http://api.viki.io/v4/containers'
-    _APP = '100000a'
+
      _PER_PAGE = 25
  
      def _real_extract(self, url):
          channel_id = self._match_id(url)
  
-        channel = self._download_json(
-            '%s/%s.json?app=%s' % (self._API_BASE, channel_id, self._APP),
-            channel_id, 'Downloading channel JSON')
+        channel = self._call_api(
+            'containers/%s.json' % channel_id, channel_id,
+            'Downloading channel JSON')
  
          titles = channel['titles']
          title = titles.get('en') or titles[titles.keys()[0]]
@@ -239,17 +306,17 @@ class VikiChannelIE(InfoExtractor):
          description = descriptions.get('en') or descriptions[descriptions.keys()[0]]
  
          entries = []
-        for video_type in ('episodes', 'clips'):
-            page_url = '%s/%s/%s.json?app=%s&per_page=%d&sort=number&direction=asc&with_paging=true&page=1' % (self._API_BASE, channel_id, video_type, self._APP, self._PER_PAGE)
-            while page_url:
-                page = self._download_json(
-                    page_url, channel_id,
-                    'Downloading %s JSON page #%s'
-                    % (video_type, re.search(r'[?&]page=([0-9]+)', page_url).group(1)))
+        for video_type in ('episodes', 'clips', 'movies'):
+            for page_num in itertools.count(1):
+                page = self._call_api(
+                    'containers/%s/%s.json?per_page=%d&sort=number&direction=asc&with_paging=true&page=%d'
+                    % (channel_id, video_type, self._PER_PAGE, page_num), channel_id,
+                    'Downloading %s JSON page #%d' % (video_type, page_num))
                  for video in page['response']:
                      video_id = video['id']
                      entries.append(self.url_result(
-                        'http://www.viki.com/videos/%s' % video_id, 'Viki', video_id))
-                page_url = page['pagination']['next']
+                        'http://www.viki.com/videos/%s' % video_id, 'Viki'))
+                if not page['pagination']['next']:
+                    break
  
          return self.playlist_result(entries, channel_id, title, description)