[viki] Modernize
[youtube-dl] / youtube_dl / extractor / viki.py
1 from __future__ import unicode_literals
2
3 import re
4
5 from ..utils import (
6     ExtractorError,
7     unescapeHTML,
8     unified_strdate,
9 )
10 from .subtitles import SubtitlesInfoExtractor
11
12
13 class VikiIE(SubtitlesInfoExtractor):
14     IE_NAME = 'viki'
15
16     _VALID_URL = r'^https?://(?:www\.)?viki\.com/videos/(?P<id>[0-9]+v)'
17     _TEST = {
18         'url': 'http://www.viki.com/videos/1023585v-heirs-episode-14',
19         'md5': 'a21454021c2646f5433514177e2caa5f',
20         'info_dict': {
21             'id': '1023585v',
22             'ext': 'mp4',
23             'title': 'Heirs Episode 14',
24             'uploader': 'SBS',
25             'description': 'md5:c4b17b9626dd4b143dcc4d855ba3474e',
26             'upload_date': '20131121',
27             'age_limit': 13,
28         },
29         'skip': 'Blocked in the US',
30     }
31
32     def _real_extract(self, url):
33         mobj = re.match(self._VALID_URL, url)
34         video_id = mobj.group(1)
35
36         webpage = self._download_webpage(url, video_id)
37         title = self._og_search_title(webpage)
38         description = self._og_search_description(webpage)
39         thumbnail = self._og_search_thumbnail(webpage)
40
41         uploader_m = re.search(
42             r'<strong>Broadcast Network: </strong>\s*([^<]*)<', webpage)
43         if uploader_m is None:
44             uploader = None
45         else:
46             uploader = uploader_m.group(1).strip()
47
48         rating_str = self._html_search_regex(
49             r'<strong>Rating: </strong>\s*([^<]*)<', webpage,
50             'rating information', default='').strip()
51         RATINGS = {
52             'G': 0,
53             'PG': 10,
54             'PG-13': 13,
55             'R': 16,
56             'NC': 18,
57         }
58         age_limit = RATINGS.get(rating_str)
59
60         info_url = 'http://www.viki.com/player5_fragment/%s?action=show&controller=videos' % video_id
61         info_webpage = self._download_webpage(
62             info_url, video_id, note='Downloading info page')
63         if re.match(r'\s*<div\s+class="video-error', info_webpage):
64             raise ExtractorError(
65                 'Video %s is blocked from your location.' % video_id,
66                 expected=True)
67         video_url = self._html_search_regex(
68             r'<source[^>]+src="([^"]+)"', info_webpage, 'video URL')
69
70         upload_date_str = self._html_search_regex(
71             r'"created_at":"([^"]+)"', info_webpage, 'upload date')
72         upload_date = (
73             unified_strdate(upload_date_str)
74             if upload_date_str is not None
75             else None
76         )
77
78         # subtitles
79         video_subtitles = self.extract_subtitles(video_id, info_webpage)
80         if self._downloader.params.get('listsubtitles', False):
81             self._list_available_subtitles(video_id, info_webpage)
82             return
83
84         return {
85             'id': video_id,
86             'title': title,
87             'url': video_url,
88             'description': description,
89             'thumbnail': thumbnail,
90             'age_limit': age_limit,
91             'uploader': uploader,
92             'subtitles': video_subtitles,
93             'upload_date': upload_date,
94         }
95
96     def _get_available_subtitles(self, video_id, info_webpage):
97         res = {}
98         for sturl_html in re.findall(r'<track src="([^"]+)"/>', info_webpage):
99             sturl = unescapeHTML(sturl_html)
100             m = re.search(r'/(?P<lang>[a-z]+)\.vtt', sturl)
101             if not m:
102                 continue
103             res[m.group('lang')] = sturl
104         return res