[noco] Add support for multi language videos (Closes #4326)
[youtube-dl] / youtube_dl / extractor / noco.py
1 # encoding: utf-8
2 from __future__ import unicode_literals
3
4 import re
5 import time
6 import hashlib
7
8 from .common import InfoExtractor
9 from ..utils import (
10     compat_urllib_request,
11     compat_urllib_parse,
12     ExtractorError,
13     clean_html,
14     unified_strdate,
15     compat_str,
16 )
17
18
19 class NocoIE(InfoExtractor):
20     _VALID_URL = r'http://(?:(?:www\.)?noco\.tv/emission/|player\.noco\.tv/\?idvideo=)(?P<id>\d+)'
21     _LOGIN_URL = 'http://noco.tv/do.php'
22     _API_URL_TEMPLATE = 'https://api.noco.tv/1.1/%s?ts=%s&tk=%s'
23     _SUB_LANG_TEMPLATE = '&sub_lang=%s'
24     _NETRC_MACHINE = 'noco'
25
26     _TEST = {
27         'url': 'http://noco.tv/emission/11538/nolife/ami-ami-idol-hello-france/',
28         'md5': '0a993f0058ddbcd902630b2047ef710e',
29         'info_dict': {
30             'id': '11538',
31             'ext': 'mp4',
32             'title': 'Ami Ami Idol - Hello! France',
33             'description': 'md5:4eaab46ab68fa4197a317a88a53d3b86',
34             'upload_date': '20140412',
35             'uploader': 'Nolife',
36             'uploader_id': 'NOL',
37             'duration': 2851.2,
38         },
39         'skip': 'Requires noco account',
40     }
41
42     def _real_initialize(self):
43         self._login()
44
45     def _login(self):
46         (username, password) = self._get_login_info()
47         if username is None:
48             return
49
50         login_form = {
51             'a': 'login',
52             'cookie': '1',
53             'username': username,
54             'password': password,
55         }
56         request = compat_urllib_request.Request(self._LOGIN_URL, compat_urllib_parse.urlencode(login_form))
57         request.add_header('Content-Type', 'application/x-www-form-urlencoded; charset=UTF-8')
58
59         login = self._download_json(request, None, 'Logging in as %s' % username)
60
61         if 'erreur' in login:
62             raise ExtractorError('Unable to login: %s' % clean_html(login['erreur']), expected=True)
63
64     def _call_api(self, path, video_id, note, sub_lang=None):
65         ts = compat_str(int(time.time() * 1000))
66         tk = hashlib.md5((hashlib.md5(ts.encode('ascii')).hexdigest() + '#8S?uCraTedap6a').encode('ascii')).hexdigest()
67         url = self._API_URL_TEMPLATE % (path, ts, tk)
68         if sub_lang:
69             url += self._SUB_LANG_TEMPLATE % sub_lang
70
71         resp = self._download_json(url, video_id, note)
72
73         if isinstance(resp, dict) and resp.get('error'):
74             self._raise_error(resp['error'], resp['description'])
75
76         return resp
77
78     def _raise_error(self, error, description):
79         raise ExtractorError(
80             '%s returned error: %s - %s' % (self.IE_NAME, error, description),
81             expected=True)
82
83     def _real_extract(self, url):
84         mobj = re.match(self._VALID_URL, url)
85         video_id = mobj.group('id')
86
87         medias = self._call_api(
88             'shows/%s/medias' % video_id,
89             video_id, 'Downloading video JSON')
90
91         qualities = self._call_api(
92             'qualities',
93             video_id, 'Downloading qualities JSON')
94
95         formats = []
96
97         for lang, lang_dict in medias['fr']['video_list'].items():
98             for format_id, fmt in lang_dict['quality_list'].items():
99                 format_id_extended = '%s-%s' % (lang, format_id) if lang != 'none' else format_id
100
101                 video = self._call_api(
102                     'shows/%s/video/%s/fr' % (video_id, format_id.lower()),
103                     video_id, 'Downloading %s video JSON' % format_id_extended,
104                     lang if lang != 'none' else None)
105
106                 file_url = video['file']
107                 if not file_url:
108                     continue
109
110                 if file_url in ['forbidden', 'not found']:
111                     popmessage = video['popmessage']
112                     self._raise_error(popmessage['title'], popmessage['message'])
113
114                 formats.append({
115                     'url': file_url,
116                     'format_id': format_id_extended,
117                     'width': fmt['res_width'],
118                     'height': fmt['res_lines'],
119                     'abr': fmt['audiobitrate'],
120                     'vbr': fmt['videobitrate'],
121                     'filesize': fmt['filesize'],
122                     'format_note': qualities[format_id]['quality_name'],
123                     'preference': qualities[format_id]['priority'],
124                 })
125
126         self._sort_formats(formats)
127
128         show = self._call_api(
129             'shows/by_id/%s' % video_id,
130             video_id, 'Downloading show JSON')[0]
131
132         upload_date = unified_strdate(show['online_date_start_utc'])
133         uploader = show['partner_name']
134         uploader_id = show['partner_key']
135         duration = show['duration_ms'] / 1000.0
136
137         thumbnails = []
138         for thumbnail_key, thumbnail_url in show.items():
139             m = re.search(r'^screenshot_(?P<width>\d+)x(?P<height>\d+)$', thumbnail_key)
140             if not m:
141                 continue
142             thumbnails.append({
143                 'url': thumbnail_url,
144                 'width': int(m.group('width')),
145                 'height': int(m.group('height')),
146             })
147
148         episode = show.get('show_TT') or show.get('show_OT')
149         family = show.get('family_TT') or show.get('family_OT')
150         episode_number = show.get('episode_number')
151
152         title = ''
153         if family:
154             title += family
155         if episode_number:
156             title += ' #' + compat_str(episode_number)
157         if episode:
158             title += ' - ' + episode
159
160         description = show.get('show_resume') or show.get('family_resume')
161
162         return {
163             'id': video_id,
164             'title': title,
165             'description': description,
166             'thumbnails': thumbnails,
167             'upload_date': upload_date,
168             'uploader': uploader,
169             'uploader_id': uploader_id,
170             'duration': duration,
171             'formats': formats,
172         }