_ Git - youtube-dl/blob - youtube_dl/extractor/noco.py

   1 # encoding: utf-8
   2 from __future__ import unicode_literals
   3
   4 import re
   5 import time
   6 import hashlib
   7
   8 from .common import InfoExtractor
   9 from ..compat import (
  10     compat_str,
  11     compat_urllib_parse,
  12     compat_urllib_request,
  13 )
  14 from ..utils import (
  15     clean_html,
  16     ExtractorError,
  17     unified_strdate,
  18 )
  19
  20
  21 class NocoIE(InfoExtractor):
  22     _VALID_URL = r'http://(?:(?:www\.)?noco\.tv/emission/|player\.noco\.tv/\?idvideo=)(?P<id>\d+)'
  23     _LOGIN_URL = 'http://noco.tv/do.php'
  24     _API_URL_TEMPLATE = 'https://api.noco.tv/1.1/%s?ts=%s&tk=%s'
  25     _SUB_LANG_TEMPLATE = '&sub_lang=%s'
  26     _NETRC_MACHINE = 'noco'
  27
  28     _TESTS = [
  29         {
  30             'url': 'http://noco.tv/emission/11538/nolife/ami-ami-idol-hello-france/',
  31             'md5': '0a993f0058ddbcd902630b2047ef710e',
  32             'info_dict': {
  33                 'id': '11538',
  34                 'ext': 'mp4',
  35                 'title': 'Ami Ami Idol - Hello! France',
  36                 'description': 'md5:4eaab46ab68fa4197a317a88a53d3b86',
  37                 'upload_date': '20140412',
  38                 'uploader': 'Nolife',
  39                 'uploader_id': 'NOL',
  40                 'duration': 2851.2,
  41             },
  42             'skip': 'Requires noco account',
  43         },
  44         {
  45             'url': 'http://noco.tv/emission/12610/lbl42/the-guild/s01e01-wake-up-call',
  46             'md5': 'c190f1f48e313c55838f1f412225934d',
  47             'info_dict': {
  48                 'id': '12610',
  49                 'ext': 'mp4',
  50                 'title': 'The Guild #1 - Wake-Up Call',
  51                 'description': '',
  52                 'upload_date': '20140627',
  53                 'uploader': 'LBL42',
  54                 'uploader_id': 'LBL',
  55                 'duration': 233.023,
  56             },
  57             'skip': 'Requires noco account',
  58         }
  59     ]
  60
  61     def _real_initialize(self):
  62         self._login()
  63
  64     def _login(self):
  65         (username, password) = self._get_login_info()
  66         if username is None:
  67             return
  68
  69         login_form = {
  70             'a': 'login',
  71             'cookie': '1',
  72             'username': username,
  73             'password': password,
  74         }
  75         request = compat_urllib_request.Request(self._LOGIN_URL, compat_urllib_parse.urlencode(login_form))
  76         request.add_header('Content-Type', 'application/x-www-form-urlencoded; charset=UTF-8')
  77
  78         login = self._download_json(request, None, 'Logging in as %s' % username)
  79
  80         if 'erreur' in login:
  81             raise ExtractorError('Unable to login: %s' % clean_html(login['erreur']), expected=True)
  82
  83     def _call_api(self, path, video_id, note, sub_lang=None):
  84         ts = compat_str(int(time.time() * 1000))
  85         tk = hashlib.md5((hashlib.md5(ts.encode('ascii')).hexdigest() + '#8S?uCraTedap6a').encode('ascii')).hexdigest()
  86         url = self._API_URL_TEMPLATE % (path, ts, tk)
  87         if sub_lang:
  88             url += self._SUB_LANG_TEMPLATE % sub_lang
  89
  90         resp = self._download_json(url, video_id, note)
  91
  92         if isinstance(resp, dict) and resp.get('error'):
  93             self._raise_error(resp['error'], resp['description'])
  94
  95         return resp
  96
  97     def _raise_error(self, error, description):
  98         raise ExtractorError(
  99             '%s returned error: %s - %s' % (self.IE_NAME, error, description),
 100             expected=True)
 101
 102     def _real_extract(self, url):
 103         mobj = re.match(self._VALID_URL, url)
 104         video_id = mobj.group('id')
 105
 106         medias = self._call_api(
 107             'shows/%s/medias' % video_id,
 108             video_id, 'Downloading video JSON')
 109
 110         show = self._call_api(
 111             'shows/by_id/%s' % video_id,
 112             video_id, 'Downloading show JSON')[0]
 113
 114         options = self._call_api(
 115             'users/init', video_id,
 116             'Downloading user options JSON')['options']
 117         audio_lang_pref = options.get('audio_language') or options.get('language', 'fr')
 118
 119         if audio_lang_pref == 'original':
 120             audio_lang_pref = show['original_lang']
 121         if len(medias) == 1:
 122             audio_lang_pref = list(medias.keys())[0]
 123         elif audio_lang_pref not in medias:
 124             audio_lang_pref = 'fr'
 125
 126         qualities = self._call_api(
 127             'qualities',
 128             video_id, 'Downloading qualities JSON')
 129
 130         formats = []
 131
 132         for audio_lang, audio_lang_dict in medias.items():
 133             preference = 1 if audio_lang == audio_lang_pref else 0
 134             for sub_lang, lang_dict in audio_lang_dict['video_list'].items():
 135                 for format_id, fmt in lang_dict['quality_list'].items():
 136                     format_id_extended = 'audio-%s_sub-%s_%s' % (audio_lang, sub_lang, format_id)
 137
 138                     video = self._call_api(
 139                         'shows/%s/video/%s/%s' % (video_id, format_id.lower(), audio_lang),
 140                         video_id, 'Downloading %s video JSON' % format_id_extended,
 141                         sub_lang if sub_lang != 'none' else None)
 142
 143                     file_url = video['file']
 144                     if not file_url:
 145                         continue
 146
 147                     if file_url in ['forbidden', 'not found']:
 148                         popmessage = video['popmessage']
 149                         self._raise_error(popmessage['title'], popmessage['message'])
 150
 151                     formats.append({
 152                         'url': file_url,
 153                         'format_id': format_id_extended,
 154                         'width': fmt['res_width'],
 155                         'height': fmt['res_lines'],
 156                         'abr': fmt['audiobitrate'],
 157                         'vbr': fmt['videobitrate'],
 158                         'filesize': fmt['filesize'],
 159                         'format_note': qualities[format_id]['quality_name'],
 160                         'quality': qualities[format_id]['priority'],
 161                         'preference': preference,
 162                     })
 163
 164         self._sort_formats(formats)
 165
 166         upload_date = unified_strdate(show['online_date_start_utc'])
 167         uploader = show['partner_name']
 168         uploader_id = show['partner_key']
 169         duration = show['duration_ms'] / 1000.0
 170
 171         thumbnails = []
 172         for thumbnail_key, thumbnail_url in show.items():
 173             m = re.search(r'^screenshot_(?P<width>\d+)x(?P<height>\d+)$', thumbnail_key)
 174             if not m:
 175                 continue
 176             thumbnails.append({
 177                 'url': thumbnail_url,
 178                 'width': int(m.group('width')),
 179                 'height': int(m.group('height')),
 180             })
 181
 182         episode = show.get('show_TT') or show.get('show_OT')
 183         family = show.get('family_TT') or show.get('family_OT')
 184         episode_number = show.get('episode_number')
 185
 186         title = ''
 187         if family:
 188             title += family
 189         if episode_number:
 190             title += ' #' + compat_str(episode_number)
 191         if episode:
 192             title += ' - ' + episode
 193
 194         description = show.get('show_resume') or show.get('family_resume')
 195
 196         return {
 197             'id': video_id,
 198             'title': title,
 199             'description': description,
 200             'thumbnails': thumbnails,
 201             'upload_date': upload_date,
 202             'uploader': uploader,
 203             'uploader_id': uploader_id,
 204             'duration': duration,
 205             'formats': formats,
 206         }