_ Git - youtube-dl/blob - youtube_dl/extractor/vtm.py

   1 from __future__ import unicode_literals
   2
   3 import re
   4
   5 from .generic import GenericIE
   6 from .common import InfoExtractor
   7 from ..utils import (
   8     urlencode_postdata,
   9     compat_urllib_parse_urlencode,
  10     ExtractorError,
  11     remove_end,
  12 )
  13
  14
  15 class VTMIE(InfoExtractor):
  16     """Download full episodes that require an account from vtm.be or q2.be.
  17
  18     The generic extractor can be used to download clips that do no require an
  19     account.
  20     """
  21     _VALID_URL = r'https?://(?:www\.)?(?P<site_id>vtm|q2)\.be/video[/?].+?'
  22     _NETRC_MACHINE = 'vtm'
  23     _APIKEY = '3_HZ0FtkMW_gOyKlqQzW5_0FHRC7Nd5XpXJZcDdXY4pk5eES2ZWmejRW5egwVm4ug-'
  24     _TESTS = [
  25         {
  26             'url': 'http://vtm.be/video/volledige-afleveringen/id/vtm_20170219_VM0678361_vtmwatch',
  27             'info_dict': {
  28                 'id': 'vtm_20170219_VM0678361_vtmwatch',
  29                 'ext': 'mp4',
  30                 'title': 'Allemaal Chris afl. 6',
  31                 'description': 'md5:4be86427521e7b07e0adb0c9c554ddb2',
  32             },
  33             'skip_download': True,
  34         },
  35         {
  36             'url': 'http://vtm.be/video/volledige-afleveringen/id/257107153551000',
  37             'only_matching': True,
  38         },
  39         {
  40             'url': 'http://vtm.be/video?aid=163157',
  41             'only_matching': True,
  42         },
  43         {
  44             'url': 'http://www.q2.be/video/volledige-afleveringen/id/2be_20170301_VM0684442_q2',
  45             'only_matching': True,
  46         },
  47         {
  48             'url': 'http://vtm.be/video?aid=168332',
  49             'info_dict': {
  50                 'id': 'video?aid=168332',
  51                 'ext': 'mp4',
  52                 'title': 'Videozone',
  53             },
  54         },
  55     ]
  56
  57     def _real_initialize(self):
  58         self._logged_in = False
  59
  60     def _login(self):
  61         (username, password) = self._get_login_info()
  62         if username is None or password is None:
  63             self.raise_login_required()
  64
  65         auth_data = {
  66             'APIKey': self._APIKEY,
  67             'sdk': 'js_6.1',
  68             'format': 'json',
  69             'loginID': username,
  70             'password': password,
  71         }
  72
  73         auth_info = self._download_json(
  74             'https://accounts.eu1.gigya.com/accounts.login', None,
  75             note='Logging in', errnote='Unable to log in',
  76             data=urlencode_postdata(auth_data), fatal=True)
  77
  78         error_message = auth_info.get('errorDetails')
  79         if error_message:
  80             raise ExtractorError(
  81                 'Unable to login: %s' % error_message, expected=True)
  82
  83         self._uid = auth_info['UID']
  84         self._uid_signature = auth_info['UIDSignature']
  85         self._signature_timestamp = auth_info['signatureTimestamp']
  86
  87         self._logged_in = True
  88
  89     def _real_extract(self, url):
  90         mobj = re.match(self._VALID_URL, url)
  91         site_id = mobj.group('site_id')
  92
  93         webpage = self._download_webpage(url, None, "Downloading webpage")
  94
  95         # The URL sometimes contains the video id, but not always, e.g., test
  96         # case 3. Fortunately, all webpages of videos requiring authentication
  97         # contain the video id.
  98         video_id = self._search_regex(
  99             r'\\"vodId\\":\\"(.+?)\\"', webpage, 'video_id', default=None)
 100
 101         # It was most likely a video not requiring authentication.
 102         if not video_id:
 103             return self.url_result(url, 'Generic')
 104
 105         if not self._logged_in:
 106             self._login()
 107
 108         title = self._html_search_regex(
 109             r'\\"title\\":\\"(.+?)\\"', webpage, 'title', default=None)
 110
 111         description = self._html_search_regex(
 112             r'<div[^>]+class="field-item\s+even">\s*<p>(.+?)</p>',
 113             webpage, 'description', default=None)
 114
 115         data_url = 'http://vod.medialaan.io/api/1.0/item/%s/video' % video_id
 116         m3u8_data = {
 117             'app_id': 'vtm_watch' if site_id == 'vtm' else 'q2',
 118             'user_network': 'vtm-sso',
 119             'UID': self._uid,
 120             'UIDSignature': self._uid_signature,
 121             'signatureTimestamp': self._signature_timestamp,
 122         }
 123         data = self._download_json(data_url, video_id, query=m3u8_data)
 124
 125         formats = self._extract_m3u8_formats(
 126             data['response']['uri'], video_id, entry_protocol='m3u8_native',
 127             ext='mp4', m3u8_id='hls')
 128
 129         self._sort_formats(formats)
 130
 131         return {
 132             'id': video_id,
 133             'title': title,
 134             'description': description,
 135             'formats': formats,
 136         }