_ Git - youtube-dl/blob - youtube_dl/extractor/rtbf.py

   1 # coding: utf-8
   2 from __future__ import unicode_literals
   3
   4 from .common import InfoExtractor
   5 from ..utils import (
   6     int_or_none,
   7     unescapeHTML,
   8 )
   9
  10
  11 class RTBFIE(InfoExtractor):
  12     _VALID_URL = r'''(?x)
  13         https?://www\.rtbf\.be/
  14             (?:
  15                 video/[^\?]+\?id=|
  16                 ouftivi/heros/[^&]+&videoId=
  17             )
  18         (?P<id>\d+)
  19     '''
  20     _TESTS = [
  21         {
  22             'url': 'https://www.rtbf.be/video/detail_les-diables-au-coeur-episode-2?id=1921274',
  23             'md5': '799f334ddf2c0a582ba80c44655be570',
  24             'info_dict': {
  25                 'id': '1921274',
  26                 'ext': 'mp4',
  27                 'title': 'Les Diables au coeur (épisode 2)',
  28                 'duration': 3099,
  29             }
  30         },
  31         {
  32             'url': 'http://www.rtbf.be/ouftivi/heros/detail_scooby-doo-mysteres-associes?id=1097&videoId=2057442',
  33             'md5': '25aea17e949e1e0c7c41270d60d25f22',
  34             'info_dict': {
  35                 'id': '2057442',
  36                 'ext': 'mp4',
  37                 'title': 'Scooby-Doo, myst\xe8res associ\xe9s',
  38                 'duration': 1279,
  39             }
  40         },
  41     ]
  42
  43     _QUALITIES = [
  44         ('mobile', 'mobile'),
  45         ('web', 'SD'),
  46         ('url', 'MD'),
  47         ('high', 'HD'),
  48     ]
  49
  50     def _real_extract(self, url):
  51         video_id = self._match_id(url)
  52
  53         webpage = self._download_webpage(
  54             'http://www.rtbf.be/video/embed?id=%s' % video_id, video_id)
  55
  56         data = self._parse_json(
  57             unescapeHTML(self._search_regex(
  58                 r'data-media="([^"]+)"', webpage, 'data video')),
  59             video_id)
  60
  61         if data.get('provider').lower() == 'youtube':
  62             video_url = data.get('downloadUrl') or data.get('url')
  63             return self.url_result(video_url, 'Youtube')
  64         formats = []
  65         for key, format_id in self._QUALITIES:
  66             format_url = data['sources'].get(key)
  67             if format_url:
  68                 formats.append({
  69                     'format_id': format_id,
  70                     'url': format_url,
  71                 })
  72
  73         return {
  74             'id': video_id,
  75             'formats': formats,
  76             'title': data['title'],
  77             'description': data.get('description') or data.get('subtitle'),
  78             'thumbnail': data.get('thumbnail'),
  79             'duration': data.get('duration') or data.get('realDuration'),
  80             'timestamp': int_or_none(data.get('created')),
  81             'view_count': int_or_none(data.get('viewCount')),
  82         }