_ Git - youtube-dl/blob - youtube_dl/extractor/dumpert.py

   1 # coding: utf-8
   2 from __future__ import unicode_literals
   3
   4 import base64
   5
   6 from .common import InfoExtractor
   7
   8
   9 class DumpertIE(InfoExtractor):
  10     _VALID_URL = (r'https?://(?:www\.)?dumpert\.nl/mediabase/'
  11                   r'(?P<id>[0-9]+/[0-9a-zA-Z]+)/?.*')
  12     _TEST = {
  13         'url': 'http://www.dumpert.nl/mediabase/6646981/951bc60f/',
  14         'md5': '1b9318d7d5054e7dcb9dc7654f21d643',
  15         'info_dict': {
  16             'id': '6646981/951bc60f',
  17             'ext': 'mp4',
  18             'title': 'Ik heb nieuws voor je',
  19             'description': 'Niet schrikken hoor'
  20         }
  21     }
  22
  23     def _real_extract(self, url):
  24         video_id = self._match_id(url)
  25         webpage = self._download_webpage(url, video_id)
  26
  27         title = self._html_search_meta('title', webpage)
  28         description = self._html_search_meta('description', webpage)
  29
  30         files_base64 = self._html_search_regex(r'data-files="(.*?)"',
  31                                                webpage,
  32                                                'files')
  33         files_json = base64.b64decode(files_base64).decode('iso-8859-1')
  34         files = self._parse_json(files_json, video_id)
  35
  36         format_names = ['flv', 'mobile', 'tablet', '720p']
  37         formats = [{'format_id': name,
  38                     'url': files[name].replace(r'\/', '/')}
  39                    for name in format_names
  40                    if name in files]
  41
  42         return {
  43             'id': video_id,
  44             'title': title,
  45             'description': description,
  46             'formats': formats
  47         }