_ Git - youtube-dl/blob - youtube_dl/extractor/belgiannational.py

   1 # coding: utf-8
   2 from __future__ import unicode_literals
   3
   4 import re
   5
   6 from .common import InfoExtractor
   7 from ..utils import int_or_none
   8
   9 class BelgianNationalIE(InfoExtractor):
  10     _VALID_URL = r'http://(?:deredactie|sporza|cobra)\.be/cm/(.*)/(?P<video_id>[^\']+)'
  11     _TESTS = [
  12     # deredactie.be
  13     {
  14         'url': 'http://deredactie.be/cm/vrtnieuws/videozone/programmas/journaal/EP_141025_JOL',
  15         'md5': '4cebde1eb60a53782d4f3992cbd46ec8',
  16         'info_dict': {
  17             'id': 'EP_141025_JOL',
  18             'title': 'Het journaal L - 25/10/14',
  19             'ext': 'mp4',
  20             'duration': 929,
  21         }
  22     },
  23     # sporza.be
  24     {
  25         'url': 'http://sporza.be/cm/sporza/videozone/programmas/extratime/EP_141020_Extra_time',
  26         'md5': '11f53088da9bf8e7cfc42456697953ff',
  27         'info_dict': {
  28             'id': 'EP_141020_Extra_time',
  29             'title': 'Bekijk Extra Time van 20 oktober',
  30             'ext': 'mp4',
  31             'duration': 3238,
  32         }
  33
  34     },
  35     # cobra.be
  36     {
  37         'url': 'http://cobra.be/cm/cobra/videozone/rubriek/film-videozone/141022-mv-ellis-cafecorsari',
  38         'md5': '78a2b060a5083c4f055449a72477409d',
  39         'info_dict': {
  40             'id': '141022-mv-ellis-cafecorsari',
  41             'title': 'Bret Easton Ellis in Café Corsari',
  42             'ext': 'mp4',
  43             'duration': 661,
  44         }
  45     }
  46     ]
  47
  48     def _real_extract(self, url):
  49         mobj = re.match(self._VALID_URL, url)
  50         video_id = mobj.group('video_id')
  51
  52         webpage = self._download_webpage(url, video_id)
  53         title = self._og_search_title(webpage)
  54
  55         video_url = self._search_regex(r'data-video-src="(.*?)"', webpage, 'Video url') + '/manifest.f4m'
  56         duration = int_or_none(self._search_regex(r'data-video-sitestat-duration="(.*?)"', webpage, 'Duration'))
  57
  58         return {
  59             'id': video_id,
  60             'title': title,
  61             'url': video_url,
  62             'ext': 'mp4',
  63             'duration': duration,
  64         }