_ Git - youtube-dl/blob - youtube_dl/extractor/belgiannational.py

   1 # coding: utf-8
   2 from __future__ import unicode_literals
   3
   4 import re
   5
   6 from .common import InfoExtractor
   7 from ..utils import int_or_none
   8
   9 class BelgianNationalIE(InfoExtractor):
  10     _VALID_URL = r'http://(?:deredactie|sporza|cobra)\.be/cm/(.*)/(?P<video_id>[^\']+)'
  11     _TESTS = [
  12         # deredactie.be
  13         {
  14             'url': 'http://deredactie.be/cm/vrtnieuws/videozone/programmas/journaal/EP_141025_JOL',
  15             'md5': '4cebde1eb60a53782d4f3992cbd46ec8',
  16             'info_dict': {
  17                 'id': 'EP_141025_JOL',
  18                 'title': 'Het journaal L - 25/10/14',
  19                 'ext': 'mp4',
  20                 'duration': 929,
  21             }
  22         },
  23         # sporza.be
  24         {
  25             'url': 'http://sporza.be/cm/sporza/videozone/programmas/extratime/EP_141020_Extra_time',
  26             'md5': '11f53088da9bf8e7cfc42456697953ff',
  27             'info_dict': {
  28                 'id': 'EP_141020_Extra_time',
  29                 'title': 'Bekijk Extra Time van 20 oktober',
  30                 'ext': 'mp4',
  31                 'duration': 3238,
  32             }
  33         },
  34         # cobra.be
  35         {
  36             'url': 'http://cobra.be/cm/cobra/videozone/rubriek/film-videozone/141022-mv-ellis-cafecorsari',
  37             'md5': '78a2b060a5083c4f055449a72477409d',
  38             'info_dict': {
  39                 'id': '141022-mv-ellis-cafecorsari',
  40                 'title': 'Bret Easton Ellis in Café Corsari',
  41                 'ext': 'mp4',
  42                 'duration': 661,
  43             }
  44         },
  45     ]
  46
  47     def _real_extract(self, url):
  48         mobj = re.match(self._VALID_URL, url)
  49         video_id = mobj.group('video_id')
  50
  51         webpage = self._download_webpage(url, video_id)
  52         title = self._og_search_title(webpage)
  53
  54         video_url = self._search_regex(r'data-video-src="(.*?)"', webpage, 'Video url') + '/manifest.f4m'
  55         duration = int_or_none(self._search_regex(r'data-video-sitestat-duration="(.*?)"', webpage, 'Duration'))
  56
  57         return {
  58             'id': video_id,
  59             'title': title,
  60             'url': video_url,
  61             'ext': 'mp4',
  62             'duration': duration,
  63         }