_ Git - youtube-dl/blob - youtube_dl/extractor/swrmediathek.py

   1 # -*- coding: utf-8 -*-
   2 from __future__ import unicode_literals
   3
   4 import re
   5
   6 from .common import InfoExtractor
   7 from ..utils import int_or_none
   8
   9
  10 class SWRMediathekIE(InfoExtractor):
  11     _VALID_URL = r'https?://(?:www\.)?swrmediathek\.de/player\.htm\?show=(?P<videoid>[^?#&]+)'
  12
  13     _TESTS = [{
  14         'url': 'http://swrmediathek.de/player.htm?show=849790d0-dab8-11e3-a953-0026b975f2e6',
  15         'info_dict': {
  16             'id': '849790d0-dab8-11e3-a953-0026b975f2e6',
  17             'ext': 'flv',
  18             'title': 'SWR odysso',
  19             'description': 'md5:2012e31baad36162e97ce9eb3f157b8a',
  20             'thumbnail': 're:^http:.*\.jpg$',
  21         },
  22         'params': {
  23             'skip_download': True,  # requires rtmpdump
  24         },
  25     }, {
  26         'url': 'http://swrmediathek.de/player.htm?show=0e1a8510-ddf2-11e3-9be3-0026b975f2e6',
  27         'info_dict': {
  28             'id': '0e1a8510-ddf2-11e3-9be3-0026b975f2e6',
  29             'ext': 'flv',
  30             'title': 'Nachtcafé - Alltagsdroge Alkohol - zwischen Sektempfang und Komasaufen',
  31             'description': 'md5:e0a3adc17e47db2c23aab9ebc36dbee2',
  32             'thumbnail': 're:http://.*\.jpg',
  33         },
  34         'params': {
  35             'skip_download': True,  # requires rtmpdump
  36         },
  37     }]
  38
  39     def _real_extract(self, url):
  40         mobj = re.match(self._VALID_URL, url)
  41         video_id = mobj.group('videoid')
  42
  43         webpage = self._download_webpage(url, video_id)
  44
  45         smilurl = 'http://swrmediathek.de/rtmpQuals/%s/clips.smil'
  46         smildoc = self._download_xml(smilurl % video_id, video_id, note='Downloading SMIL page')
  47
  48         baseurl = smildoc.find('.//meta').attrib['base']
  49
  50         formats = []
  51         for video in smildoc.findall('.//video'):
  52             vbr = video.attrib.get('system-bitrate')
  53             if vbr:
  54                 vbr = int(vbr) / 1000
  55
  56             formats.append({
  57                 'format_id': video.attrib['height'] + 'p',
  58                 'width': int_or_none(video.attrib['width']),
  59                 'height': int_or_none(video.attrib['height']),
  60                 'vbr': vbr,
  61                 'url': baseurl,
  62                 'play_path': 'mp4:' + video.attrib['src'],
  63                 'ext': 'flv',
  64             })
  65
  66         self._sort_formats(formats)
  67
  68         return {
  69             'id': video_id,
  70             'title': self._html_search_regex(r'<meta name="title" content="(.+)" />', webpage, 'title'),
  71             'thumbnail': self._search_regex(r'<link rel="image_src".+href="(.+)" />', webpage, 'thumbnail'),
  72             'formats': formats,
  73             'description': self._html_search_regex(r'<meta name="description" content="(.+)" />', webpage, 'description'),
  74         }