_ Git - youtube-dl/blob - youtube_dl/extractor/videolecturesnet.py

   1 from __future__ import unicode_literals
   2
   3 import re
   4
   5 from .common import InfoExtractor
   6 from ..utils import (
   7     find_xpath_attr,
   8     int_or_none,
   9     parse_duration,
  10     unified_strdate,
  11 )
  12
  13
  14 class VideoLecturesNetIE(InfoExtractor):
  15     _VALID_URL = r'http://(?:www\.)?videolectures\.net/(?P<id>[^/#?]+)/'
  16     IE_NAME = 'videolectures.net'
  17
  18     _TEST = {
  19         'url': 'http://videolectures.net/promogram_igor_mekjavic_eng/',
  20         'info_dict': {
  21             'id': 'promogram_igor_mekjavic_eng',
  22             'ext': 'mp4',
  23             'title': 'Automatics, robotics and biocybernetics',
  24             'description': 'md5:815fc1deb6b3a2bff99de2d5325be482',
  25             'upload_date': '20130627',
  26             'duration': 565,
  27             'thumbnail': 're:http://.*\.jpg',
  28         },
  29     }
  30
  31     def _real_extract(self, url):
  32         mobj = re.match(self._VALID_URL, url)
  33         video_id = mobj.group('id')
  34
  35         smil_url = 'http://videolectures.net/%s/video/1/smil.xml' % video_id
  36         smil = self._download_xml(smil_url, video_id)
  37
  38         title = find_xpath_attr(smil, './/meta', 'name', 'title').attrib['content']
  39         description_el = find_xpath_attr(smil, './/meta', 'name', 'abstract')
  40         description = (
  41             None if description_el is None
  42             else description_el.attrib['content'])
  43         upload_date = unified_strdate(
  44             find_xpath_attr(smil, './/meta', 'name', 'date').attrib['content'])
  45
  46         switch = smil.find('.//switch')
  47         duration = parse_duration(switch.attrib.get('dur'))
  48         thumbnail_el = find_xpath_attr(switch, './image', 'type', 'thumbnail')
  49         thumbnail = (
  50             None if thumbnail_el is None else thumbnail_el.attrib.get('src'))
  51
  52         formats = []
  53         for v in switch.findall('./video'):
  54             proto = v.attrib.get('proto')
  55             if proto not in ['http', 'rtmp']:
  56                 continue
  57             f = {
  58                 'width': int_or_none(v.attrib.get('width')),
  59                 'height': int_or_none(v.attrib.get('height')),
  60                 'filesize': int_or_none(v.attrib.get('size')),
  61                 'tbr': int_or_none(v.attrib.get('systemBitrate')) / 1000.0,
  62                 'ext': v.attrib.get('ext'),
  63             }
  64             src = v.attrib['src']
  65             if proto == 'http':
  66                 if self._is_valid_url(src, video_id):
  67                     f['url'] = src
  68                     formats.append(f)
  69             elif proto == 'rtmp':
  70                 f.update({
  71                     'url': v.attrib['streamer'],
  72                     'play_path': src,
  73                     'rtmp_real_time': True,
  74                 })
  75                 formats.append(f)
  76         self._sort_formats(formats)
  77
  78         return {
  79             'id': video_id,
  80             'title': title,
  81             'description': description,
  82             'upload_date': upload_date,
  83             'duration': duration,
  84             'thumbnail': thumbnail,
  85             'formats': formats,
  86         }