]> git.bitcoin.ninja Git - youtube-dl/blob - youtube_dl/extractor/videolecturesnet.py
[SpankBang] Use python2.6 compatible string formatting spec
[youtube-dl] / youtube_dl / extractor / videolecturesnet.py
1 from __future__ import unicode_literals
2
3 import re
4
5 from .common import InfoExtractor
6 from ..utils import (
7     find_xpath_attr,
8     int_or_none,
9     parse_duration,
10     unified_strdate,
11 )
12
13
14 class VideoLecturesNetIE(InfoExtractor):
15     _VALID_URL = r'http://(?:www\.)?videolectures\.net/(?P<id>[^/#?]+)/'
16     IE_NAME = 'videolectures.net'
17
18     _TEST = {
19         'url': 'http://videolectures.net/promogram_igor_mekjavic_eng/',
20         'info_dict': {
21             'id': 'promogram_igor_mekjavic_eng',
22             'ext': 'mp4',
23             'title': 'Automatics, robotics and biocybernetics',
24             'description': 'md5:815fc1deb6b3a2bff99de2d5325be482',
25             'upload_date': '20130627',
26             'duration': 565,
27             'thumbnail': 're:http://.*\.jpg',
28         },
29     }
30
31     def _real_extract(self, url):
32         mobj = re.match(self._VALID_URL, url)
33         video_id = mobj.group('id')
34
35         smil_url = 'http://videolectures.net/%s/video/1/smil.xml' % video_id
36         smil = self._download_xml(smil_url, video_id)
37
38         title = find_xpath_attr(smil, './/meta', 'name', 'title').attrib['content']
39         description_el = find_xpath_attr(smil, './/meta', 'name', 'abstract')
40         description = (
41             None if description_el is None
42             else description_el.attrib['content'])
43         upload_date = unified_strdate(
44             find_xpath_attr(smil, './/meta', 'name', 'date').attrib['content'])
45
46         switch = smil.find('.//switch')
47         duration = parse_duration(switch.attrib.get('dur'))
48         thumbnail_el = find_xpath_attr(switch, './image', 'type', 'thumbnail')
49         thumbnail = (
50             None if thumbnail_el is None else thumbnail_el.attrib.get('src'))
51
52         formats = []
53         for v in switch.findall('./video'):
54             proto = v.attrib.get('proto')
55             if proto not in ['http', 'rtmp']:
56                 continue
57             f = {
58                 'width': int_or_none(v.attrib.get('width')),
59                 'height': int_or_none(v.attrib.get('height')),
60                 'filesize': int_or_none(v.attrib.get('size')),
61                 'tbr': int_or_none(v.attrib.get('systemBitrate')) / 1000.0,
62                 'ext': v.attrib.get('ext'),
63             }
64             src = v.attrib['src']
65             if proto == 'http':
66                 if self._is_valid_url(src, video_id):
67                     f['url'] = src
68                     formats.append(f)
69             elif proto == 'rtmp':
70                 f.update({
71                     'url': v.attrib['streamer'],
72                     'play_path': src,
73                     'rtmp_real_time': True,
74                 })
75                 formats.append(f)
76         self._sort_formats(formats)
77
78         return {
79             'id': video_id,
80             'title': title,
81             'description': description,
82             'upload_date': upload_date,
83             'duration': duration,
84             'thumbnail': thumbnail,
85             'formats': formats,
86         }