]> git.bitcoin.ninja Git - youtube-dl/blob - youtube_dl/extractor/videolecturesnet.py
release 2015.01.05
[youtube-dl] / youtube_dl / extractor / videolecturesnet.py
1 from __future__ import unicode_literals
2
3 import re
4
5 from .common import InfoExtractor
6 from ..utils import (
7     find_xpath_attr,
8     int_or_none,
9     parse_duration,
10     unified_strdate,
11 )
12
13
14 class VideoLecturesNetIE(InfoExtractor):
15     _VALID_URL = r'http://(?:www\.)?videolectures\.net/(?P<id>[^/#?]+)/'
16     IE_NAME = 'videolectures.net'
17
18     _TEST = {
19         'url': 'http://videolectures.net/promogram_igor_mekjavic_eng/',
20         'info_dict': {
21             'id': 'promogram_igor_mekjavic_eng',
22             'ext': 'mp4',
23             'title': 'Automatics, robotics and biocybernetics',
24             'description': 'md5:815fc1deb6b3a2bff99de2d5325be482',
25             'upload_date': '20130627',
26             'duration': 565,
27             'thumbnail': 're:http://.*\.jpg',
28         },
29     }
30
31     def _real_extract(self, url):
32         mobj = re.match(self._VALID_URL, url)
33         video_id = mobj.group('id')
34
35         smil_url = 'http://videolectures.net/%s/video/1/smil.xml' % video_id
36         smil = self._download_xml(smil_url, video_id)
37
38         title = find_xpath_attr(smil, './/meta', 'name', 'title').attrib['content']
39         description_el = find_xpath_attr(smil, './/meta', 'name', 'abstract')
40         description = (
41             None if description_el is None
42             else description_el.attrib['content'])
43         upload_date = unified_strdate(
44             find_xpath_attr(smil, './/meta', 'name', 'date').attrib['content'])
45
46         switch = smil.find('.//switch')
47         duration = parse_duration(switch.attrib.get('dur'))
48         thumbnail_el = find_xpath_attr(switch, './image', 'type', 'thumbnail')
49         thumbnail = (
50             None if thumbnail_el is None else thumbnail_el.attrib.get('src'))
51
52         formats = [{
53             'url': v.attrib['src'],
54             'width': int_or_none(v.attrib.get('width')),
55             'height': int_or_none(v.attrib.get('height')),
56             'filesize': int_or_none(v.attrib.get('size')),
57             'tbr': int_or_none(v.attrib.get('systemBitrate')) / 1000.0,
58             'ext': v.attrib.get('ext'),
59         } for v in switch.findall('./video')
60             if v.attrib.get('proto') == 'http']
61
62         return {
63             'id': video_id,
64             'title': title,
65             'description': description,
66             'upload_date': upload_date,
67             'duration': duration,
68             'thumbnail': thumbnail,
69             'formats': formats,
70         }