from __future__ import unicode_literals
-import re
-
from .common import InfoExtractor
from ..utils import (
- find_xpath_attr,
- int_or_none,
parse_duration,
- unified_strdate,
)
class VideoLecturesNetIE(InfoExtractor):
- _VALID_URL = r'http://(?:www\.)?videolectures\.net/(?P<id>[^/#?]+)/'
+ _VALID_URL = r'http://(?:www\.)?videolectures\.net/(?P<id>[^/#?]+)/*(?:[#?].*)?$'
IE_NAME = 'videolectures.net'
_TEST = {
}
def _real_extract(self, url):
- mobj = re.match(self._VALID_URL, url)
- video_id = mobj.group('id')
+ video_id = self._match_id(url)
smil_url = 'http://videolectures.net/%s/video/1/smil.xml' % video_id
- smil = self._download_xml(smil_url, video_id)
+ smil = self._download_smil(smil_url, video_id)
- title = find_xpath_attr(smil, './/meta', 'name', 'title').attrib['content']
- description_el = find_xpath_attr(smil, './/meta', 'name', 'abstract')
- description = (
- None if description_el is None
- else description_el.attrib['content'])
- upload_date = unified_strdate(
- find_xpath_attr(smil, './/meta', 'name', 'date').attrib['content'])
+ info = self._parse_smil(smil, smil_url, video_id)
- switch = smil.find('.//switch')
- duration = parse_duration(switch.attrib.get('dur'))
- thumbnail_el = find_xpath_attr(switch, './image', 'type', 'thumbnail')
- thumbnail = (
- None if thumbnail_el is None else thumbnail_el.attrib.get('src'))
+ info['id'] = video_id
- formats = [{
- 'url': v.attrib['src'],
- 'width': int_or_none(v.attrib.get('width')),
- 'height': int_or_none(v.attrib.get('height')),
- 'filesize': int_or_none(v.attrib.get('size')),
- 'tbr': int_or_none(v.attrib.get('systemBitrate')) / 1000.0,
- 'ext': v.attrib.get('ext'),
- } for v in switch.findall('./video')
- if v.attrib.get('proto') == 'http']
+ switch = smil.find('.//switch')
+ if switch is not None:
+ info['duration'] = parse_duration(switch.attrib.get('dur'))
- return {
- 'id': video_id,
- 'title': title,
- 'description': description,
- 'upload_date': upload_date,
- 'duration': duration,
- 'thumbnail': thumbnail,
- 'formats': formats,
- }
+ return info