[videolecturesnet] Use generic SMIL extraction
[youtube-dl] / youtube_dl / extractor / videolecturesnet.py
index ebd2a3dca3ac0e7bd812226c80c356a19b3677ab..160dbb5900f084bf44e8decde68b9d5b1e27ec2a 100644 (file)
@@ -1,18 +1,13 @@
 from __future__ import unicode_literals
 
-import re
-
 from .common import InfoExtractor
 from ..utils import (
-    find_xpath_attr,
-    int_or_none,
     parse_duration,
-    unified_strdate,
 )
 
 
 class VideoLecturesNetIE(InfoExtractor):
-    _VALID_URL = r'http://(?:www\.)?videolectures\.net/(?P<id>[^/#?]+)/'
+    _VALID_URL = r'http://(?:www\.)?videolectures\.net/(?P<id>[^/#?]+)/*(?:[#?].*)?$'
     IE_NAME = 'videolectures.net'
 
     _TEST = {
@@ -29,42 +24,17 @@ class VideoLecturesNetIE(InfoExtractor):
     }
 
     def _real_extract(self, url):
-        mobj = re.match(self._VALID_URL, url)
-        video_id = mobj.group('id')
+        video_id = self._match_id(url)
 
         smil_url = 'http://videolectures.net/%s/video/1/smil.xml' % video_id
-        smil = self._download_xml(smil_url, video_id)
+        smil = self._download_smil(smil_url, video_id)
 
-        title = find_xpath_attr(smil, './/meta', 'name', 'title').attrib['content']
-        description_el = find_xpath_attr(smil, './/meta', 'name', 'abstract')
-        description = (
-            None if description_el is None
-            else description_el.attrib['content'])
-        upload_date = unified_strdate(
-            find_xpath_attr(smil, './/meta', 'name', 'date').attrib['content'])
+        info = self._parse_smil(smil, smil_url, video_id)
 
-        switch = smil.find('.//switch')
-        duration = parse_duration(switch.attrib.get('dur'))
-        thumbnail_el = find_xpath_attr(switch, './image', 'type', 'thumbnail')
-        thumbnail = (
-            None if thumbnail_el is None else thumbnail_el.attrib.get('src'))
+        info['id'] = video_id
 
-        formats = [{
-            'url': v.attrib['src'],
-            'width': int_or_none(v.attrib.get('width')),
-            'height': int_or_none(v.attrib.get('height')),
-            'filesize': int_or_none(v.attrib.get('size')),
-            'tbr': int_or_none(v.attrib.get('systemBitrate')) / 1000.0,
-            'ext': v.attrib.get('ext'),
-        } for v in switch.findall('./video')
-            if v.attrib.get('proto') == 'http']
+        switch = smil.find('.//switch')
+        if switch is not None:
+            info['duration'] = parse_duration(switch.attrib.get('dur'))
 
-        return {
-            'id': video_id,
-            'title': title,
-            'description': description,
-            'upload_date': upload_date,
-            'duration': duration,
-            'thumbnail': thumbnail,
-            'formats': formats,
-        }
+        return info