[videolectures] Fix _VALID_URL
[youtube-dl] / youtube_dl / extractor / videolecturesnet.py
index f8b946a8839d1bd3278629df0f919f539523c4fd..24584dc80d976cea4a56b1fdc14a447def38e0b3 100644 (file)
@@ -12,7 +12,7 @@ from ..utils import (
 
 
 class VideoLecturesNetIE(InfoExtractor):
-    _VALID_URL = r'http://(?:www\.)?videolectures\.net/(?P<id>[^/#?]+)/'
+    _VALID_URL = r'http://(?:www\.)?videolectures\.net/(?P<id>[^/#?]+)(?:/?[#?].*)?$'
     IE_NAME = 'videolectures.net'
 
     _TEST = {
@@ -36,7 +36,10 @@ class VideoLecturesNetIE(InfoExtractor):
         smil = self._download_xml(smil_url, video_id)
 
         title = find_xpath_attr(smil, './/meta', 'name', 'title').attrib['content']
-        description = find_xpath_attr(smil, './/meta', 'name', 'abstract').attrib['content']
+        description_el = find_xpath_attr(smil, './/meta', 'name', 'abstract')
+        description = (
+            None if description_el is None
+            else description_el.attrib['content'])
         upload_date = unified_strdate(
             find_xpath_attr(smil, './/meta', 'name', 'date').attrib['content'])
 
@@ -46,15 +49,31 @@ class VideoLecturesNetIE(InfoExtractor):
         thumbnail = (
             None if thumbnail_el is None else thumbnail_el.attrib.get('src'))
 
-        formats = [{
-            'url': v.attrib['src'],
-            'width': int_or_none(v.attrib.get('width')),
-            'height': int_or_none(v.attrib.get('height')),
-            'filesize': int_or_none(v.attrib.get('size')),
-            'tbr': int_or_none(v.attrib.get('systemBitrate')) / 1000.0,
-            'ext': v.attrib.get('ext'),
-        } for v in switch.findall('./video')
-            if v.attrib.get('proto') == 'http']
+        formats = []
+        for v in switch.findall('./video'):
+            proto = v.attrib.get('proto')
+            if proto not in ['http', 'rtmp']:
+                continue
+            f = {
+                'width': int_or_none(v.attrib.get('width')),
+                'height': int_or_none(v.attrib.get('height')),
+                'filesize': int_or_none(v.attrib.get('size')),
+                'tbr': int_or_none(v.attrib.get('systemBitrate')) / 1000.0,
+                'ext': v.attrib.get('ext'),
+            }
+            src = v.attrib['src']
+            if proto == 'http':
+                if self._is_valid_url(src, video_id):
+                    f['url'] = src
+                    formats.append(f)
+            elif proto == 'rtmp':
+                f.update({
+                    'url': v.attrib['streamer'],
+                    'play_path': src,
+                    'rtmp_real_time': True,
+                })
+                formats.append(f)
+        self._sort_formats(formats)
 
         return {
             'id': video_id,