[videolecturesnet] Add support for playlists (Closes #7031)
[youtube-dl] / youtube_dl / extractor / videolecturesnet.py
index ebd2a3dca3ac0e7bd812226c80c356a19b3677ab..eadff8d180034f407cea190a9e5f51aea72f3c8a 100644 (file)
@@ -3,16 +3,12 @@ from __future__ import unicode_literals
 import re
 
 from .common import InfoExtractor
-from ..utils import (
-    find_xpath_attr,
-    int_or_none,
-    parse_duration,
-    unified_strdate,
-)
+from ..compat import compat_urlparse
+from ..utils import parse_duration
 
 
 class VideoLecturesNetIE(InfoExtractor):
-    _VALID_URL = r'http://(?:www\.)?videolectures\.net/(?P<id>[^/#?]+)/'
+    _VALID_URL = r'http://(?:www\.)?videolectures\.net/(?P<id>[^/#?]+)/*(?:[#?].*)?$'
     IE_NAME = 'videolectures.net'
 
     _TEST = {
@@ -29,42 +25,27 @@ class VideoLecturesNetIE(InfoExtractor):
     }
 
     def _real_extract(self, url):
-        mobj = re.match(self._VALID_URL, url)
-        video_id = mobj.group('id')
+        video_id = self._match_id(url)
 
         smil_url = 'http://videolectures.net/%s/video/1/smil.xml' % video_id
-        smil = self._download_xml(smil_url, video_id)
+        smil = self._download_smil(smil_url, video_id, fatal=False)
 
-        title = find_xpath_attr(smil, './/meta', 'name', 'title').attrib['content']
-        description_el = find_xpath_attr(smil, './/meta', 'name', 'abstract')
-        description = (
-            None if description_el is None
-            else description_el.attrib['content'])
-        upload_date = unified_strdate(
-            find_xpath_attr(smil, './/meta', 'name', 'date').attrib['content'])
+        # Probably a playlist
+        if smil is False:
+            webpage = self._download_webpage(url, video_id)
+            entries = [
+                self.url_result(compat_urlparse.urljoin(url, video_url), 'VideoLecturesNet')
+                for _, video_url in re.findall(r'<a[^>]+href=(["\'])(.+?)\1[^>]+id=["\']lec=\d+', webpage)]
+            playlist_title = self._html_search_meta('title', webpage, 'title', fatal=True)
+            playlist_description = self._html_search_meta('description', webpage, 'description')
+            return self.playlist_result(entries, video_id, playlist_title, playlist_description)
 
-        switch = smil.find('.//switch')
-        duration = parse_duration(switch.attrib.get('dur'))
-        thumbnail_el = find_xpath_attr(switch, './image', 'type', 'thumbnail')
-        thumbnail = (
-            None if thumbnail_el is None else thumbnail_el.attrib.get('src'))
+        info = self._parse_smil(smil, smil_url, video_id)
+
+        info['id'] = video_id
 
-        formats = [{
-            'url': v.attrib['src'],
-            'width': int_or_none(v.attrib.get('width')),
-            'height': int_or_none(v.attrib.get('height')),
-            'filesize': int_or_none(v.attrib.get('size')),
-            'tbr': int_or_none(v.attrib.get('systemBitrate')) / 1000.0,
-            'ext': v.attrib.get('ext'),
-        } for v in switch.findall('./video')
-            if v.attrib.get('proto') == 'http']
+        switch = smil.find('.//switch')
+        if switch is not None:
+            info['duration'] = parse_duration(switch.attrib.get('dur'))
 
-        return {
-            'id': video_id,
-            'title': title,
-            'description': description,
-            'upload_date': upload_date,
-            'duration': duration,
-            'thumbnail': thumbnail,
-            'formats': formats,
-        }
+        return info