[extractor/common] Extract upload date from SMIL
[youtube-dl] / youtube_dl / extractor / common.py
index 1e7db8a9baf2fb48b8cab0697ca0ada46f624aa7..b86d0652338457a65c4fc655d5e0406aa42850b0 100644 (file)
@@ -39,6 +39,7 @@ from ..utils import (
     RegexNotFoundError,
     sanitize_filename,
     unescapeHTML,
+    unified_strdate,
     url_basename,
     xpath_text,
     xpath_with_ns,
@@ -152,6 +153,7 @@ class InfoExtractor(object):
     description:    Full video description.
     uploader:       Full name of the video uploader.
     creator:        The main artist who created the video.
+    release_date:   The date (YYYYMMDD) when the video was released.
     timestamp:      UNIX timestamp of the moment the video became available.
     upload_date:    Video upload date (YYYYMMDD).
                     If not explicitly set, calculated from timestamp.
@@ -1043,6 +1045,7 @@ class InfoExtractor(object):
         video_id = os.path.splitext(url_basename(smil_url))[0]
         title = None
         description = None
+        upload_date = None
         for meta in smil.findall(self._xpath_ns('./head/meta', namespace)):
             name = meta.attrib.get('name')
             content = meta.attrib.get('content')
@@ -1052,11 +1055,22 @@ class InfoExtractor(object):
                 title = content
             elif not description and name in ('description', 'abstract'):
                 description = content
+            elif not upload_date and name == 'date':
+                upload_date = unified_strdate(content)
+
+        thumbnails = [{
+            'id': image.get('type'),
+            'url': image.get('src'),
+            'width': int_or_none(image.get('width')),
+            'height': int_or_none(image.get('height')),
+        } for image in smil.findall(self._xpath_ns('.//image', namespace)) if image.get('src')]
 
         return {
             'id': video_id,
             'title': title or video_id,
             'description': description,
+            'upload_date': upload_date,
+            'thumbnails': thumbnails,
             'formats': formats,
             'subtitles': subtitles,
         }
@@ -1083,7 +1097,7 @@ class InfoExtractor(object):
             if not src:
                 continue
 
-            bitrate = int_or_none(video.get('system-bitrate') or video.get('systemBitrate'), 1000)
+            bitrate = float_or_none(video.get('system-bitrate') or video.get('systemBitrate'), 1000)
             filesize = int_or_none(video.get('size') or video.get('fileSize'))
             width = int_or_none(video.get('width'))
             height = int_or_none(video.get('height'))