[extractor/common] Extract more metadata for VideoObject in _json_ld
[youtube-dl] / youtube_dl / extractor / common.py
index b5fce5de21972f75253c0ae4c3352e4f0775da1f..9ffe64d05533ae5c5ac2a8d21bb1223b388e9789 100644 (file)
@@ -44,6 +44,7 @@ from ..utils import (
     sanitized_Request,
     unescapeHTML,
     unified_strdate,
+    unified_timestamp,
     url_basename,
     xpath_element,
     xpath_text,
@@ -161,6 +162,7 @@ class InfoExtractor(object):
                         * "height" (optional, int)
                         * "resolution" (optional, string "{width}x{height"},
                                         deprecated)
+                        * "filesize" (optional, int)
     thumbnail:      Full URL to a video thumbnail image.
     description:    Full video description.
     uploader:       Full name of the video uploader.
@@ -839,10 +841,16 @@ class InfoExtractor(object):
                 })
             elif item_type == 'VideoObject':
                 info.update({
+                    'url': json_ld.get('contentUrl'),
                     'title': unescapeHTML(json_ld.get('name')),
                     'description': unescapeHTML(json_ld.get('description')),
-                    'upload_date': unified_strdate(json_ld.get('upload_date')),
-                    'url': unescapeHTML(json_ld.get('contentUrl')),
+                    'thumbnail': json_ld.get('thumbnailUrl'),
+                    'duration': parse_duration(json_ld.get('duration')),
+                    'timestamp': unified_timestamp(json_ld.get('uploadDate')),
+                    'filesize': float_or_none(json_ld.get('contentSize')),
+                    'tbr': int_or_none(json_ld.get('bitrate')),
+                    'width': int_or_none(json_ld.get('width')),
+                    'height': int_or_none(json_ld.get('height')),
                 })
         return dict((k, v) for k, v in info.items() if v is not None)