[extractor/generic] Detect schema.org/VideoObject embeds
authorSergey M․ <dstftw@gmail.com>
Fri, 8 Jul 2016 20:29:07 +0000 (03:29 +0700)
committerSergey M․ <dstftw@gmail.com>
Fri, 8 Jul 2016 20:29:07 +0000 (03:29 +0700)
youtube_dl/extractor/generic.py

index 31527d1c6cd6f6aebb468c2d6e9e3af7bcf96076..62da9bbc0b6119414555b7130acd0c7499449662 100644 (file)
@@ -1313,6 +1313,23 @@ class GenericIE(InfoExtractor):
             },
             'add_ie': ['Kaltura'],
         },
+        {
+            # TODO: find another test
+            # http://schema.org/VideoObject
+            # 'url': 'https://flipagram.com/f/nyvTSJMKId',
+            # 'md5': '888dcf08b7ea671381f00fab74692755',
+            # 'info_dict': {
+            #     'id': 'nyvTSJMKId',
+            #     'ext': 'mp4',
+            #     'title': 'Flipagram by sjuria101 featuring Midnight Memories by One Direction',
+            #     'description': '#love for cats.',
+            #     'timestamp': 1461244995,
+            #     'upload_date': '20160421',
+            # },
+            # 'params': {
+            #     'force_generic_extractor': True,
+            # },
+        }
     ]
 
     def report_following_redirect(self, new_url):
@@ -2157,6 +2174,19 @@ class GenericIE(InfoExtractor):
         if embed_url:
             return self.url_result(embed_url)
 
+        # Looking for http://schema.org/VideoObject
+        json_ld = self._search_json_ld(
+            webpage, video_id, default=None, expected_type='VideoObject')
+        if json_ld and json_ld.get('url'):
+            info_dict.update({
+                'title': video_title or info_dict['title'],
+                'description': video_description,
+                'thumbnail': video_thumbnail,
+                'age_limit': age_limit
+            })
+            info_dict.update(json_ld)
+            return info_dict
+
         def check_video(vurl):
             if YoutubeIE.suitable(vurl):
                 return True