Improve OpenGraph property matching
[youtube-dl] / youtube_dl / extractor / common.py
index 0f6049cb43306537be089d27404ccb05d69d10a9..52ae98be3be8032b09acc4a058250b29d70af221 100644 (file)
@@ -37,6 +37,8 @@ class InfoExtractor(object):
     The following fields are optional:
 
     format:         The video format, defaults to ext (used for --get-format)
+    thumbnails:     A list of dictionaries (with the entries "resolution" and
+                    "url") for the varying thumbnails
     thumbnail:      Full URL to a video thumbnail image.
     description:    One-line video description.
     uploader:       Full name of the video uploader.
@@ -167,11 +169,6 @@ class InfoExtractor(object):
         self.to_screen(u'Logging in')
 
     #Methods for following #608
-    #They set the correct value of the '_type' key
-    def video_result(self, video_info):
-        """Returns a video"""
-        video_info['_type'] = 'video'
-        return video_info
     def url_result(self, url, ie=None):
         """Returns a url that points to a page that should be processed"""
         #TODO: ie should be the class used for getting the info
@@ -260,6 +257,30 @@ class InfoExtractor(object):
         
         return (username, password)
 
+    # Helper functions for extracting OpenGraph info
+    @staticmethod
+    def _og_regex(prop):
+        return r'<meta.+?property=[\'"]og:%s[\'"].+?content=(?:"(.+?)"|\'(.+?)\')' % re.escape(prop)
+
+    def _og_search_property(self, prop, html, name=None, **kargs):
+        if name is None:
+            name = 'OpenGraph %s' % prop
+        return self._html_search_regex(self._og_regex(prop), html, name, flags=re.DOTALL, **kargs)
+
+    def _og_search_thumbnail(self, html, **kargs):
+        return self._og_search_property('image', html, u'thumbnail url', fatal=False, **kargs)
+
+    def _og_search_description(self, html, **kargs):
+        return self._og_search_property('description', html, fatal=False, **kargs)
+
+    def _og_search_title(self, html, **kargs):
+        return self._og_search_property('title', html, **kargs)
+
+    def _og_search_video_url(self, html, name='video url', **kargs):
+        return self._html_search_regex([self._og_regex('video:secure_url'),
+                                        self._og_regex('video')],
+                                       html, name, **kargs)
+
 class SearchInfoExtractor(InfoExtractor):
     """
     Base class for paged search queries extractors.