[extractor/common] Support multiple properties in _og_search_property
authorSergey M․ <dstftw@gmail.com>
Tue, 2 Aug 2016 15:55:14 +0000 (22:55 +0700)
committerSergey M․ <dstftw@gmail.com>
Tue, 2 Aug 2016 15:55:14 +0000 (22:55 +0700)
test/test_InfoExtractor.py
youtube_dl/extractor/common.py

index 88e8ff904e26576125910cd2ecd2c3e5662d7b17..a98305c747635c1b1638f761d7bdf9bead353d19 100644 (file)
@@ -48,6 +48,9 @@ class TestInfoExtractor(unittest.TestCase):
         self.assertEqual(ie._og_search_property('foobar', html), 'Foo')
         self.assertEqual(ie._og_search_property('test1', html), 'foo > < bar')
         self.assertEqual(ie._og_search_property('test2', html), 'foo >//< bar')
+        self.assertEqual(ie._og_search_property(('test0', 'test1'), html), 'foo > < bar')
+        self.assertRaises(RegexNotFoundError, ie._og_search_property, 'test0', html, None, fatal=True)
+        self.assertRaises(RegexNotFoundError, ie._og_search_property, ('test0', 'test00'), html, None, fatal=True)
 
     def test_html_search_meta(self):
         ie = self.ie
index 53c28f016065195b9e26849cbab75825a8cdcaa0..3b6a5491dbd1f17ef0d056b385955354c82df156 100644 (file)
@@ -727,9 +727,14 @@ class InfoExtractor(object):
                     [^>]+?content=(["\'])(?P<content>.*?)\2''' % re.escape(prop)
 
     def _og_search_property(self, prop, html, name=None, **kargs):
+        if not isinstance(prop, (list, tuple)):
+            prop = [prop]
         if name is None:
-            name = 'OpenGraph %s' % prop
-        escaped = self._search_regex(self._og_regexes(prop), html, name, flags=re.DOTALL, **kargs)
+            name = 'OpenGraph %s' % prop[0]
+        og_regexes = []
+        for p in prop:
+            og_regexes.extend(self._og_regexes(p))
+        escaped = self._search_regex(og_regexes, html, name, flags=re.DOTALL, **kargs)
         if escaped is None:
             return None
         return unescapeHTML(escaped)