[utils] Handle HTMLParseError in extract_attributes (closes #13349)
authorSergey M․ <dstftw@gmail.com>
Sun, 11 Jun 2017 18:52:24 +0000 (01:52 +0700)
committerSergey M․ <dstftw@gmail.com>
Sun, 11 Jun 2017 18:52:24 +0000 (01:52 +0700)
test/test_utils.py
youtube_dl/utils.py

index 41b094d89f7bf80bdd49d15dcf60f65d3b02ce23..2b93b360477f857b8fa693450d3c5375db5663cc 100644 (file)
@@ -916,6 +916,8 @@ class TestUtil(unittest.TestCase):
             supports_outside_bmp = False
         if supports_outside_bmp:
             self.assertEqual(extract_attributes('<e x="Smile &#128512;!">'), {'x': 'Smile \U0001f600!'})
+        # Malformed HTML should not break attributes extraction on older Python
+        self.assertEqual(extract_attributes('<mal"formed/>'), {})
 
     def test_clean_html(self):
         self.assertEqual(clean_html('a:\nb'), 'a: b')
index 16bf49408c5228202cd43d9f7265d77c1c5122dc..1973bd4836a407d3e66fcc4c3a54d052e958ae19 100644 (file)
@@ -36,6 +36,7 @@ import xml.etree.ElementTree
 import zlib
 
 from .compat import (
+    compat_HTMLParseError,
     compat_HTMLParser,
     compat_basestring,
     compat_chr,
@@ -409,8 +410,12 @@ def extract_attributes(html_element):
     but the cases in the unit test will work for all of 2.6, 2.7, 3.2-3.5.
     """
     parser = HTMLAttributeParser()
-    parser.feed(html_element)
-    parser.close()
+    try:
+        parser.feed(html_element)
+        parser.close()
+    # Older Python may throw HTMLParseError in case of malformed HTML
+    except compat_HTMLParseError:
+        pass
     return parser.attrs