[utils] Fix unescapeHTML for misformed string like "&a"" (#13935)
authorYen Chi Hsuan <yan12125@gmail.com>
Sat, 19 Aug 2017 13:40:53 +0000 (21:40 +0800)
committerYen Chi Hsuan <yan12125@gmail.com>
Sat, 19 Aug 2017 13:40:53 +0000 (21:40 +0800)
ChangeLog
test/test_utils.py
youtube_dl/utils.py

index 9a0fad673d9484769237dfba717f841b2ab5fac7..9eab4d1e7a34b573a7bd3582c2ab53e23fb42b74 100644 (file)
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,9 @@
+version <unreleased>
+
+Core
+* [utils] Fix unescapeHTML for misformed string like "&a&quot;" (#13935)
+
+
 version 2017.08.18
 
 Core
 version 2017.08.18
 
 Core
index 2aab16b97c300ee1858cffba0eff3f352009a404..e50f3764e57050c560365eb566979e171538985b 100644 (file)
@@ -279,6 +279,7 @@ class TestUtil(unittest.TestCase):
         self.assertEqual(unescapeHTML('&#47;'), '/')
         self.assertEqual(unescapeHTML('&eacute;'), 'é')
         self.assertEqual(unescapeHTML('&#2013266066;'), '&#2013266066;')
         self.assertEqual(unescapeHTML('&#47;'), '/')
         self.assertEqual(unescapeHTML('&eacute;'), 'é')
         self.assertEqual(unescapeHTML('&#2013266066;'), '&#2013266066;')
+        self.assertEqual(unescapeHTML('&a&quot;'), '&a"')
         # HTML5 entities
         self.assertEqual(unescapeHTML('&period;&apos;'), '.\'')
 
         # HTML5 entities
         self.assertEqual(unescapeHTML('&period;&apos;'), '.\'')
 
index c9cbd58426e2c63e95d317ef81b01472d2a166c6..2554a2abd7e86eeac1c92dddf6e90a307f8b9c9f 100644 (file)
@@ -596,7 +596,7 @@ def unescapeHTML(s):
     assert type(s) == compat_str
 
     return re.sub(
     assert type(s) == compat_str
 
     return re.sub(
-        r'&([^;]+;)', lambda m: _htmlentity_transform(m.group(1)), s)
+        r'&([^&;]+;)', lambda m: _htmlentity_transform(m.group(1)), s)
 
 
 def get_subprocess_encoding():
 
 
 def get_subprocess_encoding():