[utils] Skip invalid/non HTML entities (Closes #7518)

[youtube-dl] / youtube_dl / utils.py
diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py

index 65556d056a7edfe12b94e82d0f55baa93360aa70..b7013a6aaef400633d45ac33c4a7fcb949802be7 100644 (file)
--- a/youtube_dl/utils.py
+++ b/youtube_dl/utils.py
@@ -259,15 +259,6 @@ def get_element_by_attribute(attribute, value, html):
      return unescapeHTML(res)
  
  
-def extract_attributes(attributes_str, attributes_regex=r'(?s)\s*([^\s=]+)\s*=\s*["\']([^"\']+)["\']'):
-    attributes = re.findall(attributes_regex, attributes_str)
-    attributes_dict = {}
-    if attributes:
-        for (attribute_name, attribute_value) in attributes:
-            attributes_dict[attribute_name] = attribute_value
-    return attributes_dict
-
-
  def clean_html(html):
      """Clean an HTML snippet into a readable string"""
  
@@ -405,7 +396,11 @@ def _htmlentity_transform(entity):
              numstr = '0%s' % numstr
          else:
              base = 10
-        return compat_chr(int(numstr, base))
+        # See https://github.com/rg3/youtube-dl/issues/7518
+        try:
+            return compat_chr(int(numstr, base))
+        except ValueError:
+            pass
  
      # Unknown entity in name, return its literal representation
      return ('&%s;' % entity)