[compat] add compat_etree_register_namespace

[youtube-dl] / youtube_dl / compat.py
diff --git a/youtube_dl/compat.py b/youtube_dl/compat.py

index 0243949a44468582a55fb151ec40ccf3a6850e9a..49e3c90e29852ef849909b5995473bac4d9cbfd8 100644 (file)
--- a/youtube_dl/compat.py
+++ b/youtube_dl/compat.py
@@ -1,3 +1,4 @@
+# coding: utf-8
  from __future__ import unicode_literals
  
  import binascii
@@ -64,8 +65,8 @@ except ImportError:  # Python 2
      import htmlentitydefs as compat_html_entities
  
  try:  # Python >= 3.3
-    from compat_html_entities import html as compat_html_entities_html5
-except ImportError:
+    compat_html_entities_html5 = compat_html_entities.html5
+except AttributeError:
      # Copied from CPython 3.5.1 html/entities.py
      compat_html_entities_html5 = {
          'Aacute': '\xc1',
@@ -2343,7 +2344,7 @@ try:
      from urllib.parse import unquote_plus as compat_urllib_parse_unquote_plus
  except ImportError:  # Python 2
      _asciire = (compat_urllib_parse._asciire if hasattr(compat_urllib_parse, '_asciire')
-                else re.compile('([\x00-\x7f]+)'))
+                else re.compile(r'([\x00-\x7f]+)'))
  
      # HACK: The following are the correct unquote_to_bytes, unquote and unquote_plus
      # implementations from cpython 3.4.3's stdlib. Python 2's version
@@ -2490,6 +2491,7 @@ class _TreeBuilder(etree.TreeBuilder):
      def doctype(self, name, pubid, system):
          pass
  
+
  if sys.version_info[0] >= 3:
      def compat_etree_fromstring(text):
          return etree.XML(text, parser=etree.XMLParser(target=_TreeBuilder()))
@@ -2527,6 +2529,24 @@ else:
                  el.text = el.text.decode('utf-8')
          return doc
  
+if hasattr(etree, 'register_namespace'):
+    compat_etree_register_namespace = etree.register_namespace
+else:
+    def compat_etree_register_namespace(prefix, uri):
+        """Register a namespace prefix.
+        The registry is global, and any existing mapping for either the
+        given prefix or the namespace URI will be removed.
+        *prefix* is the namespace prefix, *uri* is a namespace uri. Tags and
+        attributes in this namespace will be serialized with prefix if possible.
+        ValueError is raised if prefix is reserved or is invalid.
+        """
+        if re.match(r"ns\d+$", prefix):
+            raise ValueError("Prefix format reserved for internal use")
+        for k, v in list(etree._namespace_map.items()):
+            if k == uri or v == prefix:
+                del etree._namespace_map[k]
+        etree._namespace_map[uri] = prefix
+
  if sys.version_info < (2, 7):
      # Here comes the crazy part: In 2.6, if the xpath is a unicode,
      # .//node does not match if a node is a direct child of . !
@@ -2594,15 +2614,19 @@ except ImportError:  # Python < 3.3
              return "'" + s.replace("'", "'\"'\"'") + "'"
  
  
-if sys.version_info >= (2, 7, 3):
+try:
+    args = shlex.split('中文')
+    assert (isinstance(args, list) and
+            isinstance(args[0], compat_str) and
+            args[0] == '中文')
      compat_shlex_split = shlex.split
-else:
+except (AssertionError, UnicodeEncodeError):
      # Working around shlex issue with unicode strings on some python 2
      # versions (see http://bugs.python.org/issue1548891)
      def compat_shlex_split(s, comments=False, posix=True):
          if isinstance(s, compat_str):
              s = s.encode('utf-8')
-        return shlex.split(s, comments, posix)
+        return list(map(lambda s: s.decode('utf-8'), shlex.split(s, comments, posix)))
  
  
  def compat_ord(c):
@@ -2782,6 +2806,7 @@ def workaround_optparse_bug9161():
              return real_add_option(self, *bargs, **bkwargs)
          optparse.OptionGroup.add_option = _compat_add_option
  
+
  if hasattr(shutil, 'get_terminal_size'):  # Python >= 3.3
      compat_get_terminal_size = shutil.get_terminal_size
  else: