[downloader/f4m] Extract routine for removing unsupported encrypted media

[youtube-dl] / youtube_dl / compat.py
diff --git a/youtube_dl/compat.py b/youtube_dl/compat.py

index 192e1c515e568b73e6bb5a42574fada4af511662..76b6b0e3838c65c2d5814d0206c18dfe713d6435 100644 (file)
--- a/youtube_dl/compat.py
+++ b/youtube_dl/compat.py
@@ -1,7 +1,10 @@
  from __future__ import unicode_literals
  
+import binascii
  import collections
+import email
  import getpass
+import io
  import optparse
  import os
  import re
@@ -11,6 +14,7 @@ import socket
  import subprocess
  import sys
  import itertools
+import xml.etree.ElementTree
  
  
  try:
@@ -38,6 +42,11 @@ try:
  except ImportError:  # Python 2
      import urlparse as compat_urlparse
  
+try:
+    import urllib.response as compat_urllib_response
+except ImportError:  # Python 2
+    import urllib as compat_urllib_response
+
  try:
      import http.cookiejar as compat_cookiejar
  except ImportError:  # Python 2
@@ -68,6 +77,11 @@ try:
  except ImportError:  # Python 2
      from urllib import urlretrieve as compat_urlretrieve
  
+try:
+    from html.parser import HTMLParser as compat_HTMLParser
+except ImportError:  # Python 2
+    from HTMLParser import HTMLParser as compat_HTMLParser
+
  
  try:
      from subprocess import DEVNULL
@@ -155,6 +169,65 @@ except ImportError:  # Python 2
          string = string.replace('+', ' ')
          return compat_urllib_parse_unquote(string, encoding, errors)
  
+try:
+    from urllib.parse import urlencode as compat_urllib_parse_urlencode
+except ImportError:  # Python 2
+    # Python 2 will choke in urlencode on mixture of byte and unicode strings.
+    # Possible solutions are to either port it from python 3 with all
+    # the friends or manually ensure input query contains only byte strings.
+    # We will stick with latter thus recursively encoding the whole query.
+    def compat_urllib_parse_urlencode(query, doseq=0, encoding='utf-8'):
+        def encode_elem(e):
+            if isinstance(e, dict):
+                e = encode_dict(e)
+            elif isinstance(e, (list, tuple,)):
+                e = encode_list(e)
+            elif isinstance(e, compat_str):
+                e = e.encode(encoding)
+            return e
+
+        def encode_dict(d):
+            return dict((encode_elem(k), encode_elem(v)) for k, v in d.items())
+
+        def encode_list(l):
+            return [encode_elem(e) for e in l]
+
+        return compat_urllib_parse.urlencode(encode_elem(query), doseq=doseq)
+
+try:
+    from urllib.request import DataHandler as compat_urllib_request_DataHandler
+except ImportError:  # Python < 3.4
+    # Ported from CPython 98774:1733b3bd46db, Lib/urllib/request.py
+    class compat_urllib_request_DataHandler(compat_urllib_request.BaseHandler):
+        def data_open(self, req):
+            # data URLs as specified in RFC 2397.
+            #
+            # ignores POSTed data
+            #
+            # syntax:
+            # dataurl   := "data:" [ mediatype ] [ ";base64" ] "," data
+            # mediatype := [ type "/" subtype ] *( ";" parameter )
+            # data      := *urlchar
+            # parameter := attribute "=" value
+            url = req.get_full_url()
+
+            scheme, data = url.split(':', 1)
+            mediatype, data = data.split(',', 1)
+
+            # even base64 encoded data URLs might be quoted so unquote in any case:
+            data = compat_urllib_parse_unquote_to_bytes(data)
+            if mediatype.endswith(';base64'):
+                data = binascii.a2b_base64(data)
+                mediatype = mediatype[:-7]
+
+            if not mediatype:
+                mediatype = 'text/plain;charset=US-ASCII'
+
+            headers = email.message_from_string(
+                'Content-type: %s\nContent-length: %d\n' % (mediatype, len(data)))
+
+            return compat_urllib_response.addinfourl(io.BytesIO(data), headers, url)
+
  try:
      compat_basestring = basestring  # Python 2
  except NameError:
@@ -170,6 +243,53 @@ try:
  except ImportError:  # Python 2.6
      from xml.parsers.expat import ExpatError as compat_xml_parse_error
  
+if sys.version_info[0] >= 3:
+    compat_etree_fromstring = xml.etree.ElementTree.fromstring
+else:
+    # python 2.x tries to encode unicode strings with ascii (see the
+    # XMLParser._fixtext method)
+    etree = xml.etree.ElementTree
+
+    try:
+        _etree_iter = etree.Element.iter
+    except AttributeError:  # Python <=2.6
+        def _etree_iter(root):
+            for el in root.findall('*'):
+                yield el
+                for sub in _etree_iter(el):
+                    yield sub
+
+    # on 2.6 XML doesn't have a parser argument, function copied from CPython
+    # 2.7 source
+    def _XML(text, parser=None):
+        if not parser:
+            parser = etree.XMLParser(target=etree.TreeBuilder())
+        parser.feed(text)
+        return parser.close()
+
+    def _element_factory(*args, **kwargs):
+        el = etree.Element(*args, **kwargs)
+        for k, v in el.items():
+            if isinstance(v, bytes):
+                el.set(k, v.decode('utf-8'))
+        return el
+
+    def compat_etree_fromstring(text):
+        doc = _XML(text, parser=etree.XMLParser(target=etree.TreeBuilder(element_factory=_element_factory)))
+        for el in _etree_iter(doc):
+            if el.text is not None and isinstance(el.text, bytes):
+                el.text = el.text.decode('utf-8')
+        return doc
+
+if sys.version_info < (2, 7):
+    # Here comes the crazy part: In 2.6, if the xpath is a unicode,
+    # .//node does not match if a node is a direct child of . !
+    def compat_xpath(xpath):
+        if isinstance(xpath, compat_str):
+            xpath = xpath.encode('ascii')
+        return xpath
+else:
+    compat_xpath = lambda xpath: xpath
  
  try:
      from urllib.parse import parse_qs as compat_parse_qs
@@ -188,7 +308,7 @@ except ImportError:  # Python 2
              nv = name_value.split('=', 1)
              if len(nv) != 2:
                  if strict_parsing:
-                    raise ValueError("bad query field: %r" % (name_value,))
+                    raise ValueError('bad query field: %r' % (name_value,))
                  # Handle case of a control-name with no equal sign
                  if keep_blank_values:
                      nv.append('')
@@ -246,6 +366,9 @@ def compat_ord(c):
          return ord(c)
  
  
+compat_os_name = os._name if os.name == 'java' else os.name
+
+
  if sys.version_info >= (3, 0):
      compat_getenv = os.getenv
      compat_expanduser = os.path.expanduser
@@ -266,7 +389,7 @@ else:
      # The following are os.path.expanduser implementations from cpython 2.7.8 stdlib
      # for different platforms with correct environment variables decoding.
  
-    if os.name == 'posix':
+    if compat_os_name == 'posix':
          def compat_expanduser(path):
              """Expand ~ and ~user constructions.  If user or $HOME is unknown,
              do nothing."""
@@ -290,7 +413,7 @@ else:
                  userhome = pwent.pw_dir
              userhome = userhome.rstrip('/')
              return (userhome + path[i:]) or '/'
-    elif os.name == 'nt' or os.name == 'ce':
+    elif compat_os_name == 'nt' or compat_os_name == 'ce':
          def compat_expanduser(path):
              """Expand ~ and ~user constructs.
  
@@ -353,7 +476,7 @@ if sys.version_info < (3, 0) and sys.platform == 'win32':
  else:
      compat_getpass = getpass.getpass
  
-# Old 2.6 and 2.7 releases require kwargs to be bytes
+# Python < 2.6.5 require kwargs to be bytes
  try:
      def _testfunc(x):
          pass
@@ -386,7 +509,7 @@ if sys.version_info < (2, 7):
          if err is not None:
              raise err
          else:
-            raise socket.error("getaddrinfo returns an empty list")
+            raise socket.error('getaddrinfo returns an empty list')
  else:
      compat_socket_create_connection = socket.create_connection
  
@@ -460,11 +583,13 @@ else:
      from tokenize import generate_tokens as compat_tokenize_tokenize
  
  __all__ = [
+    'compat_HTMLParser',
      'compat_HTTPError',
      'compat_basestring',
      'compat_chr',
      'compat_cookiejar',
      'compat_cookies',
+    'compat_etree_fromstring',
      'compat_expanduser',
      'compat_get_terminal_size',
      'compat_getenv',
@@ -475,6 +600,7 @@ __all__ = [
      'compat_itertools_count',
      'compat_kwargs',
      'compat_ord',
+    'compat_os_name',
      'compat_parse_qs',
      'compat_print',
      'compat_shlex_split',
@@ -487,11 +613,15 @@ __all__ = [
      'compat_urllib_parse_unquote',
      'compat_urllib_parse_unquote_plus',
      'compat_urllib_parse_unquote_to_bytes',
+    'compat_urllib_parse_urlencode',
      'compat_urllib_parse_urlparse',
      'compat_urllib_request',
+    'compat_urllib_request_DataHandler',
+    'compat_urllib_response',
      'compat_urlparse',
      'compat_urlretrieve',
      'compat_xml_parse_error',
+    'compat_xpath',
      'shlex_quote',
      'subprocess_check_output',
      'workaround_optparse_bug9161',