_ Git - youtube-dl/blob - youtube_dl/compat.py

   1 from __future__ import unicode_literals
   2
   3 import collections
   4 import getpass
   5 import optparse
   6 import os
   7 import re
   8 import shutil
   9 import socket
  10 import subprocess
  11 import sys
  12 import itertools
  13
  14
  15 try:
  16     import urllib.request as compat_urllib_request
  17 except ImportError:  # Python 2
  18     import urllib2 as compat_urllib_request
  19
  20 try:
  21     import urllib.error as compat_urllib_error
  22 except ImportError:  # Python 2
  23     import urllib2 as compat_urllib_error
  24
  25 try:
  26     import urllib.parse as compat_urllib_parse
  27 except ImportError:  # Python 2
  28     import urllib as compat_urllib_parse
  29
  30 try:
  31     from urllib.parse import urlparse as compat_urllib_parse_urlparse
  32 except ImportError:  # Python 2
  33     from urlparse import urlparse as compat_urllib_parse_urlparse
  34
  35 try:
  36     import urllib.parse as compat_urlparse
  37 except ImportError:  # Python 2
  38     import urlparse as compat_urlparse
  39
  40 try:
  41     import http.cookiejar as compat_cookiejar
  42 except ImportError:  # Python 2
  43     import cookielib as compat_cookiejar
  44
  45 try:
  46     import html.entities as compat_html_entities
  47 except ImportError:  # Python 2
  48     import htmlentitydefs as compat_html_entities
  49
  50 try:
  51     import http.client as compat_http_client
  52 except ImportError:  # Python 2
  53     import httplib as compat_http_client
  54
  55 try:
  56     from urllib.error import HTTPError as compat_HTTPError
  57 except ImportError:  # Python 2
  58     from urllib2 import HTTPError as compat_HTTPError
  59
  60 try:
  61     from urllib.request import urlretrieve as compat_urlretrieve
  62 except ImportError:  # Python 2
  63     from urllib import urlretrieve as compat_urlretrieve
  64
  65
  66 try:
  67     from subprocess import DEVNULL
  68     compat_subprocess_get_DEVNULL = lambda: DEVNULL
  69 except ImportError:
  70     compat_subprocess_get_DEVNULL = lambda: open(os.path.devnull, 'w')
  71
  72 try:
  73     import http.server as compat_http_server
  74 except ImportError:
  75     import BaseHTTPServer as compat_http_server
  76
  77 try:
  78     from urllib.parse import unquote as compat_urllib_parse_unquote
  79 except ImportError:
  80     def compat_urllib_parse_unquote_to_bytes(string):
  81         """unquote_to_bytes('abc%20def') -> b'abc def'."""
  82         # Note: strings are encoded as UTF-8. This is only an issue if it contains
  83         # unescaped non-ASCII characters, which URIs should not.
  84         if not string:
  85             # Is it a string-like object?
  86             string.split
  87             return b''
  88         if isinstance(string, str):
  89             string = string.encode('utf-8')
  90             # string = encode('utf-8')
  91
  92         # python3 -> 2: must implicitly convert to bits
  93         bits = bytes(string).split(b'%')
  94
  95         if len(bits) == 1:
  96             return string
  97         res = [bits[0]]
  98         append = res.append
  99
 100         for item in bits[1:]:
 101             try:
 102                 append(item[:2].decode('hex'))
 103                 append(item[2:])
 104             except:
 105                 append(b'%')
 106                 append(item)
 107         return b''.join(res)
 108
 109     compat_urllib_parse_asciire = re.compile('([\x00-\x7f]+)')
 110
 111     def new_compat_urllib_parse_unquote(string, encoding='utf-8', errors='replace'):
 112         """Replace %xx escapes by their single-character equivalent. The optional
 113         encoding and errors parameters specify how to decode percent-encoded
 114         sequences into Unicode characters, as accepted by the bytes.decode()
 115         method.
 116         By default, percent-encoded sequences are decoded with UTF-8, and invalid
 117         sequences are replaced by a placeholder character.
 118
 119         unquote('abc%20def') -> 'abc def'.
 120         """
 121
 122         if '%' not in string:
 123             string.split
 124             return string
 125         if encoding is None:
 126             encoding = 'utf-8'
 127         if errors is None:
 128             errors = 'replace'
 129
 130         bits = compat_urllib_parse_asciire.split(string)
 131         res = [bits[0]]
 132         append = res.append
 133         for i in range(1, len(bits), 2):
 134             foo = compat_urllib_parse_unquote_to_bytes(bits[i])
 135             foo = foo.decode(encoding, errors)
 136             append(foo)
 137
 138             if bits[i + 1]:
 139                 bar = bits[i + 1]
 140                 if not isinstance(bar, unicode):
 141                     bar = bar.decode('utf-8')
 142                 append(bar)
 143         return ''.join(res)
 144
 145     def old_compat_urllib_parse_unquote(string, encoding='utf-8', errors='replace'):
 146         if string == '':
 147             return string
 148         res = string.split('%')
 149         if len(res) == 1:
 150             return string
 151         if encoding is None:
 152             encoding = 'utf-8'
 153         if errors is None:
 154             errors = 'replace'
 155         # pct_sequence: contiguous sequence of percent-encoded bytes, decoded
 156         pct_sequence = b''
 157         string = res[0]
 158         for item in res[1:]:
 159             try:
 160                 if not item:
 161                     raise ValueError
 162                 if not re.match('[0-9a-fA-F][0-9a-fA-F]',item[:2]):
 163                     raise ValueError
 164                 pct_sequence += item[:2].decode('hex')
 165                 rest = item[2:]
 166                 if not rest:
 167                     # This segment was just a single percent-encoded character.
 168                     # May be part of a sequence of code units, so delay decoding.
 169                     # (Stored in pct_sequence).
 170                     continue
 171             except ValueError:
 172                 rest = '%' + item
 173             # Encountered non-percent-encoded characters. Flush the current
 174             # pct_sequence.
 175             string += pct_sequence.decode(encoding, errors) + rest
 176             pct_sequence = b''
 177         if pct_sequence:
 178             # Flush the final pct_sequence
 179             string += pct_sequence.decode(encoding, errors)
 180         return string
 181
 182     compat_urllib_parse_unquote = new_compat_urllib_parse_unquote
 183
 184 try:
 185     compat_str = unicode  # Python 2
 186 except NameError:
 187     compat_str = str
 188
 189 try:
 190     compat_basestring = basestring  # Python 2
 191 except NameError:
 192     compat_basestring = str
 193
 194 try:
 195     compat_chr = unichr  # Python 2
 196 except NameError:
 197     compat_chr = chr
 198
 199 try:
 200     from xml.etree.ElementTree import ParseError as compat_xml_parse_error
 201 except ImportError:  # Python 2.6
 202     from xml.parsers.expat import ExpatError as compat_xml_parse_error
 203
 204
 205 try:
 206     from urllib.parse import parse_qs as compat_parse_qs
 207 except ImportError:  # Python 2
 208     # HACK: The following is the correct parse_qs implementation from cpython 3's stdlib.
 209     # Python 2's version is apparently totally broken
 210
 211     def _parse_qsl(qs, keep_blank_values=False, strict_parsing=False,
 212                    encoding='utf-8', errors='replace'):
 213         qs, _coerce_result = qs, compat_str
 214         pairs = [s2 for s1 in qs.split('&') for s2 in s1.split(';')]
 215         r = []
 216         for name_value in pairs:
 217             if not name_value and not strict_parsing:
 218                 continue
 219             nv = name_value.split('=', 1)
 220             if len(nv) != 2:
 221                 if strict_parsing:
 222                     raise ValueError("bad query field: %r" % (name_value,))
 223                 # Handle case of a control-name with no equal sign
 224                 if keep_blank_values:
 225                     nv.append('')
 226                 else:
 227                     continue
 228             if len(nv[1]) or keep_blank_values:
 229                 name = nv[0].replace('+', ' ')
 230                 name = compat_urllib_parse_unquote(
 231                     name, encoding=encoding, errors=errors)
 232                 name = _coerce_result(name)
 233                 value = nv[1].replace('+', ' ')
 234                 value = compat_urllib_parse_unquote(
 235                     value, encoding=encoding, errors=errors)
 236                 value = _coerce_result(value)
 237                 r.append((name, value))
 238         return r
 239
 240     def compat_parse_qs(qs, keep_blank_values=False, strict_parsing=False,
 241                         encoding='utf-8', errors='replace'):
 242         parsed_result = {}
 243         pairs = _parse_qsl(qs, keep_blank_values, strict_parsing,
 244                            encoding=encoding, errors=errors)
 245         for name, value in pairs:
 246             if name in parsed_result:
 247                 parsed_result[name].append(value)
 248             else:
 249                 parsed_result[name] = [value]
 250         return parsed_result
 251
 252 try:
 253     from shlex import quote as shlex_quote
 254 except ImportError:  # Python < 3.3
 255     def shlex_quote(s):
 256         if re.match(r'^[-_\w./]+$', s):
 257             return s
 258         else:
 259             return "'" + s.replace("'", "'\"'\"'") + "'"
 260
 261
 262 def compat_ord(c):
 263     if type(c) is int:
 264         return c
 265     else:
 266         return ord(c)
 267
 268
 269 if sys.version_info >= (3, 0):
 270     compat_getenv = os.getenv
 271     compat_expanduser = os.path.expanduser
 272 else:
 273     # Environment variables should be decoded with filesystem encoding.
 274     # Otherwise it will fail if any non-ASCII characters present (see #3854 #3217 #2918)
 275
 276     def compat_getenv(key, default=None):
 277         from .utils import get_filesystem_encoding
 278         env = os.getenv(key, default)
 279         if env:
 280             env = env.decode(get_filesystem_encoding())
 281         return env
 282
 283     # HACK: The default implementations of os.path.expanduser from cpython do not decode
 284     # environment variables with filesystem encoding. We will work around this by
 285     # providing adjusted implementations.
 286     # The following are os.path.expanduser implementations from cpython 2.7.8 stdlib
 287     # for different platforms with correct environment variables decoding.
 288
 289     if os.name == 'posix':
 290         def compat_expanduser(path):
 291             """Expand ~ and ~user constructions.  If user or $HOME is unknown,
 292             do nothing."""
 293             if not path.startswith('~'):
 294                 return path
 295             i = path.find('/', 1)
 296             if i < 0:
 297                 i = len(path)
 298             if i == 1:
 299                 if 'HOME' not in os.environ:
 300                     import pwd
 301                     userhome = pwd.getpwuid(os.getuid()).pw_dir
 302                 else:
 303                     userhome = compat_getenv('HOME')
 304             else:
 305                 import pwd
 306                 try:
 307                     pwent = pwd.getpwnam(path[1:i])
 308                 except KeyError:
 309                     return path
 310                 userhome = pwent.pw_dir
 311             userhome = userhome.rstrip('/')
 312             return (userhome + path[i:]) or '/'
 313     elif os.name == 'nt' or os.name == 'ce':
 314         def compat_expanduser(path):
 315             """Expand ~ and ~user constructs.
 316
 317             If user or $HOME is unknown, do nothing."""
 318             if path[:1] != '~':
 319                 return path
 320             i, n = 1, len(path)
 321             while i < n and path[i] not in '/\\':
 322                 i = i + 1
 323
 324             if 'HOME' in os.environ:
 325                 userhome = compat_getenv('HOME')
 326             elif 'USERPROFILE' in os.environ:
 327                 userhome = compat_getenv('USERPROFILE')
 328             elif 'HOMEPATH' not in os.environ:
 329                 return path
 330             else:
 331                 try:
 332                     drive = compat_getenv('HOMEDRIVE')
 333                 except KeyError:
 334                     drive = ''
 335                 userhome = os.path.join(drive, compat_getenv('HOMEPATH'))
 336
 337             if i != 1:  # ~user
 338                 userhome = os.path.join(os.path.dirname(userhome), path[1:i])
 339
 340             return userhome + path[i:]
 341     else:
 342         compat_expanduser = os.path.expanduser
 343
 344
 345 if sys.version_info < (3, 0):
 346     def compat_print(s):
 347         from .utils import preferredencoding
 348         print(s.encode(preferredencoding(), 'xmlcharrefreplace'))
 349 else:
 350     def compat_print(s):
 351         assert isinstance(s, compat_str)
 352         print(s)
 353
 354
 355 try:
 356     subprocess_check_output = subprocess.check_output
 357 except AttributeError:
 358     def subprocess_check_output(*args, **kwargs):
 359         assert 'input' not in kwargs
 360         p = subprocess.Popen(*args, stdout=subprocess.PIPE, **kwargs)
 361         output, _ = p.communicate()
 362         ret = p.poll()
 363         if ret:
 364             raise subprocess.CalledProcessError(ret, p.args, output=output)
 365         return output
 366
 367 if sys.version_info < (3, 0) and sys.platform == 'win32':
 368     def compat_getpass(prompt, *args, **kwargs):
 369         if isinstance(prompt, compat_str):
 370             from .utils import preferredencoding
 371             prompt = prompt.encode(preferredencoding())
 372         return getpass.getpass(prompt, *args, **kwargs)
 373 else:
 374     compat_getpass = getpass.getpass
 375
 376 # Old 2.6 and 2.7 releases require kwargs to be bytes
 377 try:
 378     def _testfunc(x):
 379         pass
 380     _testfunc(**{'x': 0})
 381 except TypeError:
 382     def compat_kwargs(kwargs):
 383         return dict((bytes(k), v) for k, v in kwargs.items())
 384 else:
 385     compat_kwargs = lambda kwargs: kwargs
 386
 387
 388 if sys.version_info < (2, 7):
 389     def compat_socket_create_connection(address, timeout, source_address=None):
 390         host, port = address
 391         err = None
 392         for res in socket.getaddrinfo(host, port, 0, socket.SOCK_STREAM):
 393             af, socktype, proto, canonname, sa = res
 394             sock = None
 395             try:
 396                 sock = socket.socket(af, socktype, proto)
 397                 sock.settimeout(timeout)
 398                 if source_address:
 399                     sock.bind(source_address)
 400                 sock.connect(sa)
 401                 return sock
 402             except socket.error as _:
 403                 err = _
 404                 if sock is not None:
 405                     sock.close()
 406         if err is not None:
 407             raise err
 408         else:
 409             raise socket.error("getaddrinfo returns an empty list")
 410 else:
 411     compat_socket_create_connection = socket.create_connection
 412
 413
 414 # Fix https://github.com/rg3/youtube-dl/issues/4223
 415 # See http://bugs.python.org/issue9161 for what is broken
 416 def workaround_optparse_bug9161():
 417     op = optparse.OptionParser()
 418     og = optparse.OptionGroup(op, 'foo')
 419     try:
 420         og.add_option('-t')
 421     except TypeError:
 422         real_add_option = optparse.OptionGroup.add_option
 423
 424         def _compat_add_option(self, *args, **kwargs):
 425             enc = lambda v: (
 426                 v.encode('ascii', 'replace') if isinstance(v, compat_str)
 427                 else v)
 428             bargs = [enc(a) for a in args]
 429             bkwargs = dict(
 430                 (k, enc(v)) for k, v in kwargs.items())
 431             return real_add_option(self, *bargs, **bkwargs)
 432         optparse.OptionGroup.add_option = _compat_add_option
 433
 434 if hasattr(shutil, 'get_terminal_size'):  # Python >= 3.3
 435     compat_get_terminal_size = shutil.get_terminal_size
 436 else:
 437     _terminal_size = collections.namedtuple('terminal_size', ['columns', 'lines'])
 438
 439     def compat_get_terminal_size():
 440         columns = compat_getenv('COLUMNS', None)
 441         if columns:
 442             columns = int(columns)
 443         else:
 444             columns = None
 445         lines = compat_getenv('LINES', None)
 446         if lines:
 447             lines = int(lines)
 448         else:
 449             lines = None
 450
 451         try:
 452             sp = subprocess.Popen(
 453                 ['stty', 'size'],
 454                 stdout=subprocess.PIPE, stderr=subprocess.PIPE)
 455             out, err = sp.communicate()
 456             lines, columns = map(int, out.split())
 457         except Exception:
 458             pass
 459         return _terminal_size(columns, lines)
 460
 461 try:
 462     itertools.count(start=0, step=1)
 463     compat_itertools_count = itertools.count
 464 except TypeError:  # Python 2.6
 465     def compat_itertools_count(start=0, step=1):
 466         n = start
 467         while True:
 468             yield n
 469             n += step
 470
 471 __all__ = [
 472     'compat_HTTPError',
 473     'compat_basestring',
 474     'compat_chr',
 475     'compat_cookiejar',
 476     'compat_expanduser',
 477     'compat_get_terminal_size',
 478     'compat_getenv',
 479     'compat_getpass',
 480     'compat_html_entities',
 481     'compat_http_client',
 482     'compat_http_server',
 483     'compat_itertools_count',
 484     'compat_kwargs',
 485     'compat_ord',
 486     'compat_parse_qs',
 487     'compat_print',
 488     'compat_socket_create_connection',
 489     'compat_str',
 490     'compat_subprocess_get_DEVNULL',
 491     'compat_urllib_error',
 492     'compat_urllib_parse',
 493     'compat_urllib_parse_unquote',
 494     'compat_urllib_parse_urlparse',
 495     'compat_urllib_request',
 496     'compat_urlparse',
 497     'compat_urlretrieve',
 498     'compat_xml_parse_error',
 499     'shlex_quote',
 500     'subprocess_check_output',
 501     'workaround_optparse_bug9161',
 502 ]