_ Git - youtube-dl/blob - youtube_dl/compat.py

   1 from __future__ import unicode_literals
   2
   3 import collections
   4 import getpass
   5 import optparse
   6 import os
   7 import re
   8 import shutil
   9 import socket
  10 import subprocess
  11 import sys
  12 import itertools
  13
  14
  15 try:
  16     import urllib.request as compat_urllib_request
  17 except ImportError:  # Python 2
  18     import urllib2 as compat_urllib_request
  19
  20 try:
  21     import urllib.error as compat_urllib_error
  22 except ImportError:  # Python 2
  23     import urllib2 as compat_urllib_error
  24
  25 try:
  26     import urllib.parse as compat_urllib_parse
  27 except ImportError:  # Python 2
  28     import urllib as compat_urllib_parse
  29
  30 try:
  31     from urllib.parse import urlparse as compat_urllib_parse_urlparse
  32 except ImportError:  # Python 2
  33     from urlparse import urlparse as compat_urllib_parse_urlparse
  34
  35 try:
  36     import urllib.parse as compat_urlparse
  37 except ImportError:  # Python 2
  38     import urlparse as compat_urlparse
  39
  40 try:
  41     import http.cookiejar as compat_cookiejar
  42 except ImportError:  # Python 2
  43     import cookielib as compat_cookiejar
  44
  45 try:
  46     import html.entities as compat_html_entities
  47 except ImportError:  # Python 2
  48     import htmlentitydefs as compat_html_entities
  49
  50 try:
  51     import http.client as compat_http_client
  52 except ImportError:  # Python 2
  53     import httplib as compat_http_client
  54
  55 try:
  56     from urllib.error import HTTPError as compat_HTTPError
  57 except ImportError:  # Python 2
  58     from urllib2 import HTTPError as compat_HTTPError
  59
  60 try:
  61     from urllib.request import urlretrieve as compat_urlretrieve
  62 except ImportError:  # Python 2
  63     from urllib import urlretrieve as compat_urlretrieve
  64
  65
  66 try:
  67     from subprocess import DEVNULL
  68     compat_subprocess_get_DEVNULL = lambda: DEVNULL
  69 except ImportError:
  70     compat_subprocess_get_DEVNULL = lambda: open(os.path.devnull, 'w')
  71
  72 try:
  73     import http.server as compat_http_server
  74 except ImportError:
  75     import BaseHTTPServer as compat_http_server
  76
  77 from pprint import (pprint, pformat)
  78
  79
  80 def dprint(fmt):
  81     sys.stderr.write(pformat(fmt) + "\n")
  82
  83 try:
  84     from urllib.parse import unquote as compat_urllib_parse_unquote
  85 except ImportError:
  86     def compat_urllib_parse_unquote_to_bytes(string):
  87         """unquote_to_bytes('abc%20def') -> b'abc def'."""
  88         # Note: strings are encoded as UTF-8. This is only an issue if it contains
  89         # unescaped non-ASCII characters, which URIs should not.
  90         if not string:
  91             # Is it a string-like object?
  92             string.split
  93             return b''
  94         if isinstance(string, str):
  95             string = string.encode('utf-8')
  96             # string = encode('utf-8')
  97
  98         # python3 -> 2: must implicitly convert to bits
  99         bits = bytes(string).split(b'%')
 100
 101         if len(bits) == 1:
 102             return string
 103         res = [bits[0]]
 104         append = res.append
 105
 106         for item in bits[1:]:
 107             try:
 108                 append(item[:2].decode('hex'))
 109                 append(item[2:])
 110             except:
 111                 append(b'%')
 112                 append(item)
 113         return b''.join(res)
 114
 115     compat_urllib_parse_asciire = re.compile('([\x00-\x7f]+)')
 116
 117     def new_compat_urllib_parse_unquote(string, encoding='utf-8', errors='replace'):
 118         """Replace %xx escapes by their single-character equivalent. The optional
 119         encoding and errors parameters specify how to decode percent-encoded
 120         sequences into Unicode characters, as accepted by the bytes.decode()
 121         method.
 122         By default, percent-encoded sequences are decoded with UTF-8, and invalid
 123         sequences are replaced by a placeholder character.
 124
 125         unquote('abc%20def') -> 'abc def'.
 126         """
 127
 128         if '%' not in string:
 129             string.split
 130             return string
 131         if encoding is None:
 132             encoding = 'utf-8'
 133         if errors is None:
 134             errors = 'replace'
 135
 136         bits = compat_urllib_parse_asciire.split(string)
 137         res = [bits[0]]
 138         append = res.append
 139         for i in range(1, len(bits), 2):
 140             foo = compat_urllib_parse_unquote_to_bytes(bits[i])
 141             foo = foo.decode(encoding, errors)
 142             append(foo)
 143
 144             if bits[i + 1]:
 145                 bar = bits[i + 1]
 146                 if not isinstance(bar, unicode):
 147                     bar = bar.decode('utf-8')
 148                 append(bar)
 149         return ''.join(res)
 150
 151     def old_compat_urllib_parse_unquote(string, encoding='utf-8', errors='replace'):
 152         if string == '':
 153             return string
 154         res = string.split('%')
 155         if len(res) == 1:
 156             return string
 157         if encoding is None:
 158             encoding = 'utf-8'
 159         if errors is None:
 160             errors = 'replace'
 161         # pct_sequence: contiguous sequence of percent-encoded bytes, decoded
 162         pct_sequence = b''
 163         string = res[0]
 164         for item in res[1:]:
 165             try:
 166                 if not item:
 167                     raise ValueError
 168                 if not re.match('[0-9a-fA-F][0-9a-fA-F]',item[:2]):
 169                     raise ValueError
 170                 pct_sequence += item[:2].decode('hex')
 171                 rest = item[2:]
 172                 if not rest:
 173                     # This segment was just a single percent-encoded character.
 174                     # May be part of a sequence of code units, so delay decoding.
 175                     # (Stored in pct_sequence).
 176                     continue
 177             except ValueError:
 178                 rest = '%' + item
 179             # Encountered non-percent-encoded characters. Flush the current
 180             # pct_sequence.
 181             string += pct_sequence.decode(encoding, errors) + rest
 182             pct_sequence = b''
 183         if pct_sequence:
 184             # Flush the final pct_sequence
 185             string += pct_sequence.decode(encoding, errors)
 186         return string
 187
 188     compat_urllib_parse_unquote = new_compat_urllib_parse_unquote
 189
 190 try:
 191     compat_str = unicode  # Python 2
 192 except NameError:
 193     compat_str = str
 194
 195 try:
 196     compat_basestring = basestring  # Python 2
 197 except NameError:
 198     compat_basestring = str
 199
 200 try:
 201     compat_chr = unichr  # Python 2
 202 except NameError:
 203     compat_chr = chr
 204
 205 try:
 206     from xml.etree.ElementTree import ParseError as compat_xml_parse_error
 207 except ImportError:  # Python 2.6
 208     from xml.parsers.expat import ExpatError as compat_xml_parse_error
 209
 210
 211 try:
 212     from urllib.parse import parse_qs as compat_parse_qs
 213 except ImportError:  # Python 2
 214     # HACK: The following is the correct parse_qs implementation from cpython 3's stdlib.
 215     # Python 2's version is apparently totally broken
 216
 217     def _parse_qsl(qs, keep_blank_values=False, strict_parsing=False,
 218                    encoding='utf-8', errors='replace'):
 219         qs, _coerce_result = qs, compat_str
 220         pairs = [s2 for s1 in qs.split('&') for s2 in s1.split(';')]
 221         r = []
 222         for name_value in pairs:
 223             if not name_value and not strict_parsing:
 224                 continue
 225             nv = name_value.split('=', 1)
 226             if len(nv) != 2:
 227                 if strict_parsing:
 228                     raise ValueError("bad query field: %r" % (name_value,))
 229                 # Handle case of a control-name with no equal sign
 230                 if keep_blank_values:
 231                     nv.append('')
 232                 else:
 233                     continue
 234             if len(nv[1]) or keep_blank_values:
 235                 name = nv[0].replace('+', ' ')
 236                 name = compat_urllib_parse_unquote(
 237                     name, encoding=encoding, errors=errors)
 238                 name = _coerce_result(name)
 239                 value = nv[1].replace('+', ' ')
 240                 value = compat_urllib_parse_unquote(
 241                     value, encoding=encoding, errors=errors)
 242                 value = _coerce_result(value)
 243                 r.append((name, value))
 244         return r
 245
 246     def compat_parse_qs(qs, keep_blank_values=False, strict_parsing=False,
 247                         encoding='utf-8', errors='replace'):
 248         parsed_result = {}
 249         pairs = _parse_qsl(qs, keep_blank_values, strict_parsing,
 250                            encoding=encoding, errors=errors)
 251         for name, value in pairs:
 252             if name in parsed_result:
 253                 parsed_result[name].append(value)
 254             else:
 255                 parsed_result[name] = [value]
 256         return parsed_result
 257
 258 try:
 259     from shlex import quote as shlex_quote
 260 except ImportError:  # Python < 3.3
 261     def shlex_quote(s):
 262         if re.match(r'^[-_\w./]+$', s):
 263             return s
 264         else:
 265             return "'" + s.replace("'", "'\"'\"'") + "'"
 266
 267
 268 def compat_ord(c):
 269     if type(c) is int:
 270         return c
 271     else:
 272         return ord(c)
 273
 274
 275 if sys.version_info >= (3, 0):
 276     compat_getenv = os.getenv
 277     compat_expanduser = os.path.expanduser
 278 else:
 279     # Environment variables should be decoded with filesystem encoding.
 280     # Otherwise it will fail if any non-ASCII characters present (see #3854 #3217 #2918)
 281
 282     def compat_getenv(key, default=None):
 283         from .utils import get_filesystem_encoding
 284         env = os.getenv(key, default)
 285         if env:
 286             env = env.decode(get_filesystem_encoding())
 287         return env
 288
 289     # HACK: The default implementations of os.path.expanduser from cpython do not decode
 290     # environment variables with filesystem encoding. We will work around this by
 291     # providing adjusted implementations.
 292     # The following are os.path.expanduser implementations from cpython 2.7.8 stdlib
 293     # for different platforms with correct environment variables decoding.
 294
 295     if os.name == 'posix':
 296         def compat_expanduser(path):
 297             """Expand ~ and ~user constructions.  If user or $HOME is unknown,
 298             do nothing."""
 299             if not path.startswith('~'):
 300                 return path
 301             i = path.find('/', 1)
 302             if i < 0:
 303                 i = len(path)
 304             if i == 1:
 305                 if 'HOME' not in os.environ:
 306                     import pwd
 307                     userhome = pwd.getpwuid(os.getuid()).pw_dir
 308                 else:
 309                     userhome = compat_getenv('HOME')
 310             else:
 311                 import pwd
 312                 try:
 313                     pwent = pwd.getpwnam(path[1:i])
 314                 except KeyError:
 315                     return path
 316                 userhome = pwent.pw_dir
 317             userhome = userhome.rstrip('/')
 318             return (userhome + path[i:]) or '/'
 319     elif os.name == 'nt' or os.name == 'ce':
 320         def compat_expanduser(path):
 321             """Expand ~ and ~user constructs.
 322
 323             If user or $HOME is unknown, do nothing."""
 324             if path[:1] != '~':
 325                 return path
 326             i, n = 1, len(path)
 327             while i < n and path[i] not in '/\\':
 328                 i = i + 1
 329
 330             if 'HOME' in os.environ:
 331                 userhome = compat_getenv('HOME')
 332             elif 'USERPROFILE' in os.environ:
 333                 userhome = compat_getenv('USERPROFILE')
 334             elif 'HOMEPATH' not in os.environ:
 335                 return path
 336             else:
 337                 try:
 338                     drive = compat_getenv('HOMEDRIVE')
 339                 except KeyError:
 340                     drive = ''
 341                 userhome = os.path.join(drive, compat_getenv('HOMEPATH'))
 342
 343             if i != 1:  # ~user
 344                 userhome = os.path.join(os.path.dirname(userhome), path[1:i])
 345
 346             return userhome + path[i:]
 347     else:
 348         compat_expanduser = os.path.expanduser
 349
 350
 351 if sys.version_info < (3, 0):
 352     def compat_print(s):
 353         from .utils import preferredencoding
 354         print(s.encode(preferredencoding(), 'xmlcharrefreplace'))
 355 else:
 356     def compat_print(s):
 357         assert isinstance(s, compat_str)
 358         print(s)
 359
 360
 361 try:
 362     subprocess_check_output = subprocess.check_output
 363 except AttributeError:
 364     def subprocess_check_output(*args, **kwargs):
 365         assert 'input' not in kwargs
 366         p = subprocess.Popen(*args, stdout=subprocess.PIPE, **kwargs)
 367         output, _ = p.communicate()
 368         ret = p.poll()
 369         if ret:
 370             raise subprocess.CalledProcessError(ret, p.args, output=output)
 371         return output
 372
 373 if sys.version_info < (3, 0) and sys.platform == 'win32':
 374     def compat_getpass(prompt, *args, **kwargs):
 375         if isinstance(prompt, compat_str):
 376             from .utils import preferredencoding
 377             prompt = prompt.encode(preferredencoding())
 378         return getpass.getpass(prompt, *args, **kwargs)
 379 else:
 380     compat_getpass = getpass.getpass
 381
 382 # Old 2.6 and 2.7 releases require kwargs to be bytes
 383 try:
 384     def _testfunc(x):
 385         pass
 386     _testfunc(**{'x': 0})
 387 except TypeError:
 388     def compat_kwargs(kwargs):
 389         return dict((bytes(k), v) for k, v in kwargs.items())
 390 else:
 391     compat_kwargs = lambda kwargs: kwargs
 392
 393
 394 if sys.version_info < (2, 7):
 395     def compat_socket_create_connection(address, timeout, source_address=None):
 396         host, port = address
 397         err = None
 398         for res in socket.getaddrinfo(host, port, 0, socket.SOCK_STREAM):
 399             af, socktype, proto, canonname, sa = res
 400             sock = None
 401             try:
 402                 sock = socket.socket(af, socktype, proto)
 403                 sock.settimeout(timeout)
 404                 if source_address:
 405                     sock.bind(source_address)
 406                 sock.connect(sa)
 407                 return sock
 408             except socket.error as _:
 409                 err = _
 410                 if sock is not None:
 411                     sock.close()
 412         if err is not None:
 413             raise err
 414         else:
 415             raise socket.error("getaddrinfo returns an empty list")
 416 else:
 417     compat_socket_create_connection = socket.create_connection
 418
 419
 420 # Fix https://github.com/rg3/youtube-dl/issues/4223
 421 # See http://bugs.python.org/issue9161 for what is broken
 422 def workaround_optparse_bug9161():
 423     op = optparse.OptionParser()
 424     og = optparse.OptionGroup(op, 'foo')
 425     try:
 426         og.add_option('-t')
 427     except TypeError:
 428         real_add_option = optparse.OptionGroup.add_option
 429
 430         def _compat_add_option(self, *args, **kwargs):
 431             enc = lambda v: (
 432                 v.encode('ascii', 'replace') if isinstance(v, compat_str)
 433                 else v)
 434             bargs = [enc(a) for a in args]
 435             bkwargs = dict(
 436                 (k, enc(v)) for k, v in kwargs.items())
 437             return real_add_option(self, *bargs, **bkwargs)
 438         optparse.OptionGroup.add_option = _compat_add_option
 439
 440 if hasattr(shutil, 'get_terminal_size'):  # Python >= 3.3
 441     compat_get_terminal_size = shutil.get_terminal_size
 442 else:
 443     _terminal_size = collections.namedtuple('terminal_size', ['columns', 'lines'])
 444
 445     def compat_get_terminal_size():
 446         columns = compat_getenv('COLUMNS', None)
 447         if columns:
 448             columns = int(columns)
 449         else:
 450             columns = None
 451         lines = compat_getenv('LINES', None)
 452         if lines:
 453             lines = int(lines)
 454         else:
 455             lines = None
 456
 457         try:
 458             sp = subprocess.Popen(
 459                 ['stty', 'size'],
 460                 stdout=subprocess.PIPE, stderr=subprocess.PIPE)
 461             out, err = sp.communicate()
 462             lines, columns = map(int, out.split())
 463         except Exception:
 464             pass
 465         return _terminal_size(columns, lines)
 466
 467 try:
 468     itertools.count(start=0, step=1)
 469     compat_itertools_count = itertools.count
 470 except TypeError:  # Python 2.6
 471     def compat_itertools_count(start=0, step=1):
 472         n = start
 473         while True:
 474             yield n
 475             n += step
 476
 477 __all__ = [
 478     'compat_HTTPError',
 479     'compat_basestring',
 480     'compat_chr',
 481     'compat_cookiejar',
 482     'compat_expanduser',
 483     'compat_get_terminal_size',
 484     'compat_getenv',
 485     'compat_getpass',
 486     'compat_html_entities',
 487     'compat_http_client',
 488     'compat_http_server',
 489     'compat_itertools_count',
 490     'compat_kwargs',
 491     'compat_ord',
 492     'compat_parse_qs',
 493     'compat_print',
 494     'compat_socket_create_connection',
 495     'compat_str',
 496     'compat_subprocess_get_DEVNULL',
 497     'compat_urllib_error',
 498     'compat_urllib_parse',
 499     'compat_urllib_parse_unquote',
 500     'compat_urllib_parse_urlparse',
 501     'compat_urllib_request',
 502     'compat_urlparse',
 503     'compat_urlretrieve',
 504     'compat_xml_parse_error',
 505     'shlex_quote',
 506     'subprocess_check_output',
 507     'workaround_optparse_bug9161',
 508 ]