_ Git - youtube-dl/blob - youtube_dl/compat.py

   1 from __future__ import unicode_literals
   2
   3 import binascii
   4 import collections
   5 import email
   6 import getpass
   7 import io
   8 import optparse
   9 import os
  10 import re
  11 import shlex
  12 import shutil
  13 import socket
  14 import subprocess
  15 import sys
  16 import itertools
  17
  18
  19 try:
  20     import urllib.request as compat_urllib_request
  21 except ImportError:  # Python 2
  22     import urllib2 as compat_urllib_request
  23
  24 try:
  25     import urllib.error as compat_urllib_error
  26 except ImportError:  # Python 2
  27     import urllib2 as compat_urllib_error
  28
  29 try:
  30     import urllib.parse as compat_urllib_parse
  31 except ImportError:  # Python 2
  32     import urllib as compat_urllib_parse
  33
  34 try:
  35     from urllib.parse import urlparse as compat_urllib_parse_urlparse
  36 except ImportError:  # Python 2
  37     from urlparse import urlparse as compat_urllib_parse_urlparse
  38
  39 try:
  40     import urllib.parse as compat_urlparse
  41 except ImportError:  # Python 2
  42     import urlparse as compat_urlparse
  43
  44 try:
  45     import urllib.response as compat_urllib_response
  46 except ImportError:  # Python 2
  47     import urllib as compat_urllib_response
  48
  49 try:
  50     import http.cookiejar as compat_cookiejar
  51 except ImportError:  # Python 2
  52     import cookielib as compat_cookiejar
  53
  54 try:
  55     import http.cookies as compat_cookies
  56 except ImportError:  # Python 2
  57     import Cookie as compat_cookies
  58
  59 try:
  60     import html.entities as compat_html_entities
  61 except ImportError:  # Python 2
  62     import htmlentitydefs as compat_html_entities
  63
  64 try:
  65     import http.client as compat_http_client
  66 except ImportError:  # Python 2
  67     import httplib as compat_http_client
  68
  69 try:
  70     from urllib.error import HTTPError as compat_HTTPError
  71 except ImportError:  # Python 2
  72     from urllib2 import HTTPError as compat_HTTPError
  73
  74 try:
  75     from urllib.request import urlretrieve as compat_urlretrieve
  76 except ImportError:  # Python 2
  77     from urllib import urlretrieve as compat_urlretrieve
  78
  79
  80 try:
  81     from subprocess import DEVNULL
  82     compat_subprocess_get_DEVNULL = lambda: DEVNULL
  83 except ImportError:
  84     compat_subprocess_get_DEVNULL = lambda: open(os.path.devnull, 'w')
  85
  86 try:
  87     import http.server as compat_http_server
  88 except ImportError:
  89     import BaseHTTPServer as compat_http_server
  90
  91 try:
  92     compat_str = unicode  # Python 2
  93 except NameError:
  94     compat_str = str
  95
  96 try:
  97     from urllib.parse import unquote_to_bytes as compat_urllib_parse_unquote_to_bytes
  98     from urllib.parse import unquote as compat_urllib_parse_unquote
  99     from urllib.parse import unquote_plus as compat_urllib_parse_unquote_plus
 100 except ImportError:  # Python 2
 101     _asciire = (compat_urllib_parse._asciire if hasattr(compat_urllib_parse, '_asciire')
 102                 else re.compile('([\x00-\x7f]+)'))
 103
 104     # HACK: The following are the correct unquote_to_bytes, unquote and unquote_plus
 105     # implementations from cpython 3.4.3's stdlib. Python 2's version
 106     # is apparently broken (see https://github.com/rg3/youtube-dl/pull/6244)
 107
 108     def compat_urllib_parse_unquote_to_bytes(string):
 109         """unquote_to_bytes('abc%20def') -> b'abc def'."""
 110         # Note: strings are encoded as UTF-8. This is only an issue if it contains
 111         # unescaped non-ASCII characters, which URIs should not.
 112         if not string:
 113             # Is it a string-like object?
 114             string.split
 115             return b''
 116         if isinstance(string, compat_str):
 117             string = string.encode('utf-8')
 118         bits = string.split(b'%')
 119         if len(bits) == 1:
 120             return string
 121         res = [bits[0]]
 122         append = res.append
 123         for item in bits[1:]:
 124             try:
 125                 append(compat_urllib_parse._hextochr[item[:2]])
 126                 append(item[2:])
 127             except KeyError:
 128                 append(b'%')
 129                 append(item)
 130         return b''.join(res)
 131
 132     def compat_urllib_parse_unquote(string, encoding='utf-8', errors='replace'):
 133         """Replace %xx escapes by their single-character equivalent. The optional
 134         encoding and errors parameters specify how to decode percent-encoded
 135         sequences into Unicode characters, as accepted by the bytes.decode()
 136         method.
 137         By default, percent-encoded sequences are decoded with UTF-8, and invalid
 138         sequences are replaced by a placeholder character.
 139
 140         unquote('abc%20def') -> 'abc def'.
 141         """
 142         if '%' not in string:
 143             string.split
 144             return string
 145         if encoding is None:
 146             encoding = 'utf-8'
 147         if errors is None:
 148             errors = 'replace'
 149         bits = _asciire.split(string)
 150         res = [bits[0]]
 151         append = res.append
 152         for i in range(1, len(bits), 2):
 153             append(compat_urllib_parse_unquote_to_bytes(bits[i]).decode(encoding, errors))
 154             append(bits[i + 1])
 155         return ''.join(res)
 156
 157     def compat_urllib_parse_unquote_plus(string, encoding='utf-8', errors='replace'):
 158         """Like unquote(), but also replace plus signs by spaces, as required for
 159         unquoting HTML form values.
 160
 161         unquote_plus('%7e/abc+def') -> '~/abc def'
 162         """
 163         string = string.replace('+', ' ')
 164         return compat_urllib_parse_unquote(string, encoding, errors)
 165
 166 try:
 167     from urllib.request import DataHandler as compat_urllib_request_DataHandler
 168 except ImportError:  # Python < 3.4
 169     # Ported from CPython 98774:1733b3bd46db, Lib/urllib/request.py
 170     class compat_urllib_request_DataHandler(compat_urllib_request.BaseHandler):
 171         def data_open(self, req):
 172             # data URLs as specified in RFC 2397.
 173             #
 174             # ignores POSTed data
 175             #
 176             # syntax:
 177             # dataurl   := "data:" [ mediatype ] [ ";base64" ] "," data
 178             # mediatype := [ type "/" subtype ] *( ";" parameter )
 179             # data      := *urlchar
 180             # parameter := attribute "=" value
 181             url = req.get_full_url()
 182
 183             scheme, data = url.split(":", 1)
 184             mediatype, data = data.split(",", 1)
 185
 186             # even base64 encoded data URLs might be quoted so unquote in any case:
 187             data = compat_urllib_parse_unquote_to_bytes(data)
 188             if mediatype.endswith(";base64"):
 189                 data = binascii.a2b_base64(data)
 190                 mediatype = mediatype[:-7]
 191
 192             if not mediatype:
 193                 mediatype = "text/plain;charset=US-ASCII"
 194
 195             headers = email.message_from_string(
 196                 "Content-type: %s\nContent-length: %d\n" % (mediatype, len(data)))
 197
 198             return compat_urllib_response.addinfourl(io.BytesIO(data), headers, url)
 199
 200 try:
 201     compat_basestring = basestring  # Python 2
 202 except NameError:
 203     compat_basestring = str
 204
 205 try:
 206     compat_chr = unichr  # Python 2
 207 except NameError:
 208     compat_chr = chr
 209
 210 try:
 211     from xml.etree.ElementTree import ParseError as compat_xml_parse_error
 212 except ImportError:  # Python 2.6
 213     from xml.parsers.expat import ExpatError as compat_xml_parse_error
 214
 215
 216 try:
 217     from urllib.parse import parse_qs as compat_parse_qs
 218 except ImportError:  # Python 2
 219     # HACK: The following is the correct parse_qs implementation from cpython 3's stdlib.
 220     # Python 2's version is apparently totally broken
 221
 222     def _parse_qsl(qs, keep_blank_values=False, strict_parsing=False,
 223                    encoding='utf-8', errors='replace'):
 224         qs, _coerce_result = qs, compat_str
 225         pairs = [s2 for s1 in qs.split('&') for s2 in s1.split(';')]
 226         r = []
 227         for name_value in pairs:
 228             if not name_value and not strict_parsing:
 229                 continue
 230             nv = name_value.split('=', 1)
 231             if len(nv) != 2:
 232                 if strict_parsing:
 233                     raise ValueError("bad query field: %r" % (name_value,))
 234                 # Handle case of a control-name with no equal sign
 235                 if keep_blank_values:
 236                     nv.append('')
 237                 else:
 238                     continue
 239             if len(nv[1]) or keep_blank_values:
 240                 name = nv[0].replace('+', ' ')
 241                 name = compat_urllib_parse_unquote(
 242                     name, encoding=encoding, errors=errors)
 243                 name = _coerce_result(name)
 244                 value = nv[1].replace('+', ' ')
 245                 value = compat_urllib_parse_unquote(
 246                     value, encoding=encoding, errors=errors)
 247                 value = _coerce_result(value)
 248                 r.append((name, value))
 249         return r
 250
 251     def compat_parse_qs(qs, keep_blank_values=False, strict_parsing=False,
 252                         encoding='utf-8', errors='replace'):
 253         parsed_result = {}
 254         pairs = _parse_qsl(qs, keep_blank_values, strict_parsing,
 255                            encoding=encoding, errors=errors)
 256         for name, value in pairs:
 257             if name in parsed_result:
 258                 parsed_result[name].append(value)
 259             else:
 260                 parsed_result[name] = [value]
 261         return parsed_result
 262
 263 try:
 264     from shlex import quote as shlex_quote
 265 except ImportError:  # Python < 3.3
 266     def shlex_quote(s):
 267         if re.match(r'^[-_\w./]+$', s):
 268             return s
 269         else:
 270             return "'" + s.replace("'", "'\"'\"'") + "'"
 271
 272
 273 if sys.version_info >= (2, 7, 3):
 274     compat_shlex_split = shlex.split
 275 else:
 276     # Working around shlex issue with unicode strings on some python 2
 277     # versions (see http://bugs.python.org/issue1548891)
 278     def compat_shlex_split(s, comments=False, posix=True):
 279         if isinstance(s, compat_str):
 280             s = s.encode('utf-8')
 281         return shlex.split(s, comments, posix)
 282
 283
 284 def compat_ord(c):
 285     if type(c) is int:
 286         return c
 287     else:
 288         return ord(c)
 289
 290
 291 if sys.version_info >= (3, 0):
 292     compat_getenv = os.getenv
 293     compat_expanduser = os.path.expanduser
 294 else:
 295     # Environment variables should be decoded with filesystem encoding.
 296     # Otherwise it will fail if any non-ASCII characters present (see #3854 #3217 #2918)
 297
 298     def compat_getenv(key, default=None):
 299         from .utils import get_filesystem_encoding
 300         env = os.getenv(key, default)
 301         if env:
 302             env = env.decode(get_filesystem_encoding())
 303         return env
 304
 305     # HACK: The default implementations of os.path.expanduser from cpython do not decode
 306     # environment variables with filesystem encoding. We will work around this by
 307     # providing adjusted implementations.
 308     # The following are os.path.expanduser implementations from cpython 2.7.8 stdlib
 309     # for different platforms with correct environment variables decoding.
 310
 311     if os.name == 'posix':
 312         def compat_expanduser(path):
 313             """Expand ~ and ~user constructions.  If user or $HOME is unknown,
 314             do nothing."""
 315             if not path.startswith('~'):
 316                 return path
 317             i = path.find('/', 1)
 318             if i < 0:
 319                 i = len(path)
 320             if i == 1:
 321                 if 'HOME' not in os.environ:
 322                     import pwd
 323                     userhome = pwd.getpwuid(os.getuid()).pw_dir
 324                 else:
 325                     userhome = compat_getenv('HOME')
 326             else:
 327                 import pwd
 328                 try:
 329                     pwent = pwd.getpwnam(path[1:i])
 330                 except KeyError:
 331                     return path
 332                 userhome = pwent.pw_dir
 333             userhome = userhome.rstrip('/')
 334             return (userhome + path[i:]) or '/'
 335     elif os.name == 'nt' or os.name == 'ce':
 336         def compat_expanduser(path):
 337             """Expand ~ and ~user constructs.
 338
 339             If user or $HOME is unknown, do nothing."""
 340             if path[:1] != '~':
 341                 return path
 342             i, n = 1, len(path)
 343             while i < n and path[i] not in '/\\':
 344                 i = i + 1
 345
 346             if 'HOME' in os.environ:
 347                 userhome = compat_getenv('HOME')
 348             elif 'USERPROFILE' in os.environ:
 349                 userhome = compat_getenv('USERPROFILE')
 350             elif 'HOMEPATH' not in os.environ:
 351                 return path
 352             else:
 353                 try:
 354                     drive = compat_getenv('HOMEDRIVE')
 355                 except KeyError:
 356                     drive = ''
 357                 userhome = os.path.join(drive, compat_getenv('HOMEPATH'))
 358
 359             if i != 1:  # ~user
 360                 userhome = os.path.join(os.path.dirname(userhome), path[1:i])
 361
 362             return userhome + path[i:]
 363     else:
 364         compat_expanduser = os.path.expanduser
 365
 366
 367 if sys.version_info < (3, 0):
 368     def compat_print(s):
 369         from .utils import preferredencoding
 370         print(s.encode(preferredencoding(), 'xmlcharrefreplace'))
 371 else:
 372     def compat_print(s):
 373         assert isinstance(s, compat_str)
 374         print(s)
 375
 376
 377 try:
 378     subprocess_check_output = subprocess.check_output
 379 except AttributeError:
 380     def subprocess_check_output(*args, **kwargs):
 381         assert 'input' not in kwargs
 382         p = subprocess.Popen(*args, stdout=subprocess.PIPE, **kwargs)
 383         output, _ = p.communicate()
 384         ret = p.poll()
 385         if ret:
 386             raise subprocess.CalledProcessError(ret, p.args, output=output)
 387         return output
 388
 389 if sys.version_info < (3, 0) and sys.platform == 'win32':
 390     def compat_getpass(prompt, *args, **kwargs):
 391         if isinstance(prompt, compat_str):
 392             from .utils import preferredencoding
 393             prompt = prompt.encode(preferredencoding())
 394         return getpass.getpass(prompt, *args, **kwargs)
 395 else:
 396     compat_getpass = getpass.getpass
 397
 398 # Old 2.6 and 2.7 releases require kwargs to be bytes
 399 try:
 400     def _testfunc(x):
 401         pass
 402     _testfunc(**{'x': 0})
 403 except TypeError:
 404     def compat_kwargs(kwargs):
 405         return dict((bytes(k), v) for k, v in kwargs.items())
 406 else:
 407     compat_kwargs = lambda kwargs: kwargs
 408
 409
 410 if sys.version_info < (2, 7):
 411     def compat_socket_create_connection(address, timeout, source_address=None):
 412         host, port = address
 413         err = None
 414         for res in socket.getaddrinfo(host, port, 0, socket.SOCK_STREAM):
 415             af, socktype, proto, canonname, sa = res
 416             sock = None
 417             try:
 418                 sock = socket.socket(af, socktype, proto)
 419                 sock.settimeout(timeout)
 420                 if source_address:
 421                     sock.bind(source_address)
 422                 sock.connect(sa)
 423                 return sock
 424             except socket.error as _:
 425                 err = _
 426                 if sock is not None:
 427                     sock.close()
 428         if err is not None:
 429             raise err
 430         else:
 431             raise socket.error("getaddrinfo returns an empty list")
 432 else:
 433     compat_socket_create_connection = socket.create_connection
 434
 435
 436 # Fix https://github.com/rg3/youtube-dl/issues/4223
 437 # See http://bugs.python.org/issue9161 for what is broken
 438 def workaround_optparse_bug9161():
 439     op = optparse.OptionParser()
 440     og = optparse.OptionGroup(op, 'foo')
 441     try:
 442         og.add_option('-t')
 443     except TypeError:
 444         real_add_option = optparse.OptionGroup.add_option
 445
 446         def _compat_add_option(self, *args, **kwargs):
 447             enc = lambda v: (
 448                 v.encode('ascii', 'replace') if isinstance(v, compat_str)
 449                 else v)
 450             bargs = [enc(a) for a in args]
 451             bkwargs = dict(
 452                 (k, enc(v)) for k, v in kwargs.items())
 453             return real_add_option(self, *bargs, **bkwargs)
 454         optparse.OptionGroup.add_option = _compat_add_option
 455
 456 if hasattr(shutil, 'get_terminal_size'):  # Python >= 3.3
 457     compat_get_terminal_size = shutil.get_terminal_size
 458 else:
 459     _terminal_size = collections.namedtuple('terminal_size', ['columns', 'lines'])
 460
 461     def compat_get_terminal_size(fallback=(80, 24)):
 462         columns = compat_getenv('COLUMNS')
 463         if columns:
 464             columns = int(columns)
 465         else:
 466             columns = None
 467         lines = compat_getenv('LINES')
 468         if lines:
 469             lines = int(lines)
 470         else:
 471             lines = None
 472
 473         if columns is None or lines is None or columns <= 0 or lines <= 0:
 474             try:
 475                 sp = subprocess.Popen(
 476                     ['stty', 'size'],
 477                     stdout=subprocess.PIPE, stderr=subprocess.PIPE)
 478                 out, err = sp.communicate()
 479                 _lines, _columns = map(int, out.split())
 480             except Exception:
 481                 _columns, _lines = _terminal_size(*fallback)
 482
 483             if columns is None or columns <= 0:
 484                 columns = _columns
 485             if lines is None or lines <= 0:
 486                 lines = _lines
 487         return _terminal_size(columns, lines)
 488
 489 try:
 490     itertools.count(start=0, step=1)
 491     compat_itertools_count = itertools.count
 492 except TypeError:  # Python 2.6
 493     def compat_itertools_count(start=0, step=1):
 494         n = start
 495         while True:
 496             yield n
 497             n += step
 498
 499 if sys.version_info >= (3, 0):
 500     from tokenize import tokenize as compat_tokenize_tokenize
 501 else:
 502     from tokenize import generate_tokens as compat_tokenize_tokenize
 503
 504 __all__ = [
 505     'compat_HTTPError',
 506     'compat_basestring',
 507     'compat_chr',
 508     'compat_cookiejar',
 509     'compat_cookies',
 510     'compat_expanduser',
 511     'compat_get_terminal_size',
 512     'compat_getenv',
 513     'compat_getpass',
 514     'compat_html_entities',
 515     'compat_http_client',
 516     'compat_http_server',
 517     'compat_itertools_count',
 518     'compat_kwargs',
 519     'compat_ord',
 520     'compat_parse_qs',
 521     'compat_print',
 522     'compat_shlex_split',
 523     'compat_socket_create_connection',
 524     'compat_str',
 525     'compat_subprocess_get_DEVNULL',
 526     'compat_tokenize_tokenize',
 527     'compat_urllib_error',
 528     'compat_urllib_parse',
 529     'compat_urllib_parse_unquote',
 530     'compat_urllib_parse_unquote_plus',
 531     'compat_urllib_parse_unquote_to_bytes',
 532     'compat_urllib_parse_urlparse',
 533     'compat_urllib_request',
 534     'compat_urllib_request_DataHandler',
 535     'compat_urllib_response',
 536     'compat_urlparse',
 537     'compat_urlretrieve',
 538     'compat_xml_parse_error',
 539     'shlex_quote',
 540     'subprocess_check_output',
 541     'workaround_optparse_bug9161',
 542 ]