_ Git - youtube-dl/blob - youtube_dl/compat.py

   1 from __future__ import unicode_literals
   2
   3 import binascii
   4 import collections
   5 import email
   6 import getpass
   7 import io
   8 import optparse
   9 import os
  10 import re
  11 import shlex
  12 import shutil
  13 import socket
  14 import struct
  15 import subprocess
  16 import sys
  17 import itertools
  18 import xml.etree.ElementTree
  19
  20
  21 try:
  22     import urllib.request as compat_urllib_request
  23 except ImportError:  # Python 2
  24     import urllib2 as compat_urllib_request
  25
  26 try:
  27     import urllib.error as compat_urllib_error
  28 except ImportError:  # Python 2
  29     import urllib2 as compat_urllib_error
  30
  31 try:
  32     import urllib.parse as compat_urllib_parse
  33 except ImportError:  # Python 2
  34     import urllib as compat_urllib_parse
  35
  36 try:
  37     from urllib.parse import urlparse as compat_urllib_parse_urlparse
  38 except ImportError:  # Python 2
  39     from urlparse import urlparse as compat_urllib_parse_urlparse
  40
  41 try:
  42     import urllib.parse as compat_urlparse
  43 except ImportError:  # Python 2
  44     import urlparse as compat_urlparse
  45
  46 try:
  47     import urllib.response as compat_urllib_response
  48 except ImportError:  # Python 2
  49     import urllib as compat_urllib_response
  50
  51 try:
  52     import http.cookiejar as compat_cookiejar
  53 except ImportError:  # Python 2
  54     import cookielib as compat_cookiejar
  55
  56 try:
  57     import http.cookies as compat_cookies
  58 except ImportError:  # Python 2
  59     import Cookie as compat_cookies
  60
  61 try:
  62     import html.entities as compat_html_entities
  63 except ImportError:  # Python 2
  64     import htmlentitydefs as compat_html_entities
  65
  66 try:
  67     import http.client as compat_http_client
  68 except ImportError:  # Python 2
  69     import httplib as compat_http_client
  70
  71 try:
  72     from urllib.error import HTTPError as compat_HTTPError
  73 except ImportError:  # Python 2
  74     from urllib2 import HTTPError as compat_HTTPError
  75
  76 try:
  77     from urllib.request import urlretrieve as compat_urlretrieve
  78 except ImportError:  # Python 2
  79     from urllib import urlretrieve as compat_urlretrieve
  80
  81 try:
  82     from html.parser import HTMLParser as compat_HTMLParser
  83 except ImportError:  # Python 2
  84     from HTMLParser import HTMLParser as compat_HTMLParser
  85
  86
  87 try:
  88     from subprocess import DEVNULL
  89     compat_subprocess_get_DEVNULL = lambda: DEVNULL
  90 except ImportError:
  91     compat_subprocess_get_DEVNULL = lambda: open(os.path.devnull, 'w')
  92
  93 try:
  94     import http.server as compat_http_server
  95 except ImportError:
  96     import BaseHTTPServer as compat_http_server
  97
  98 try:
  99     compat_str = unicode  # Python 2
 100 except NameError:
 101     compat_str = str
 102
 103 try:
 104     from urllib.parse import unquote_to_bytes as compat_urllib_parse_unquote_to_bytes
 105     from urllib.parse import unquote as compat_urllib_parse_unquote
 106     from urllib.parse import unquote_plus as compat_urllib_parse_unquote_plus
 107 except ImportError:  # Python 2
 108     _asciire = (compat_urllib_parse._asciire if hasattr(compat_urllib_parse, '_asciire')
 109                 else re.compile('([\x00-\x7f]+)'))
 110
 111     # HACK: The following are the correct unquote_to_bytes, unquote and unquote_plus
 112     # implementations from cpython 3.4.3's stdlib. Python 2's version
 113     # is apparently broken (see https://github.com/rg3/youtube-dl/pull/6244)
 114
 115     def compat_urllib_parse_unquote_to_bytes(string):
 116         """unquote_to_bytes('abc%20def') -> b'abc def'."""
 117         # Note: strings are encoded as UTF-8. This is only an issue if it contains
 118         # unescaped non-ASCII characters, which URIs should not.
 119         if not string:
 120             # Is it a string-like object?
 121             string.split
 122             return b''
 123         if isinstance(string, compat_str):
 124             string = string.encode('utf-8')
 125         bits = string.split(b'%')
 126         if len(bits) == 1:
 127             return string
 128         res = [bits[0]]
 129         append = res.append
 130         for item in bits[1:]:
 131             try:
 132                 append(compat_urllib_parse._hextochr[item[:2]])
 133                 append(item[2:])
 134             except KeyError:
 135                 append(b'%')
 136                 append(item)
 137         return b''.join(res)
 138
 139     def compat_urllib_parse_unquote(string, encoding='utf-8', errors='replace'):
 140         """Replace %xx escapes by their single-character equivalent. The optional
 141         encoding and errors parameters specify how to decode percent-encoded
 142         sequences into Unicode characters, as accepted by the bytes.decode()
 143         method.
 144         By default, percent-encoded sequences are decoded with UTF-8, and invalid
 145         sequences are replaced by a placeholder character.
 146
 147         unquote('abc%20def') -> 'abc def'.
 148         """
 149         if '%' not in string:
 150             string.split
 151             return string
 152         if encoding is None:
 153             encoding = 'utf-8'
 154         if errors is None:
 155             errors = 'replace'
 156         bits = _asciire.split(string)
 157         res = [bits[0]]
 158         append = res.append
 159         for i in range(1, len(bits), 2):
 160             append(compat_urllib_parse_unquote_to_bytes(bits[i]).decode(encoding, errors))
 161             append(bits[i + 1])
 162         return ''.join(res)
 163
 164     def compat_urllib_parse_unquote_plus(string, encoding='utf-8', errors='replace'):
 165         """Like unquote(), but also replace plus signs by spaces, as required for
 166         unquoting HTML form values.
 167
 168         unquote_plus('%7e/abc+def') -> '~/abc def'
 169         """
 170         string = string.replace('+', ' ')
 171         return compat_urllib_parse_unquote(string, encoding, errors)
 172
 173 try:
 174     from urllib.parse import urlencode as compat_urllib_parse_urlencode
 175 except ImportError:  # Python 2
 176     # Python 2 will choke in urlencode on mixture of byte and unicode strings.
 177     # Possible solutions are to either port it from python 3 with all
 178     # the friends or manually ensure input query contains only byte strings.
 179     # We will stick with latter thus recursively encoding the whole query.
 180     def compat_urllib_parse_urlencode(query, doseq=0, encoding='utf-8'):
 181         def encode_elem(e):
 182             if isinstance(e, dict):
 183                 e = encode_dict(e)
 184             elif isinstance(e, (list, tuple,)):
 185                 list_e = encode_list(e)
 186                 e = tuple(list_e) if isinstance(e, tuple) else list_e
 187             elif isinstance(e, compat_str):
 188                 e = e.encode(encoding)
 189             return e
 190
 191         def encode_dict(d):
 192             return dict((encode_elem(k), encode_elem(v)) for k, v in d.items())
 193
 194         def encode_list(l):
 195             return [encode_elem(e) for e in l]
 196
 197         return compat_urllib_parse.urlencode(encode_elem(query), doseq=doseq)
 198
 199 try:
 200     from urllib.request import DataHandler as compat_urllib_request_DataHandler
 201 except ImportError:  # Python < 3.4
 202     # Ported from CPython 98774:1733b3bd46db, Lib/urllib/request.py
 203     class compat_urllib_request_DataHandler(compat_urllib_request.BaseHandler):
 204         def data_open(self, req):
 205             # data URLs as specified in RFC 2397.
 206             #
 207             # ignores POSTed data
 208             #
 209             # syntax:
 210             # dataurl   := "data:" [ mediatype ] [ ";base64" ] "," data
 211             # mediatype := [ type "/" subtype ] *( ";" parameter )
 212             # data      := *urlchar
 213             # parameter := attribute "=" value
 214             url = req.get_full_url()
 215
 216             scheme, data = url.split(':', 1)
 217             mediatype, data = data.split(',', 1)
 218
 219             # even base64 encoded data URLs might be quoted so unquote in any case:
 220             data = compat_urllib_parse_unquote_to_bytes(data)
 221             if mediatype.endswith(';base64'):
 222                 data = binascii.a2b_base64(data)
 223                 mediatype = mediatype[:-7]
 224
 225             if not mediatype:
 226                 mediatype = 'text/plain;charset=US-ASCII'
 227
 228             headers = email.message_from_string(
 229                 'Content-type: %s\nContent-length: %d\n' % (mediatype, len(data)))
 230
 231             return compat_urllib_response.addinfourl(io.BytesIO(data), headers, url)
 232
 233 try:
 234     compat_basestring = basestring  # Python 2
 235 except NameError:
 236     compat_basestring = str
 237
 238 try:
 239     compat_chr = unichr  # Python 2
 240 except NameError:
 241     compat_chr = chr
 242
 243 try:
 244     from xml.etree.ElementTree import ParseError as compat_xml_parse_error
 245 except ImportError:  # Python 2.6
 246     from xml.parsers.expat import ExpatError as compat_xml_parse_error
 247
 248
 249 etree = xml.etree.ElementTree
 250
 251
 252 class _TreeBuilder(etree.TreeBuilder):
 253     def doctype(self, name, pubid, system):
 254         pass
 255
 256 if sys.version_info[0] >= 3:
 257     def compat_etree_fromstring(text):
 258         return etree.XML(text, parser=etree.XMLParser(target=_TreeBuilder()))
 259 else:
 260     # python 2.x tries to encode unicode strings with ascii (see the
 261     # XMLParser._fixtext method)
 262     try:
 263         _etree_iter = etree.Element.iter
 264     except AttributeError:  # Python <=2.6
 265         def _etree_iter(root):
 266             for el in root.findall('*'):
 267                 yield el
 268                 for sub in _etree_iter(el):
 269                     yield sub
 270
 271     # on 2.6 XML doesn't have a parser argument, function copied from CPython
 272     # 2.7 source
 273     def _XML(text, parser=None):
 274         if not parser:
 275             parser = etree.XMLParser(target=_TreeBuilder())
 276         parser.feed(text)
 277         return parser.close()
 278
 279     def _element_factory(*args, **kwargs):
 280         el = etree.Element(*args, **kwargs)
 281         for k, v in el.items():
 282             if isinstance(v, bytes):
 283                 el.set(k, v.decode('utf-8'))
 284         return el
 285
 286     def compat_etree_fromstring(text):
 287         doc = _XML(text, parser=etree.XMLParser(target=_TreeBuilder(element_factory=_element_factory)))
 288         for el in _etree_iter(doc):
 289             if el.text is not None and isinstance(el.text, bytes):
 290                 el.text = el.text.decode('utf-8')
 291         return doc
 292
 293 if sys.version_info < (2, 7):
 294     # Here comes the crazy part: In 2.6, if the xpath is a unicode,
 295     # .//node does not match if a node is a direct child of . !
 296     def compat_xpath(xpath):
 297         if isinstance(xpath, compat_str):
 298             xpath = xpath.encode('ascii')
 299         return xpath
 300 else:
 301     compat_xpath = lambda xpath: xpath
 302
 303 try:
 304     from urllib.parse import parse_qs as compat_parse_qs
 305 except ImportError:  # Python 2
 306     # HACK: The following is the correct parse_qs implementation from cpython 3's stdlib.
 307     # Python 2's version is apparently totally broken
 308
 309     def _parse_qsl(qs, keep_blank_values=False, strict_parsing=False,
 310                    encoding='utf-8', errors='replace'):
 311         qs, _coerce_result = qs, compat_str
 312         pairs = [s2 for s1 in qs.split('&') for s2 in s1.split(';')]
 313         r = []
 314         for name_value in pairs:
 315             if not name_value and not strict_parsing:
 316                 continue
 317             nv = name_value.split('=', 1)
 318             if len(nv) != 2:
 319                 if strict_parsing:
 320                     raise ValueError('bad query field: %r' % (name_value,))
 321                 # Handle case of a control-name with no equal sign
 322                 if keep_blank_values:
 323                     nv.append('')
 324                 else:
 325                     continue
 326             if len(nv[1]) or keep_blank_values:
 327                 name = nv[0].replace('+', ' ')
 328                 name = compat_urllib_parse_unquote(
 329                     name, encoding=encoding, errors=errors)
 330                 name = _coerce_result(name)
 331                 value = nv[1].replace('+', ' ')
 332                 value = compat_urllib_parse_unquote(
 333                     value, encoding=encoding, errors=errors)
 334                 value = _coerce_result(value)
 335                 r.append((name, value))
 336         return r
 337
 338     def compat_parse_qs(qs, keep_blank_values=False, strict_parsing=False,
 339                         encoding='utf-8', errors='replace'):
 340         parsed_result = {}
 341         pairs = _parse_qsl(qs, keep_blank_values, strict_parsing,
 342                            encoding=encoding, errors=errors)
 343         for name, value in pairs:
 344             if name in parsed_result:
 345                 parsed_result[name].append(value)
 346             else:
 347                 parsed_result[name] = [value]
 348         return parsed_result
 349
 350 try:
 351     from shlex import quote as compat_shlex_quote
 352 except ImportError:  # Python < 3.3
 353     def compat_shlex_quote(s):
 354         if re.match(r'^[-_\w./]+$', s):
 355             return s
 356         else:
 357             return "'" + s.replace("'", "'\"'\"'") + "'"
 358
 359
 360 if sys.version_info >= (2, 7, 3):
 361     compat_shlex_split = shlex.split
 362 else:
 363     # Working around shlex issue with unicode strings on some python 2
 364     # versions (see http://bugs.python.org/issue1548891)
 365     def compat_shlex_split(s, comments=False, posix=True):
 366         if isinstance(s, compat_str):
 367             s = s.encode('utf-8')
 368         return shlex.split(s, comments, posix)
 369
 370
 371 def compat_ord(c):
 372     if type(c) is int:
 373         return c
 374     else:
 375         return ord(c)
 376
 377
 378 compat_os_name = os._name if os.name == 'java' else os.name
 379
 380
 381 if sys.version_info >= (3, 0):
 382     compat_getenv = os.getenv
 383     compat_expanduser = os.path.expanduser
 384
 385     def compat_setenv(key, value, env=os.environ):
 386         env[key] = value
 387 else:
 388     # Environment variables should be decoded with filesystem encoding.
 389     # Otherwise it will fail if any non-ASCII characters present (see #3854 #3217 #2918)
 390
 391     def compat_getenv(key, default=None):
 392         from .utils import get_filesystem_encoding
 393         env = os.getenv(key, default)
 394         if env:
 395             env = env.decode(get_filesystem_encoding())
 396         return env
 397
 398     def compat_setenv(key, value, env=os.environ):
 399         def encode(v):
 400             from .utils import get_filesystem_encoding
 401             return v.encode(get_filesystem_encoding()) if isinstance(v, compat_str) else v
 402         env[encode(key)] = encode(value)
 403
 404     # HACK: The default implementations of os.path.expanduser from cpython do not decode
 405     # environment variables with filesystem encoding. We will work around this by
 406     # providing adjusted implementations.
 407     # The following are os.path.expanduser implementations from cpython 2.7.8 stdlib
 408     # for different platforms with correct environment variables decoding.
 409
 410     if compat_os_name == 'posix':
 411         def compat_expanduser(path):
 412             """Expand ~ and ~user constructions.  If user or $HOME is unknown,
 413             do nothing."""
 414             if not path.startswith('~'):
 415                 return path
 416             i = path.find('/', 1)
 417             if i < 0:
 418                 i = len(path)
 419             if i == 1:
 420                 if 'HOME' not in os.environ:
 421                     import pwd
 422                     userhome = pwd.getpwuid(os.getuid()).pw_dir
 423                 else:
 424                     userhome = compat_getenv('HOME')
 425             else:
 426                 import pwd
 427                 try:
 428                     pwent = pwd.getpwnam(path[1:i])
 429                 except KeyError:
 430                     return path
 431                 userhome = pwent.pw_dir
 432             userhome = userhome.rstrip('/')
 433             return (userhome + path[i:]) or '/'
 434     elif compat_os_name == 'nt' or compat_os_name == 'ce':
 435         def compat_expanduser(path):
 436             """Expand ~ and ~user constructs.
 437
 438             If user or $HOME is unknown, do nothing."""
 439             if path[:1] != '~':
 440                 return path
 441             i, n = 1, len(path)
 442             while i < n and path[i] not in '/\\':
 443                 i = i + 1
 444
 445             if 'HOME' in os.environ:
 446                 userhome = compat_getenv('HOME')
 447             elif 'USERPROFILE' in os.environ:
 448                 userhome = compat_getenv('USERPROFILE')
 449             elif 'HOMEPATH' not in os.environ:
 450                 return path
 451             else:
 452                 try:
 453                     drive = compat_getenv('HOMEDRIVE')
 454                 except KeyError:
 455                     drive = ''
 456                 userhome = os.path.join(drive, compat_getenv('HOMEPATH'))
 457
 458             if i != 1:  # ~user
 459                 userhome = os.path.join(os.path.dirname(userhome), path[1:i])
 460
 461             return userhome + path[i:]
 462     else:
 463         compat_expanduser = os.path.expanduser
 464
 465
 466 if sys.version_info < (3, 0):
 467     def compat_print(s):
 468         from .utils import preferredencoding
 469         print(s.encode(preferredencoding(), 'xmlcharrefreplace'))
 470 else:
 471     def compat_print(s):
 472         assert isinstance(s, compat_str)
 473         print(s)
 474
 475
 476 if sys.version_info < (3, 0) and sys.platform == 'win32':
 477     def compat_getpass(prompt, *args, **kwargs):
 478         if isinstance(prompt, compat_str):
 479             from .utils import preferredencoding
 480             prompt = prompt.encode(preferredencoding())
 481         return getpass.getpass(prompt, *args, **kwargs)
 482 else:
 483     compat_getpass = getpass.getpass
 484
 485 # Python < 2.6.5 require kwargs to be bytes
 486 try:
 487     def _testfunc(x):
 488         pass
 489     _testfunc(**{'x': 0})
 490 except TypeError:
 491     def compat_kwargs(kwargs):
 492         return dict((bytes(k), v) for k, v in kwargs.items())
 493 else:
 494     compat_kwargs = lambda kwargs: kwargs
 495
 496
 497 if sys.version_info < (2, 7):
 498     def compat_socket_create_connection(address, timeout, source_address=None):
 499         host, port = address
 500         err = None
 501         for res in socket.getaddrinfo(host, port, 0, socket.SOCK_STREAM):
 502             af, socktype, proto, canonname, sa = res
 503             sock = None
 504             try:
 505                 sock = socket.socket(af, socktype, proto)
 506                 sock.settimeout(timeout)
 507                 if source_address:
 508                     sock.bind(source_address)
 509                 sock.connect(sa)
 510                 return sock
 511             except socket.error as _:
 512                 err = _
 513                 if sock is not None:
 514                     sock.close()
 515         if err is not None:
 516             raise err
 517         else:
 518             raise socket.error('getaddrinfo returns an empty list')
 519 else:
 520     compat_socket_create_connection = socket.create_connection
 521
 522
 523 # Fix https://github.com/rg3/youtube-dl/issues/4223
 524 # See http://bugs.python.org/issue9161 for what is broken
 525 def workaround_optparse_bug9161():
 526     op = optparse.OptionParser()
 527     og = optparse.OptionGroup(op, 'foo')
 528     try:
 529         og.add_option('-t')
 530     except TypeError:
 531         real_add_option = optparse.OptionGroup.add_option
 532
 533         def _compat_add_option(self, *args, **kwargs):
 534             enc = lambda v: (
 535                 v.encode('ascii', 'replace') if isinstance(v, compat_str)
 536                 else v)
 537             bargs = [enc(a) for a in args]
 538             bkwargs = dict(
 539                 (k, enc(v)) for k, v in kwargs.items())
 540             return real_add_option(self, *bargs, **bkwargs)
 541         optparse.OptionGroup.add_option = _compat_add_option
 542
 543 if hasattr(shutil, 'get_terminal_size'):  # Python >= 3.3
 544     compat_get_terminal_size = shutil.get_terminal_size
 545 else:
 546     _terminal_size = collections.namedtuple('terminal_size', ['columns', 'lines'])
 547
 548     def compat_get_terminal_size(fallback=(80, 24)):
 549         columns = compat_getenv('COLUMNS')
 550         if columns:
 551             columns = int(columns)
 552         else:
 553             columns = None
 554         lines = compat_getenv('LINES')
 555         if lines:
 556             lines = int(lines)
 557         else:
 558             lines = None
 559
 560         if columns is None or lines is None or columns <= 0 or lines <= 0:
 561             try:
 562                 sp = subprocess.Popen(
 563                     ['stty', 'size'],
 564                     stdout=subprocess.PIPE, stderr=subprocess.PIPE)
 565                 out, err = sp.communicate()
 566                 _lines, _columns = map(int, out.split())
 567             except Exception:
 568                 _columns, _lines = _terminal_size(*fallback)
 569
 570             if columns is None or columns <= 0:
 571                 columns = _columns
 572             if lines is None or lines <= 0:
 573                 lines = _lines
 574         return _terminal_size(columns, lines)
 575
 576 try:
 577     itertools.count(start=0, step=1)
 578     compat_itertools_count = itertools.count
 579 except TypeError:  # Python 2.6
 580     def compat_itertools_count(start=0, step=1):
 581         n = start
 582         while True:
 583             yield n
 584             n += step
 585
 586 if sys.version_info >= (3, 0):
 587     from tokenize import tokenize as compat_tokenize_tokenize
 588 else:
 589     from tokenize import generate_tokens as compat_tokenize_tokenize
 590
 591
 592 try:
 593     struct.pack('!I', 0)
 594 except TypeError:
 595     # In Python 2.6 and 2.7.x < 2.7.7, struct requires a bytes argument
 596     # See https://bugs.python.org/issue19099
 597     def compat_struct_pack(spec, *args):
 598         if isinstance(spec, compat_str):
 599             spec = spec.encode('ascii')
 600         return struct.pack(spec, *args)
 601
 602     def compat_struct_unpack(spec, *args):
 603         if isinstance(spec, compat_str):
 604             spec = spec.encode('ascii')
 605         return struct.unpack(spec, *args)
 606 else:
 607     compat_struct_pack = struct.pack
 608     compat_struct_unpack = struct.unpack
 609
 610
 611 __all__ = [
 612     'compat_HTMLParser',
 613     'compat_HTTPError',
 614     'compat_basestring',
 615     'compat_chr',
 616     'compat_cookiejar',
 617     'compat_cookies',
 618     'compat_etree_fromstring',
 619     'compat_expanduser',
 620     'compat_get_terminal_size',
 621     'compat_getenv',
 622     'compat_getpass',
 623     'compat_html_entities',
 624     'compat_http_client',
 625     'compat_http_server',
 626     'compat_itertools_count',
 627     'compat_kwargs',
 628     'compat_ord',
 629     'compat_os_name',
 630     'compat_parse_qs',
 631     'compat_print',
 632     'compat_setenv',
 633     'compat_shlex_quote',
 634     'compat_shlex_split',
 635     'compat_socket_create_connection',
 636     'compat_str',
 637     'compat_struct_pack',
 638     'compat_struct_unpack',
 639     'compat_subprocess_get_DEVNULL',
 640     'compat_tokenize_tokenize',
 641     'compat_urllib_error',
 642     'compat_urllib_parse',
 643     'compat_urllib_parse_unquote',
 644     'compat_urllib_parse_unquote_plus',
 645     'compat_urllib_parse_unquote_to_bytes',
 646     'compat_urllib_parse_urlencode',
 647     'compat_urllib_parse_urlparse',
 648     'compat_urllib_request',
 649     'compat_urllib_request_DataHandler',
 650     'compat_urllib_response',
 651     'compat_urlparse',
 652     'compat_urlretrieve',
 653     'compat_xml_parse_error',
 654     'compat_xpath',
 655     'workaround_optparse_bug9161',
 656 ]