Merge remote-tracking branch 'Boris-de/wdrmaus_fix#8562'

[youtube-dl] / youtube_dl / utils.py
diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py

index b2e4a2dfb7060b83fb16e499e63b10ffacb30a10..89234b39df9cdfe3edbce0420fee5ec5b67c4575 100644 (file)
--- a/youtube_dl/utils.py
+++ b/youtube_dl/utils.py
@@ -42,17 +42,18 @@ from .compat import (
      compat_http_client,
      compat_kwargs,
      compat_parse_qs,
+    compat_shlex_quote,
      compat_socket_create_connection,
      compat_str,
+    compat_struct_pack,
      compat_urllib_error,
      compat_urllib_parse,
      compat_urllib_parse_urlencode,
      compat_urllib_parse_urlparse,
+    compat_urllib_parse_unquote_plus,
      compat_urllib_request,
      compat_urlparse,
      compat_xpath,
-    shlex_quote,
-    struct_pack,
  )
  
  from .socks import (
@@ -61,6 +62,15 @@ from .socks import (
  )
  
  
+def register_socks_protocols():
+    # "Register" SOCKS protocols
+    # In Python < 2.6.5, urlsplit() suffers from bug https://bugs.python.org/issue7904
+    # URLs with protocols not in urlparse.uses_netloc are not handled correctly
+    for scheme in ('socks', 'socks4', 'socks4a', 'socks5'):
+        if scheme not in compat_urlparse.uses_netloc:
+            compat_urlparse.uses_netloc.append(scheme)
+
+
  # This is not clearly defined otherwise
  compiled_regex_type = type(re.compile(''))
  
@@ -95,9 +105,9 @@ KNOWN_EXTENSIONS = (
      'f4f', 'f4m', 'm3u8', 'smil')
  
  # needed for sanitizing filenames in restricted mode
-ACCENT_CHARS = dict(zip('ÂÃÄÀÁÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖØÙÚÛÜÝÞßàáâãäåæçèéêëìíîïðñòóôõöøùúûüýþÿ',
-                        itertools.chain('AAAAAA', ['AE'], 'CEEEEIIIIDNOOOOOOUUUUYP', ['ss'],
-                                        'aaaaaa', ['ae'], 'ceeeeiiiionoooooouuuuypy')))
+ACCENT_CHARS = dict(zip('ÂÃÄÀÁÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖŐØŒÙÚÛÜŰÝÞßàáâãäåæçèéêëìíîïðñòóôõöőøœùúûüűýþÿ',
+                        itertools.chain('AAAAAA', ['AE'], 'CEEEEIIIIDNOOOOOOO', ['OE'], 'UUUUUYP', ['ss'],
+                                        'aaaaaa', ['ae'], 'ceeeeiiiionooooooo', ['oe'], 'uuuuuypy')))
  
  
  def preferredencoding():
@@ -851,9 +861,13 @@ class YoutubeDLHandler(compat_urllib_request.HTTPHandler):
                  # As of RFC 2616 default charset is iso-8859-1 that is respected by python 3
                  if sys.version_info >= (3, 0):
                      location = location.encode('iso-8859-1').decode('utf-8')
+                else:
+                    location = location.decode('utf-8')
                  location_escaped = escape_url(location)
                  if location != location_escaped:
                      del resp.headers['Location']
+                    if sys.version_info < (3, 0):
+                        location_escaped = location_escaped.encode('utf-8')
                      resp.headers['Location'] = location_escaped
          return resp
  
@@ -870,12 +884,20 @@ def make_socks_conn_class(base_class, socks_proxy):
          socks_type = ProxyType.SOCKS5
      elif url_components.scheme.lower() in ('socks', 'socks4'):
          socks_type = ProxyType.SOCKS4
+    elif url_components.scheme.lower() == 'socks4a':
+        socks_type = ProxyType.SOCKS4A
+
+    def unquote_if_non_empty(s):
+        if not s:
+            return s
+        return compat_urllib_parse_unquote_plus(s)
  
      proxy_args = (
          socks_type,
          url_components.hostname, url_components.port or 1080,
          True,  # Remote DNS
-        url_components.username, url_components.password
+        unquote_if_non_empty(url_components.username),
+        unquote_if_non_empty(url_components.password),
      )
  
      class SocksConnection(base_class):
@@ -1017,6 +1039,7 @@ def unified_strdate(date_str, day_first=True):
          format_expressions.extend([
              '%d-%m-%Y',
              '%d.%m.%Y',
+            '%d.%m.%y',
              '%d/%m/%Y',
              '%d/%m/%y',
              '%d/%m/%Y %H:%M:%S',
@@ -1037,7 +1060,10 @@ def unified_strdate(date_str, day_first=True):
      if upload_date is None:
          timetuple = email.utils.parsedate_tz(date_str)
          if timetuple:
-            upload_date = datetime.datetime(*timetuple[:6]).strftime('%Y%m%d')
+            try:
+                upload_date = datetime.datetime(*timetuple[:6]).strftime('%Y%m%d')
+            except ValueError:
+                pass
      if upload_date is not None:
          return compat_str(upload_date)
  
@@ -1248,7 +1274,7 @@ def bytes_to_intlist(bs):
  def intlist_to_bytes(xs):
      if not xs:
          return b''
-    return struct_pack('%dB' % len(xs), *xs)
+    return compat_struct_pack('%dB' % len(xs), *xs)
  
  
  # Cross-platform file locking
@@ -1531,15 +1557,11 @@ def setproctitle(title):
  
  
  def remove_start(s, start):
-    if s.startswith(start):
-        return s[len(start):]
-    return s
+    return s[len(start):] if s is not None and s.startswith(start) else s
  
  
  def remove_end(s, end):
-    if s.endswith(end):
-        return s[:-len(end)]
-    return s
+    return s[:-len(end)] if s is not None and s.endswith(end) else s
  
  
  def remove_quotes(s):
@@ -1893,7 +1915,7 @@ def parse_age_limit(s):
  
  def strip_jsonp(code):
      return re.sub(
-        r'(?s)^[a-zA-Z0-9_.]+\s*\(\s*(.*)\);?\s*?(?://[^\n]*)*$', r'\1', code)
+        r'(?s)^[a-zA-Z0-9_.$]+\s*\(\s*(.*)\);?\s*?(?://[^\n]*)*$', r'\1', code)
  
  
  def js_to_json(code):
@@ -1901,24 +1923,38 @@ def js_to_json(code):
          v = m.group(0)
          if v in ('true', 'false', 'null'):
              return v
-        if v.startswith('"'):
-            v = re.sub(r"\\'", "'", v[1:-1])
-        elif v.startswith("'"):
-            v = v[1:-1]
-            v = re.sub(r"\\\\|\\'|\"", lambda m: {
-                '\\\\': '\\\\',
-                "\\'": "'",
+        elif v.startswith('/*') or v == ',':
+            return ""
+
+        if v[0] in ("'", '"'):
+            v = re.sub(r'(?s)\\.|"', lambda m: {
                  '"': '\\"',
-            }[m.group(0)], v)
+                "\\'": "'",
+                '\\\n': '',
+                '\\x': '\\u00',
+            }.get(m.group(0), m.group(0)), v[1:-1])
+
+        INTEGER_TABLE = (
+            (r'^0[xX][0-9a-fA-F]+', 16),
+            (r'^0+[0-7]+', 8),
+        )
+
+        for regex, base in INTEGER_TABLE:
+            im = re.match(regex, v)
+            if im:
+                i = int(im.group(0), base)
+                return '"%d":' % i if v.endswith(':') else '%d' % i
+
          return '"%s"' % v
  
-    res = re.sub(r'''(?x)
-        "(?:[^"\\]*(?:\\\\|\\['"nu]))*[^"\\]*"|
-        '(?:[^'\\]*(?:\\\\|\\['"nu]))*[^'\\]*'|
-        [a-zA-Z_][.a-zA-Z_0-9]*
+    return re.sub(r'''(?sx)
+        "(?:[^"\\]*(?:\\\\|\\['"nurtbfx/\n]))*[^"\\]*"|
+        '(?:[^'\\]*(?:\\\\|\\['"nurtbfx/\n]))*[^'\\]*'|
+        /\*.*?\*/|,(?=\s*[\]}])|
+        [a-zA-Z_][.a-zA-Z_0-9]*|
+        (?:0[xX][0-9a-fA-F]+|0+[0-7]+)(?:\s*:)?|
+        [0-9]+(?=\s*:)
          ''', fix_kv, code)
-    res = re.sub(r',(\s*[\]}])', lambda m: m.group(1), res)
-    return res
  
  
  def qualities(quality_ids):
@@ -1966,7 +2002,7 @@ def ytdl_is_updateable():
  
  def args_to_str(args):
      # Get a short string representation for a subprocess command
-    return ' '.join(shlex_quote(a) for a in args)
+    return ' '.join(compat_shlex_quote(a) for a in args)
  
  
  def error_to_compat_str(err):
@@ -2004,11 +2040,7 @@ def mimetype2ext(mt):
  
  
  def urlhandle_detect_ext(url_handle):
-    try:
-        url_handle.headers
-        getheader = lambda h: url_handle.headers[h]
-    except AttributeError:  # Python < 3
-        getheader = url_handle.info().getheader
+    getheader = url_handle.headers.get
  
      cd = getheader('Content-Disposition')
      if cd:
@@ -2738,7 +2770,7 @@ class PerRequestProxyHandler(compat_urllib_request.ProxyHandler):
  
          if proxy == '__noproxy__':
              return None  # No Proxy
-        if compat_urlparse.urlparse(proxy).scheme.lower() in ('socks', 'socks4', 'socks5'):
+        if compat_urlparse.urlparse(proxy).scheme.lower() in ('socks', 'socks4', 'socks4a', 'socks5'):
              req.add_header('Ytdl-socks-proxy', proxy)
              # youtube-dl's http/https handlers do wrapping the socket with socks
              return None