import errno
import functools
import gzip
-import itertools
import io
+import itertools
import json
import locale
import math
import pipes
import platform
import re
-import ssl
import socket
-import struct
+import ssl
import subprocess
import sys
import tempfile
compat_urlparse,
compat_xpath,
shlex_quote,
+ struct_pack,
)
'wav',
'f4f', 'f4m', 'm3u8', 'smil')
+# needed for sanitizing filenames in restricted mode
+ACCENT_CHARS = dict(zip('ÂÃÄÀÁÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖØÙÚÛÜÝÞßàáâãäåæçèéêëìíîïðñòóôõöøùúûüýþÿ',
+ itertools.chain('AAAAAA', ['AE'], 'CEEEEIIIIDNOOOOOOUUUUYP', ['ss'],
+ 'aaaaaa', ['ae'], 'ceeeeiiiionoooooouuuuypy')))
+
def preferredencoding():
"""Get preferred encoding.
m = re.search(r'''(?xs)
<([a-zA-Z0-9:._-]+)
- (?:\s+[a-zA-Z0-9:._-]+(?:=[a-zA-Z0-9:._-]+|="[^"]+"|='[^']+'))*?
+ (?:\s+[a-zA-Z0-9:._-]+(?:=[a-zA-Z0-9:._-]*|="[^"]*"|='[^']*'))*?
\s+%s=['"]?%s['"]?
- (?:\s+[a-zA-Z0-9:._-]+(?:=[a-zA-Z0-9:._-]+|="[^"]+"|='[^']+'))*?
+ (?:\s+[a-zA-Z0-9:._-]+(?:=[a-zA-Z0-9:._-]*|="[^"]*"|='[^']*'))*?
\s*>
(?P<content>.*?)
</\1>
Set is_id if this is not an arbitrary string, but an ID that should be kept if possible
"""
def replace_insane(char):
+ if restricted and char in ACCENT_CHARS:
+ return ACCENT_CHARS[char]
if char == '?' or ord(char) < 32 or ord(char) == 127:
return ''
elif char == '"':
fragment=escape_rfc3986(url_parsed.fragment)
).geturl()
-try:
- struct.pack('!I', 0)
-except TypeError:
- # In Python 2.6 and 2.7.x < 2.7.7, struct requires a bytes argument
- # See https://bugs.python.org/issue19099
- def struct_pack(spec, *args):
- if isinstance(spec, compat_str):
- spec = spec.encode('ascii')
- return struct.pack(spec, *args)
-
- def struct_unpack(spec, *args):
- if isinstance(spec, compat_str):
- spec = spec.encode('ascii')
- return struct.unpack(spec, *args)
-else:
- struct_pack = struct.pack
- struct_unpack = struct.unpack
-
def read_batch_urls(batch_fd):
def fixup(url):
def update_url_query(url, query):
+ if not query:
+ return url
parsed_url = compat_urlparse.urlparse(url)
qs = compat_parse_qs(parsed_url.query)
qs.update(query)
def mimetype2ext(mt):
+ if mt is None:
+ return None
+
ext = {
'audio/mp4': 'm4a',
}.get(mt)