X-Git-Url: http://git.bitcoin.ninja/index.cgi?a=blobdiff_plain;f=youtube_dl%2Futils.py;h=a1f586b801a5aaa254ec63525f26ba32388b9474;hb=28cc2241e44ff0c0704cfffaca6d47d377041aa5;hp=62e769fd5188a532b710a1bddbce9af8eef660ee;hpb=1bab3437046646da4ebe2b8e0c7fdc25aa1072ce;p=youtube-dl
diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py
index 62e769fd5..a1f586b80 100644
--- a/youtube_dl/utils.py
+++ b/youtube_dl/utils.py
@@ -125,8 +125,8 @@ KNOWN_EXTENSIONS = (
# needed for sanitizing filenames in restricted mode
ACCENT_CHARS = dict(zip('ÃÃÃÃÃÃ
ÃÃÃÃÃÃÃÃÃÃÃÃÃÃÃÃÃÅÃÅÃÃÃÃÅ°ÃÃÃà áâãäåæçèéêëìÃîïðñòóôõöÅøÅùúûüűýþÿ',
- itertools.chain('AAAAAA', ['AE'], 'CEEEEIIIIDNOOOOOOO', ['OE'], 'UUUUUYP', ['ss'],
- 'aaaaaa', ['ae'], 'ceeeeiiiionooooooo', ['oe'], 'uuuuuypy')))
+ itertools.chain('AAAAAA', ['AE'], 'CEEEEIIIIDNOOOOOOO', ['OE'], 'UUUUUY', ['TH', 'ss'],
+ 'aaaaaa', ['ae'], 'ceeeeiiiionooooooo', ['oe'], 'uuuuuy', ['th'], 'y')))
DATE_FORMATS = (
'%d %B %Y',
@@ -184,7 +184,7 @@ DATE_FORMATS_MONTH_FIRST.extend([
])
PACKED_CODES_RE = r"}\('(.+)',(\d+),(\d+),'([^']+)'\.split\('\|'\)"
-JSON_LD_RE = r'(?is)'
+JSON_LD_RE = r'(?is)'
def preferredencoding():
@@ -546,7 +546,7 @@ def sanitize_url(url):
return 'http:%s' % url
# Fix some common typos seen so far
COMMON_TYPOS = (
- # https://github.com/rg3/youtube-dl/issues/15649
+ # https://github.com/ytdl-org/youtube-dl/issues/15649
(r'^httpss://', r'https://'),
# https://bx1.be/lives/direct-tv/
(r'^rmtp([es]?)://', r'rtmp\1://'),
@@ -596,7 +596,7 @@ def _htmlentity_transform(entity_with_semicolon):
numstr = '0%s' % numstr
else:
base = 10
- # See https://github.com/rg3/youtube-dl/issues/7518
+ # See https://github.com/ytdl-org/youtube-dl/issues/7518
try:
return compat_chr(int(numstr, base))
except ValueError:
@@ -861,8 +861,8 @@ class XAttrMetadataError(YoutubeDLError):
self.msg = msg
# Parsing code and msg
- if (self.code in (errno.ENOSPC, errno.EDQUOT) or
- 'No space left' in self.msg or 'Disk quota excedded' in self.msg):
+ if (self.code in (errno.ENOSPC, errno.EDQUOT)
+ or 'No space left' in self.msg or 'Disk quota excedded' in self.msg):
self.reason = 'NO_SPACE'
elif self.code == errno.E2BIG or 'Argument list too long' in self.msg:
self.reason = 'VALUE_TOO_LONG'
@@ -877,7 +877,7 @@ class XAttrUnavailableError(YoutubeDLError):
def _create_http_connection(ydl_handler, http_class, is_https, *args, **kwargs):
# Working around python 2 bug (see http://bugs.python.org/issue17849) by limiting
# expected HTTP responses to meet HTTP/1.0 or later (see also
- # https://github.com/rg3/youtube-dl/issues/6727)
+ # https://github.com/ytdl-org/youtube-dl/issues/6727)
if sys.version_info < (3, 0):
kwargs['strict'] = True
hc = http_class(*args, **compat_kwargs(kwargs))
@@ -1051,7 +1051,7 @@ class YoutubeDLHandler(compat_urllib_request.HTTPHandler):
resp.msg = old_resp.msg
del resp.headers['Content-encoding']
# Percent-encode redirect URL of Location HTTP header to satisfy RFC 3986 (see
- # https://github.com/rg3/youtube-dl/issues/6457).
+ # https://github.com/ytdl-org/youtube-dl/issues/6457).
if 300 <= resp.code < 400:
location = resp.headers.get('Location')
if location:
@@ -1141,6 +1141,8 @@ class YoutubeDLHTTPSHandler(compat_urllib_request.HTTPSHandler):
class YoutubeDLCookieJar(compat_cookiejar.MozillaCookieJar):
+ _HTTPONLY_PREFIX = '#HttpOnly_'
+
def save(self, filename=None, ignore_discard=False, ignore_expires=False):
# Store session cookies with `expires` set to 0 instead of an empty
# string
@@ -1150,7 +1152,21 @@ class YoutubeDLCookieJar(compat_cookiejar.MozillaCookieJar):
compat_cookiejar.MozillaCookieJar.save(self, filename, ignore_discard, ignore_expires)
def load(self, filename=None, ignore_discard=False, ignore_expires=False):
- compat_cookiejar.MozillaCookieJar.load(self, filename, ignore_discard, ignore_expires)
+ """Load cookies from a file."""
+ if filename is None:
+ if self.filename is not None:
+ filename = self.filename
+ else:
+ raise ValueError(compat_cookiejar.MISSING_FILENAME_TEXT)
+
+ cf = io.StringIO()
+ with open(filename) as f:
+ for line in f:
+ if line.startswith(self._HTTPONLY_PREFIX):
+ line = line[len(self._HTTPONLY_PREFIX):]
+ cf.write(compat_str(line))
+ cf.seek(0)
+ self._really_load(cf, filename, ignore_discard, ignore_expires)
# Session cookies are denoted by either `expires` field set to
# an empty string or 0. MozillaCookieJar only recognizes the former
# (see [1]). So we need force the latter to be recognized as session
@@ -1174,7 +1190,7 @@ class YoutubeDLCookieProcessor(compat_urllib_request.HTTPCookieProcessor):
def http_response(self, request, response):
# Python 2 will choke on next HTTP request in row if there are non-ASCII
# characters in Set-Cookie HTTP header of last response (see
- # https://github.com/rg3/youtube-dl/issues/6769).
+ # https://github.com/ytdl-org/youtube-dl/issues/6769).
# In order to at least prevent crashing we will percent encode Set-Cookie
# header before HTTPCookieProcessor starts processing it.
# if sys.version_info < (3, 0) and response.headers:
@@ -1437,8 +1453,8 @@ def _windows_write_string(s, out):
def not_a_console(handle):
if handle == INVALID_HANDLE_VALUE or handle is None:
return True
- return ((GetFileType(handle) & ~FILE_TYPE_REMOTE) != FILE_TYPE_CHAR or
- GetConsoleMode(handle, ctypes.byref(ctypes.wintypes.DWORD())) == 0)
+ return ((GetFileType(handle) & ~FILE_TYPE_REMOTE) != FILE_TYPE_CHAR
+ or GetConsoleMode(handle, ctypes.byref(ctypes.wintypes.DWORD())) == 0)
if not_a_console(h):
return False
@@ -1474,8 +1490,8 @@ def write_string(s, out=None, encoding=None):
if _windows_write_string(s, out):
return
- if ('b' in getattr(out, 'mode', '') or
- sys.version_info[0] < 3): # Python 2 lies about mode of sys.stderr
+ if ('b' in getattr(out, 'mode', '')
+ or sys.version_info[0] < 3): # Python 2 lies about mode of sys.stderr
byt = s.encode(encoding or preferredencoding(), 'ignore')
out.write(byt)
elif hasattr(out, 'buffer'):
@@ -1782,6 +1798,14 @@ def parse_resolution(s):
return {}
+def parse_bitrate(s):
+ if not isinstance(s, compat_str):
+ return
+ mobj = re.search(r'\b(\d+)\s*kbps', s)
+ if mobj:
+ return int(mobj.group(1))
+
+
def month_by_name(name, lang='en'):
""" Return the number of a month by (locale-independently) English name """
@@ -1868,7 +1892,7 @@ def urljoin(base, path):
path = path.decode('utf-8')
if not isinstance(path, compat_str) or not path:
return None
- if re.match(r'^(?:https?:)?//', path):
+ if re.match(r'^(?:[a-zA-Z][a-zA-Z0-9+-.]*:)?//', path):
return path
if isinstance(base, bytes):
base = base.decode('utf-8')
@@ -1898,7 +1922,7 @@ def int_or_none(v, scale=1, default=None, get_attr=None, invscale=1):
return default
try:
return int(v) * invscale // scale
- except ValueError:
+ except (ValueError, TypeError):
return default
@@ -1919,7 +1943,7 @@ def float_or_none(v, scale=1, invscale=1, default=None):
return default
try:
return float(v) * invscale / scale
- except ValueError:
+ except (ValueError, TypeError):
return default
@@ -1927,8 +1951,8 @@ def bool_or_none(v, default=None):
return v if isinstance(v, bool) else default
-def strip_or_none(v):
- return None if v is None else v.strip()
+def strip_or_none(v, default=None):
+ return v.strip() if isinstance(v, compat_str) else default
def url_or_none(url):
@@ -2028,7 +2052,7 @@ def get_exe_version(exe, args=['--version'],
try:
# STDIN should be redirected too. On UNIX-like systems, ffmpeg triggers
# SIGTTOU if youtube-dl is run in the background.
- # See https://github.com/rg3/youtube-dl/issues/955#issuecomment-209789656
+ # See https://github.com/ytdl-org/youtube-dl/issues/955#issuecomment-209789656
out, _ = subprocess.Popen(
[encodeArgument(exe)] + args,
stdin=subprocess.PIPE,
@@ -2304,10 +2328,10 @@ def merge_dicts(*dicts):
for k, v in a_dict.items():
if v is None:
continue
- if (k not in merged or
- (isinstance(v, compat_str) and v and
- isinstance(merged[k], compat_str) and
- not merged[k])):
+ if (k not in merged
+ or (isinstance(v, compat_str) and v
+ and isinstance(merged[k], compat_str)
+ and not merged[k])):
merged[k] = v
return merged
@@ -2505,7 +2529,7 @@ def parse_codecs(codecs_str):
vcodec, acodec = None, None
for full_codec in splited_codecs:
codec = full_codec.split('.')[0]
- if codec in ('avc1', 'avc2', 'avc3', 'avc4', 'vp9', 'vp8', 'hev1', 'hev2', 'h263', 'h264', 'mp4v', 'hvc1', 'av01'):
+ if codec in ('avc1', 'avc2', 'avc3', 'avc4', 'vp9', 'vp8', 'hev1', 'hev2', 'h263', 'h264', 'mp4v', 'hvc1', 'av01', 'theora'):
if not vcodec:
vcodec = full_codec
elif codec in ('mp4a', 'opus', 'vorbis', 'mp3', 'aac', 'ac-3', 'ec-3', 'eac3', 'dtsc', 'dtse', 'dtsh', 'dtsl'):
@@ -2516,13 +2540,8 @@ def parse_codecs(codecs_str):
if not vcodec and not acodec:
if len(splited_codecs) == 2:
return {
- 'vcodec': vcodec,
- 'acodec': acodec,
- }
- elif len(splited_codecs) == 1:
- return {
- 'vcodec': 'none',
- 'acodec': vcodec,
+ 'vcodec': splited_codecs[0],
+ 'acodec': splited_codecs[1],
}
else:
return {
@@ -2633,14 +2652,14 @@ def _match_one(filter_part, dct):
if m:
op = COMPARISON_OPERATORS[m.group('op')]
actual_value = dct.get(m.group('key'))
- if (m.group('quotedstrval') is not None or
- m.group('strval') is not None or
+ if (m.group('quotedstrval') is not None
+ or m.group('strval') is not None
# If the original field is a string and matching comparisonvalue is
# a number we should respect the origin of the original field
# and process comparison value as a string (see
- # https://github.com/rg3/youtube-dl/issues/11082).
- actual_value is not None and m.group('intval') is not None and
- isinstance(actual_value, compat_str)):
+ # https://github.com/ytdl-org/youtube-dl/issues/11082).
+ or actual_value is not None and m.group('intval') is not None
+ and isinstance(actual_value, compat_str)):
if m.group('op') not in ('=', '!='):
raise ValueError(
'Operator %s does not support string values!' % m.group('op'))
@@ -2968,6 +2987,7 @@ class ISO639Utils(object):
'gv': 'glv',
'ha': 'hau',
'he': 'heb',
+ 'iw': 'heb', # Replaced by he in 1989 revision
'hi': 'hin',
'ho': 'hmo',
'hr': 'hrv',
@@ -2977,6 +2997,7 @@ class ISO639Utils(object):
'hz': 'her',
'ia': 'ina',
'id': 'ind',
+ 'in': 'ind', # Replaced by id in 1989 revision
'ie': 'ile',
'ig': 'ibo',
'ii': 'iii',
@@ -3091,6 +3112,7 @@ class ISO639Utils(object):
'wo': 'wol',
'xh': 'xho',
'yi': 'yid',
+ 'ji': 'yid', # Replaced by yi in 1989 revision
'yo': 'yor',
'za': 'zha',
'zh': 'zho',
@@ -3785,7 +3807,7 @@ def urshift(val, n):
# Based on png2str() written by @gdkchan and improved by @yokrysty
-# Originally posted at https://github.com/rg3/youtube-dl/issues/9706
+# Originally posted at https://github.com/ytdl-org/youtube-dl/issues/9706
def decode_png(png_data):
# Reference: https://www.w3.org/TR/PNG/
header = png_data[8:]
@@ -3900,7 +3922,7 @@ def write_xattr(path, key, value):
if hasattr(xattr, 'set'): # pyxattr
# Unicode arguments are not supported in python-pyxattr until
# version 0.5.0
- # See https://github.com/rg3/youtube-dl/issues/5498
+ # See https://github.com/ytdl-org/youtube-dl/issues/5498
pyxattr_required_version = '0.5.0'
if version_tuple(xattr.__version__) < version_tuple(pyxattr_required_version):
# TODO: fallback to CLI tools
@@ -3946,9 +3968,9 @@ def write_xattr(path, key, value):
executable = 'xattr'
opts = ['-w', key, value]
- cmd = ([encodeFilename(executable, True)] +
- [encodeArgument(o) for o in opts] +
- [encodeFilename(path, True)])
+ cmd = ([encodeFilename(executable, True)]
+ + [encodeArgument(o) for o in opts]
+ + [encodeFilename(path, True)])
try:
p = subprocess.Popen(