X-Git-Url: http://git.bitcoin.ninja/index.cgi?a=blobdiff_plain;f=youtube_dl%2Futils.py;h=0720fe9ebb180b15e55c5c308e600063feae6026;hb=3f8ced5144a76a3f9ab7ee8cd06cc79bb75dc564;hp=768c6207df5e32728c95ccc4a1cfa5e05d9088c3;hpb=a921f40799d2ecb4be53b3241d2dbfc80f804d73;p=youtube-dl diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index 768c6207d..0720fe9eb 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -9,6 +9,7 @@ import io import json import locale import os +import pipes import platform import re import socket @@ -66,6 +67,12 @@ try: except ImportError: # Python 2 from urllib2 import HTTPError as compat_HTTPError +try: + from urllib.request import urlretrieve as compat_urlretrieve +except ImportError: # Python 2 + from urllib import urlretrieve as compat_urlretrieve + + try: from subprocess import DEVNULL compat_subprocess_get_DEVNULL = lambda: DEVNULL @@ -169,7 +176,7 @@ def compat_ord(c): compiled_regex_type = type(re.compile('')) std_headers = { - 'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64; rv:10.0) Gecko/20100101 Firefox/10.0', + 'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64; rv:10.0) Gecko/20100101 Firefox/10.0 (Chrome)', 'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.7', 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8', 'Accept-Encoding': 'gzip, deflate', @@ -223,6 +230,19 @@ else: return f return None +# On python2.6 the xml.etree.ElementTree.Element methods don't support +# the namespace parameter +def xpath_with_ns(path, ns_map): + components = [c.split(':') for c in path.split('/')] + replaced = [] + for c in components: + if len(c) == 1: + replaced.append(c[0]) + else: + ns, tag = c + replaced.append('{%s}%s' % (ns_map[ns], tag)) + return '/'.join(replaced) + def htmlentity_transform(matchobj): """Transforms an HTML entity to a character. @@ -552,6 +572,11 @@ class ExtractorError(Exception): return u''.join(traceback.format_tb(self.traceback)) +class RegexNotFoundError(ExtractorError): + """Error when a regex didn't match""" + pass + + class DownloadError(Exception): """Download Error exception. @@ -700,7 +725,19 @@ def unified_strdate(date_str): date_str = date_str.replace(',',' ') # %z (UTC offset) is only supported in python>=3.2 date_str = re.sub(r' (\+|-)[\d]*$', '', date_str) - format_expressions = ['%d %B %Y', '%B %d %Y', '%b %d %Y', '%Y-%m-%d', '%d/%m/%Y', '%Y/%m/%d %H:%M:%S', '%d.%m.%Y %H:%M'] + format_expressions = [ + '%d %B %Y', + '%B %d %Y', + '%b %d %Y', + '%Y-%m-%d', + '%d/%m/%Y', + '%Y/%m/%d %H:%M:%S', + '%d.%m.%Y %H:%M', + '%Y-%m-%dT%H:%M:%SZ', + '%Y-%m-%dT%H:%M:%S.%fZ', + '%Y-%m-%dT%H:%M:%S.%f0Z', + '%Y-%m-%dT%H:%M:%S', + ] for expression in format_expressions: try: upload_date = datetime.datetime.strptime(date_str, expression).strftime('%Y%m%d') @@ -781,6 +818,18 @@ def platform_name(): return res +def write_string(s, out=None): + if out is None: + out = sys.stderr + assert type(s) == type(u'') + + if ('b' in getattr(out, 'mode', '') or + sys.version_info[0] < 3): # Python 2 lies about mode of sys.stderr + s = s.encode(preferredencoding(), 'ignore') + out.write(s) + out.flush() + + def bytes_to_intlist(bs): if not bs: return [] @@ -797,3 +846,144 @@ def intlist_to_bytes(xs): return ''.join([chr(x) for x in xs]) else: return bytes(xs) + + +def get_cachedir(params={}): + cache_root = os.environ.get('XDG_CACHE_HOME', + os.path.expanduser('~/.cache')) + return params.get('cachedir', os.path.join(cache_root, 'youtube-dl')) + + +# Cross-platform file locking +if sys.platform == 'win32': + import ctypes.wintypes + import msvcrt + + class OVERLAPPED(ctypes.Structure): + _fields_ = [ + ('Internal', ctypes.wintypes.LPVOID), + ('InternalHigh', ctypes.wintypes.LPVOID), + ('Offset', ctypes.wintypes.DWORD), + ('OffsetHigh', ctypes.wintypes.DWORD), + ('hEvent', ctypes.wintypes.HANDLE), + ] + + kernel32 = ctypes.windll.kernel32 + LockFileEx = kernel32.LockFileEx + LockFileEx.argtypes = [ + ctypes.wintypes.HANDLE, # hFile + ctypes.wintypes.DWORD, # dwFlags + ctypes.wintypes.DWORD, # dwReserved + ctypes.wintypes.DWORD, # nNumberOfBytesToLockLow + ctypes.wintypes.DWORD, # nNumberOfBytesToLockHigh + ctypes.POINTER(OVERLAPPED) # Overlapped + ] + LockFileEx.restype = ctypes.wintypes.BOOL + UnlockFileEx = kernel32.UnlockFileEx + UnlockFileEx.argtypes = [ + ctypes.wintypes.HANDLE, # hFile + ctypes.wintypes.DWORD, # dwReserved + ctypes.wintypes.DWORD, # nNumberOfBytesToLockLow + ctypes.wintypes.DWORD, # nNumberOfBytesToLockHigh + ctypes.POINTER(OVERLAPPED) # Overlapped + ] + UnlockFileEx.restype = ctypes.wintypes.BOOL + whole_low = 0xffffffff + whole_high = 0x7fffffff + + def _lock_file(f, exclusive): + overlapped = OVERLAPPED() + overlapped.Offset = 0 + overlapped.OffsetHigh = 0 + overlapped.hEvent = 0 + f._lock_file_overlapped_p = ctypes.pointer(overlapped) + handle = msvcrt.get_osfhandle(f.fileno()) + if not LockFileEx(handle, 0x2 if exclusive else 0x0, 0, + whole_low, whole_high, f._lock_file_overlapped_p): + raise OSError('Locking file failed: %r' % ctypes.FormatError()) + + def _unlock_file(f): + assert f._lock_file_overlapped_p + handle = msvcrt.get_osfhandle(f.fileno()) + if not UnlockFileEx(handle, 0, + whole_low, whole_high, f._lock_file_overlapped_p): + raise OSError('Unlocking file failed: %r' % ctypes.FormatError()) + +else: + import fcntl + + def _lock_file(f, exclusive): + fcntl.lockf(f, fcntl.LOCK_EX if exclusive else fcntl.LOCK_SH) + + def _unlock_file(f): + fcntl.lockf(f, fcntl.LOCK_UN) + + +class locked_file(object): + def __init__(self, filename, mode, encoding=None): + assert mode in ['r', 'a', 'w'] + self.f = io.open(filename, mode, encoding=encoding) + self.mode = mode + + def __enter__(self): + exclusive = self.mode != 'r' + try: + _lock_file(self.f, exclusive) + except IOError: + self.f.close() + raise + return self + + def __exit__(self, etype, value, traceback): + try: + _unlock_file(self.f) + finally: + self.f.close() + + def __iter__(self): + return iter(self.f) + + def write(self, *args): + return self.f.write(*args) + + def read(self, *args): + return self.f.read(*args) + + +def shell_quote(args): + quoted_args = [] + encoding = sys.getfilesystemencoding() + if encoding is None: + encoding = 'utf-8' + for a in args: + if isinstance(a, bytes): + # We may get a filename encoded with 'encodeFilename' + a = a.decode(encoding) + quoted_args.append(pipes.quote(a)) + return u' '.join(quoted_args) + + +def takewhile_inclusive(pred, seq): + """ Like itertools.takewhile, but include the latest evaluated element + (the first element so that Not pred(e)) """ + for e in seq: + yield e + if not pred(e): + return + + +def smuggle_url(url, data): + """ Pass additional data in a URL for internal use. """ + + sdata = compat_urllib_parse.urlencode( + {u'__youtubedl_smuggle': json.dumps(data)}) + return url + u'#' + sdata + + +def unsmuggle_url(smug_url): + if not '#__youtubedl_smuggle' in smug_url: + return smug_url, None + url, _, sdata = smug_url.rpartition(u'#') + jsond = compat_parse_qs(sdata)[u'__youtubedl_smuggle'][0] + data = json.loads(jsond) + return url, data