X-Git-Url: http://git.bitcoin.ninja/index.cgi?a=blobdiff_plain;f=youtube_dl%2Futils.py;h=9ad0952d5a0bd279fc23bb7585fefb72f341021b;hb=8bcc875676b56c062a4fdd81763a6adb0fb1390c;hp=67be4a9ae43710fb86bf2087ab3bae4421688f3d;hpb=ec5f601670dfb6c39d3a4669898284bb2782dd0c;p=youtube-dl diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index 67be4a9ae..9ad0952d5 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -41,6 +41,7 @@ from .compat import ( compat_urllib_parse_urlparse, compat_urllib_request, compat_urlparse, + shlex_quote, ) @@ -55,6 +56,7 @@ std_headers = { 'Accept-Language': 'en-us,en;q=0.5', } + def preferredencoding(): """Get preferred encoding. @@ -63,7 +65,7 @@ def preferredencoding(): """ try: pref = locale.getpreferredencoding() - u'TEST'.encode(pref) + 'TEST'.encode(pref) except: pref = 'UTF-8' @@ -71,9 +73,10 @@ def preferredencoding(): def write_json_file(obj, fn): - """ Encode obj as JSON and write it to fn, atomically """ + """ Encode obj as JSON and write it to fn, atomically if possible """ - if sys.version_info < (3, 0): + fn = encodeFilename(fn) + if sys.version_info < (3, 0) and sys.platform != 'win32': encoding = get_filesystem_encoding() # os.path.basename returns a bytes object, but NamedTemporaryFile # will fail if the filename contains non ascii characters unless we @@ -107,6 +110,13 @@ def write_json_file(obj, fn): try: with tf: json.dump(obj, tf) + if sys.platform == 'win32': + # Need to remove existing file on Windows, else os.rename raises + # WindowsError or FileExistsError. + try: + os.unlink(fn) + except OSError: + pass os.rename(tf.name, fn) except: try: @@ -137,6 +147,8 @@ else: # On python2.6 the xml.etree.ElementTree.Element methods don't support # the namespace parameter + + def xpath_with_ns(path, ns_map): components = [c.split(':') for c in path.split('/')] replaced = [] @@ -215,7 +227,7 @@ def sanitize_open(filename, open_mode): It returns the tuple (stream, definitive_file_name). """ try: - if filename == u'-': + if filename == '-': if sys.platform == 'win32': import msvcrt msvcrt.setmode(sys.stdout.fileno(), os.O_BINARY) @@ -228,7 +240,7 @@ def sanitize_open(filename, open_mode): # In case of error, try to remove win32 forbidden chars alt_filename = os.path.join( - re.sub(u'[/<>:"\\|\\\\?\\*]', u'#', path_part) + re.sub('[/<>:"\\|\\\\?\\*]', '#', path_part) for path_part in os.path.split(filename) ) if alt_filename == filename: @@ -247,6 +259,7 @@ def timeconvert(timestr): timestamp = email.utils.mktime_tz(timetuple) return timestamp + def sanitize_filename(s, restricted=False, is_id=False): """Sanitizes a string so it could be used as part of a filename. If restricted is set, use a stricter subset of allowed characters. @@ -267,7 +280,7 @@ def sanitize_filename(s, restricted=False, is_id=False): return '_' return char - result = u''.join(map(replace_insane, s)) + result = ''.join(map(replace_insane, s)) if not is_id: while '__' in result: result = result.replace('__', '_') @@ -279,6 +292,7 @@ def sanitize_filename(s, restricted=False, is_id=False): result = '_' return result + def orderedSet(iterable): """ Remove all duplicates from the input iterable """ res = [] @@ -297,15 +311,15 @@ def _htmlentity_transform(entity): mobj = re.match(r'#(x?[0-9]+)', entity) if mobj is not None: numstr = mobj.group(1) - if numstr.startswith(u'x'): + if numstr.startswith('x'): base = 16 - numstr = u'0%s' % numstr + numstr = '0%s' % numstr else: base = 10 return compat_chr(int(numstr, base)) # Unknown entity in name, return its literal representation - return (u'&%s;' % entity) + return ('&%s;' % entity) def unescapeHTML(s): @@ -329,7 +343,7 @@ def encodeFilename(s, for_subprocess=False): return s if sys.platform == 'win32' and sys.getwindowsversion()[0] >= 5: - # Pass u'' directly to use Unicode APIs on Windows 2000 and up + # Pass '' directly to use Unicode APIs on Windows 2000 and up # (Detecting Windows NT 4 is tricky because 'major >= 4' would # match Windows 9x series as well. Besides, NT 4 is obsolete.) if not for_subprocess: @@ -363,6 +377,7 @@ def decodeOption(optval): assert isinstance(optval, compat_str) return optval + def formatSeconds(secs): if secs > 3600: return '%d:%02d:%02d' % (secs // 3600, (secs % 3600) // 60, secs % 60) @@ -412,8 +427,10 @@ def make_HTTPS_handler(opts_no_check_certificate, **kwargs): pass # Python < 3.4 return compat_urllib_request.HTTPSHandler(context=context, **kwargs) + class ExtractorError(Exception): """Error during info extraction.""" + def __init__(self, msg, tb=None, expected=False, cause=None, video_id=None): """ tb, if given, is the original traceback (so that it can be printed out). If expected is set, this is a normal error message and most likely not a bug in youtube-dl. @@ -424,9 +441,15 @@ class ExtractorError(Exception): if video_id is not None: msg = video_id + ': ' + msg if cause: - msg += u' (caused by %r)' % cause + msg += ' (caused by %r)' % cause if not expected: - msg = msg + u'; please report this issue on https://yt-dl.org/bug . Be sure to call youtube-dl with the --verbose flag and include its complete output. Make sure you are using the latest version; type youtube-dl -U to update.' + if ytdl_is_updateable(): + update_cmd = 'type youtube-dl -U to update' + else: + update_cmd = 'see https://yt-dl.org/update on how to update' + msg += '; please report this issue on https://yt-dl.org/bug .' + msg += ' Make sure you are using the latest version; %s.' % update_cmd + msg += ' Be sure to call youtube-dl with the --verbose flag and include its complete output.' super(ExtractorError, self).__init__(msg) self.traceback = tb @@ -437,7 +460,7 @@ class ExtractorError(Exception): def format_traceback(self): if self.traceback is None: return None - return u''.join(traceback.format_tb(self.traceback)) + return ''.join(traceback.format_tb(self.traceback)) class RegexNotFoundError(ExtractorError): @@ -452,6 +475,7 @@ class DownloadError(Exception): configured to continue on errors. They will contain the appropriate error message. """ + def __init__(self, msg, exc_info=None): """ exc_info, if given, is the original exception that caused the trouble (as returned by sys.exc_info()). """ super(DownloadError, self).__init__(msg) @@ -473,9 +497,11 @@ class PostProcessingError(Exception): This exception may be raised by PostProcessor's .run() method to indicate an error in the postprocessing task. """ + def __init__(self, msg): self.msg = msg + class MaxDownloadsReached(Exception): """ --max-downloads limit has been reached. """ pass @@ -505,6 +531,7 @@ class ContentTooShortError(Exception): self.downloaded = downloaded self.expected = expected + class YoutubeDLHandler(compat_urllib_request.HTTPHandler): """Handler for HTTP requests and responses. @@ -624,7 +651,7 @@ def unified_strdate(date_str): return None upload_date = None - #Replace commas + # Replace commas date_str = date_str.replace(',', ' ') # %z (UTC offset) is only supported in python>=3.2 date_str = re.sub(r' ?(\+|-)[0-9]{2}:?[0-9]{2}$', '', date_str) @@ -665,17 +692,20 @@ def unified_strdate(date_str): upload_date = datetime.datetime(*timetuple[:6]).strftime('%Y%m%d') return upload_date -def determine_ext(url, default_ext=u'unknown_video'): + +def determine_ext(url, default_ext='unknown_video'): if url is None: return default_ext - guess = url.partition(u'?')[0].rpartition(u'.')[2] + guess = url.partition('?')[0].rpartition('.')[2] if re.match(r'^[A-Za-z0-9]+$', guess): return guess else: return default_ext + def subtitles_filename(filename, sub_lang, sub_format): - return filename.rsplit('.', 1)[0] + u'.' + sub_lang + u'.' + sub_format + return filename.rsplit('.', 1)[0] + '.' + sub_lang + '.' + sub_format + def date_from_str(date_str): """ @@ -691,7 +721,7 @@ def date_from_str(date_str): if sign == '-': time = -time unit = match.group('unit') - #A bad aproximation? + # A bad aproximation? if unit == 'month': unit = 'day' time *= 30 @@ -702,7 +732,8 @@ def date_from_str(date_str): delta = datetime.timedelta(**{unit: time}) return today + delta return datetime.datetime.strptime(date_str, "%Y%m%d").date() - + + def hyphenate_date(date_str): """ Convert a date in 'YYYYMMDD' format to 'YYYY-MM-DD' format""" @@ -712,8 +743,10 @@ def hyphenate_date(date_str): else: return date_str + class DateRange(object): """Represents a time interval between two dates""" + def __init__(self, start=None, end=None): """start and end must be strings in the format accepted by date""" if start is not None: @@ -726,17 +759,20 @@ class DateRange(object): self.end = datetime.datetime.max.date() if self.start > self.end: raise ValueError('Date range: "%s" , the start date must be before the end date' % self) + @classmethod def day(cls, day): """Returns a range that only contains the given day""" - return cls(day,day) + return cls(day, day) + def __contains__(self, date): """Check if the date is in the range""" if not isinstance(date, datetime.date): date = date_from_str(date) return self.start <= date <= self.end + def __str__(self): - return '%s - %s' % ( self.start.isoformat(), self.end.isoformat()) + return '%s - %s' % (self.start.isoformat(), self.end.isoformat()) def platform_name(): @@ -967,7 +1003,7 @@ def shell_quote(args): # We may get a filename encoded with 'encodeFilename' a = a.decode(encoding) quoted_args.append(pipes.quote(a)) - return u' '.join(quoted_args) + return ' '.join(quoted_args) def takewhile_inclusive(pred, seq): @@ -983,31 +1019,31 @@ def smuggle_url(url, data): """ Pass additional data in a URL for internal use. """ sdata = compat_urllib_parse.urlencode( - {u'__youtubedl_smuggle': json.dumps(data)}) - return url + u'#' + sdata + {'__youtubedl_smuggle': json.dumps(data)}) + return url + '#' + sdata def unsmuggle_url(smug_url, default=None): if not '#__youtubedl_smuggle' in smug_url: return smug_url, default - url, _, sdata = smug_url.rpartition(u'#') - jsond = compat_parse_qs(sdata)[u'__youtubedl_smuggle'][0] + url, _, sdata = smug_url.rpartition('#') + jsond = compat_parse_qs(sdata)['__youtubedl_smuggle'][0] data = json.loads(jsond) return url, data def format_bytes(bytes): if bytes is None: - return u'N/A' + return 'N/A' if type(bytes) is str: bytes = float(bytes) if bytes == 0.0: exponent = 0 else: exponent = int(math.log(bytes, 1024.0)) - suffix = [u'B', u'KiB', u'MiB', u'GiB', u'TiB', u'PiB', u'EiB', u'ZiB', u'YiB'][exponent] + suffix = ['B', 'KiB', 'MiB', 'GiB', 'TiB', 'PiB', 'EiB', 'ZiB', 'YiB'][exponent] converted = float(bytes) / float(1024 ** exponent) - return u'%.2f%s' % (converted, suffix) + return '%.2f%s' % (converted, suffix) def get_term_width(): @@ -1030,8 +1066,8 @@ def month_by_name(name): """ Return the number of a month by (locale-independently) English name """ ENGLISH_NAMES = [ - u'January', u'February', u'March', u'April', u'May', u'June', - u'July', u'August', u'September', u'October', u'November', u'December'] + 'January', 'February', 'March', 'April', 'May', 'June', + 'July', 'August', 'September', 'October', 'November', 'December'] try: return ENGLISH_NAMES.index(name) + 1 except ValueError: @@ -1042,7 +1078,7 @@ def fix_xml_ampersands(xml_str): """Replace all the '&' by '&' in XML""" return re.sub( r'&(?!amp;|lt;|gt;|apos;|quot;|#x[0-9a-fA-F]{,4};|#[0-9]{,4};)', - u'&', + '&', xml_str) @@ -1075,7 +1111,7 @@ def remove_end(s, end): def url_basename(url): path = compat_urlparse.urlparse(url).path - return path.strip(u'/').split(u'/')[-1] + return path.strip('/').split('/')[-1] class HEADRequest(compat_urllib_request.Request): @@ -1100,7 +1136,7 @@ def str_to_int(int_str): """ A more relaxed version of int_or_none """ if int_str is None: return None - int_str = re.sub(r'[,\.\+]', u'', int_str) + int_str = re.sub(r'[,\.\+]', '', int_str) return int(int_str) @@ -1115,7 +1151,12 @@ def parse_duration(s): s = s.strip() m = re.match( - r'(?i)(?:(?:(?P[0-9]+)\s*(?:[:h]|hours?)\s*)?(?P[0-9]+)\s*(?:[:m]|mins?|minutes?)\s*)?(?P[0-9]+)(?P\.[0-9]+)?\s*(?:s|secs?|seconds?)?$', s) + r'''(?ix)T? + (?: + (?:(?P[0-9]+)\s*(?:[:h]|hours?)\s*)? + (?P[0-9]+)\s*(?:[:m]|mins?|minutes?)\s* + )? + (?P[0-9]+)(?P\.[0-9]+)?\s*(?:s|secs?|seconds?)?$''', s) if not m: return None res = int(m.group('secs')) @@ -1129,8 +1170,8 @@ def parse_duration(s): def prepend_extension(filename, ext): - name, real_ext = os.path.splitext(filename) - return u'{0}.{1}{2}'.format(name, ext, real_ext) + name, real_ext = os.path.splitext(filename) + return '{0}.{1}{2}'.format(name, ext, real_ext) def check_executable(exe, args=[]): @@ -1145,7 +1186,7 @@ def check_executable(exe, args=[]): def get_exe_version(exe, args=['--version'], version_re=r'version\s+([0-9._-a-zA-Z]+)', - unrecognized=u'present'): + unrecognized='present'): """ Returns the version of the specified executable, or False if the executable is not present """ try: @@ -1266,7 +1307,7 @@ def escape_url(url): ).geturl() try: - struct.pack(u'!I', 0) + struct.pack('!I', 0) except TypeError: # In Python 2.6 (and some 2.7 versions), struct requires a bytes argument def struct_pack(spec, *args): @@ -1287,7 +1328,7 @@ def read_batch_urls(batch_fd): def fixup(url): if not isinstance(url, compat_str): url = url.decode('utf-8', 'replace') - BOM_UTF8 = u'\xef\xbb\xbf' + BOM_UTF8 = '\xef\xbb\xbf' if url.startswith(BOM_UTF8): url = url[len(BOM_UTF8):] url = url.strip() @@ -1406,3 +1447,15 @@ def is_outdated_version(version, limit, assume_new=True): return version_tuple(version) < version_tuple(limit) except ValueError: return not assume_new + + +def ytdl_is_updateable(): + """ Returns if youtube-dl can be updated with -U """ + from zipimport import zipimporter + + return isinstance(globals().get('__loader__'), zipimporter) or hasattr(sys, 'frozen') + + +def args_to_str(args): + # Get a short string representation for a subprocess command + return ' '.join(shlex_quote(a) for a in args)