_ Git - youtube-dl/blob - youtube_dl/utils.py

   1 #!/usr/bin/env python
   2 # -*- coding: utf-8 -*-
   3
   4 from __future__ import unicode_literals
   5
   6 import calendar
   7 import codecs
   8 import contextlib
   9 import ctypes
  10 import datetime
  11 import email.utils
  12 import errno
  13 import gzip
  14 import itertools
  15 import io
  16 import json
  17 import locale
  18 import math
  19 import os
  20 import pipes
  21 import platform
  22 import re
  23 import ssl
  24 import socket
  25 import struct
  26 import subprocess
  27 import sys
  28 import tempfile
  29 import traceback
  30 import xml.etree.ElementTree
  31 import zlib
  32
  33 from .compat import (
  34     compat_chr,
  35     compat_getenv,
  36     compat_html_entities,
  37     compat_parse_qs,
  38     compat_str,
  39     compat_urllib_error,
  40     compat_urllib_parse,
  41     compat_urllib_parse_urlparse,
  42     compat_urllib_request,
  43     compat_urlparse,
  44 )
  45
  46
  47 # This is not clearly defined otherwise
  48 compiled_regex_type = type(re.compile(''))
  49
  50 std_headers = {
  51     'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64; rv:10.0) Gecko/20100101 Firefox/10.0 (Chrome)',
  52     'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.7',
  53     'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
  54     'Accept-Encoding': 'gzip, deflate',
  55     'Accept-Language': 'en-us,en;q=0.5',
  56 }
  57
  58 def preferredencoding():
  59     """Get preferred encoding.
  60
  61     Returns the best encoding scheme for the system, based on
  62     locale.getpreferredencoding() and some further tweaks.
  63     """
  64     try:
  65         pref = locale.getpreferredencoding()
  66         u'TEST'.encode(pref)
  67     except:
  68         pref = 'UTF-8'
  69
  70     return pref
  71
  72
  73 def write_json_file(obj, fn):
  74     """ Encode obj as JSON and write it to fn, atomically """
  75
  76     args = {
  77         'suffix': '.tmp',
  78         'prefix': os.path.basename(fn) + '.',
  79         'dir': os.path.dirname(fn),
  80         'delete': False,
  81     }
  82
  83     # In Python 2.x, json.dump expects a bytestream.
  84     # In Python 3.x, it writes to a character stream
  85     if sys.version_info < (3, 0):
  86         args['mode'] = 'wb'
  87     else:
  88         args.update({
  89             'mode': 'w',
  90             'encoding': 'utf-8',
  91         })
  92
  93     tf = tempfile.NamedTemporaryFile(**args)
  94
  95     try:
  96         with tf:
  97             json.dump(obj, tf)
  98         os.rename(tf.name, fn)
  99     except:
 100         try:
 101             os.remove(tf.name)
 102         except OSError:
 103             pass
 104         raise
 105
 106
 107 if sys.version_info >= (2, 7):
 108     def find_xpath_attr(node, xpath, key, val):
 109         """ Find the xpath xpath[@key=val] """
 110         assert re.match(r'^[a-zA-Z-]+$', key)
 111         assert re.match(r'^[a-zA-Z0-9@\s:._-]*$', val)
 112         expr = xpath + u"[@%s='%s']" % (key, val)
 113         return node.find(expr)
 114 else:
 115     def find_xpath_attr(node, xpath, key, val):
 116         # Here comes the crazy part: In 2.6, if the xpath is a unicode,
 117         # .//node does not match if a node is a direct child of . !
 118         if isinstance(xpath, unicode):
 119             xpath = xpath.encode('ascii')
 120
 121         for f in node.findall(xpath):
 122             if f.attrib.get(key) == val:
 123                 return f
 124         return None
 125
 126 # On python2.6 the xml.etree.ElementTree.Element methods don't support
 127 # the namespace parameter
 128 def xpath_with_ns(path, ns_map):
 129     components = [c.split(':') for c in path.split('/')]
 130     replaced = []
 131     for c in components:
 132         if len(c) == 1:
 133             replaced.append(c[0])
 134         else:
 135             ns, tag = c
 136             replaced.append('{%s}%s' % (ns_map[ns], tag))
 137     return '/'.join(replaced)
 138
 139
 140 def xpath_text(node, xpath, name=None, fatal=False):
 141     if sys.version_info < (2, 7):  # Crazy 2.6
 142         xpath = xpath.encode('ascii')
 143
 144     n = node.find(xpath)
 145     if n is None:
 146         if fatal:
 147             name = xpath if name is None else name
 148             raise ExtractorError('Could not find XML element %s' % name)
 149         else:
 150             return None
 151     return n.text
 152
 153
 154 def get_element_by_id(id, html):
 155     """Return the content of the tag with the specified ID in the passed HTML document"""
 156     return get_element_by_attribute("id", id, html)
 157
 158
 159 def get_element_by_attribute(attribute, value, html):
 160     """Return the content of the tag with the specified attribute in the passed HTML document"""
 161
 162     m = re.search(r'''(?xs)
 163         <([a-zA-Z0-9:._-]+)
 164          (?:\s+[a-zA-Z0-9:._-]+(?:=[a-zA-Z0-9:._-]+|="[^"]+"|='[^']+'))*?
 165          \s+%s=['"]?%s['"]?
 166          (?:\s+[a-zA-Z0-9:._-]+(?:=[a-zA-Z0-9:._-]+|="[^"]+"|='[^']+'))*?
 167         \s*>
 168         (?P<content>.*?)
 169         </\1>
 170     ''' % (re.escape(attribute), re.escape(value)), html)
 171
 172     if not m:
 173         return None
 174     res = m.group('content')
 175
 176     if res.startswith('"') or res.startswith("'"):
 177         res = res[1:-1]
 178
 179     return unescapeHTML(res)
 180
 181
 182 def clean_html(html):
 183     """Clean an HTML snippet into a readable string"""
 184     # Newline vs <br />
 185     html = html.replace('\n', ' ')
 186     html = re.sub(r'\s*<\s*br\s*/?\s*>\s*', '\n', html)
 187     html = re.sub(r'<\s*/\s*p\s*>\s*<\s*p[^>]*>', '\n', html)
 188     # Strip html tags
 189     html = re.sub('<.*?>', '', html)
 190     # Replace html entities
 191     html = unescapeHTML(html)
 192     return html.strip()
 193
 194
 195 def sanitize_open(filename, open_mode):
 196     """Try to open the given filename, and slightly tweak it if this fails.
 197
 198     Attempts to open the given filename. If this fails, it tries to change
 199     the filename slightly, step by step, until it's either able to open it
 200     or it fails and raises a final exception, like the standard open()
 201     function.
 202
 203     It returns the tuple (stream, definitive_file_name).
 204     """
 205     try:
 206         if filename == u'-':
 207             if sys.platform == 'win32':
 208                 import msvcrt
 209                 msvcrt.setmode(sys.stdout.fileno(), os.O_BINARY)
 210             return (sys.stdout.buffer if hasattr(sys.stdout, 'buffer') else sys.stdout, filename)
 211         stream = open(encodeFilename(filename), open_mode)
 212         return (stream, filename)
 213     except (IOError, OSError) as err:
 214         if err.errno in (errno.EACCES,):
 215             raise
 216
 217         # In case of error, try to remove win32 forbidden chars
 218         alt_filename = os.path.join(
 219                         re.sub(u'[/<>:"\\|\\\\?\\*]', u'#', path_part)
 220                         for path_part in os.path.split(filename)
 221                        )
 222         if alt_filename == filename:
 223             raise
 224         else:
 225             # An exception here should be caught in the caller
 226             stream = open(encodeFilename(filename), open_mode)
 227             return (stream, alt_filename)
 228
 229
 230 def timeconvert(timestr):
 231     """Convert RFC 2822 defined time string into system timestamp"""
 232     timestamp = None
 233     timetuple = email.utils.parsedate_tz(timestr)
 234     if timetuple is not None:
 235         timestamp = email.utils.mktime_tz(timetuple)
 236     return timestamp
 237
 238 def sanitize_filename(s, restricted=False, is_id=False):
 239     """Sanitizes a string so it could be used as part of a filename.
 240     If restricted is set, use a stricter subset of allowed characters.
 241     Set is_id if this is not an arbitrary string, but an ID that should be kept if possible
 242     """
 243     def replace_insane(char):
 244         if char == '?' or ord(char) < 32 or ord(char) == 127:
 245             return ''
 246         elif char == '"':
 247             return '' if restricted else '\''
 248         elif char == ':':
 249             return '_-' if restricted else ' -'
 250         elif char in '\\/|*<>':
 251             return '_'
 252         if restricted and (char in '!&\'()[]{}$;`^,#' or char.isspace()):
 253             return '_'
 254         if restricted and ord(char) > 127:
 255             return '_'
 256         return char
 257
 258     result = u''.join(map(replace_insane, s))
 259     if not is_id:
 260         while '__' in result:
 261             result = result.replace('__', '_')
 262         result = result.strip('_')
 263         # Common case of "Foreign band name - English song title"
 264         if restricted and result.startswith('-_'):
 265             result = result[2:]
 266         if not result:
 267             result = '_'
 268     return result
 269
 270 def orderedSet(iterable):
 271     """ Remove all duplicates from the input iterable """
 272     res = []
 273     for el in iterable:
 274         if el not in res:
 275             res.append(el)
 276     return res
 277
 278
 279 def _htmlentity_transform(entity):
 280     """Transforms an HTML entity to a character."""
 281     # Known non-numeric HTML entity
 282     if entity in compat_html_entities.name2codepoint:
 283         return compat_chr(compat_html_entities.name2codepoint[entity])
 284
 285     mobj = re.match(r'#(x?[0-9]+)', entity)
 286     if mobj is not None:
 287         numstr = mobj.group(1)
 288         if numstr.startswith(u'x'):
 289             base = 16
 290             numstr = u'0%s' % numstr
 291         else:
 292             base = 10
 293         return compat_chr(int(numstr, base))
 294
 295     # Unknown entity in name, return its literal representation
 296     return (u'&%s;' % entity)
 297
 298
 299 def unescapeHTML(s):
 300     if s is None:
 301         return None
 302     assert type(s) == compat_str
 303
 304     return re.sub(
 305         r'&([^;]+);', lambda m: _htmlentity_transform(m.group(1)), s)
 306
 307
 308 def encodeFilename(s, for_subprocess=False):
 309     """
 310     @param s The name of the file
 311     """
 312
 313     assert type(s) == compat_str
 314
 315     # Python 3 has a Unicode API
 316     if sys.version_info >= (3, 0):
 317         return s
 318
 319     if sys.platform == 'win32' and sys.getwindowsversion()[0] >= 5:
 320         # Pass u'' directly to use Unicode APIs on Windows 2000 and up
 321         # (Detecting Windows NT 4 is tricky because 'major >= 4' would
 322         # match Windows 9x series as well. Besides, NT 4 is obsolete.)
 323         if not for_subprocess:
 324             return s
 325         else:
 326             # For subprocess calls, encode with locale encoding
 327             # Refer to http://stackoverflow.com/a/9951851/35070
 328             encoding = preferredencoding()
 329     else:
 330         encoding = sys.getfilesystemencoding()
 331     if encoding is None:
 332         encoding = 'utf-8'
 333     return s.encode(encoding, 'ignore')
 334
 335
 336 def encodeArgument(s):
 337     if not isinstance(s, compat_str):
 338         # Legacy code that uses byte strings
 339         # Uncomment the following line after fixing all post processors
 340         #assert False, 'Internal error: %r should be of type %r, is %r' % (s, compat_str, type(s))
 341         s = s.decode('ascii')
 342     return encodeFilename(s, True)
 343
 344
 345 def decodeOption(optval):
 346     if optval is None:
 347         return optval
 348     if isinstance(optval, bytes):
 349         optval = optval.decode(preferredencoding())
 350
 351     assert isinstance(optval, compat_str)
 352     return optval
 353
 354 def formatSeconds(secs):
 355     if secs > 3600:
 356         return '%d:%02d:%02d' % (secs // 3600, (secs % 3600) // 60, secs % 60)
 357     elif secs > 60:
 358         return '%d:%02d' % (secs // 60, secs % 60)
 359     else:
 360         return '%d' % secs
 361
 362
 363 def make_HTTPS_handler(opts_no_check_certificate, **kwargs):
 364     if sys.version_info < (3, 2):
 365         import httplib
 366
 367         class HTTPSConnectionV3(httplib.HTTPSConnection):
 368             def __init__(self, *args, **kwargs):
 369                 httplib.HTTPSConnection.__init__(self, *args, **kwargs)
 370
 371             def connect(self):
 372                 sock = socket.create_connection((self.host, self.port), self.timeout)
 373                 if getattr(self, '_tunnel_host', False):
 374                     self.sock = sock
 375                     self._tunnel()
 376                 try:
 377                     self.sock = ssl.wrap_socket(sock, self.key_file, self.cert_file, ssl_version=ssl.PROTOCOL_TLSv1)
 378                 except ssl.SSLError:
 379                     self.sock = ssl.wrap_socket(sock, self.key_file, self.cert_file, ssl_version=ssl.PROTOCOL_SSLv23)
 380
 381         class HTTPSHandlerV3(compat_urllib_request.HTTPSHandler):
 382             def https_open(self, req):
 383                 return self.do_open(HTTPSConnectionV3, req)
 384         return HTTPSHandlerV3(**kwargs)
 385     elif hasattr(ssl, 'create_default_context'):  # Python >= 3.4
 386         context = ssl.create_default_context(ssl.Purpose.CLIENT_AUTH)
 387         context.options &= ~ssl.OP_NO_SSLv3  # Allow older, not-as-secure SSLv3
 388         if opts_no_check_certificate:
 389             context.verify_mode = ssl.CERT_NONE
 390         return compat_urllib_request.HTTPSHandler(context=context, **kwargs)
 391     else:  # Python < 3.4
 392         context = ssl.SSLContext(ssl.PROTOCOL_SSLv23)
 393         context.verify_mode = (ssl.CERT_NONE
 394                                if opts_no_check_certificate
 395                                else ssl.CERT_REQUIRED)
 396         context.set_default_verify_paths()
 397         try:
 398             context.load_default_certs()
 399         except AttributeError:
 400             pass  # Python < 3.4
 401         return compat_urllib_request.HTTPSHandler(context=context, **kwargs)
 402
 403 class ExtractorError(Exception):
 404     """Error during info extraction."""
 405     def __init__(self, msg, tb=None, expected=False, cause=None, video_id=None):
 406         """ tb, if given, is the original traceback (so that it can be printed out).
 407         If expected is set, this is a normal error message and most likely not a bug in youtube-dl.
 408         """
 409
 410         if sys.exc_info()[0] in (compat_urllib_error.URLError, socket.timeout, UnavailableVideoError):
 411             expected = True
 412         if video_id is not None:
 413             msg = video_id + ': ' + msg
 414         if cause:
 415             msg += u' (caused by %r)' % cause
 416         if not expected:
 417             msg = msg + u'; please report this issue on https://yt-dl.org/bug . Be sure to call youtube-dl with the --verbose flag and include its complete output. Make sure you are using the latest version; type  youtube-dl -U  to update.'
 418         super(ExtractorError, self).__init__(msg)
 419
 420         self.traceback = tb
 421         self.exc_info = sys.exc_info()  # preserve original exception
 422         self.cause = cause
 423         self.video_id = video_id
 424
 425     def format_traceback(self):
 426         if self.traceback is None:
 427             return None
 428         return u''.join(traceback.format_tb(self.traceback))
 429
 430
 431 class RegexNotFoundError(ExtractorError):
 432     """Error when a regex didn't match"""
 433     pass
 434
 435
 436 class DownloadError(Exception):
 437     """Download Error exception.
 438
 439     This exception may be thrown by FileDownloader objects if they are not
 440     configured to continue on errors. They will contain the appropriate
 441     error message.
 442     """
 443     def __init__(self, msg, exc_info=None):
 444         """ exc_info, if given, is the original exception that caused the trouble (as returned by sys.exc_info()). """
 445         super(DownloadError, self).__init__(msg)
 446         self.exc_info = exc_info
 447
 448
 449 class SameFileError(Exception):
 450     """Same File exception.
 451
 452     This exception will be thrown by FileDownloader objects if they detect
 453     multiple files would have to be downloaded to the same file on disk.
 454     """
 455     pass
 456
 457
 458 class PostProcessingError(Exception):
 459     """Post Processing exception.
 460
 461     This exception may be raised by PostProcessor's .run() method to
 462     indicate an error in the postprocessing task.
 463     """
 464     def __init__(self, msg):
 465         self.msg = msg
 466
 467 class MaxDownloadsReached(Exception):
 468     """ --max-downloads limit has been reached. """
 469     pass
 470
 471
 472 class UnavailableVideoError(Exception):
 473     """Unavailable Format exception.
 474
 475     This exception will be thrown when a video is requested
 476     in a format that is not available for that video.
 477     """
 478     pass
 479
 480
 481 class ContentTooShortError(Exception):
 482     """Content Too Short exception.
 483
 484     This exception may be raised by FileDownloader objects when a file they
 485     download is too small for what the server announced first, indicating
 486     the connection was probably interrupted.
 487     """
 488     # Both in bytes
 489     downloaded = None
 490     expected = None
 491
 492     def __init__(self, downloaded, expected):
 493         self.downloaded = downloaded
 494         self.expected = expected
 495
 496 class YoutubeDLHandler(compat_urllib_request.HTTPHandler):
 497     """Handler for HTTP requests and responses.
 498
 499     This class, when installed with an OpenerDirector, automatically adds
 500     the standard headers to every HTTP request and handles gzipped and
 501     deflated responses from web servers. If compression is to be avoided in
 502     a particular request, the original request in the program code only has
 503     to include the HTTP header "Youtubedl-No-Compression", which will be
 504     removed before making the real request.
 505
 506     Part of this code was copied from:
 507
 508     http://techknack.net/python-urllib2-handlers/
 509
 510     Andrew Rowls, the author of that code, agreed to release it to the
 511     public domain.
 512     """
 513
 514     @staticmethod
 515     def deflate(data):
 516         try:
 517             return zlib.decompress(data, -zlib.MAX_WBITS)
 518         except zlib.error:
 519             return zlib.decompress(data)
 520
 521     @staticmethod
 522     def addinfourl_wrapper(stream, headers, url, code):
 523         if hasattr(compat_urllib_request.addinfourl, 'getcode'):
 524             return compat_urllib_request.addinfourl(stream, headers, url, code)
 525         ret = compat_urllib_request.addinfourl(stream, headers, url)
 526         ret.code = code
 527         return ret
 528
 529     def http_request(self, req):
 530         for h, v in std_headers.items():
 531             if h not in req.headers:
 532                 req.add_header(h, v)
 533         if 'Youtubedl-no-compression' in req.headers:
 534             if 'Accept-encoding' in req.headers:
 535                 del req.headers['Accept-encoding']
 536             del req.headers['Youtubedl-no-compression']
 537         if 'Youtubedl-user-agent' in req.headers:
 538             if 'User-agent' in req.headers:
 539                 del req.headers['User-agent']
 540             req.headers['User-agent'] = req.headers['Youtubedl-user-agent']
 541             del req.headers['Youtubedl-user-agent']
 542
 543         if sys.version_info < (2, 7) and '#' in req.get_full_url():
 544             # Python 2.6 is brain-dead when it comes to fragments
 545             req._Request__original = req._Request__original.partition('#')[0]
 546             req._Request__r_type = req._Request__r_type.partition('#')[0]
 547
 548         return req
 549
 550     def http_response(self, req, resp):
 551         old_resp = resp
 552         # gzip
 553         if resp.headers.get('Content-encoding', '') == 'gzip':
 554             content = resp.read()
 555             gz = gzip.GzipFile(fileobj=io.BytesIO(content), mode='rb')
 556             try:
 557                 uncompressed = io.BytesIO(gz.read())
 558             except IOError as original_ioerror:
 559                 # There may be junk add the end of the file
 560                 # See http://stackoverflow.com/q/4928560/35070 for details
 561                 for i in range(1, 1024):
 562                     try:
 563                         gz = gzip.GzipFile(fileobj=io.BytesIO(content[:-i]), mode='rb')
 564                         uncompressed = io.BytesIO(gz.read())
 565                     except IOError:
 566                         continue
 567                     break
 568                 else:
 569                     raise original_ioerror
 570             resp = self.addinfourl_wrapper(uncompressed, old_resp.headers, old_resp.url, old_resp.code)
 571             resp.msg = old_resp.msg
 572         # deflate
 573         if resp.headers.get('Content-encoding', '') == 'deflate':
 574             gz = io.BytesIO(self.deflate(resp.read()))
 575             resp = self.addinfourl_wrapper(gz, old_resp.headers, old_resp.url, old_resp.code)
 576             resp.msg = old_resp.msg
 577         return resp
 578
 579     https_request = http_request
 580     https_response = http_response
 581
 582
 583 def parse_iso8601(date_str, delimiter='T'):
 584     """ Return a UNIX timestamp from the given date """
 585
 586     if date_str is None:
 587         return None
 588
 589     m = re.search(
 590         r'(\.[0-9]+)?(?:Z$| ?(?P<sign>\+|-)(?P<hours>[0-9]{2}):?(?P<minutes>[0-9]{2})$)',
 591         date_str)
 592     if not m:
 593         timezone = datetime.timedelta()
 594     else:
 595         date_str = date_str[:-len(m.group(0))]
 596         if not m.group('sign'):
 597             timezone = datetime.timedelta()
 598         else:
 599             sign = 1 if m.group('sign') == '+' else -1
 600             timezone = datetime.timedelta(
 601                 hours=sign * int(m.group('hours')),
 602                 minutes=sign * int(m.group('minutes')))
 603     date_format = '%Y-%m-%d{0}%H:%M:%S'.format(delimiter)
 604     dt = datetime.datetime.strptime(date_str, date_format) - timezone
 605     return calendar.timegm(dt.timetuple())
 606
 607
 608 def unified_strdate(date_str):
 609     """Return a string with the date in the format YYYYMMDD"""
 610
 611     if date_str is None:
 612         return None
 613
 614     upload_date = None
 615     #Replace commas
 616     date_str = date_str.replace(',', ' ')
 617     # %z (UTC offset) is only supported in python>=3.2
 618     date_str = re.sub(r' ?(\+|-)[0-9]{2}:?[0-9]{2}$', '', date_str)
 619     format_expressions = [
 620         '%d %B %Y',
 621         '%d %b %Y',
 622         '%B %d %Y',
 623         '%b %d %Y',
 624         '%b %dst %Y %I:%M%p',
 625         '%b %dnd %Y %I:%M%p',
 626         '%b %dth %Y %I:%M%p',
 627         '%Y-%m-%d',
 628         '%Y/%m/%d',
 629         '%d.%m.%Y',
 630         '%d/%m/%Y',
 631         '%d/%m/%y',
 632         '%Y/%m/%d %H:%M:%S',
 633         '%d/%m/%Y %H:%M:%S',
 634         '%Y-%m-%d %H:%M:%S',
 635         '%Y-%m-%d %H:%M:%S.%f',
 636         '%d.%m.%Y %H:%M',
 637         '%d.%m.%Y %H.%M',
 638         '%Y-%m-%dT%H:%M:%SZ',
 639         '%Y-%m-%dT%H:%M:%S.%fZ',
 640         '%Y-%m-%dT%H:%M:%S.%f0Z',
 641         '%Y-%m-%dT%H:%M:%S',
 642         '%Y-%m-%dT%H:%M:%S.%f',
 643         '%Y-%m-%dT%H:%M',
 644     ]
 645     for expression in format_expressions:
 646         try:
 647             upload_date = datetime.datetime.strptime(date_str, expression).strftime('%Y%m%d')
 648         except ValueError:
 649             pass
 650     if upload_date is None:
 651         timetuple = email.utils.parsedate_tz(date_str)
 652         if timetuple:
 653             upload_date = datetime.datetime(*timetuple[:6]).strftime('%Y%m%d')
 654     return upload_date
 655
 656 def determine_ext(url, default_ext=u'unknown_video'):
 657     if url is None:
 658         return default_ext
 659     guess = url.partition(u'?')[0].rpartition(u'.')[2]
 660     if re.match(r'^[A-Za-z0-9]+$', guess):
 661         return guess
 662     else:
 663         return default_ext
 664
 665 def subtitles_filename(filename, sub_lang, sub_format):
 666     return filename.rsplit('.', 1)[0] + u'.' + sub_lang + u'.' + sub_format
 667
 668 def date_from_str(date_str):
 669     """
 670     Return a datetime object from a string in the format YYYYMMDD or
 671     (now|today)[+-][0-9](day|week|month|year)(s)?"""
 672     today = datetime.date.today()
 673     if date_str == 'now'or date_str == 'today':
 674         return today
 675     match = re.match('(now|today)(?P<sign>[+-])(?P<time>\d+)(?P<unit>day|week|month|year)(s)?', date_str)
 676     if match is not None:
 677         sign = match.group('sign')
 678         time = int(match.group('time'))
 679         if sign == '-':
 680             time = -time
 681         unit = match.group('unit')
 682         #A bad aproximation?
 683         if unit == 'month':
 684             unit = 'day'
 685             time *= 30
 686         elif unit == 'year':
 687             unit = 'day'
 688             time *= 365
 689         unit += 's'
 690         delta = datetime.timedelta(**{unit: time})
 691         return today + delta
 692     return datetime.datetime.strptime(date_str, "%Y%m%d").date()
 693
 694 def hyphenate_date(date_str):
 695     """
 696     Convert a date in 'YYYYMMDD' format to 'YYYY-MM-DD' format"""
 697     match = re.match(r'^(\d\d\d\d)(\d\d)(\d\d)$', date_str)
 698     if match is not None:
 699         return '-'.join(match.groups())
 700     else:
 701         return date_str
 702
 703 class DateRange(object):
 704     """Represents a time interval between two dates"""
 705     def __init__(self, start=None, end=None):
 706         """start and end must be strings in the format accepted by date"""
 707         if start is not None:
 708             self.start = date_from_str(start)
 709         else:
 710             self.start = datetime.datetime.min.date()
 711         if end is not None:
 712             self.end = date_from_str(end)
 713         else:
 714             self.end = datetime.datetime.max.date()
 715         if self.start > self.end:
 716             raise ValueError('Date range: "%s" , the start date must be before the end date' % self)
 717     @classmethod
 718     def day(cls, day):
 719         """Returns a range that only contains the given day"""
 720         return cls(day,day)
 721     def __contains__(self, date):
 722         """Check if the date is in the range"""
 723         if not isinstance(date, datetime.date):
 724             date = date_from_str(date)
 725         return self.start <= date <= self.end
 726     def __str__(self):
 727         return '%s - %s' % ( self.start.isoformat(), self.end.isoformat())
 728
 729
 730 def platform_name():
 731     """ Returns the platform name as a compat_str """
 732     res = platform.platform()
 733     if isinstance(res, bytes):
 734         res = res.decode(preferredencoding())
 735
 736     assert isinstance(res, compat_str)
 737     return res
 738
 739
 740 def _windows_write_string(s, out):
 741     """ Returns True if the string was written using special methods,
 742     False if it has yet to be written out."""
 743     # Adapted from http://stackoverflow.com/a/3259271/35070
 744
 745     import ctypes
 746     import ctypes.wintypes
 747
 748     WIN_OUTPUT_IDS = {
 749         1: -11,
 750         2: -12,
 751     }
 752
 753     try:
 754         fileno = out.fileno()
 755     except AttributeError:
 756         # If the output stream doesn't have a fileno, it's virtual
 757         return False
 758     if fileno not in WIN_OUTPUT_IDS:
 759         return False
 760
 761     GetStdHandle = ctypes.WINFUNCTYPE(
 762         ctypes.wintypes.HANDLE, ctypes.wintypes.DWORD)(
 763         ("GetStdHandle", ctypes.windll.kernel32))
 764     h = GetStdHandle(WIN_OUTPUT_IDS[fileno])
 765
 766     WriteConsoleW = ctypes.WINFUNCTYPE(
 767         ctypes.wintypes.BOOL, ctypes.wintypes.HANDLE, ctypes.wintypes.LPWSTR,
 768         ctypes.wintypes.DWORD, ctypes.POINTER(ctypes.wintypes.DWORD),
 769         ctypes.wintypes.LPVOID)(("WriteConsoleW", ctypes.windll.kernel32))
 770     written = ctypes.wintypes.DWORD(0)
 771
 772     GetFileType = ctypes.WINFUNCTYPE(ctypes.wintypes.DWORD, ctypes.wintypes.DWORD)(("GetFileType", ctypes.windll.kernel32))
 773     FILE_TYPE_CHAR = 0x0002
 774     FILE_TYPE_REMOTE = 0x8000
 775     GetConsoleMode = ctypes.WINFUNCTYPE(
 776         ctypes.wintypes.BOOL, ctypes.wintypes.HANDLE,
 777         ctypes.POINTER(ctypes.wintypes.DWORD))(
 778         ("GetConsoleMode", ctypes.windll.kernel32))
 779     INVALID_HANDLE_VALUE = ctypes.wintypes.DWORD(-1).value
 780
 781     def not_a_console(handle):
 782         if handle == INVALID_HANDLE_VALUE or handle is None:
 783             return True
 784         return ((GetFileType(handle) & ~FILE_TYPE_REMOTE) != FILE_TYPE_CHAR
 785                 or GetConsoleMode(handle, ctypes.byref(ctypes.wintypes.DWORD())) == 0)
 786
 787     if not_a_console(h):
 788         return False
 789
 790     def next_nonbmp_pos(s):
 791         try:
 792             return next(i for i, c in enumerate(s) if ord(c) > 0xffff)
 793         except StopIteration:
 794             return len(s)
 795
 796     while s:
 797         count = min(next_nonbmp_pos(s), 1024)
 798
 799         ret = WriteConsoleW(
 800             h, s, count if count else 2, ctypes.byref(written), None)
 801         if ret == 0:
 802             raise OSError('Failed to write string')
 803         if not count:  # We just wrote a non-BMP character
 804             assert written.value == 2
 805             s = s[1:]
 806         else:
 807             assert written.value > 0
 808             s = s[written.value:]
 809     return True
 810
 811
 812 def write_string(s, out=None, encoding=None):
 813     if out is None:
 814         out = sys.stderr
 815     assert type(s) == compat_str
 816
 817     if sys.platform == 'win32' and encoding is None and hasattr(out, 'fileno'):
 818         if _windows_write_string(s, out):
 819             return
 820
 821     if ('b' in getattr(out, 'mode', '') or
 822             sys.version_info[0] < 3):  # Python 2 lies about mode of sys.stderr
 823         byt = s.encode(encoding or preferredencoding(), 'ignore')
 824         out.write(byt)
 825     elif hasattr(out, 'buffer'):
 826         enc = encoding or getattr(out, 'encoding', None) or preferredencoding()
 827         byt = s.encode(enc, 'ignore')
 828         out.buffer.write(byt)
 829     else:
 830         out.write(s)
 831     out.flush()
 832
 833
 834 def bytes_to_intlist(bs):
 835     if not bs:
 836         return []
 837     if isinstance(bs[0], int):  # Python 3
 838         return list(bs)
 839     else:
 840         return [ord(c) for c in bs]
 841
 842
 843 def intlist_to_bytes(xs):
 844     if not xs:
 845         return b''
 846     if isinstance(chr(0), bytes):  # Python 2
 847         return ''.join([chr(x) for x in xs])
 848     else:
 849         return bytes(xs)
 850
 851
 852 # Cross-platform file locking
 853 if sys.platform == 'win32':
 854     import ctypes.wintypes
 855     import msvcrt
 856
 857     class OVERLAPPED(ctypes.Structure):
 858         _fields_ = [
 859             ('Internal', ctypes.wintypes.LPVOID),
 860             ('InternalHigh', ctypes.wintypes.LPVOID),
 861             ('Offset', ctypes.wintypes.DWORD),
 862             ('OffsetHigh', ctypes.wintypes.DWORD),
 863             ('hEvent', ctypes.wintypes.HANDLE),
 864         ]
 865
 866     kernel32 = ctypes.windll.kernel32
 867     LockFileEx = kernel32.LockFileEx
 868     LockFileEx.argtypes = [
 869         ctypes.wintypes.HANDLE,     # hFile
 870         ctypes.wintypes.DWORD,      # dwFlags
 871         ctypes.wintypes.DWORD,      # dwReserved
 872         ctypes.wintypes.DWORD,      # nNumberOfBytesToLockLow
 873         ctypes.wintypes.DWORD,      # nNumberOfBytesToLockHigh
 874         ctypes.POINTER(OVERLAPPED)  # Overlapped
 875     ]
 876     LockFileEx.restype = ctypes.wintypes.BOOL
 877     UnlockFileEx = kernel32.UnlockFileEx
 878     UnlockFileEx.argtypes = [
 879         ctypes.wintypes.HANDLE,     # hFile
 880         ctypes.wintypes.DWORD,      # dwReserved
 881         ctypes.wintypes.DWORD,      # nNumberOfBytesToLockLow
 882         ctypes.wintypes.DWORD,      # nNumberOfBytesToLockHigh
 883         ctypes.POINTER(OVERLAPPED)  # Overlapped
 884     ]
 885     UnlockFileEx.restype = ctypes.wintypes.BOOL
 886     whole_low = 0xffffffff
 887     whole_high = 0x7fffffff
 888
 889     def _lock_file(f, exclusive):
 890         overlapped = OVERLAPPED()
 891         overlapped.Offset = 0
 892         overlapped.OffsetHigh = 0
 893         overlapped.hEvent = 0
 894         f._lock_file_overlapped_p = ctypes.pointer(overlapped)
 895         handle = msvcrt.get_osfhandle(f.fileno())
 896         if not LockFileEx(handle, 0x2 if exclusive else 0x0, 0,
 897                           whole_low, whole_high, f._lock_file_overlapped_p):
 898             raise OSError('Locking file failed: %r' % ctypes.FormatError())
 899
 900     def _unlock_file(f):
 901         assert f._lock_file_overlapped_p
 902         handle = msvcrt.get_osfhandle(f.fileno())
 903         if not UnlockFileEx(handle, 0,
 904                             whole_low, whole_high, f._lock_file_overlapped_p):
 905             raise OSError('Unlocking file failed: %r' % ctypes.FormatError())
 906
 907 else:
 908     import fcntl
 909
 910     def _lock_file(f, exclusive):
 911         fcntl.flock(f, fcntl.LOCK_EX if exclusive else fcntl.LOCK_SH)
 912
 913     def _unlock_file(f):
 914         fcntl.flock(f, fcntl.LOCK_UN)
 915
 916
 917 class locked_file(object):
 918     def __init__(self, filename, mode, encoding=None):
 919         assert mode in ['r', 'a', 'w']
 920         self.f = io.open(filename, mode, encoding=encoding)
 921         self.mode = mode
 922
 923     def __enter__(self):
 924         exclusive = self.mode != 'r'
 925         try:
 926             _lock_file(self.f, exclusive)
 927         except IOError:
 928             self.f.close()
 929             raise
 930         return self
 931
 932     def __exit__(self, etype, value, traceback):
 933         try:
 934             _unlock_file(self.f)
 935         finally:
 936             self.f.close()
 937
 938     def __iter__(self):
 939         return iter(self.f)
 940
 941     def write(self, *args):
 942         return self.f.write(*args)
 943
 944     def read(self, *args):
 945         return self.f.read(*args)
 946
 947
 948 def get_filesystem_encoding():
 949     encoding = sys.getfilesystemencoding()
 950     return encoding if encoding is not None else 'utf-8'
 951
 952
 953 def shell_quote(args):
 954     quoted_args = []
 955     encoding = get_filesystem_encoding()
 956     for a in args:
 957         if isinstance(a, bytes):
 958             # We may get a filename encoded with 'encodeFilename'
 959             a = a.decode(encoding)
 960         quoted_args.append(pipes.quote(a))
 961     return u' '.join(quoted_args)
 962
 963
 964 def takewhile_inclusive(pred, seq):
 965     """ Like itertools.takewhile, but include the latest evaluated element
 966         (the first element so that Not pred(e)) """
 967     for e in seq:
 968         yield e
 969         if not pred(e):
 970             return
 971
 972
 973 def smuggle_url(url, data):
 974     """ Pass additional data in a URL for internal use. """
 975
 976     sdata = compat_urllib_parse.urlencode(
 977         {u'__youtubedl_smuggle': json.dumps(data)})
 978     return url + u'#' + sdata
 979
 980
 981 def unsmuggle_url(smug_url, default=None):
 982     if not '#__youtubedl_smuggle' in smug_url:
 983         return smug_url, default
 984     url, _, sdata = smug_url.rpartition(u'#')
 985     jsond = compat_parse_qs(sdata)[u'__youtubedl_smuggle'][0]
 986     data = json.loads(jsond)
 987     return url, data
 988
 989
 990 def format_bytes(bytes):
 991     if bytes is None:
 992         return u'N/A'
 993     if type(bytes) is str:
 994         bytes = float(bytes)
 995     if bytes == 0.0:
 996         exponent = 0
 997     else:
 998         exponent = int(math.log(bytes, 1024.0))
 999     suffix = [u'B', u'KiB', u'MiB', u'GiB', u'TiB', u'PiB', u'EiB', u'ZiB', u'YiB'][exponent]
1000     converted = float(bytes) / float(1024 ** exponent)
1001     return u'%.2f%s' % (converted, suffix)
1002
1003
1004 def get_term_width():
1005     columns = compat_getenv('COLUMNS', None)
1006     if columns:
1007         return int(columns)
1008
1009     try:
1010         sp = subprocess.Popen(
1011             ['stty', 'size'],
1012             stdout=subprocess.PIPE, stderr=subprocess.PIPE)
1013         out, err = sp.communicate()
1014         return int(out.split()[1])
1015     except:
1016         pass
1017     return None
1018
1019
1020 def month_by_name(name):
1021     """ Return the number of a month by (locale-independently) English name """
1022
1023     ENGLISH_NAMES = [
1024         u'January', u'February', u'March', u'April', u'May', u'June',
1025         u'July', u'August', u'September', u'October', u'November', u'December']
1026     try:
1027         return ENGLISH_NAMES.index(name) + 1
1028     except ValueError:
1029         return None
1030
1031
1032 def fix_xml_ampersands(xml_str):
1033     """Replace all the '&' by '&amp;' in XML"""
1034     return re.sub(
1035         r'&(?!amp;|lt;|gt;|apos;|quot;|#x[0-9a-fA-F]{,4};|#[0-9]{,4};)',
1036         u'&amp;',
1037         xml_str)
1038
1039
1040 def setproctitle(title):
1041     assert isinstance(title, compat_str)
1042     try:
1043         libc = ctypes.cdll.LoadLibrary("libc.so.6")
1044     except OSError:
1045         return
1046     title_bytes = title.encode('utf-8')
1047     buf = ctypes.create_string_buffer(len(title_bytes))
1048     buf.value = title_bytes
1049     try:
1050         libc.prctl(15, buf, 0, 0, 0)
1051     except AttributeError:
1052         return  # Strange libc, just skip this
1053
1054
1055 def remove_start(s, start):
1056     if s.startswith(start):
1057         return s[len(start):]
1058     return s
1059
1060
1061 def remove_end(s, end):
1062     if s.endswith(end):
1063         return s[:-len(end)]
1064     return s
1065
1066
1067 def url_basename(url):
1068     path = compat_urlparse.urlparse(url).path
1069     return path.strip(u'/').split(u'/')[-1]
1070
1071
1072 class HEADRequest(compat_urllib_request.Request):
1073     def get_method(self):
1074         return "HEAD"
1075
1076
1077 def int_or_none(v, scale=1, default=None, get_attr=None, invscale=1):
1078     if get_attr:
1079         if v is not None:
1080             v = getattr(v, get_attr, None)
1081     if v == '':
1082         v = None
1083     return default if v is None else (int(v) * invscale // scale)
1084
1085
1086 def str_or_none(v, default=None):
1087     return default if v is None else compat_str(v)
1088
1089
1090 def str_to_int(int_str):
1091     """ A more relaxed version of int_or_none """
1092     if int_str is None:
1093         return None
1094     int_str = re.sub(r'[,\.\+]', u'', int_str)
1095     return int(int_str)
1096
1097
1098 def float_or_none(v, scale=1, invscale=1, default=None):
1099     return default if v is None else (float(v) * invscale / scale)
1100
1101
1102 def parse_duration(s):
1103     if s is None:
1104         return None
1105
1106     s = s.strip()
1107
1108     m = re.match(
1109         r'(?i)(?:(?:(?P<hours>[0-9]+)\s*(?:[:h]|hours?)\s*)?(?P<mins>[0-9]+)\s*(?:[:m]|mins?|minutes?)\s*)?(?P<secs>[0-9]+)(?P<ms>\.[0-9]+)?\s*(?:s|secs?|seconds?)?$', s)
1110     if not m:
1111         return None
1112     res = int(m.group('secs'))
1113     if m.group('mins'):
1114         res += int(m.group('mins')) * 60
1115         if m.group('hours'):
1116             res += int(m.group('hours')) * 60 * 60
1117     if m.group('ms'):
1118         res += float(m.group('ms'))
1119     return res
1120
1121
1122 def prepend_extension(filename, ext):
1123     name, real_ext = os.path.splitext(filename)
1124     return u'{0}.{1}{2}'.format(name, ext, real_ext)
1125
1126
1127 def check_executable(exe, args=[]):
1128     """ Checks if the given binary is installed somewhere in PATH, and returns its name.
1129     args can be a list of arguments for a short output (like -version) """
1130     try:
1131         subprocess.Popen([exe] + args, stdout=subprocess.PIPE, stderr=subprocess.PIPE).communicate()
1132     except OSError:
1133         return False
1134     return exe
1135
1136
1137 def get_exe_version(exe, args=['--version'],
1138                     version_re=r'version\s+([0-9._-a-zA-Z]+)',
1139                     unrecognized=u'present'):
1140     """ Returns the version of the specified executable,
1141     or False if the executable is not present """
1142     try:
1143         out, err = subprocess.Popen(
1144             [exe] + args,
1145             stdout=subprocess.PIPE, stderr=subprocess.STDOUT).communicate()
1146     except OSError:
1147         return False
1148     firstline = out.partition(b'\n')[0].decode('ascii', 'ignore')
1149     m = re.search(version_re, firstline)
1150     if m:
1151         return m.group(1)
1152     else:
1153         return unrecognized
1154
1155
1156 class PagedList(object):
1157     def __len__(self):
1158         # This is only useful for tests
1159         return len(self.getslice())
1160
1161
1162 class OnDemandPagedList(PagedList):
1163     def __init__(self, pagefunc, pagesize):
1164         self._pagefunc = pagefunc
1165         self._pagesize = pagesize
1166
1167     def getslice(self, start=0, end=None):
1168         res = []
1169         for pagenum in itertools.count(start // self._pagesize):
1170             firstid = pagenum * self._pagesize
1171             nextfirstid = pagenum * self._pagesize + self._pagesize
1172             if start >= nextfirstid:
1173                 continue
1174
1175             page_results = list(self._pagefunc(pagenum))
1176
1177             startv = (
1178                 start % self._pagesize
1179                 if firstid <= start < nextfirstid
1180                 else 0)
1181
1182             endv = (
1183                 ((end - 1) % self._pagesize) + 1
1184                 if (end is not None and firstid <= end <= nextfirstid)
1185                 else None)
1186
1187             if startv != 0 or endv is not None:
1188                 page_results = page_results[startv:endv]
1189             res.extend(page_results)
1190
1191             # A little optimization - if current page is not "full", ie. does
1192             # not contain page_size videos then we can assume that this page
1193             # is the last one - there are no more ids on further pages -
1194             # i.e. no need to query again.
1195             if len(page_results) + startv < self._pagesize:
1196                 break
1197
1198             # If we got the whole page, but the next page is not interesting,
1199             # break out early as well
1200             if end == nextfirstid:
1201                 break
1202         return res
1203
1204
1205 class InAdvancePagedList(PagedList):
1206     def __init__(self, pagefunc, pagecount, pagesize):
1207         self._pagefunc = pagefunc
1208         self._pagecount = pagecount
1209         self._pagesize = pagesize
1210
1211     def getslice(self, start=0, end=None):
1212         res = []
1213         start_page = start // self._pagesize
1214         end_page = (
1215             self._pagecount if end is None else (end // self._pagesize + 1))
1216         skip_elems = start - start_page * self._pagesize
1217         only_more = None if end is None else end - start
1218         for pagenum in range(start_page, end_page):
1219             page = list(self._pagefunc(pagenum))
1220             if skip_elems:
1221                 page = page[skip_elems:]
1222                 skip_elems = None
1223             if only_more is not None:
1224                 if len(page) < only_more:
1225                     only_more -= len(page)
1226                 else:
1227                     page = page[:only_more]
1228                     res.extend(page)
1229                     break
1230             res.extend(page)
1231         return res
1232
1233
1234 def uppercase_escape(s):
1235     unicode_escape = codecs.getdecoder('unicode_escape')
1236     return re.sub(
1237         r'\\U[0-9a-fA-F]{8}',
1238         lambda m: unicode_escape(m.group(0))[0],
1239         s)
1240
1241
1242 def escape_rfc3986(s):
1243     """Escape non-ASCII characters as suggested by RFC 3986"""
1244     if sys.version_info < (3, 0) and isinstance(s, unicode):
1245         s = s.encode('utf-8')
1246     return compat_urllib_parse.quote(s, b"%/;:@&=+$,!~*'()?#[]")
1247
1248
1249 def escape_url(url):
1250     """Escape URL as suggested by RFC 3986"""
1251     url_parsed = compat_urllib_parse_urlparse(url)
1252     return url_parsed._replace(
1253         path=escape_rfc3986(url_parsed.path),
1254         params=escape_rfc3986(url_parsed.params),
1255         query=escape_rfc3986(url_parsed.query),
1256         fragment=escape_rfc3986(url_parsed.fragment)
1257     ).geturl()
1258
1259 try:
1260     struct.pack(u'!I', 0)
1261 except TypeError:
1262     # In Python 2.6 (and some 2.7 versions), struct requires a bytes argument
1263     def struct_pack(spec, *args):
1264         if isinstance(spec, compat_str):
1265             spec = spec.encode('ascii')
1266         return struct.pack(spec, *args)
1267
1268     def struct_unpack(spec, *args):
1269         if isinstance(spec, compat_str):
1270             spec = spec.encode('ascii')
1271         return struct.unpack(spec, *args)
1272 else:
1273     struct_pack = struct.pack
1274     struct_unpack = struct.unpack
1275
1276
1277 def read_batch_urls(batch_fd):
1278     def fixup(url):
1279         if not isinstance(url, compat_str):
1280             url = url.decode('utf-8', 'replace')
1281         BOM_UTF8 = u'\xef\xbb\xbf'
1282         if url.startswith(BOM_UTF8):
1283             url = url[len(BOM_UTF8):]
1284         url = url.strip()
1285         if url.startswith(('#', ';', ']')):
1286             return False
1287         return url
1288
1289     with contextlib.closing(batch_fd) as fd:
1290         return [url for url in map(fixup, fd) if url]
1291
1292
1293 def urlencode_postdata(*args, **kargs):
1294     return compat_urllib_parse.urlencode(*args, **kargs).encode('ascii')
1295
1296
1297 try:
1298     etree_iter = xml.etree.ElementTree.Element.iter
1299 except AttributeError:  # Python <=2.6
1300     etree_iter = lambda n: n.findall('.//*')
1301
1302
1303 def parse_xml(s):
1304     class TreeBuilder(xml.etree.ElementTree.TreeBuilder):
1305         def doctype(self, name, pubid, system):
1306             pass  # Ignore doctypes
1307
1308     parser = xml.etree.ElementTree.XMLParser(target=TreeBuilder())
1309     kwargs = {'parser': parser} if sys.version_info >= (2, 7) else {}
1310     tree = xml.etree.ElementTree.XML(s.encode('utf-8'), **kwargs)
1311     # Fix up XML parser in Python 2.x
1312     if sys.version_info < (3, 0):
1313         for n in etree_iter(tree):
1314             if n.text is not None:
1315                 if not isinstance(n.text, compat_str):
1316                     n.text = n.text.decode('utf-8')
1317     return tree
1318
1319
1320 US_RATINGS = {
1321     'G': 0,
1322     'PG': 10,
1323     'PG-13': 13,
1324     'R': 16,
1325     'NC': 18,
1326 }
1327
1328
1329 def parse_age_limit(s):
1330     if s is None:
1331         return None
1332     m = re.match(r'^(?P<age>\d{1,2})\+?$', s)
1333     return int(m.group('age')) if m else US_RATINGS.get(s, None)
1334
1335
1336 def strip_jsonp(code):
1337     return re.sub(r'(?s)^[a-zA-Z0-9_]+\s*\(\s*(.*)\);?\s*?\s*$', r'\1', code)
1338
1339
1340 def js_to_json(code):
1341     def fix_kv(m):
1342         v = m.group(0)
1343         if v in ('true', 'false', 'null'):
1344             return v
1345         if v.startswith('"'):
1346             return v
1347         if v.startswith("'"):
1348             v = v[1:-1]
1349             v = re.sub(r"\\\\|\\'|\"", lambda m: {
1350                 '\\\\': '\\\\',
1351                 "\\'": "'",
1352                 '"': '\\"',
1353             }[m.group(0)], v)
1354         return '"%s"' % v
1355
1356     res = re.sub(r'''(?x)
1357         "(?:[^"\\]*(?:\\\\|\\")?)*"|
1358         '(?:[^'\\]*(?:\\\\|\\')?)*'|
1359         [a-zA-Z_][a-zA-Z_0-9]*
1360         ''', fix_kv, code)
1361     res = re.sub(r',(\s*\])', lambda m: m.group(1), res)
1362     return res
1363
1364
1365 def qualities(quality_ids):
1366     """ Get a numeric quality value out of a list of possible values """
1367     def q(qid):
1368         try:
1369             return quality_ids.index(qid)
1370         except ValueError:
1371             return -1
1372     return q
1373
1374
1375 DEFAULT_OUTTMPL = '%(title)s-%(id)s.%(ext)s'
1376
1377
1378 def limit_length(s, length):
1379     """ Add ellipses to overly long strings """
1380     if s is None:
1381         return None
1382     ELLIPSES = '...'
1383     if len(s) > length:
1384         return s[:length - len(ELLIPSES)] + ELLIPSES
1385     return s
1386
1387
1388 def version_tuple(v):
1389     return [int(e) for e in v.split('.')]
1390
1391
1392 def is_outdated_version(version, limit, assume_new=True):
1393     if not version:
1394         return not assume_new
1395     try:
1396         return version_tuple(version) < version_tuple(limit)
1397     except ValueError:
1398         return not assume_new