_ Git - youtube-dl/blob - youtube_dl/utils.py

   1 #!/usr/bin/env python
   2 # -*- coding: utf-8 -*-
   3
   4 from __future__ import unicode_literals
   5
   6 import calendar
   7 import codecs
   8 import contextlib
   9 import ctypes
  10 import datetime
  11 import email.utils
  12 import errno
  13 import gzip
  14 import itertools
  15 import io
  16 import json
  17 import locale
  18 import math
  19 import os
  20 import pipes
  21 import platform
  22 import re
  23 import ssl
  24 import socket
  25 import struct
  26 import subprocess
  27 import sys
  28 import tempfile
  29 import traceback
  30 import xml.etree.ElementTree
  31 import zlib
  32
  33 from .compat import (
  34     compat_chr,
  35     compat_getenv,
  36     compat_html_entities,
  37     compat_html_parser,
  38     compat_parse_qs,
  39     compat_str,
  40     compat_urllib_error,
  41     compat_urllib_parse,
  42     compat_urllib_parse_urlparse,
  43     compat_urllib_request,
  44     compat_urlparse,
  45 )
  46
  47
  48 # This is not clearly defined otherwise
  49 compiled_regex_type = type(re.compile(''))
  50
  51 std_headers = {
  52     'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64; rv:10.0) Gecko/20100101 Firefox/10.0 (Chrome)',
  53     'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.7',
  54     'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
  55     'Accept-Encoding': 'gzip, deflate',
  56     'Accept-Language': 'en-us,en;q=0.5',
  57 }
  58
  59 def preferredencoding():
  60     """Get preferred encoding.
  61
  62     Returns the best encoding scheme for the system, based on
  63     locale.getpreferredencoding() and some further tweaks.
  64     """
  65     try:
  66         pref = locale.getpreferredencoding()
  67         u'TEST'.encode(pref)
  68     except:
  69         pref = 'UTF-8'
  70
  71     return pref
  72
  73
  74 def write_json_file(obj, fn):
  75     """ Encode obj as JSON and write it to fn, atomically """
  76
  77     args = {
  78         'suffix': '.tmp',
  79         'prefix': os.path.basename(fn) + '.',
  80         'dir': os.path.dirname(fn),
  81         'delete': False,
  82     }
  83
  84     # In Python 2.x, json.dump expects a bytestream.
  85     # In Python 3.x, it writes to a character stream
  86     if sys.version_info < (3, 0):
  87         args['mode'] = 'wb'
  88     else:
  89         args.update({
  90             'mode': 'w',
  91             'encoding': 'utf-8',
  92         })
  93
  94     tf = tempfile.NamedTemporaryFile(**args)
  95
  96     try:
  97         with tf:
  98             json.dump(obj, tf)
  99         os.rename(tf.name, fn)
 100     except:
 101         try:
 102             os.remove(tf.name)
 103         except OSError:
 104             pass
 105         raise
 106
 107
 108 if sys.version_info >= (2, 7):
 109     def find_xpath_attr(node, xpath, key, val):
 110         """ Find the xpath xpath[@key=val] """
 111         assert re.match(r'^[a-zA-Z-]+$', key)
 112         assert re.match(r'^[a-zA-Z0-9@\s:._-]*$', val)
 113         expr = xpath + u"[@%s='%s']" % (key, val)
 114         return node.find(expr)
 115 else:
 116     def find_xpath_attr(node, xpath, key, val):
 117         # Here comes the crazy part: In 2.6, if the xpath is a unicode,
 118         # .//node does not match if a node is a direct child of . !
 119         if isinstance(xpath, unicode):
 120             xpath = xpath.encode('ascii')
 121
 122         for f in node.findall(xpath):
 123             if f.attrib.get(key) == val:
 124                 return f
 125         return None
 126
 127 # On python2.6 the xml.etree.ElementTree.Element methods don't support
 128 # the namespace parameter
 129 def xpath_with_ns(path, ns_map):
 130     components = [c.split(':') for c in path.split('/')]
 131     replaced = []
 132     for c in components:
 133         if len(c) == 1:
 134             replaced.append(c[0])
 135         else:
 136             ns, tag = c
 137             replaced.append('{%s}%s' % (ns_map[ns], tag))
 138     return '/'.join(replaced)
 139
 140
 141 def xpath_text(node, xpath, name=None, fatal=False):
 142     if sys.version_info < (2, 7):  # Crazy 2.6
 143         xpath = xpath.encode('ascii')
 144
 145     n = node.find(xpath)
 146     if n is None:
 147         if fatal:
 148             name = xpath if name is None else name
 149             raise ExtractorError('Could not find XML element %s' % name)
 150         else:
 151             return None
 152     return n.text
 153
 154
 155 def get_element_by_id(id, html):
 156     """Return the content of the tag with the specified ID in the passed HTML document"""
 157     return get_element_by_attribute("id", id, html)
 158
 159
 160 def get_element_by_attribute(attribute, value, html):
 161     """Return the content of the tag with the specified attribute in the passed HTML document"""
 162
 163     m = re.search(r'''(?xs)
 164         <([a-zA-Z0-9:._-]+)
 165          (?:\s+[a-zA-Z0-9:._-]+(?:=[a-zA-Z0-9:._-]+|="[^"]+"|='[^']+'))*?
 166          \s+%s=['"]?%s['"]?
 167          (?:\s+[a-zA-Z0-9:._-]+(?:=[a-zA-Z0-9:._-]+|="[^"]+"|='[^']+'))*?
 168         \s*>
 169         (?P<content>.*?)
 170         </\1>
 171     ''' % (re.escape(attribute), re.escape(value)), html)
 172
 173     if not m:
 174         return None
 175     res = m.group('content')
 176
 177     if res.startswith('"') or res.startswith("'"):
 178         res = res[1:-1]
 179
 180     return unescapeHTML(res)
 181
 182
 183 def clean_html(html):
 184     """Clean an HTML snippet into a readable string"""
 185     # Newline vs <br />
 186     html = html.replace('\n', ' ')
 187     html = re.sub(r'\s*<\s*br\s*/?\s*>\s*', '\n', html)
 188     html = re.sub(r'<\s*/\s*p\s*>\s*<\s*p[^>]*>', '\n', html)
 189     # Strip html tags
 190     html = re.sub('<.*?>', '', html)
 191     # Replace html entities
 192     html = unescapeHTML(html)
 193     return html.strip()
 194
 195
 196 def sanitize_open(filename, open_mode):
 197     """Try to open the given filename, and slightly tweak it if this fails.
 198
 199     Attempts to open the given filename. If this fails, it tries to change
 200     the filename slightly, step by step, until it's either able to open it
 201     or it fails and raises a final exception, like the standard open()
 202     function.
 203
 204     It returns the tuple (stream, definitive_file_name).
 205     """
 206     try:
 207         if filename == u'-':
 208             if sys.platform == 'win32':
 209                 import msvcrt
 210                 msvcrt.setmode(sys.stdout.fileno(), os.O_BINARY)
 211             return (sys.stdout.buffer if hasattr(sys.stdout, 'buffer') else sys.stdout, filename)
 212         stream = open(encodeFilename(filename), open_mode)
 213         return (stream, filename)
 214     except (IOError, OSError) as err:
 215         if err.errno in (errno.EACCES,):
 216             raise
 217
 218         # In case of error, try to remove win32 forbidden chars
 219         alt_filename = os.path.join(
 220                         re.sub(u'[/<>:"\\|\\\\?\\*]', u'#', path_part)
 221                         for path_part in os.path.split(filename)
 222                        )
 223         if alt_filename == filename:
 224             raise
 225         else:
 226             # An exception here should be caught in the caller
 227             stream = open(encodeFilename(filename), open_mode)
 228             return (stream, alt_filename)
 229
 230
 231 def timeconvert(timestr):
 232     """Convert RFC 2822 defined time string into system timestamp"""
 233     timestamp = None
 234     timetuple = email.utils.parsedate_tz(timestr)
 235     if timetuple is not None:
 236         timestamp = email.utils.mktime_tz(timetuple)
 237     return timestamp
 238
 239 def sanitize_filename(s, restricted=False, is_id=False):
 240     """Sanitizes a string so it could be used as part of a filename.
 241     If restricted is set, use a stricter subset of allowed characters.
 242     Set is_id if this is not an arbitrary string, but an ID that should be kept if possible
 243     """
 244     def replace_insane(char):
 245         if char == '?' or ord(char) < 32 or ord(char) == 127:
 246             return ''
 247         elif char == '"':
 248             return '' if restricted else '\''
 249         elif char == ':':
 250             return '_-' if restricted else ' -'
 251         elif char in '\\/|*<>':
 252             return '_'
 253         if restricted and (char in '!&\'()[]{}$;`^,#' or char.isspace()):
 254             return '_'
 255         if restricted and ord(char) > 127:
 256             return '_'
 257         return char
 258
 259     result = u''.join(map(replace_insane, s))
 260     if not is_id:
 261         while '__' in result:
 262             result = result.replace('__', '_')
 263         result = result.strip('_')
 264         # Common case of "Foreign band name - English song title"
 265         if restricted and result.startswith('-_'):
 266             result = result[2:]
 267         if not result:
 268             result = '_'
 269     return result
 270
 271 def orderedSet(iterable):
 272     """ Remove all duplicates from the input iterable """
 273     res = []
 274     for el in iterable:
 275         if el not in res:
 276             res.append(el)
 277     return res
 278
 279
 280 def _htmlentity_transform(entity):
 281     """Transforms an HTML entity to a character."""
 282     # Known non-numeric HTML entity
 283     if entity in compat_html_entities.name2codepoint:
 284         return compat_chr(compat_html_entities.name2codepoint[entity])
 285
 286     mobj = re.match(r'#(x?[0-9]+)', entity)
 287     if mobj is not None:
 288         numstr = mobj.group(1)
 289         if numstr.startswith(u'x'):
 290             base = 16
 291             numstr = u'0%s' % numstr
 292         else:
 293             base = 10
 294         return compat_chr(int(numstr, base))
 295
 296     # Unknown entity in name, return its literal representation
 297     return (u'&%s;' % entity)
 298
 299
 300 def unescapeHTML(s):
 301     if s is None:
 302         return None
 303     assert type(s) == compat_str
 304
 305     return re.sub(
 306         r'&([^;]+);', lambda m: _htmlentity_transform(m.group(1)), s)
 307
 308
 309 def encodeFilename(s, for_subprocess=False):
 310     """
 311     @param s The name of the file
 312     """
 313
 314     assert type(s) == compat_str
 315
 316     # Python 3 has a Unicode API
 317     if sys.version_info >= (3, 0):
 318         return s
 319
 320     if sys.platform == 'win32' and sys.getwindowsversion()[0] >= 5:
 321         # Pass u'' directly to use Unicode APIs on Windows 2000 and up
 322         # (Detecting Windows NT 4 is tricky because 'major >= 4' would
 323         # match Windows 9x series as well. Besides, NT 4 is obsolete.)
 324         if not for_subprocess:
 325             return s
 326         else:
 327             # For subprocess calls, encode with locale encoding
 328             # Refer to http://stackoverflow.com/a/9951851/35070
 329             encoding = preferredencoding()
 330     else:
 331         encoding = sys.getfilesystemencoding()
 332     if encoding is None:
 333         encoding = 'utf-8'
 334     return s.encode(encoding, 'ignore')
 335
 336
 337 def encodeArgument(s):
 338     if not isinstance(s, compat_str):
 339         # Legacy code that uses byte strings
 340         # Uncomment the following line after fixing all post processors
 341         #assert False, 'Internal error: %r should be of type %r, is %r' % (s, compat_str, type(s))
 342         s = s.decode('ascii')
 343     return encodeFilename(s, True)
 344
 345
 346 def decodeOption(optval):
 347     if optval is None:
 348         return optval
 349     if isinstance(optval, bytes):
 350         optval = optval.decode(preferredencoding())
 351
 352     assert isinstance(optval, compat_str)
 353     return optval
 354
 355 def formatSeconds(secs):
 356     if secs > 3600:
 357         return '%d:%02d:%02d' % (secs // 3600, (secs % 3600) // 60, secs % 60)
 358     elif secs > 60:
 359         return '%d:%02d' % (secs // 60, secs % 60)
 360     else:
 361         return '%d' % secs
 362
 363
 364 def make_HTTPS_handler(opts_no_check_certificate, **kwargs):
 365     if sys.version_info < (3, 2):
 366         import httplib
 367
 368         class HTTPSConnectionV3(httplib.HTTPSConnection):
 369             def __init__(self, *args, **kwargs):
 370                 httplib.HTTPSConnection.__init__(self, *args, **kwargs)
 371
 372             def connect(self):
 373                 sock = socket.create_connection((self.host, self.port), self.timeout)
 374                 if getattr(self, '_tunnel_host', False):
 375                     self.sock = sock
 376                     self._tunnel()
 377                 try:
 378                     self.sock = ssl.wrap_socket(sock, self.key_file, self.cert_file, ssl_version=ssl.PROTOCOL_TLSv1)
 379                 except ssl.SSLError:
 380                     self.sock = ssl.wrap_socket(sock, self.key_file, self.cert_file, ssl_version=ssl.PROTOCOL_SSLv23)
 381
 382         class HTTPSHandlerV3(compat_urllib_request.HTTPSHandler):
 383             def https_open(self, req):
 384                 return self.do_open(HTTPSConnectionV3, req)
 385         return HTTPSHandlerV3(**kwargs)
 386     elif hasattr(ssl, 'create_default_context'):  # Python >= 3.4
 387         context = ssl.create_default_context(ssl.Purpose.CLIENT_AUTH)
 388         context.options &= ~ssl.OP_NO_SSLv3  # Allow older, not-as-secure SSLv3
 389         if opts_no_check_certificate:
 390             context.verify_mode = ssl.CERT_NONE
 391         return compat_urllib_request.HTTPSHandler(context=context, **kwargs)
 392     else:  # Python < 3.4
 393         context = ssl.SSLContext(ssl.PROTOCOL_SSLv23)
 394         context.verify_mode = (ssl.CERT_NONE
 395                                if opts_no_check_certificate
 396                                else ssl.CERT_REQUIRED)
 397         context.set_default_verify_paths()
 398         try:
 399             context.load_default_certs()
 400         except AttributeError:
 401             pass  # Python < 3.4
 402         return compat_urllib_request.HTTPSHandler(context=context, **kwargs)
 403
 404 class ExtractorError(Exception):
 405     """Error during info extraction."""
 406     def __init__(self, msg, tb=None, expected=False, cause=None, video_id=None):
 407         """ tb, if given, is the original traceback (so that it can be printed out).
 408         If expected is set, this is a normal error message and most likely not a bug in youtube-dl.
 409         """
 410
 411         if sys.exc_info()[0] in (compat_urllib_error.URLError, socket.timeout, UnavailableVideoError):
 412             expected = True
 413         if video_id is not None:
 414             msg = video_id + ': ' + msg
 415         if cause:
 416             msg += u' (caused by %r)' % cause
 417         if not expected:
 418             msg = msg + u'; please report this issue on https://yt-dl.org/bug . Be sure to call youtube-dl with the --verbose flag and include its complete output. Make sure you are using the latest version; type  youtube-dl -U  to update.'
 419         super(ExtractorError, self).__init__(msg)
 420
 421         self.traceback = tb
 422         self.exc_info = sys.exc_info()  # preserve original exception
 423         self.cause = cause
 424         self.video_id = video_id
 425
 426     def format_traceback(self):
 427         if self.traceback is None:
 428             return None
 429         return u''.join(traceback.format_tb(self.traceback))
 430
 431
 432 class RegexNotFoundError(ExtractorError):
 433     """Error when a regex didn't match"""
 434     pass
 435
 436
 437 class DownloadError(Exception):
 438     """Download Error exception.
 439
 440     This exception may be thrown by FileDownloader objects if they are not
 441     configured to continue on errors. They will contain the appropriate
 442     error message.
 443     """
 444     def __init__(self, msg, exc_info=None):
 445         """ exc_info, if given, is the original exception that caused the trouble (as returned by sys.exc_info()). """
 446         super(DownloadError, self).__init__(msg)
 447         self.exc_info = exc_info
 448
 449
 450 class SameFileError(Exception):
 451     """Same File exception.
 452
 453     This exception will be thrown by FileDownloader objects if they detect
 454     multiple files would have to be downloaded to the same file on disk.
 455     """
 456     pass
 457
 458
 459 class PostProcessingError(Exception):
 460     """Post Processing exception.
 461
 462     This exception may be raised by PostProcessor's .run() method to
 463     indicate an error in the postprocessing task.
 464     """
 465     def __init__(self, msg):
 466         self.msg = msg
 467
 468 class MaxDownloadsReached(Exception):
 469     """ --max-downloads limit has been reached. """
 470     pass
 471
 472
 473 class UnavailableVideoError(Exception):
 474     """Unavailable Format exception.
 475
 476     This exception will be thrown when a video is requested
 477     in a format that is not available for that video.
 478     """
 479     pass
 480
 481
 482 class ContentTooShortError(Exception):
 483     """Content Too Short exception.
 484
 485     This exception may be raised by FileDownloader objects when a file they
 486     download is too small for what the server announced first, indicating
 487     the connection was probably interrupted.
 488     """
 489     # Both in bytes
 490     downloaded = None
 491     expected = None
 492
 493     def __init__(self, downloaded, expected):
 494         self.downloaded = downloaded
 495         self.expected = expected
 496
 497 class YoutubeDLHandler(compat_urllib_request.HTTPHandler):
 498     """Handler for HTTP requests and responses.
 499
 500     This class, when installed with an OpenerDirector, automatically adds
 501     the standard headers to every HTTP request and handles gzipped and
 502     deflated responses from web servers. If compression is to be avoided in
 503     a particular request, the original request in the program code only has
 504     to include the HTTP header "Youtubedl-No-Compression", which will be
 505     removed before making the real request.
 506
 507     Part of this code was copied from:
 508
 509     http://techknack.net/python-urllib2-handlers/
 510
 511     Andrew Rowls, the author of that code, agreed to release it to the
 512     public domain.
 513     """
 514
 515     @staticmethod
 516     def deflate(data):
 517         try:
 518             return zlib.decompress(data, -zlib.MAX_WBITS)
 519         except zlib.error:
 520             return zlib.decompress(data)
 521
 522     @staticmethod
 523     def addinfourl_wrapper(stream, headers, url, code):
 524         if hasattr(compat_urllib_request.addinfourl, 'getcode'):
 525             return compat_urllib_request.addinfourl(stream, headers, url, code)
 526         ret = compat_urllib_request.addinfourl(stream, headers, url)
 527         ret.code = code
 528         return ret
 529
 530     def http_request(self, req):
 531         for h, v in std_headers.items():
 532             if h not in req.headers:
 533                 req.add_header(h, v)
 534         if 'Youtubedl-no-compression' in req.headers:
 535             if 'Accept-encoding' in req.headers:
 536                 del req.headers['Accept-encoding']
 537             del req.headers['Youtubedl-no-compression']
 538         if 'Youtubedl-user-agent' in req.headers:
 539             if 'User-agent' in req.headers:
 540                 del req.headers['User-agent']
 541             req.headers['User-agent'] = req.headers['Youtubedl-user-agent']
 542             del req.headers['Youtubedl-user-agent']
 543
 544         if sys.version_info < (2, 7) and '#' in req.get_full_url():
 545             # Python 2.6 is brain-dead when it comes to fragments
 546             req._Request__original = req._Request__original.partition('#')[0]
 547             req._Request__r_type = req._Request__r_type.partition('#')[0]
 548
 549         return req
 550
 551     def http_response(self, req, resp):
 552         old_resp = resp
 553         # gzip
 554         if resp.headers.get('Content-encoding', '') == 'gzip':
 555             content = resp.read()
 556             gz = gzip.GzipFile(fileobj=io.BytesIO(content), mode='rb')
 557             try:
 558                 uncompressed = io.BytesIO(gz.read())
 559             except IOError as original_ioerror:
 560                 # There may be junk add the end of the file
 561                 # See http://stackoverflow.com/q/4928560/35070 for details
 562                 for i in range(1, 1024):
 563                     try:
 564                         gz = gzip.GzipFile(fileobj=io.BytesIO(content[:-i]), mode='rb')
 565                         uncompressed = io.BytesIO(gz.read())
 566                     except IOError:
 567                         continue
 568                     break
 569                 else:
 570                     raise original_ioerror
 571             resp = self.addinfourl_wrapper(uncompressed, old_resp.headers, old_resp.url, old_resp.code)
 572             resp.msg = old_resp.msg
 573         # deflate
 574         if resp.headers.get('Content-encoding', '') == 'deflate':
 575             gz = io.BytesIO(self.deflate(resp.read()))
 576             resp = self.addinfourl_wrapper(gz, old_resp.headers, old_resp.url, old_resp.code)
 577             resp.msg = old_resp.msg
 578         return resp
 579
 580     https_request = http_request
 581     https_response = http_response
 582
 583
 584 def parse_iso8601(date_str, delimiter='T'):
 585     """ Return a UNIX timestamp from the given date """
 586
 587     if date_str is None:
 588         return None
 589
 590     m = re.search(
 591         r'(\.[0-9]+)?(?:Z$| ?(?P<sign>\+|-)(?P<hours>[0-9]{2}):?(?P<minutes>[0-9]{2})$)',
 592         date_str)
 593     if not m:
 594         timezone = datetime.timedelta()
 595     else:
 596         date_str = date_str[:-len(m.group(0))]
 597         if not m.group('sign'):
 598             timezone = datetime.timedelta()
 599         else:
 600             sign = 1 if m.group('sign') == '+' else -1
 601             timezone = datetime.timedelta(
 602                 hours=sign * int(m.group('hours')),
 603                 minutes=sign * int(m.group('minutes')))
 604     date_format = '%Y-%m-%d{0}%H:%M:%S'.format(delimiter)
 605     dt = datetime.datetime.strptime(date_str, date_format) - timezone
 606     return calendar.timegm(dt.timetuple())
 607
 608
 609 def unified_strdate(date_str):
 610     """Return a string with the date in the format YYYYMMDD"""
 611
 612     if date_str is None:
 613         return None
 614
 615     upload_date = None
 616     #Replace commas
 617     date_str = date_str.replace(',', ' ')
 618     # %z (UTC offset) is only supported in python>=3.2
 619     date_str = re.sub(r' ?(\+|-)[0-9]{2}:?[0-9]{2}$', '', date_str)
 620     format_expressions = [
 621         '%d %B %Y',
 622         '%d %b %Y',
 623         '%B %d %Y',
 624         '%b %d %Y',
 625         '%b %dst %Y %I:%M%p',
 626         '%b %dnd %Y %I:%M%p',
 627         '%b %dth %Y %I:%M%p',
 628         '%Y-%m-%d',
 629         '%Y/%m/%d',
 630         '%d.%m.%Y',
 631         '%d/%m/%Y',
 632         '%d/%m/%y',
 633         '%Y/%m/%d %H:%M:%S',
 634         '%d/%m/%Y %H:%M:%S',
 635         '%Y-%m-%d %H:%M:%S',
 636         '%Y-%m-%d %H:%M:%S.%f',
 637         '%d.%m.%Y %H:%M',
 638         '%d.%m.%Y %H.%M',
 639         '%Y-%m-%dT%H:%M:%SZ',
 640         '%Y-%m-%dT%H:%M:%S.%fZ',
 641         '%Y-%m-%dT%H:%M:%S.%f0Z',
 642         '%Y-%m-%dT%H:%M:%S',
 643         '%Y-%m-%dT%H:%M:%S.%f',
 644         '%Y-%m-%dT%H:%M',
 645     ]
 646     for expression in format_expressions:
 647         try:
 648             upload_date = datetime.datetime.strptime(date_str, expression).strftime('%Y%m%d')
 649         except ValueError:
 650             pass
 651     if upload_date is None:
 652         timetuple = email.utils.parsedate_tz(date_str)
 653         if timetuple:
 654             upload_date = datetime.datetime(*timetuple[:6]).strftime('%Y%m%d')
 655     return upload_date
 656
 657 def determine_ext(url, default_ext=u'unknown_video'):
 658     if url is None:
 659         return default_ext
 660     guess = url.partition(u'?')[0].rpartition(u'.')[2]
 661     if re.match(r'^[A-Za-z0-9]+$', guess):
 662         return guess
 663     else:
 664         return default_ext
 665
 666 def subtitles_filename(filename, sub_lang, sub_format):
 667     return filename.rsplit('.', 1)[0] + u'.' + sub_lang + u'.' + sub_format
 668
 669 def date_from_str(date_str):
 670     """
 671     Return a datetime object from a string in the format YYYYMMDD or
 672     (now|today)[+-][0-9](day|week|month|year)(s)?"""
 673     today = datetime.date.today()
 674     if date_str == 'now'or date_str == 'today':
 675         return today
 676     match = re.match('(now|today)(?P<sign>[+-])(?P<time>\d+)(?P<unit>day|week|month|year)(s)?', date_str)
 677     if match is not None:
 678         sign = match.group('sign')
 679         time = int(match.group('time'))
 680         if sign == '-':
 681             time = -time
 682         unit = match.group('unit')
 683         #A bad aproximation?
 684         if unit == 'month':
 685             unit = 'day'
 686             time *= 30
 687         elif unit == 'year':
 688             unit = 'day'
 689             time *= 365
 690         unit += 's'
 691         delta = datetime.timedelta(**{unit: time})
 692         return today + delta
 693     return datetime.datetime.strptime(date_str, "%Y%m%d").date()
 694
 695 def hyphenate_date(date_str):
 696     """
 697     Convert a date in 'YYYYMMDD' format to 'YYYY-MM-DD' format"""
 698     match = re.match(r'^(\d\d\d\d)(\d\d)(\d\d)$', date_str)
 699     if match is not None:
 700         return '-'.join(match.groups())
 701     else:
 702         return date_str
 703
 704 class DateRange(object):
 705     """Represents a time interval between two dates"""
 706     def __init__(self, start=None, end=None):
 707         """start and end must be strings in the format accepted by date"""
 708         if start is not None:
 709             self.start = date_from_str(start)
 710         else:
 711             self.start = datetime.datetime.min.date()
 712         if end is not None:
 713             self.end = date_from_str(end)
 714         else:
 715             self.end = datetime.datetime.max.date()
 716         if self.start > self.end:
 717             raise ValueError('Date range: "%s" , the start date must be before the end date' % self)
 718     @classmethod
 719     def day(cls, day):
 720         """Returns a range that only contains the given day"""
 721         return cls(day,day)
 722     def __contains__(self, date):
 723         """Check if the date is in the range"""
 724         if not isinstance(date, datetime.date):
 725             date = date_from_str(date)
 726         return self.start <= date <= self.end
 727     def __str__(self):
 728         return '%s - %s' % ( self.start.isoformat(), self.end.isoformat())
 729
 730
 731 def platform_name():
 732     """ Returns the platform name as a compat_str """
 733     res = platform.platform()
 734     if isinstance(res, bytes):
 735         res = res.decode(preferredencoding())
 736
 737     assert isinstance(res, compat_str)
 738     return res
 739
 740
 741 def _windows_write_string(s, out):
 742     """ Returns True if the string was written using special methods,
 743     False if it has yet to be written out."""
 744     # Adapted from http://stackoverflow.com/a/3259271/35070
 745
 746     import ctypes
 747     import ctypes.wintypes
 748
 749     WIN_OUTPUT_IDS = {
 750         1: -11,
 751         2: -12,
 752     }
 753
 754     try:
 755         fileno = out.fileno()
 756     except AttributeError:
 757         # If the output stream doesn't have a fileno, it's virtual
 758         return False
 759     if fileno not in WIN_OUTPUT_IDS:
 760         return False
 761
 762     GetStdHandle = ctypes.WINFUNCTYPE(
 763         ctypes.wintypes.HANDLE, ctypes.wintypes.DWORD)(
 764         ("GetStdHandle", ctypes.windll.kernel32))
 765     h = GetStdHandle(WIN_OUTPUT_IDS[fileno])
 766
 767     WriteConsoleW = ctypes.WINFUNCTYPE(
 768         ctypes.wintypes.BOOL, ctypes.wintypes.HANDLE, ctypes.wintypes.LPWSTR,
 769         ctypes.wintypes.DWORD, ctypes.POINTER(ctypes.wintypes.DWORD),
 770         ctypes.wintypes.LPVOID)(("WriteConsoleW", ctypes.windll.kernel32))
 771     written = ctypes.wintypes.DWORD(0)
 772
 773     GetFileType = ctypes.WINFUNCTYPE(ctypes.wintypes.DWORD, ctypes.wintypes.DWORD)(("GetFileType", ctypes.windll.kernel32))
 774     FILE_TYPE_CHAR = 0x0002
 775     FILE_TYPE_REMOTE = 0x8000
 776     GetConsoleMode = ctypes.WINFUNCTYPE(
 777         ctypes.wintypes.BOOL, ctypes.wintypes.HANDLE,
 778         ctypes.POINTER(ctypes.wintypes.DWORD))(
 779         ("GetConsoleMode", ctypes.windll.kernel32))
 780     INVALID_HANDLE_VALUE = ctypes.wintypes.DWORD(-1).value
 781
 782     def not_a_console(handle):
 783         if handle == INVALID_HANDLE_VALUE or handle is None:
 784             return True
 785         return ((GetFileType(handle) & ~FILE_TYPE_REMOTE) != FILE_TYPE_CHAR
 786                 or GetConsoleMode(handle, ctypes.byref(ctypes.wintypes.DWORD())) == 0)
 787
 788     if not_a_console(h):
 789         return False
 790
 791     def next_nonbmp_pos(s):
 792         try:
 793             return next(i for i, c in enumerate(s) if ord(c) > 0xffff)
 794         except StopIteration:
 795             return len(s)
 796
 797     while s:
 798         count = min(next_nonbmp_pos(s), 1024)
 799
 800         ret = WriteConsoleW(
 801             h, s, count if count else 2, ctypes.byref(written), None)
 802         if ret == 0:
 803             raise OSError('Failed to write string')
 804         if not count:  # We just wrote a non-BMP character
 805             assert written.value == 2
 806             s = s[1:]
 807         else:
 808             assert written.value > 0
 809             s = s[written.value:]
 810     return True
 811
 812
 813 def write_string(s, out=None, encoding=None):
 814     if out is None:
 815         out = sys.stderr
 816     assert type(s) == compat_str
 817
 818     if sys.platform == 'win32' and encoding is None and hasattr(out, 'fileno'):
 819         if _windows_write_string(s, out):
 820             return
 821
 822     if ('b' in getattr(out, 'mode', '') or
 823             sys.version_info[0] < 3):  # Python 2 lies about mode of sys.stderr
 824         byt = s.encode(encoding or preferredencoding(), 'ignore')
 825         out.write(byt)
 826     elif hasattr(out, 'buffer'):
 827         enc = encoding or getattr(out, 'encoding', None) or preferredencoding()
 828         byt = s.encode(enc, 'ignore')
 829         out.buffer.write(byt)
 830     else:
 831         out.write(s)
 832     out.flush()
 833
 834
 835 def bytes_to_intlist(bs):
 836     if not bs:
 837         return []
 838     if isinstance(bs[0], int):  # Python 3
 839         return list(bs)
 840     else:
 841         return [ord(c) for c in bs]
 842
 843
 844 def intlist_to_bytes(xs):
 845     if not xs:
 846         return b''
 847     if isinstance(chr(0), bytes):  # Python 2
 848         return ''.join([chr(x) for x in xs])
 849     else:
 850         return bytes(xs)
 851
 852
 853 # Cross-platform file locking
 854 if sys.platform == 'win32':
 855     import ctypes.wintypes
 856     import msvcrt
 857
 858     class OVERLAPPED(ctypes.Structure):
 859         _fields_ = [
 860             ('Internal', ctypes.wintypes.LPVOID),
 861             ('InternalHigh', ctypes.wintypes.LPVOID),
 862             ('Offset', ctypes.wintypes.DWORD),
 863             ('OffsetHigh', ctypes.wintypes.DWORD),
 864             ('hEvent', ctypes.wintypes.HANDLE),
 865         ]
 866
 867     kernel32 = ctypes.windll.kernel32
 868     LockFileEx = kernel32.LockFileEx
 869     LockFileEx.argtypes = [
 870         ctypes.wintypes.HANDLE,     # hFile
 871         ctypes.wintypes.DWORD,      # dwFlags
 872         ctypes.wintypes.DWORD,      # dwReserved
 873         ctypes.wintypes.DWORD,      # nNumberOfBytesToLockLow
 874         ctypes.wintypes.DWORD,      # nNumberOfBytesToLockHigh
 875         ctypes.POINTER(OVERLAPPED)  # Overlapped
 876     ]
 877     LockFileEx.restype = ctypes.wintypes.BOOL
 878     UnlockFileEx = kernel32.UnlockFileEx
 879     UnlockFileEx.argtypes = [
 880         ctypes.wintypes.HANDLE,     # hFile
 881         ctypes.wintypes.DWORD,      # dwReserved
 882         ctypes.wintypes.DWORD,      # nNumberOfBytesToLockLow
 883         ctypes.wintypes.DWORD,      # nNumberOfBytesToLockHigh
 884         ctypes.POINTER(OVERLAPPED)  # Overlapped
 885     ]
 886     UnlockFileEx.restype = ctypes.wintypes.BOOL
 887     whole_low = 0xffffffff
 888     whole_high = 0x7fffffff
 889
 890     def _lock_file(f, exclusive):
 891         overlapped = OVERLAPPED()
 892         overlapped.Offset = 0
 893         overlapped.OffsetHigh = 0
 894         overlapped.hEvent = 0
 895         f._lock_file_overlapped_p = ctypes.pointer(overlapped)
 896         handle = msvcrt.get_osfhandle(f.fileno())
 897         if not LockFileEx(handle, 0x2 if exclusive else 0x0, 0,
 898                           whole_low, whole_high, f._lock_file_overlapped_p):
 899             raise OSError('Locking file failed: %r' % ctypes.FormatError())
 900
 901     def _unlock_file(f):
 902         assert f._lock_file_overlapped_p
 903         handle = msvcrt.get_osfhandle(f.fileno())
 904         if not UnlockFileEx(handle, 0,
 905                             whole_low, whole_high, f._lock_file_overlapped_p):
 906             raise OSError('Unlocking file failed: %r' % ctypes.FormatError())
 907
 908 else:
 909     import fcntl
 910
 911     def _lock_file(f, exclusive):
 912         fcntl.flock(f, fcntl.LOCK_EX if exclusive else fcntl.LOCK_SH)
 913
 914     def _unlock_file(f):
 915         fcntl.flock(f, fcntl.LOCK_UN)
 916
 917
 918 class locked_file(object):
 919     def __init__(self, filename, mode, encoding=None):
 920         assert mode in ['r', 'a', 'w']
 921         self.f = io.open(filename, mode, encoding=encoding)
 922         self.mode = mode
 923
 924     def __enter__(self):
 925         exclusive = self.mode != 'r'
 926         try:
 927             _lock_file(self.f, exclusive)
 928         except IOError:
 929             self.f.close()
 930             raise
 931         return self
 932
 933     def __exit__(self, etype, value, traceback):
 934         try:
 935             _unlock_file(self.f)
 936         finally:
 937             self.f.close()
 938
 939     def __iter__(self):
 940         return iter(self.f)
 941
 942     def write(self, *args):
 943         return self.f.write(*args)
 944
 945     def read(self, *args):
 946         return self.f.read(*args)
 947
 948
 949 def get_filesystem_encoding():
 950     encoding = sys.getfilesystemencoding()
 951     return encoding if encoding is not None else 'utf-8'
 952
 953
 954 def shell_quote(args):
 955     quoted_args = []
 956     encoding = get_filesystem_encoding()
 957     for a in args:
 958         if isinstance(a, bytes):
 959             # We may get a filename encoded with 'encodeFilename'
 960             a = a.decode(encoding)
 961         quoted_args.append(pipes.quote(a))
 962     return u' '.join(quoted_args)
 963
 964
 965 def takewhile_inclusive(pred, seq):
 966     """ Like itertools.takewhile, but include the latest evaluated element
 967         (the first element so that Not pred(e)) """
 968     for e in seq:
 969         yield e
 970         if not pred(e):
 971             return
 972
 973
 974 def smuggle_url(url, data):
 975     """ Pass additional data in a URL for internal use. """
 976
 977     sdata = compat_urllib_parse.urlencode(
 978         {u'__youtubedl_smuggle': json.dumps(data)})
 979     return url + u'#' + sdata
 980
 981
 982 def unsmuggle_url(smug_url, default=None):
 983     if not '#__youtubedl_smuggle' in smug_url:
 984         return smug_url, default
 985     url, _, sdata = smug_url.rpartition(u'#')
 986     jsond = compat_parse_qs(sdata)[u'__youtubedl_smuggle'][0]
 987     data = json.loads(jsond)
 988     return url, data
 989
 990
 991 def format_bytes(bytes):
 992     if bytes is None:
 993         return u'N/A'
 994     if type(bytes) is str:
 995         bytes = float(bytes)
 996     if bytes == 0.0:
 997         exponent = 0
 998     else:
 999         exponent = int(math.log(bytes, 1024.0))
1000     suffix = [u'B', u'KiB', u'MiB', u'GiB', u'TiB', u'PiB', u'EiB', u'ZiB', u'YiB'][exponent]
1001     converted = float(bytes) / float(1024 ** exponent)
1002     return u'%.2f%s' % (converted, suffix)
1003
1004
1005 def get_term_width():
1006     columns = compat_getenv('COLUMNS', None)
1007     if columns:
1008         return int(columns)
1009
1010     try:
1011         sp = subprocess.Popen(
1012             ['stty', 'size'],
1013             stdout=subprocess.PIPE, stderr=subprocess.PIPE)
1014         out, err = sp.communicate()
1015         return int(out.split()[1])
1016     except:
1017         pass
1018     return None
1019
1020
1021 def month_by_name(name):
1022     """ Return the number of a month by (locale-independently) English name """
1023
1024     ENGLISH_NAMES = [
1025         u'January', u'February', u'March', u'April', u'May', u'June',
1026         u'July', u'August', u'September', u'October', u'November', u'December']
1027     try:
1028         return ENGLISH_NAMES.index(name) + 1
1029     except ValueError:
1030         return None
1031
1032
1033 def fix_xml_ampersands(xml_str):
1034     """Replace all the '&' by '&amp;' in XML"""
1035     return re.sub(
1036         r'&(?!amp;|lt;|gt;|apos;|quot;|#x[0-9a-fA-F]{,4};|#[0-9]{,4};)',
1037         u'&amp;',
1038         xml_str)
1039
1040
1041 def setproctitle(title):
1042     assert isinstance(title, compat_str)
1043     try:
1044         libc = ctypes.cdll.LoadLibrary("libc.so.6")
1045     except OSError:
1046         return
1047     title_bytes = title.encode('utf-8')
1048     buf = ctypes.create_string_buffer(len(title_bytes))
1049     buf.value = title_bytes
1050     try:
1051         libc.prctl(15, buf, 0, 0, 0)
1052     except AttributeError:
1053         return  # Strange libc, just skip this
1054
1055
1056 def remove_start(s, start):
1057     if s.startswith(start):
1058         return s[len(start):]
1059     return s
1060
1061
1062 def remove_end(s, end):
1063     if s.endswith(end):
1064         return s[:-len(end)]
1065     return s
1066
1067
1068 def url_basename(url):
1069     path = compat_urlparse.urlparse(url).path
1070     return path.strip(u'/').split(u'/')[-1]
1071
1072
1073 class HEADRequest(compat_urllib_request.Request):
1074     def get_method(self):
1075         return "HEAD"
1076
1077
1078 def int_or_none(v, scale=1, default=None, get_attr=None, invscale=1):
1079     if get_attr:
1080         if v is not None:
1081             v = getattr(v, get_attr, None)
1082     if v == '':
1083         v = None
1084     return default if v is None else (int(v) * invscale // scale)
1085
1086
1087 def str_or_none(v, default=None):
1088     return default if v is None else compat_str(v)
1089
1090
1091 def str_to_int(int_str):
1092     """ A more relaxed version of int_or_none """
1093     if int_str is None:
1094         return None
1095     int_str = re.sub(r'[,\.\+]', u'', int_str)
1096     return int(int_str)
1097
1098
1099 def float_or_none(v, scale=1, invscale=1, default=None):
1100     return default if v is None else (float(v) * invscale / scale)
1101
1102
1103 def parse_duration(s):
1104     if s is None:
1105         return None
1106
1107     s = s.strip()
1108
1109     m = re.match(
1110         r'(?i)(?:(?:(?P<hours>[0-9]+)\s*(?:[:h]|hours?)\s*)?(?P<mins>[0-9]+)\s*(?:[:m]|mins?|minutes?)\s*)?(?P<secs>[0-9]+)(?P<ms>\.[0-9]+)?\s*(?:s|secs?|seconds?)?$', s)
1111     if not m:
1112         return None
1113     res = int(m.group('secs'))
1114     if m.group('mins'):
1115         res += int(m.group('mins')) * 60
1116         if m.group('hours'):
1117             res += int(m.group('hours')) * 60 * 60
1118     if m.group('ms'):
1119         res += float(m.group('ms'))
1120     return res
1121
1122
1123 def prepend_extension(filename, ext):
1124     name, real_ext = os.path.splitext(filename)
1125     return u'{0}.{1}{2}'.format(name, ext, real_ext)
1126
1127
1128 def check_executable(exe, args=[]):
1129     """ Checks if the given binary is installed somewhere in PATH, and returns its name.
1130     args can be a list of arguments for a short output (like -version) """
1131     try:
1132         subprocess.Popen([exe] + args, stdout=subprocess.PIPE, stderr=subprocess.PIPE).communicate()
1133     except OSError:
1134         return False
1135     return exe
1136
1137
1138 def get_exe_version(exe, args=['--version'],
1139                     version_re=r'version\s+([0-9._-a-zA-Z]+)',
1140                     unrecognized=u'present'):
1141     """ Returns the version of the specified executable,
1142     or False if the executable is not present """
1143     try:
1144         out, err = subprocess.Popen(
1145             [exe] + args,
1146             stdout=subprocess.PIPE, stderr=subprocess.STDOUT).communicate()
1147     except OSError:
1148         return False
1149     firstline = out.partition(b'\n')[0].decode('ascii', 'ignore')
1150     m = re.search(version_re, firstline)
1151     if m:
1152         return m.group(1)
1153     else:
1154         return unrecognized
1155
1156
1157 class PagedList(object):
1158     def __len__(self):
1159         # This is only useful for tests
1160         return len(self.getslice())
1161
1162
1163 class OnDemandPagedList(PagedList):
1164     def __init__(self, pagefunc, pagesize):
1165         self._pagefunc = pagefunc
1166         self._pagesize = pagesize
1167
1168     def getslice(self, start=0, end=None):
1169         res = []
1170         for pagenum in itertools.count(start // self._pagesize):
1171             firstid = pagenum * self._pagesize
1172             nextfirstid = pagenum * self._pagesize + self._pagesize
1173             if start >= nextfirstid:
1174                 continue
1175
1176             page_results = list(self._pagefunc(pagenum))
1177
1178             startv = (
1179                 start % self._pagesize
1180                 if firstid <= start < nextfirstid
1181                 else 0)
1182
1183             endv = (
1184                 ((end - 1) % self._pagesize) + 1
1185                 if (end is not None and firstid <= end <= nextfirstid)
1186                 else None)
1187
1188             if startv != 0 or endv is not None:
1189                 page_results = page_results[startv:endv]
1190             res.extend(page_results)
1191
1192             # A little optimization - if current page is not "full", ie. does
1193             # not contain page_size videos then we can assume that this page
1194             # is the last one - there are no more ids on further pages -
1195             # i.e. no need to query again.
1196             if len(page_results) + startv < self._pagesize:
1197                 break
1198
1199             # If we got the whole page, but the next page is not interesting,
1200             # break out early as well
1201             if end == nextfirstid:
1202                 break
1203         return res
1204
1205
1206 class InAdvancePagedList(PagedList):
1207     def __init__(self, pagefunc, pagecount, pagesize):
1208         self._pagefunc = pagefunc
1209         self._pagecount = pagecount
1210         self._pagesize = pagesize
1211
1212     def getslice(self, start=0, end=None):
1213         res = []
1214         start_page = start // self._pagesize
1215         end_page = (
1216             self._pagecount if end is None else (end // self._pagesize + 1))
1217         skip_elems = start - start_page * self._pagesize
1218         only_more = None if end is None else end - start
1219         for pagenum in range(start_page, end_page):
1220             page = list(self._pagefunc(pagenum))
1221             if skip_elems:
1222                 page = page[skip_elems:]
1223                 skip_elems = None
1224             if only_more is not None:
1225                 if len(page) < only_more:
1226                     only_more -= len(page)
1227                 else:
1228                     page = page[:only_more]
1229                     res.extend(page)
1230                     break
1231             res.extend(page)
1232         return res
1233
1234
1235 def uppercase_escape(s):
1236     unicode_escape = codecs.getdecoder('unicode_escape')
1237     return re.sub(
1238         r'\\U[0-9a-fA-F]{8}',
1239         lambda m: unicode_escape(m.group(0))[0],
1240         s)
1241
1242
1243 def escape_rfc3986(s):
1244     """Escape non-ASCII characters as suggested by RFC 3986"""
1245     if sys.version_info < (3, 0) and isinstance(s, unicode):
1246         s = s.encode('utf-8')
1247     return compat_urllib_parse.quote(s, b"%/;:@&=+$,!~*'()?#[]")
1248
1249
1250 def escape_url(url):
1251     """Escape URL as suggested by RFC 3986"""
1252     url_parsed = compat_urllib_parse_urlparse(url)
1253     return url_parsed._replace(
1254         path=escape_rfc3986(url_parsed.path),
1255         params=escape_rfc3986(url_parsed.params),
1256         query=escape_rfc3986(url_parsed.query),
1257         fragment=escape_rfc3986(url_parsed.fragment)
1258     ).geturl()
1259
1260 try:
1261     struct.pack(u'!I', 0)
1262 except TypeError:
1263     # In Python 2.6 (and some 2.7 versions), struct requires a bytes argument
1264     def struct_pack(spec, *args):
1265         if isinstance(spec, compat_str):
1266             spec = spec.encode('ascii')
1267         return struct.pack(spec, *args)
1268
1269     def struct_unpack(spec, *args):
1270         if isinstance(spec, compat_str):
1271             spec = spec.encode('ascii')
1272         return struct.unpack(spec, *args)
1273 else:
1274     struct_pack = struct.pack
1275     struct_unpack = struct.unpack
1276
1277
1278 def read_batch_urls(batch_fd):
1279     def fixup(url):
1280         if not isinstance(url, compat_str):
1281             url = url.decode('utf-8', 'replace')
1282         BOM_UTF8 = u'\xef\xbb\xbf'
1283         if url.startswith(BOM_UTF8):
1284             url = url[len(BOM_UTF8):]
1285         url = url.strip()
1286         if url.startswith(('#', ';', ']')):
1287             return False
1288         return url
1289
1290     with contextlib.closing(batch_fd) as fd:
1291         return [url for url in map(fixup, fd) if url]
1292
1293
1294 def urlencode_postdata(*args, **kargs):
1295     return compat_urllib_parse.urlencode(*args, **kargs).encode('ascii')
1296
1297
1298 try:
1299     etree_iter = xml.etree.ElementTree.Element.iter
1300 except AttributeError:  # Python <=2.6
1301     etree_iter = lambda n: n.findall('.//*')
1302
1303
1304 def parse_xml(s):
1305     class TreeBuilder(xml.etree.ElementTree.TreeBuilder):
1306         def doctype(self, name, pubid, system):
1307             pass  # Ignore doctypes
1308
1309     parser = xml.etree.ElementTree.XMLParser(target=TreeBuilder())
1310     kwargs = {'parser': parser} if sys.version_info >= (2, 7) else {}
1311     tree = xml.etree.ElementTree.XML(s.encode('utf-8'), **kwargs)
1312     # Fix up XML parser in Python 2.x
1313     if sys.version_info < (3, 0):
1314         for n in etree_iter(tree):
1315             if n.text is not None:
1316                 if not isinstance(n.text, compat_str):
1317                     n.text = n.text.decode('utf-8')
1318     return tree
1319
1320
1321 US_RATINGS = {
1322     'G': 0,
1323     'PG': 10,
1324     'PG-13': 13,
1325     'R': 16,
1326     'NC': 18,
1327 }
1328
1329
1330 def parse_age_limit(s):
1331     if s is None:
1332         return None
1333     m = re.match(r'^(?P<age>\d{1,2})\+?$', s)
1334     return int(m.group('age')) if m else US_RATINGS.get(s, None)
1335
1336
1337 def strip_jsonp(code):
1338     return re.sub(r'(?s)^[a-zA-Z0-9_]+\s*\(\s*(.*)\);?\s*?\s*$', r'\1', code)
1339
1340
1341 def js_to_json(code):
1342     def fix_kv(m):
1343         v = m.group(0)
1344         if v in ('true', 'false', 'null'):
1345             return v
1346         if v.startswith('"'):
1347             return v
1348         if v.startswith("'"):
1349             v = v[1:-1]
1350             v = re.sub(r"\\\\|\\'|\"", lambda m: {
1351                 '\\\\': '\\\\',
1352                 "\\'": "'",
1353                 '"': '\\"',
1354             }[m.group(0)], v)
1355         return '"%s"' % v
1356
1357     res = re.sub(r'''(?x)
1358         "(?:[^"\\]*(?:\\\\|\\")?)*"|
1359         '(?:[^'\\]*(?:\\\\|\\')?)*'|
1360         [a-zA-Z_][a-zA-Z_0-9]*
1361         ''', fix_kv, code)
1362     res = re.sub(r',(\s*\])', lambda m: m.group(1), res)
1363     return res
1364
1365
1366 def qualities(quality_ids):
1367     """ Get a numeric quality value out of a list of possible values """
1368     def q(qid):
1369         try:
1370             return quality_ids.index(qid)
1371         except ValueError:
1372             return -1
1373     return q
1374
1375
1376 DEFAULT_OUTTMPL = '%(title)s-%(id)s.%(ext)s'
1377
1378
1379 def limit_length(s, length):
1380     """ Add ellipses to overly long strings """
1381     if s is None:
1382         return None
1383     ELLIPSES = '...'
1384     if len(s) > length:
1385         return s[:length - len(ELLIPSES)] + ELLIPSES
1386     return s
1387
1388
1389 def version_tuple(v):
1390     return [int(e) for e in v.split('.')]
1391
1392
1393 def is_outdated_version(version, limit, assume_new=True):
1394     if not version:
1395         return not assume_new
1396     try:
1397         return version_tuple(version) < version_tuple(limit)
1398     except ValueError:
1399         return not assume_new