_ Git - youtube-dl/blob - youtube_dl/utils.py

   1 #!/usr/bin/env python
   2 # -*- coding: utf-8 -*-
   3
   4 from __future__ import unicode_literals
   5
   6 import calendar
   7 import codecs
   8 import contextlib
   9 import ctypes
  10 import datetime
  11 import email.utils
  12 import errno
  13 import gzip
  14 import itertools
  15 import io
  16 import json
  17 import locale
  18 import math
  19 import os
  20 import pipes
  21 import platform
  22 import re
  23 import ssl
  24 import socket
  25 import struct
  26 import subprocess
  27 import sys
  28 import tempfile
  29 import traceback
  30 import xml.etree.ElementTree
  31 import zlib
  32
  33 from .compat import (
  34     compat_chr,
  35     compat_getenv,
  36     compat_html_entities,
  37     compat_parse_qs,
  38     compat_str,
  39     compat_urllib_error,
  40     compat_urllib_parse,
  41     compat_urllib_parse_urlparse,
  42     compat_urllib_request,
  43     compat_urlparse,
  44 )
  45
  46
  47 # This is not clearly defined otherwise
  48 compiled_regex_type = type(re.compile(''))
  49
  50 std_headers = {
  51     'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64; rv:10.0) Gecko/20100101 Firefox/10.0 (Chrome)',
  52     'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.7',
  53     'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
  54     'Accept-Encoding': 'gzip, deflate',
  55     'Accept-Language': 'en-us,en;q=0.5',
  56 }
  57
  58 def preferredencoding():
  59     """Get preferred encoding.
  60
  61     Returns the best encoding scheme for the system, based on
  62     locale.getpreferredencoding() and some further tweaks.
  63     """
  64     try:
  65         pref = locale.getpreferredencoding()
  66         u'TEST'.encode(pref)
  67     except:
  68         pref = 'UTF-8'
  69
  70     return pref
  71
  72
  73 def write_json_file(obj, fn):
  74     """ Encode obj as JSON and write it to fn, atomically """
  75
  76     if sys.version_info < (3, 0):
  77         encoding = get_filesystem_encoding()
  78         # os.path.basename returns a bytes object, but NamedTemporaryFile
  79         # will fail if the filename contains non ascii characters unless we
  80         # use a unicode object
  81         path_basename = lambda f: os.path.basename(fn).decode(encoding)
  82         # the same for os.path.dirname
  83         path_dirname = lambda f: os.path.dirname(fn).decode(encoding)
  84     else:
  85         path_basename = os.path.basename
  86         path_dirname = os.path.dirname
  87
  88     args = {
  89         'suffix': '.tmp',
  90         'prefix': path_basename(fn) + '.',
  91         'dir': path_dirname(fn),
  92         'delete': False,
  93     }
  94
  95     # In Python 2.x, json.dump expects a bytestream.
  96     # In Python 3.x, it writes to a character stream
  97     if sys.version_info < (3, 0):
  98         args['mode'] = 'wb'
  99     else:
 100         args.update({
 101             'mode': 'w',
 102             'encoding': 'utf-8',
 103         })
 104
 105     tf = tempfile.NamedTemporaryFile(**args)
 106
 107     try:
 108         with tf:
 109             json.dump(obj, tf)
 110         os.rename(tf.name, fn)
 111     except:
 112         try:
 113             os.remove(tf.name)
 114         except OSError:
 115             pass
 116         raise
 117
 118
 119 if sys.version_info >= (2, 7):
 120     def find_xpath_attr(node, xpath, key, val):
 121         """ Find the xpath xpath[@key=val] """
 122         assert re.match(r'^[a-zA-Z-]+$', key)
 123         assert re.match(r'^[a-zA-Z0-9@\s:._-]*$', val)
 124         expr = xpath + u"[@%s='%s']" % (key, val)
 125         return node.find(expr)
 126 else:
 127     def find_xpath_attr(node, xpath, key, val):
 128         # Here comes the crazy part: In 2.6, if the xpath is a unicode,
 129         # .//node does not match if a node is a direct child of . !
 130         if isinstance(xpath, unicode):
 131             xpath = xpath.encode('ascii')
 132
 133         for f in node.findall(xpath):
 134             if f.attrib.get(key) == val:
 135                 return f
 136         return None
 137
 138 # On python2.6 the xml.etree.ElementTree.Element methods don't support
 139 # the namespace parameter
 140 def xpath_with_ns(path, ns_map):
 141     components = [c.split(':') for c in path.split('/')]
 142     replaced = []
 143     for c in components:
 144         if len(c) == 1:
 145             replaced.append(c[0])
 146         else:
 147             ns, tag = c
 148             replaced.append('{%s}%s' % (ns_map[ns], tag))
 149     return '/'.join(replaced)
 150
 151
 152 def xpath_text(node, xpath, name=None, fatal=False):
 153     if sys.version_info < (2, 7):  # Crazy 2.6
 154         xpath = xpath.encode('ascii')
 155
 156     n = node.find(xpath)
 157     if n is None:
 158         if fatal:
 159             name = xpath if name is None else name
 160             raise ExtractorError('Could not find XML element %s' % name)
 161         else:
 162             return None
 163     return n.text
 164
 165
 166 def get_element_by_id(id, html):
 167     """Return the content of the tag with the specified ID in the passed HTML document"""
 168     return get_element_by_attribute("id", id, html)
 169
 170
 171 def get_element_by_attribute(attribute, value, html):
 172     """Return the content of the tag with the specified attribute in the passed HTML document"""
 173
 174     m = re.search(r'''(?xs)
 175         <([a-zA-Z0-9:._-]+)
 176          (?:\s+[a-zA-Z0-9:._-]+(?:=[a-zA-Z0-9:._-]+|="[^"]+"|='[^']+'))*?
 177          \s+%s=['"]?%s['"]?
 178          (?:\s+[a-zA-Z0-9:._-]+(?:=[a-zA-Z0-9:._-]+|="[^"]+"|='[^']+'))*?
 179         \s*>
 180         (?P<content>.*?)
 181         </\1>
 182     ''' % (re.escape(attribute), re.escape(value)), html)
 183
 184     if not m:
 185         return None
 186     res = m.group('content')
 187
 188     if res.startswith('"') or res.startswith("'"):
 189         res = res[1:-1]
 190
 191     return unescapeHTML(res)
 192
 193
 194 def clean_html(html):
 195     """Clean an HTML snippet into a readable string"""
 196     # Newline vs <br />
 197     html = html.replace('\n', ' ')
 198     html = re.sub(r'\s*<\s*br\s*/?\s*>\s*', '\n', html)
 199     html = re.sub(r'<\s*/\s*p\s*>\s*<\s*p[^>]*>', '\n', html)
 200     # Strip html tags
 201     html = re.sub('<.*?>', '', html)
 202     # Replace html entities
 203     html = unescapeHTML(html)
 204     return html.strip()
 205
 206
 207 def sanitize_open(filename, open_mode):
 208     """Try to open the given filename, and slightly tweak it if this fails.
 209
 210     Attempts to open the given filename. If this fails, it tries to change
 211     the filename slightly, step by step, until it's either able to open it
 212     or it fails and raises a final exception, like the standard open()
 213     function.
 214
 215     It returns the tuple (stream, definitive_file_name).
 216     """
 217     try:
 218         if filename == u'-':
 219             if sys.platform == 'win32':
 220                 import msvcrt
 221                 msvcrt.setmode(sys.stdout.fileno(), os.O_BINARY)
 222             return (sys.stdout.buffer if hasattr(sys.stdout, 'buffer') else sys.stdout, filename)
 223         stream = open(encodeFilename(filename), open_mode)
 224         return (stream, filename)
 225     except (IOError, OSError) as err:
 226         if err.errno in (errno.EACCES,):
 227             raise
 228
 229         # In case of error, try to remove win32 forbidden chars
 230         alt_filename = os.path.join(
 231                         re.sub(u'[/<>:"\\|\\\\?\\*]', u'#', path_part)
 232                         for path_part in os.path.split(filename)
 233                        )
 234         if alt_filename == filename:
 235             raise
 236         else:
 237             # An exception here should be caught in the caller
 238             stream = open(encodeFilename(filename), open_mode)
 239             return (stream, alt_filename)
 240
 241
 242 def timeconvert(timestr):
 243     """Convert RFC 2822 defined time string into system timestamp"""
 244     timestamp = None
 245     timetuple = email.utils.parsedate_tz(timestr)
 246     if timetuple is not None:
 247         timestamp = email.utils.mktime_tz(timetuple)
 248     return timestamp
 249
 250 def sanitize_filename(s, restricted=False, is_id=False):
 251     """Sanitizes a string so it could be used as part of a filename.
 252     If restricted is set, use a stricter subset of allowed characters.
 253     Set is_id if this is not an arbitrary string, but an ID that should be kept if possible
 254     """
 255     def replace_insane(char):
 256         if char == '?' or ord(char) < 32 or ord(char) == 127:
 257             return ''
 258         elif char == '"':
 259             return '' if restricted else '\''
 260         elif char == ':':
 261             return '_-' if restricted else ' -'
 262         elif char in '\\/|*<>':
 263             return '_'
 264         if restricted and (char in '!&\'()[]{}$;`^,#' or char.isspace()):
 265             return '_'
 266         if restricted and ord(char) > 127:
 267             return '_'
 268         return char
 269
 270     result = u''.join(map(replace_insane, s))
 271     if not is_id:
 272         while '__' in result:
 273             result = result.replace('__', '_')
 274         result = result.strip('_')
 275         # Common case of "Foreign band name - English song title"
 276         if restricted and result.startswith('-_'):
 277             result = result[2:]
 278         if not result:
 279             result = '_'
 280     return result
 281
 282 def orderedSet(iterable):
 283     """ Remove all duplicates from the input iterable """
 284     res = []
 285     for el in iterable:
 286         if el not in res:
 287             res.append(el)
 288     return res
 289
 290
 291 def _htmlentity_transform(entity):
 292     """Transforms an HTML entity to a character."""
 293     # Known non-numeric HTML entity
 294     if entity in compat_html_entities.name2codepoint:
 295         return compat_chr(compat_html_entities.name2codepoint[entity])
 296
 297     mobj = re.match(r'#(x?[0-9]+)', entity)
 298     if mobj is not None:
 299         numstr = mobj.group(1)
 300         if numstr.startswith(u'x'):
 301             base = 16
 302             numstr = u'0%s' % numstr
 303         else:
 304             base = 10
 305         return compat_chr(int(numstr, base))
 306
 307     # Unknown entity in name, return its literal representation
 308     return (u'&%s;' % entity)
 309
 310
 311 def unescapeHTML(s):
 312     if s is None:
 313         return None
 314     assert type(s) == compat_str
 315
 316     return re.sub(
 317         r'&([^;]+);', lambda m: _htmlentity_transform(m.group(1)), s)
 318
 319
 320 def encodeFilename(s, for_subprocess=False):
 321     """
 322     @param s The name of the file
 323     """
 324
 325     assert type(s) == compat_str
 326
 327     # Python 3 has a Unicode API
 328     if sys.version_info >= (3, 0):
 329         return s
 330
 331     if sys.platform == 'win32' and sys.getwindowsversion()[0] >= 5:
 332         # Pass u'' directly to use Unicode APIs on Windows 2000 and up
 333         # (Detecting Windows NT 4 is tricky because 'major >= 4' would
 334         # match Windows 9x series as well. Besides, NT 4 is obsolete.)
 335         if not for_subprocess:
 336             return s
 337         else:
 338             # For subprocess calls, encode with locale encoding
 339             # Refer to http://stackoverflow.com/a/9951851/35070
 340             encoding = preferredencoding()
 341     else:
 342         encoding = sys.getfilesystemencoding()
 343     if encoding is None:
 344         encoding = 'utf-8'
 345     return s.encode(encoding, 'ignore')
 346
 347
 348 def encodeArgument(s):
 349     if not isinstance(s, compat_str):
 350         # Legacy code that uses byte strings
 351         # Uncomment the following line after fixing all post processors
 352         #assert False, 'Internal error: %r should be of type %r, is %r' % (s, compat_str, type(s))
 353         s = s.decode('ascii')
 354     return encodeFilename(s, True)
 355
 356
 357 def decodeOption(optval):
 358     if optval is None:
 359         return optval
 360     if isinstance(optval, bytes):
 361         optval = optval.decode(preferredencoding())
 362
 363     assert isinstance(optval, compat_str)
 364     return optval
 365
 366 def formatSeconds(secs):
 367     if secs > 3600:
 368         return '%d:%02d:%02d' % (secs // 3600, (secs % 3600) // 60, secs % 60)
 369     elif secs > 60:
 370         return '%d:%02d' % (secs // 60, secs % 60)
 371     else:
 372         return '%d' % secs
 373
 374
 375 def make_HTTPS_handler(opts_no_check_certificate, **kwargs):
 376     if sys.version_info < (3, 2):
 377         import httplib
 378
 379         class HTTPSConnectionV3(httplib.HTTPSConnection):
 380             def __init__(self, *args, **kwargs):
 381                 httplib.HTTPSConnection.__init__(self, *args, **kwargs)
 382
 383             def connect(self):
 384                 sock = socket.create_connection((self.host, self.port), self.timeout)
 385                 if getattr(self, '_tunnel_host', False):
 386                     self.sock = sock
 387                     self._tunnel()
 388                 try:
 389                     self.sock = ssl.wrap_socket(sock, self.key_file, self.cert_file, ssl_version=ssl.PROTOCOL_TLSv1)
 390                 except ssl.SSLError:
 391                     self.sock = ssl.wrap_socket(sock, self.key_file, self.cert_file, ssl_version=ssl.PROTOCOL_SSLv23)
 392
 393         class HTTPSHandlerV3(compat_urllib_request.HTTPSHandler):
 394             def https_open(self, req):
 395                 return self.do_open(HTTPSConnectionV3, req)
 396         return HTTPSHandlerV3(**kwargs)
 397     elif hasattr(ssl, 'create_default_context'):  # Python >= 3.4
 398         context = ssl.create_default_context(ssl.Purpose.CLIENT_AUTH)
 399         context.options &= ~ssl.OP_NO_SSLv3  # Allow older, not-as-secure SSLv3
 400         if opts_no_check_certificate:
 401             context.verify_mode = ssl.CERT_NONE
 402         return compat_urllib_request.HTTPSHandler(context=context, **kwargs)
 403     else:  # Python < 3.4
 404         context = ssl.SSLContext(ssl.PROTOCOL_SSLv23)
 405         context.verify_mode = (ssl.CERT_NONE
 406                                if opts_no_check_certificate
 407                                else ssl.CERT_REQUIRED)
 408         context.set_default_verify_paths()
 409         try:
 410             context.load_default_certs()
 411         except AttributeError:
 412             pass  # Python < 3.4
 413         return compat_urllib_request.HTTPSHandler(context=context, **kwargs)
 414
 415 class ExtractorError(Exception):
 416     """Error during info extraction."""
 417     def __init__(self, msg, tb=None, expected=False, cause=None, video_id=None):
 418         """ tb, if given, is the original traceback (so that it can be printed out).
 419         If expected is set, this is a normal error message and most likely not a bug in youtube-dl.
 420         """
 421
 422         if sys.exc_info()[0] in (compat_urllib_error.URLError, socket.timeout, UnavailableVideoError):
 423             expected = True
 424         if video_id is not None:
 425             msg = video_id + ': ' + msg
 426         if cause:
 427             msg += u' (caused by %r)' % cause
 428         if not expected:
 429             msg = msg + u'; please report this issue on https://yt-dl.org/bug . Be sure to call youtube-dl with the --verbose flag and include its complete output. Make sure you are using the latest version; type  youtube-dl -U  to update.'
 430         super(ExtractorError, self).__init__(msg)
 431
 432         self.traceback = tb
 433         self.exc_info = sys.exc_info()  # preserve original exception
 434         self.cause = cause
 435         self.video_id = video_id
 436
 437     def format_traceback(self):
 438         if self.traceback is None:
 439             return None
 440         return u''.join(traceback.format_tb(self.traceback))
 441
 442
 443 class RegexNotFoundError(ExtractorError):
 444     """Error when a regex didn't match"""
 445     pass
 446
 447
 448 class DownloadError(Exception):
 449     """Download Error exception.
 450
 451     This exception may be thrown by FileDownloader objects if they are not
 452     configured to continue on errors. They will contain the appropriate
 453     error message.
 454     """
 455     def __init__(self, msg, exc_info=None):
 456         """ exc_info, if given, is the original exception that caused the trouble (as returned by sys.exc_info()). """
 457         super(DownloadError, self).__init__(msg)
 458         self.exc_info = exc_info
 459
 460
 461 class SameFileError(Exception):
 462     """Same File exception.
 463
 464     This exception will be thrown by FileDownloader objects if they detect
 465     multiple files would have to be downloaded to the same file on disk.
 466     """
 467     pass
 468
 469
 470 class PostProcessingError(Exception):
 471     """Post Processing exception.
 472
 473     This exception may be raised by PostProcessor's .run() method to
 474     indicate an error in the postprocessing task.
 475     """
 476     def __init__(self, msg):
 477         self.msg = msg
 478
 479 class MaxDownloadsReached(Exception):
 480     """ --max-downloads limit has been reached. """
 481     pass
 482
 483
 484 class UnavailableVideoError(Exception):
 485     """Unavailable Format exception.
 486
 487     This exception will be thrown when a video is requested
 488     in a format that is not available for that video.
 489     """
 490     pass
 491
 492
 493 class ContentTooShortError(Exception):
 494     """Content Too Short exception.
 495
 496     This exception may be raised by FileDownloader objects when a file they
 497     download is too small for what the server announced first, indicating
 498     the connection was probably interrupted.
 499     """
 500     # Both in bytes
 501     downloaded = None
 502     expected = None
 503
 504     def __init__(self, downloaded, expected):
 505         self.downloaded = downloaded
 506         self.expected = expected
 507
 508 class YoutubeDLHandler(compat_urllib_request.HTTPHandler):
 509     """Handler for HTTP requests and responses.
 510
 511     This class, when installed with an OpenerDirector, automatically adds
 512     the standard headers to every HTTP request and handles gzipped and
 513     deflated responses from web servers. If compression is to be avoided in
 514     a particular request, the original request in the program code only has
 515     to include the HTTP header "Youtubedl-No-Compression", which will be
 516     removed before making the real request.
 517
 518     Part of this code was copied from:
 519
 520     http://techknack.net/python-urllib2-handlers/
 521
 522     Andrew Rowls, the author of that code, agreed to release it to the
 523     public domain.
 524     """
 525
 526     @staticmethod
 527     def deflate(data):
 528         try:
 529             return zlib.decompress(data, -zlib.MAX_WBITS)
 530         except zlib.error:
 531             return zlib.decompress(data)
 532
 533     @staticmethod
 534     def addinfourl_wrapper(stream, headers, url, code):
 535         if hasattr(compat_urllib_request.addinfourl, 'getcode'):
 536             return compat_urllib_request.addinfourl(stream, headers, url, code)
 537         ret = compat_urllib_request.addinfourl(stream, headers, url)
 538         ret.code = code
 539         return ret
 540
 541     def http_request(self, req):
 542         for h, v in std_headers.items():
 543             if h not in req.headers:
 544                 req.add_header(h, v)
 545         if 'Youtubedl-no-compression' in req.headers:
 546             if 'Accept-encoding' in req.headers:
 547                 del req.headers['Accept-encoding']
 548             del req.headers['Youtubedl-no-compression']
 549         if 'Youtubedl-user-agent' in req.headers:
 550             if 'User-agent' in req.headers:
 551                 del req.headers['User-agent']
 552             req.headers['User-agent'] = req.headers['Youtubedl-user-agent']
 553             del req.headers['Youtubedl-user-agent']
 554
 555         if sys.version_info < (2, 7) and '#' in req.get_full_url():
 556             # Python 2.6 is brain-dead when it comes to fragments
 557             req._Request__original = req._Request__original.partition('#')[0]
 558             req._Request__r_type = req._Request__r_type.partition('#')[0]
 559
 560         return req
 561
 562     def http_response(self, req, resp):
 563         old_resp = resp
 564         # gzip
 565         if resp.headers.get('Content-encoding', '') == 'gzip':
 566             content = resp.read()
 567             gz = gzip.GzipFile(fileobj=io.BytesIO(content), mode='rb')
 568             try:
 569                 uncompressed = io.BytesIO(gz.read())
 570             except IOError as original_ioerror:
 571                 # There may be junk add the end of the file
 572                 # See http://stackoverflow.com/q/4928560/35070 for details
 573                 for i in range(1, 1024):
 574                     try:
 575                         gz = gzip.GzipFile(fileobj=io.BytesIO(content[:-i]), mode='rb')
 576                         uncompressed = io.BytesIO(gz.read())
 577                     except IOError:
 578                         continue
 579                     break
 580                 else:
 581                     raise original_ioerror
 582             resp = self.addinfourl_wrapper(uncompressed, old_resp.headers, old_resp.url, old_resp.code)
 583             resp.msg = old_resp.msg
 584         # deflate
 585         if resp.headers.get('Content-encoding', '') == 'deflate':
 586             gz = io.BytesIO(self.deflate(resp.read()))
 587             resp = self.addinfourl_wrapper(gz, old_resp.headers, old_resp.url, old_resp.code)
 588             resp.msg = old_resp.msg
 589         return resp
 590
 591     https_request = http_request
 592     https_response = http_response
 593
 594
 595 def parse_iso8601(date_str, delimiter='T'):
 596     """ Return a UNIX timestamp from the given date """
 597
 598     if date_str is None:
 599         return None
 600
 601     m = re.search(
 602         r'(\.[0-9]+)?(?:Z$| ?(?P<sign>\+|-)(?P<hours>[0-9]{2}):?(?P<minutes>[0-9]{2})$)',
 603         date_str)
 604     if not m:
 605         timezone = datetime.timedelta()
 606     else:
 607         date_str = date_str[:-len(m.group(0))]
 608         if not m.group('sign'):
 609             timezone = datetime.timedelta()
 610         else:
 611             sign = 1 if m.group('sign') == '+' else -1
 612             timezone = datetime.timedelta(
 613                 hours=sign * int(m.group('hours')),
 614                 minutes=sign * int(m.group('minutes')))
 615     date_format = '%Y-%m-%d{0}%H:%M:%S'.format(delimiter)
 616     dt = datetime.datetime.strptime(date_str, date_format) - timezone
 617     return calendar.timegm(dt.timetuple())
 618
 619
 620 def unified_strdate(date_str):
 621     """Return a string with the date in the format YYYYMMDD"""
 622
 623     if date_str is None:
 624         return None
 625
 626     upload_date = None
 627     #Replace commas
 628     date_str = date_str.replace(',', ' ')
 629     # %z (UTC offset) is only supported in python>=3.2
 630     date_str = re.sub(r' ?(\+|-)[0-9]{2}:?[0-9]{2}$', '', date_str)
 631     format_expressions = [
 632         '%d %B %Y',
 633         '%d %b %Y',
 634         '%B %d %Y',
 635         '%b %d %Y',
 636         '%b %dst %Y %I:%M%p',
 637         '%b %dnd %Y %I:%M%p',
 638         '%b %dth %Y %I:%M%p',
 639         '%Y-%m-%d',
 640         '%Y/%m/%d',
 641         '%d.%m.%Y',
 642         '%d/%m/%Y',
 643         '%d/%m/%y',
 644         '%Y/%m/%d %H:%M:%S',
 645         '%d/%m/%Y %H:%M:%S',
 646         '%Y-%m-%d %H:%M:%S',
 647         '%Y-%m-%d %H:%M:%S.%f',
 648         '%d.%m.%Y %H:%M',
 649         '%d.%m.%Y %H.%M',
 650         '%Y-%m-%dT%H:%M:%SZ',
 651         '%Y-%m-%dT%H:%M:%S.%fZ',
 652         '%Y-%m-%dT%H:%M:%S.%f0Z',
 653         '%Y-%m-%dT%H:%M:%S',
 654         '%Y-%m-%dT%H:%M:%S.%f',
 655         '%Y-%m-%dT%H:%M',
 656     ]
 657     for expression in format_expressions:
 658         try:
 659             upload_date = datetime.datetime.strptime(date_str, expression).strftime('%Y%m%d')
 660         except ValueError:
 661             pass
 662     if upload_date is None:
 663         timetuple = email.utils.parsedate_tz(date_str)
 664         if timetuple:
 665             upload_date = datetime.datetime(*timetuple[:6]).strftime('%Y%m%d')
 666     return upload_date
 667
 668 def determine_ext(url, default_ext=u'unknown_video'):
 669     if url is None:
 670         return default_ext
 671     guess = url.partition(u'?')[0].rpartition(u'.')[2]
 672     if re.match(r'^[A-Za-z0-9]+$', guess):
 673         return guess
 674     else:
 675         return default_ext
 676
 677 def subtitles_filename(filename, sub_lang, sub_format):
 678     return filename.rsplit('.', 1)[0] + u'.' + sub_lang + u'.' + sub_format
 679
 680 def date_from_str(date_str):
 681     """
 682     Return a datetime object from a string in the format YYYYMMDD or
 683     (now|today)[+-][0-9](day|week|month|year)(s)?"""
 684     today = datetime.date.today()
 685     if date_str == 'now'or date_str == 'today':
 686         return today
 687     match = re.match('(now|today)(?P<sign>[+-])(?P<time>\d+)(?P<unit>day|week|month|year)(s)?', date_str)
 688     if match is not None:
 689         sign = match.group('sign')
 690         time = int(match.group('time'))
 691         if sign == '-':
 692             time = -time
 693         unit = match.group('unit')
 694         #A bad aproximation?
 695         if unit == 'month':
 696             unit = 'day'
 697             time *= 30
 698         elif unit == 'year':
 699             unit = 'day'
 700             time *= 365
 701         unit += 's'
 702         delta = datetime.timedelta(**{unit: time})
 703         return today + delta
 704     return datetime.datetime.strptime(date_str, "%Y%m%d").date()
 705
 706 def hyphenate_date(date_str):
 707     """
 708     Convert a date in 'YYYYMMDD' format to 'YYYY-MM-DD' format"""
 709     match = re.match(r'^(\d\d\d\d)(\d\d)(\d\d)$', date_str)
 710     if match is not None:
 711         return '-'.join(match.groups())
 712     else:
 713         return date_str
 714
 715 class DateRange(object):
 716     """Represents a time interval between two dates"""
 717     def __init__(self, start=None, end=None):
 718         """start and end must be strings in the format accepted by date"""
 719         if start is not None:
 720             self.start = date_from_str(start)
 721         else:
 722             self.start = datetime.datetime.min.date()
 723         if end is not None:
 724             self.end = date_from_str(end)
 725         else:
 726             self.end = datetime.datetime.max.date()
 727         if self.start > self.end:
 728             raise ValueError('Date range: "%s" , the start date must be before the end date' % self)
 729     @classmethod
 730     def day(cls, day):
 731         """Returns a range that only contains the given day"""
 732         return cls(day,day)
 733     def __contains__(self, date):
 734         """Check if the date is in the range"""
 735         if not isinstance(date, datetime.date):
 736             date = date_from_str(date)
 737         return self.start <= date <= self.end
 738     def __str__(self):
 739         return '%s - %s' % ( self.start.isoformat(), self.end.isoformat())
 740
 741
 742 def platform_name():
 743     """ Returns the platform name as a compat_str """
 744     res = platform.platform()
 745     if isinstance(res, bytes):
 746         res = res.decode(preferredencoding())
 747
 748     assert isinstance(res, compat_str)
 749     return res
 750
 751
 752 def _windows_write_string(s, out):
 753     """ Returns True if the string was written using special methods,
 754     False if it has yet to be written out."""
 755     # Adapted from http://stackoverflow.com/a/3259271/35070
 756
 757     import ctypes
 758     import ctypes.wintypes
 759
 760     WIN_OUTPUT_IDS = {
 761         1: -11,
 762         2: -12,
 763     }
 764
 765     try:
 766         fileno = out.fileno()
 767     except AttributeError:
 768         # If the output stream doesn't have a fileno, it's virtual
 769         return False
 770     if fileno not in WIN_OUTPUT_IDS:
 771         return False
 772
 773     GetStdHandle = ctypes.WINFUNCTYPE(
 774         ctypes.wintypes.HANDLE, ctypes.wintypes.DWORD)(
 775         ("GetStdHandle", ctypes.windll.kernel32))
 776     h = GetStdHandle(WIN_OUTPUT_IDS[fileno])
 777
 778     WriteConsoleW = ctypes.WINFUNCTYPE(
 779         ctypes.wintypes.BOOL, ctypes.wintypes.HANDLE, ctypes.wintypes.LPWSTR,
 780         ctypes.wintypes.DWORD, ctypes.POINTER(ctypes.wintypes.DWORD),
 781         ctypes.wintypes.LPVOID)(("WriteConsoleW", ctypes.windll.kernel32))
 782     written = ctypes.wintypes.DWORD(0)
 783
 784     GetFileType = ctypes.WINFUNCTYPE(ctypes.wintypes.DWORD, ctypes.wintypes.DWORD)(("GetFileType", ctypes.windll.kernel32))
 785     FILE_TYPE_CHAR = 0x0002
 786     FILE_TYPE_REMOTE = 0x8000
 787     GetConsoleMode = ctypes.WINFUNCTYPE(
 788         ctypes.wintypes.BOOL, ctypes.wintypes.HANDLE,
 789         ctypes.POINTER(ctypes.wintypes.DWORD))(
 790         ("GetConsoleMode", ctypes.windll.kernel32))
 791     INVALID_HANDLE_VALUE = ctypes.wintypes.DWORD(-1).value
 792
 793     def not_a_console(handle):
 794         if handle == INVALID_HANDLE_VALUE or handle is None:
 795             return True
 796         return ((GetFileType(handle) & ~FILE_TYPE_REMOTE) != FILE_TYPE_CHAR
 797                 or GetConsoleMode(handle, ctypes.byref(ctypes.wintypes.DWORD())) == 0)
 798
 799     if not_a_console(h):
 800         return False
 801
 802     def next_nonbmp_pos(s):
 803         try:
 804             return next(i for i, c in enumerate(s) if ord(c) > 0xffff)
 805         except StopIteration:
 806             return len(s)
 807
 808     while s:
 809         count = min(next_nonbmp_pos(s), 1024)
 810
 811         ret = WriteConsoleW(
 812             h, s, count if count else 2, ctypes.byref(written), None)
 813         if ret == 0:
 814             raise OSError('Failed to write string')
 815         if not count:  # We just wrote a non-BMP character
 816             assert written.value == 2
 817             s = s[1:]
 818         else:
 819             assert written.value > 0
 820             s = s[written.value:]
 821     return True
 822
 823
 824 def write_string(s, out=None, encoding=None):
 825     if out is None:
 826         out = sys.stderr
 827     assert type(s) == compat_str
 828
 829     if sys.platform == 'win32' and encoding is None and hasattr(out, 'fileno'):
 830         if _windows_write_string(s, out):
 831             return
 832
 833     if ('b' in getattr(out, 'mode', '') or
 834             sys.version_info[0] < 3):  # Python 2 lies about mode of sys.stderr
 835         byt = s.encode(encoding or preferredencoding(), 'ignore')
 836         out.write(byt)
 837     elif hasattr(out, 'buffer'):
 838         enc = encoding or getattr(out, 'encoding', None) or preferredencoding()
 839         byt = s.encode(enc, 'ignore')
 840         out.buffer.write(byt)
 841     else:
 842         out.write(s)
 843     out.flush()
 844
 845
 846 def bytes_to_intlist(bs):
 847     if not bs:
 848         return []
 849     if isinstance(bs[0], int):  # Python 3
 850         return list(bs)
 851     else:
 852         return [ord(c) for c in bs]
 853
 854
 855 def intlist_to_bytes(xs):
 856     if not xs:
 857         return b''
 858     return struct_pack('%dB' % len(xs), *xs)
 859
 860
 861 # Cross-platform file locking
 862 if sys.platform == 'win32':
 863     import ctypes.wintypes
 864     import msvcrt
 865
 866     class OVERLAPPED(ctypes.Structure):
 867         _fields_ = [
 868             ('Internal', ctypes.wintypes.LPVOID),
 869             ('InternalHigh', ctypes.wintypes.LPVOID),
 870             ('Offset', ctypes.wintypes.DWORD),
 871             ('OffsetHigh', ctypes.wintypes.DWORD),
 872             ('hEvent', ctypes.wintypes.HANDLE),
 873         ]
 874
 875     kernel32 = ctypes.windll.kernel32
 876     LockFileEx = kernel32.LockFileEx
 877     LockFileEx.argtypes = [
 878         ctypes.wintypes.HANDLE,     # hFile
 879         ctypes.wintypes.DWORD,      # dwFlags
 880         ctypes.wintypes.DWORD,      # dwReserved
 881         ctypes.wintypes.DWORD,      # nNumberOfBytesToLockLow
 882         ctypes.wintypes.DWORD,      # nNumberOfBytesToLockHigh
 883         ctypes.POINTER(OVERLAPPED)  # Overlapped
 884     ]
 885     LockFileEx.restype = ctypes.wintypes.BOOL
 886     UnlockFileEx = kernel32.UnlockFileEx
 887     UnlockFileEx.argtypes = [
 888         ctypes.wintypes.HANDLE,     # hFile
 889         ctypes.wintypes.DWORD,      # dwReserved
 890         ctypes.wintypes.DWORD,      # nNumberOfBytesToLockLow
 891         ctypes.wintypes.DWORD,      # nNumberOfBytesToLockHigh
 892         ctypes.POINTER(OVERLAPPED)  # Overlapped
 893     ]
 894     UnlockFileEx.restype = ctypes.wintypes.BOOL
 895     whole_low = 0xffffffff
 896     whole_high = 0x7fffffff
 897
 898     def _lock_file(f, exclusive):
 899         overlapped = OVERLAPPED()
 900         overlapped.Offset = 0
 901         overlapped.OffsetHigh = 0
 902         overlapped.hEvent = 0
 903         f._lock_file_overlapped_p = ctypes.pointer(overlapped)
 904         handle = msvcrt.get_osfhandle(f.fileno())
 905         if not LockFileEx(handle, 0x2 if exclusive else 0x0, 0,
 906                           whole_low, whole_high, f._lock_file_overlapped_p):
 907             raise OSError('Locking file failed: %r' % ctypes.FormatError())
 908
 909     def _unlock_file(f):
 910         assert f._lock_file_overlapped_p
 911         handle = msvcrt.get_osfhandle(f.fileno())
 912         if not UnlockFileEx(handle, 0,
 913                             whole_low, whole_high, f._lock_file_overlapped_p):
 914             raise OSError('Unlocking file failed: %r' % ctypes.FormatError())
 915
 916 else:
 917     import fcntl
 918
 919     def _lock_file(f, exclusive):
 920         fcntl.flock(f, fcntl.LOCK_EX if exclusive else fcntl.LOCK_SH)
 921
 922     def _unlock_file(f):
 923         fcntl.flock(f, fcntl.LOCK_UN)
 924
 925
 926 class locked_file(object):
 927     def __init__(self, filename, mode, encoding=None):
 928         assert mode in ['r', 'a', 'w']
 929         self.f = io.open(filename, mode, encoding=encoding)
 930         self.mode = mode
 931
 932     def __enter__(self):
 933         exclusive = self.mode != 'r'
 934         try:
 935             _lock_file(self.f, exclusive)
 936         except IOError:
 937             self.f.close()
 938             raise
 939         return self
 940
 941     def __exit__(self, etype, value, traceback):
 942         try:
 943             _unlock_file(self.f)
 944         finally:
 945             self.f.close()
 946
 947     def __iter__(self):
 948         return iter(self.f)
 949
 950     def write(self, *args):
 951         return self.f.write(*args)
 952
 953     def read(self, *args):
 954         return self.f.read(*args)
 955
 956
 957 def get_filesystem_encoding():
 958     encoding = sys.getfilesystemencoding()
 959     return encoding if encoding is not None else 'utf-8'
 960
 961
 962 def shell_quote(args):
 963     quoted_args = []
 964     encoding = get_filesystem_encoding()
 965     for a in args:
 966         if isinstance(a, bytes):
 967             # We may get a filename encoded with 'encodeFilename'
 968             a = a.decode(encoding)
 969         quoted_args.append(pipes.quote(a))
 970     return u' '.join(quoted_args)
 971
 972
 973 def takewhile_inclusive(pred, seq):
 974     """ Like itertools.takewhile, but include the latest evaluated element
 975         (the first element so that Not pred(e)) """
 976     for e in seq:
 977         yield e
 978         if not pred(e):
 979             return
 980
 981
 982 def smuggle_url(url, data):
 983     """ Pass additional data in a URL for internal use. """
 984
 985     sdata = compat_urllib_parse.urlencode(
 986         {u'__youtubedl_smuggle': json.dumps(data)})
 987     return url + u'#' + sdata
 988
 989
 990 def unsmuggle_url(smug_url, default=None):
 991     if not '#__youtubedl_smuggle' in smug_url:
 992         return smug_url, default
 993     url, _, sdata = smug_url.rpartition(u'#')
 994     jsond = compat_parse_qs(sdata)[u'__youtubedl_smuggle'][0]
 995     data = json.loads(jsond)
 996     return url, data
 997
 998
 999 def format_bytes(bytes):
1000     if bytes is None:
1001         return u'N/A'
1002     if type(bytes) is str:
1003         bytes = float(bytes)
1004     if bytes == 0.0:
1005         exponent = 0
1006     else:
1007         exponent = int(math.log(bytes, 1024.0))
1008     suffix = [u'B', u'KiB', u'MiB', u'GiB', u'TiB', u'PiB', u'EiB', u'ZiB', u'YiB'][exponent]
1009     converted = float(bytes) / float(1024 ** exponent)
1010     return u'%.2f%s' % (converted, suffix)
1011
1012
1013 def get_term_width():
1014     columns = compat_getenv('COLUMNS', None)
1015     if columns:
1016         return int(columns)
1017
1018     try:
1019         sp = subprocess.Popen(
1020             ['stty', 'size'],
1021             stdout=subprocess.PIPE, stderr=subprocess.PIPE)
1022         out, err = sp.communicate()
1023         return int(out.split()[1])
1024     except:
1025         pass
1026     return None
1027
1028
1029 def month_by_name(name):
1030     """ Return the number of a month by (locale-independently) English name """
1031
1032     ENGLISH_NAMES = [
1033         u'January', u'February', u'March', u'April', u'May', u'June',
1034         u'July', u'August', u'September', u'October', u'November', u'December']
1035     try:
1036         return ENGLISH_NAMES.index(name) + 1
1037     except ValueError:
1038         return None
1039
1040
1041 def fix_xml_ampersands(xml_str):
1042     """Replace all the '&' by '&amp;' in XML"""
1043     return re.sub(
1044         r'&(?!amp;|lt;|gt;|apos;|quot;|#x[0-9a-fA-F]{,4};|#[0-9]{,4};)',
1045         u'&amp;',
1046         xml_str)
1047
1048
1049 def setproctitle(title):
1050     assert isinstance(title, compat_str)
1051     try:
1052         libc = ctypes.cdll.LoadLibrary("libc.so.6")
1053     except OSError:
1054         return
1055     title_bytes = title.encode('utf-8')
1056     buf = ctypes.create_string_buffer(len(title_bytes))
1057     buf.value = title_bytes
1058     try:
1059         libc.prctl(15, buf, 0, 0, 0)
1060     except AttributeError:
1061         return  # Strange libc, just skip this
1062
1063
1064 def remove_start(s, start):
1065     if s.startswith(start):
1066         return s[len(start):]
1067     return s
1068
1069
1070 def remove_end(s, end):
1071     if s.endswith(end):
1072         return s[:-len(end)]
1073     return s
1074
1075
1076 def url_basename(url):
1077     path = compat_urlparse.urlparse(url).path
1078     return path.strip(u'/').split(u'/')[-1]
1079
1080
1081 class HEADRequest(compat_urllib_request.Request):
1082     def get_method(self):
1083         return "HEAD"
1084
1085
1086 def int_or_none(v, scale=1, default=None, get_attr=None, invscale=1):
1087     if get_attr:
1088         if v is not None:
1089             v = getattr(v, get_attr, None)
1090     if v == '':
1091         v = None
1092     return default if v is None else (int(v) * invscale // scale)
1093
1094
1095 def str_or_none(v, default=None):
1096     return default if v is None else compat_str(v)
1097
1098
1099 def str_to_int(int_str):
1100     """ A more relaxed version of int_or_none """
1101     if int_str is None:
1102         return None
1103     int_str = re.sub(r'[,\.\+]', u'', int_str)
1104     return int(int_str)
1105
1106
1107 def float_or_none(v, scale=1, invscale=1, default=None):
1108     return default if v is None else (float(v) * invscale / scale)
1109
1110
1111 def parse_duration(s):
1112     if s is None:
1113         return None
1114
1115     s = s.strip()
1116
1117     m = re.match(
1118         r'(?i)(?:(?:(?P<hours>[0-9]+)\s*(?:[:h]|hours?)\s*)?(?P<mins>[0-9]+)\s*(?:[:m]|mins?|minutes?)\s*)?(?P<secs>[0-9]+)(?P<ms>\.[0-9]+)?\s*(?:s|secs?|seconds?)?$', s)
1119     if not m:
1120         return None
1121     res = int(m.group('secs'))
1122     if m.group('mins'):
1123         res += int(m.group('mins')) * 60
1124         if m.group('hours'):
1125             res += int(m.group('hours')) * 60 * 60
1126     if m.group('ms'):
1127         res += float(m.group('ms'))
1128     return res
1129
1130
1131 def prepend_extension(filename, ext):
1132     name, real_ext = os.path.splitext(filename)
1133     return u'{0}.{1}{2}'.format(name, ext, real_ext)
1134
1135
1136 def check_executable(exe, args=[]):
1137     """ Checks if the given binary is installed somewhere in PATH, and returns its name.
1138     args can be a list of arguments for a short output (like -version) """
1139     try:
1140         subprocess.Popen([exe] + args, stdout=subprocess.PIPE, stderr=subprocess.PIPE).communicate()
1141     except OSError:
1142         return False
1143     return exe
1144
1145
1146 def get_exe_version(exe, args=['--version'],
1147                     version_re=r'version\s+([0-9._-a-zA-Z]+)',
1148                     unrecognized=u'present'):
1149     """ Returns the version of the specified executable,
1150     or False if the executable is not present """
1151     try:
1152         out, err = subprocess.Popen(
1153             [exe] + args,
1154             stdout=subprocess.PIPE, stderr=subprocess.STDOUT).communicate()
1155     except OSError:
1156         return False
1157     firstline = out.partition(b'\n')[0].decode('ascii', 'ignore')
1158     m = re.search(version_re, firstline)
1159     if m:
1160         return m.group(1)
1161     else:
1162         return unrecognized
1163
1164
1165 class PagedList(object):
1166     def __len__(self):
1167         # This is only useful for tests
1168         return len(self.getslice())
1169
1170
1171 class OnDemandPagedList(PagedList):
1172     def __init__(self, pagefunc, pagesize):
1173         self._pagefunc = pagefunc
1174         self._pagesize = pagesize
1175
1176     def getslice(self, start=0, end=None):
1177         res = []
1178         for pagenum in itertools.count(start // self._pagesize):
1179             firstid = pagenum * self._pagesize
1180             nextfirstid = pagenum * self._pagesize + self._pagesize
1181             if start >= nextfirstid:
1182                 continue
1183
1184             page_results = list(self._pagefunc(pagenum))
1185
1186             startv = (
1187                 start % self._pagesize
1188                 if firstid <= start < nextfirstid
1189                 else 0)
1190
1191             endv = (
1192                 ((end - 1) % self._pagesize) + 1
1193                 if (end is not None and firstid <= end <= nextfirstid)
1194                 else None)
1195
1196             if startv != 0 or endv is not None:
1197                 page_results = page_results[startv:endv]
1198             res.extend(page_results)
1199
1200             # A little optimization - if current page is not "full", ie. does
1201             # not contain page_size videos then we can assume that this page
1202             # is the last one - there are no more ids on further pages -
1203             # i.e. no need to query again.
1204             if len(page_results) + startv < self._pagesize:
1205                 break
1206
1207             # If we got the whole page, but the next page is not interesting,
1208             # break out early as well
1209             if end == nextfirstid:
1210                 break
1211         return res
1212
1213
1214 class InAdvancePagedList(PagedList):
1215     def __init__(self, pagefunc, pagecount, pagesize):
1216         self._pagefunc = pagefunc
1217         self._pagecount = pagecount
1218         self._pagesize = pagesize
1219
1220     def getslice(self, start=0, end=None):
1221         res = []
1222         start_page = start // self._pagesize
1223         end_page = (
1224             self._pagecount if end is None else (end // self._pagesize + 1))
1225         skip_elems = start - start_page * self._pagesize
1226         only_more = None if end is None else end - start
1227         for pagenum in range(start_page, end_page):
1228             page = list(self._pagefunc(pagenum))
1229             if skip_elems:
1230                 page = page[skip_elems:]
1231                 skip_elems = None
1232             if only_more is not None:
1233                 if len(page) < only_more:
1234                     only_more -= len(page)
1235                 else:
1236                     page = page[:only_more]
1237                     res.extend(page)
1238                     break
1239             res.extend(page)
1240         return res
1241
1242
1243 def uppercase_escape(s):
1244     unicode_escape = codecs.getdecoder('unicode_escape')
1245     return re.sub(
1246         r'\\U[0-9a-fA-F]{8}',
1247         lambda m: unicode_escape(m.group(0))[0],
1248         s)
1249
1250
1251 def escape_rfc3986(s):
1252     """Escape non-ASCII characters as suggested by RFC 3986"""
1253     if sys.version_info < (3, 0) and isinstance(s, unicode):
1254         s = s.encode('utf-8')
1255     return compat_urllib_parse.quote(s, b"%/;:@&=+$,!~*'()?#[]")
1256
1257
1258 def escape_url(url):
1259     """Escape URL as suggested by RFC 3986"""
1260     url_parsed = compat_urllib_parse_urlparse(url)
1261     return url_parsed._replace(
1262         path=escape_rfc3986(url_parsed.path),
1263         params=escape_rfc3986(url_parsed.params),
1264         query=escape_rfc3986(url_parsed.query),
1265         fragment=escape_rfc3986(url_parsed.fragment)
1266     ).geturl()
1267
1268 try:
1269     struct.pack(u'!I', 0)
1270 except TypeError:
1271     # In Python 2.6 (and some 2.7 versions), struct requires a bytes argument
1272     def struct_pack(spec, *args):
1273         if isinstance(spec, compat_str):
1274             spec = spec.encode('ascii')
1275         return struct.pack(spec, *args)
1276
1277     def struct_unpack(spec, *args):
1278         if isinstance(spec, compat_str):
1279             spec = spec.encode('ascii')
1280         return struct.unpack(spec, *args)
1281 else:
1282     struct_pack = struct.pack
1283     struct_unpack = struct.unpack
1284
1285
1286 def read_batch_urls(batch_fd):
1287     def fixup(url):
1288         if not isinstance(url, compat_str):
1289             url = url.decode('utf-8', 'replace')
1290         BOM_UTF8 = u'\xef\xbb\xbf'
1291         if url.startswith(BOM_UTF8):
1292             url = url[len(BOM_UTF8):]
1293         url = url.strip()
1294         if url.startswith(('#', ';', ']')):
1295             return False
1296         return url
1297
1298     with contextlib.closing(batch_fd) as fd:
1299         return [url for url in map(fixup, fd) if url]
1300
1301
1302 def urlencode_postdata(*args, **kargs):
1303     return compat_urllib_parse.urlencode(*args, **kargs).encode('ascii')
1304
1305
1306 try:
1307     etree_iter = xml.etree.ElementTree.Element.iter
1308 except AttributeError:  # Python <=2.6
1309     etree_iter = lambda n: n.findall('.//*')
1310
1311
1312 def parse_xml(s):
1313     class TreeBuilder(xml.etree.ElementTree.TreeBuilder):
1314         def doctype(self, name, pubid, system):
1315             pass  # Ignore doctypes
1316
1317     parser = xml.etree.ElementTree.XMLParser(target=TreeBuilder())
1318     kwargs = {'parser': parser} if sys.version_info >= (2, 7) else {}
1319     tree = xml.etree.ElementTree.XML(s.encode('utf-8'), **kwargs)
1320     # Fix up XML parser in Python 2.x
1321     if sys.version_info < (3, 0):
1322         for n in etree_iter(tree):
1323             if n.text is not None:
1324                 if not isinstance(n.text, compat_str):
1325                     n.text = n.text.decode('utf-8')
1326     return tree
1327
1328
1329 US_RATINGS = {
1330     'G': 0,
1331     'PG': 10,
1332     'PG-13': 13,
1333     'R': 16,
1334     'NC': 18,
1335 }
1336
1337
1338 def parse_age_limit(s):
1339     if s is None:
1340         return None
1341     m = re.match(r'^(?P<age>\d{1,2})\+?$', s)
1342     return int(m.group('age')) if m else US_RATINGS.get(s, None)
1343
1344
1345 def strip_jsonp(code):
1346     return re.sub(
1347         r'(?s)^[a-zA-Z0-9_]+\s*\(\s*(.*)\);?\s*?(?://[^\n]*)*$', r'\1', code)
1348
1349
1350 def js_to_json(code):
1351     def fix_kv(m):
1352         v = m.group(0)
1353         if v in ('true', 'false', 'null'):
1354             return v
1355         if v.startswith('"'):
1356             return v
1357         if v.startswith("'"):
1358             v = v[1:-1]
1359             v = re.sub(r"\\\\|\\'|\"", lambda m: {
1360                 '\\\\': '\\\\',
1361                 "\\'": "'",
1362                 '"': '\\"',
1363             }[m.group(0)], v)
1364         return '"%s"' % v
1365
1366     res = re.sub(r'''(?x)
1367         "(?:[^"\\]*(?:\\\\|\\")?)*"|
1368         '(?:[^'\\]*(?:\\\\|\\')?)*'|
1369         [a-zA-Z_][a-zA-Z_0-9]*
1370         ''', fix_kv, code)
1371     res = re.sub(r',(\s*\])', lambda m: m.group(1), res)
1372     return res
1373
1374
1375 def qualities(quality_ids):
1376     """ Get a numeric quality value out of a list of possible values """
1377     def q(qid):
1378         try:
1379             return quality_ids.index(qid)
1380         except ValueError:
1381             return -1
1382     return q
1383
1384
1385 DEFAULT_OUTTMPL = '%(title)s-%(id)s.%(ext)s'
1386
1387
1388 def limit_length(s, length):
1389     """ Add ellipses to overly long strings """
1390     if s is None:
1391         return None
1392     ELLIPSES = '...'
1393     if len(s) > length:
1394         return s[:length - len(ELLIPSES)] + ELLIPSES
1395     return s
1396
1397
1398 def version_tuple(v):
1399     return [int(e) for e in v.split('.')]
1400
1401
1402 def is_outdated_version(version, limit, assume_new=True):
1403     if not version:
1404         return not assume_new
1405     try:
1406         return version_tuple(version) < version_tuple(limit)
1407     except ValueError:
1408         return not assume_new