_ Git - youtube-dl/blob - youtube_dl/utils.py

   1 #!/usr/bin/env python
   2 # -*- coding: utf-8 -*-
   3
   4 from __future__ import unicode_literals
   5
   6 import calendar
   7 import codecs
   8 import contextlib
   9 import ctypes
  10 import datetime
  11 import email.utils
  12 import errno
  13 import gzip
  14 import itertools
  15 import io
  16 import json
  17 import locale
  18 import math
  19 import os
  20 import pipes
  21 import platform
  22 import re
  23 import ssl
  24 import socket
  25 import struct
  26 import subprocess
  27 import sys
  28 import tempfile
  29 import traceback
  30 import xml.etree.ElementTree
  31 import zlib
  32
  33 from .compat import (
  34     compat_chr,
  35     compat_getenv,
  36     compat_html_entities,
  37     compat_parse_qs,
  38     compat_str,
  39     compat_urllib_error,
  40     compat_urllib_parse,
  41     compat_urllib_parse_urlparse,
  42     compat_urllib_request,
  43     compat_urlparse,
  44     shlex_quote,
  45 )
  46
  47
  48 # This is not clearly defined otherwise
  49 compiled_regex_type = type(re.compile(''))
  50
  51 std_headers = {
  52     'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64; rv:10.0) Gecko/20100101 Firefox/10.0 (Chrome)',
  53     'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.7',
  54     'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
  55     'Accept-Encoding': 'gzip, deflate',
  56     'Accept-Language': 'en-us,en;q=0.5',
  57 }
  58
  59 def preferredencoding():
  60     """Get preferred encoding.
  61
  62     Returns the best encoding scheme for the system, based on
  63     locale.getpreferredencoding() and some further tweaks.
  64     """
  65     try:
  66         pref = locale.getpreferredencoding()
  67         'TEST'.encode(pref)
  68     except:
  69         pref = 'UTF-8'
  70
  71     return pref
  72
  73
  74 def write_json_file(obj, fn):
  75     """ Encode obj as JSON and write it to fn, atomically if possible """
  76
  77     fn = encodeFilename(fn)
  78     if sys.version_info < (3, 0) and sys.platform != 'win32':
  79         encoding = get_filesystem_encoding()
  80         # os.path.basename returns a bytes object, but NamedTemporaryFile
  81         # will fail if the filename contains non ascii characters unless we
  82         # use a unicode object
  83         path_basename = lambda f: os.path.basename(fn).decode(encoding)
  84         # the same for os.path.dirname
  85         path_dirname = lambda f: os.path.dirname(fn).decode(encoding)
  86     else:
  87         path_basename = os.path.basename
  88         path_dirname = os.path.dirname
  89
  90     args = {
  91         'suffix': '.tmp',
  92         'prefix': path_basename(fn) + '.',
  93         'dir': path_dirname(fn),
  94         'delete': False,
  95     }
  96
  97     # In Python 2.x, json.dump expects a bytestream.
  98     # In Python 3.x, it writes to a character stream
  99     if sys.version_info < (3, 0):
 100         args['mode'] = 'wb'
 101     else:
 102         args.update({
 103             'mode': 'w',
 104             'encoding': 'utf-8',
 105         })
 106
 107     tf = tempfile.NamedTemporaryFile(**args)
 108
 109     try:
 110         with tf:
 111             json.dump(obj, tf)
 112         if sys.platform == 'win32':
 113             # Need to remove existing file on Windows, else os.rename raises
 114             # WindowsError or FileExistsError.
 115             try:
 116                 os.unlink(fn)
 117             except OSError:
 118                 pass
 119         os.rename(tf.name, fn)
 120     except:
 121         try:
 122             os.remove(tf.name)
 123         except OSError:
 124             pass
 125         raise
 126
 127
 128 if sys.version_info >= (2, 7):
 129     def find_xpath_attr(node, xpath, key, val):
 130         """ Find the xpath xpath[@key=val] """
 131         assert re.match(r'^[a-zA-Z-]+$', key)
 132         assert re.match(r'^[a-zA-Z0-9@\s:._-]*$', val)
 133         expr = xpath + u"[@%s='%s']" % (key, val)
 134         return node.find(expr)
 135 else:
 136     def find_xpath_attr(node, xpath, key, val):
 137         # Here comes the crazy part: In 2.6, if the xpath is a unicode,
 138         # .//node does not match if a node is a direct child of . !
 139         if isinstance(xpath, unicode):
 140             xpath = xpath.encode('ascii')
 141
 142         for f in node.findall(xpath):
 143             if f.attrib.get(key) == val:
 144                 return f
 145         return None
 146
 147 # On python2.6 the xml.etree.ElementTree.Element methods don't support
 148 # the namespace parameter
 149 def xpath_with_ns(path, ns_map):
 150     components = [c.split(':') for c in path.split('/')]
 151     replaced = []
 152     for c in components:
 153         if len(c) == 1:
 154             replaced.append(c[0])
 155         else:
 156             ns, tag = c
 157             replaced.append('{%s}%s' % (ns_map[ns], tag))
 158     return '/'.join(replaced)
 159
 160
 161 def xpath_text(node, xpath, name=None, fatal=False):
 162     if sys.version_info < (2, 7):  # Crazy 2.6
 163         xpath = xpath.encode('ascii')
 164
 165     n = node.find(xpath)
 166     if n is None:
 167         if fatal:
 168             name = xpath if name is None else name
 169             raise ExtractorError('Could not find XML element %s' % name)
 170         else:
 171             return None
 172     return n.text
 173
 174
 175 def get_element_by_id(id, html):
 176     """Return the content of the tag with the specified ID in the passed HTML document"""
 177     return get_element_by_attribute("id", id, html)
 178
 179
 180 def get_element_by_attribute(attribute, value, html):
 181     """Return the content of the tag with the specified attribute in the passed HTML document"""
 182
 183     m = re.search(r'''(?xs)
 184         <([a-zA-Z0-9:._-]+)
 185          (?:\s+[a-zA-Z0-9:._-]+(?:=[a-zA-Z0-9:._-]+|="[^"]+"|='[^']+'))*?
 186          \s+%s=['"]?%s['"]?
 187          (?:\s+[a-zA-Z0-9:._-]+(?:=[a-zA-Z0-9:._-]+|="[^"]+"|='[^']+'))*?
 188         \s*>
 189         (?P<content>.*?)
 190         </\1>
 191     ''' % (re.escape(attribute), re.escape(value)), html)
 192
 193     if not m:
 194         return None
 195     res = m.group('content')
 196
 197     if res.startswith('"') or res.startswith("'"):
 198         res = res[1:-1]
 199
 200     return unescapeHTML(res)
 201
 202
 203 def clean_html(html):
 204     """Clean an HTML snippet into a readable string"""
 205     # Newline vs <br />
 206     html = html.replace('\n', ' ')
 207     html = re.sub(r'\s*<\s*br\s*/?\s*>\s*', '\n', html)
 208     html = re.sub(r'<\s*/\s*p\s*>\s*<\s*p[^>]*>', '\n', html)
 209     # Strip html tags
 210     html = re.sub('<.*?>', '', html)
 211     # Replace html entities
 212     html = unescapeHTML(html)
 213     return html.strip()
 214
 215
 216 def sanitize_open(filename, open_mode):
 217     """Try to open the given filename, and slightly tweak it if this fails.
 218
 219     Attempts to open the given filename. If this fails, it tries to change
 220     the filename slightly, step by step, until it's either able to open it
 221     or it fails and raises a final exception, like the standard open()
 222     function.
 223
 224     It returns the tuple (stream, definitive_file_name).
 225     """
 226     try:
 227         if filename == '-':
 228             if sys.platform == 'win32':
 229                 import msvcrt
 230                 msvcrt.setmode(sys.stdout.fileno(), os.O_BINARY)
 231             return (sys.stdout.buffer if hasattr(sys.stdout, 'buffer') else sys.stdout, filename)
 232         stream = open(encodeFilename(filename), open_mode)
 233         return (stream, filename)
 234     except (IOError, OSError) as err:
 235         if err.errno in (errno.EACCES,):
 236             raise
 237
 238         # In case of error, try to remove win32 forbidden chars
 239         alt_filename = os.path.join(
 240                         re.sub('[/<>:"\\|\\\\?\\*]', '#', path_part)
 241                         for path_part in os.path.split(filename)
 242                        )
 243         if alt_filename == filename:
 244             raise
 245         else:
 246             # An exception here should be caught in the caller
 247             stream = open(encodeFilename(filename), open_mode)
 248             return (stream, alt_filename)
 249
 250
 251 def timeconvert(timestr):
 252     """Convert RFC 2822 defined time string into system timestamp"""
 253     timestamp = None
 254     timetuple = email.utils.parsedate_tz(timestr)
 255     if timetuple is not None:
 256         timestamp = email.utils.mktime_tz(timetuple)
 257     return timestamp
 258
 259 def sanitize_filename(s, restricted=False, is_id=False):
 260     """Sanitizes a string so it could be used as part of a filename.
 261     If restricted is set, use a stricter subset of allowed characters.
 262     Set is_id if this is not an arbitrary string, but an ID that should be kept if possible
 263     """
 264     def replace_insane(char):
 265         if char == '?' or ord(char) < 32 or ord(char) == 127:
 266             return ''
 267         elif char == '"':
 268             return '' if restricted else '\''
 269         elif char == ':':
 270             return '_-' if restricted else ' -'
 271         elif char in '\\/|*<>':
 272             return '_'
 273         if restricted and (char in '!&\'()[]{}$;`^,#' or char.isspace()):
 274             return '_'
 275         if restricted and ord(char) > 127:
 276             return '_'
 277         return char
 278
 279     result = ''.join(map(replace_insane, s))
 280     if not is_id:
 281         while '__' in result:
 282             result = result.replace('__', '_')
 283         result = result.strip('_')
 284         # Common case of "Foreign band name - English song title"
 285         if restricted and result.startswith('-_'):
 286             result = result[2:]
 287         if not result:
 288             result = '_'
 289     return result
 290
 291 def orderedSet(iterable):
 292     """ Remove all duplicates from the input iterable """
 293     res = []
 294     for el in iterable:
 295         if el not in res:
 296             res.append(el)
 297     return res
 298
 299
 300 def _htmlentity_transform(entity):
 301     """Transforms an HTML entity to a character."""
 302     # Known non-numeric HTML entity
 303     if entity in compat_html_entities.name2codepoint:
 304         return compat_chr(compat_html_entities.name2codepoint[entity])
 305
 306     mobj = re.match(r'#(x?[0-9]+)', entity)
 307     if mobj is not None:
 308         numstr = mobj.group(1)
 309         if numstr.startswith('x'):
 310             base = 16
 311             numstr = '0%s' % numstr
 312         else:
 313             base = 10
 314         return compat_chr(int(numstr, base))
 315
 316     # Unknown entity in name, return its literal representation
 317     return ('&%s;' % entity)
 318
 319
 320 def unescapeHTML(s):
 321     if s is None:
 322         return None
 323     assert type(s) == compat_str
 324
 325     return re.sub(
 326         r'&([^;]+);', lambda m: _htmlentity_transform(m.group(1)), s)
 327
 328
 329 def encodeFilename(s, for_subprocess=False):
 330     """
 331     @param s The name of the file
 332     """
 333
 334     assert type(s) == compat_str
 335
 336     # Python 3 has a Unicode API
 337     if sys.version_info >= (3, 0):
 338         return s
 339
 340     if sys.platform == 'win32' and sys.getwindowsversion()[0] >= 5:
 341         # Pass '' directly to use Unicode APIs on Windows 2000 and up
 342         # (Detecting Windows NT 4 is tricky because 'major >= 4' would
 343         # match Windows 9x series as well. Besides, NT 4 is obsolete.)
 344         if not for_subprocess:
 345             return s
 346         else:
 347             # For subprocess calls, encode with locale encoding
 348             # Refer to http://stackoverflow.com/a/9951851/35070
 349             encoding = preferredencoding()
 350     else:
 351         encoding = sys.getfilesystemencoding()
 352     if encoding is None:
 353         encoding = 'utf-8'
 354     return s.encode(encoding, 'ignore')
 355
 356
 357 def encodeArgument(s):
 358     if not isinstance(s, compat_str):
 359         # Legacy code that uses byte strings
 360         # Uncomment the following line after fixing all post processors
 361         #assert False, 'Internal error: %r should be of type %r, is %r' % (s, compat_str, type(s))
 362         s = s.decode('ascii')
 363     return encodeFilename(s, True)
 364
 365
 366 def decodeOption(optval):
 367     if optval is None:
 368         return optval
 369     if isinstance(optval, bytes):
 370         optval = optval.decode(preferredencoding())
 371
 372     assert isinstance(optval, compat_str)
 373     return optval
 374
 375 def formatSeconds(secs):
 376     if secs > 3600:
 377         return '%d:%02d:%02d' % (secs // 3600, (secs % 3600) // 60, secs % 60)
 378     elif secs > 60:
 379         return '%d:%02d' % (secs // 60, secs % 60)
 380     else:
 381         return '%d' % secs
 382
 383
 384 def make_HTTPS_handler(opts_no_check_certificate, **kwargs):
 385     if sys.version_info < (3, 2):
 386         import httplib
 387
 388         class HTTPSConnectionV3(httplib.HTTPSConnection):
 389             def __init__(self, *args, **kwargs):
 390                 httplib.HTTPSConnection.__init__(self, *args, **kwargs)
 391
 392             def connect(self):
 393                 sock = socket.create_connection((self.host, self.port), self.timeout)
 394                 if getattr(self, '_tunnel_host', False):
 395                     self.sock = sock
 396                     self._tunnel()
 397                 try:
 398                     self.sock = ssl.wrap_socket(sock, self.key_file, self.cert_file, ssl_version=ssl.PROTOCOL_TLSv1)
 399                 except ssl.SSLError:
 400                     self.sock = ssl.wrap_socket(sock, self.key_file, self.cert_file, ssl_version=ssl.PROTOCOL_SSLv23)
 401
 402         class HTTPSHandlerV3(compat_urllib_request.HTTPSHandler):
 403             def https_open(self, req):
 404                 return self.do_open(HTTPSConnectionV3, req)
 405         return HTTPSHandlerV3(**kwargs)
 406     elif hasattr(ssl, 'create_default_context'):  # Python >= 3.4
 407         context = ssl.create_default_context(ssl.Purpose.CLIENT_AUTH)
 408         context.options &= ~ssl.OP_NO_SSLv3  # Allow older, not-as-secure SSLv3
 409         if opts_no_check_certificate:
 410             context.verify_mode = ssl.CERT_NONE
 411         return compat_urllib_request.HTTPSHandler(context=context, **kwargs)
 412     else:  # Python < 3.4
 413         context = ssl.SSLContext(ssl.PROTOCOL_SSLv23)
 414         context.verify_mode = (ssl.CERT_NONE
 415                                if opts_no_check_certificate
 416                                else ssl.CERT_REQUIRED)
 417         context.set_default_verify_paths()
 418         try:
 419             context.load_default_certs()
 420         except AttributeError:
 421             pass  # Python < 3.4
 422         return compat_urllib_request.HTTPSHandler(context=context, **kwargs)
 423
 424
 425 class ExtractorError(Exception):
 426     """Error during info extraction."""
 427     def __init__(self, msg, tb=None, expected=False, cause=None, video_id=None):
 428         """ tb, if given, is the original traceback (so that it can be printed out).
 429         If expected is set, this is a normal error message and most likely not a bug in youtube-dl.
 430         """
 431
 432         if sys.exc_info()[0] in (compat_urllib_error.URLError, socket.timeout, UnavailableVideoError):
 433             expected = True
 434         if video_id is not None:
 435             msg = video_id + ': ' + msg
 436         if cause:
 437             msg += ' (caused by %r)' % cause
 438         if not expected:
 439             if ytdl_is_updateable():
 440                 update_cmd = 'type  youtube-dl -U  to update'
 441             else:
 442                 update_cmd = 'see  https://yt-dl.org/update  on how to update'
 443             msg += '; please report this issue on https://yt-dl.org/bug .'
 444             msg += ' Make sure you are using the latest version; %s.' % update_cmd
 445             msg += ' Be sure to call youtube-dl with the --verbose flag and include its complete output.'
 446         super(ExtractorError, self).__init__(msg)
 447
 448         self.traceback = tb
 449         self.exc_info = sys.exc_info()  # preserve original exception
 450         self.cause = cause
 451         self.video_id = video_id
 452
 453     def format_traceback(self):
 454         if self.traceback is None:
 455             return None
 456         return ''.join(traceback.format_tb(self.traceback))
 457
 458
 459 class RegexNotFoundError(ExtractorError):
 460     """Error when a regex didn't match"""
 461     pass
 462
 463
 464 class DownloadError(Exception):
 465     """Download Error exception.
 466
 467     This exception may be thrown by FileDownloader objects if they are not
 468     configured to continue on errors. They will contain the appropriate
 469     error message.
 470     """
 471     def __init__(self, msg, exc_info=None):
 472         """ exc_info, if given, is the original exception that caused the trouble (as returned by sys.exc_info()). """
 473         super(DownloadError, self).__init__(msg)
 474         self.exc_info = exc_info
 475
 476
 477 class SameFileError(Exception):
 478     """Same File exception.
 479
 480     This exception will be thrown by FileDownloader objects if they detect
 481     multiple files would have to be downloaded to the same file on disk.
 482     """
 483     pass
 484
 485
 486 class PostProcessingError(Exception):
 487     """Post Processing exception.
 488
 489     This exception may be raised by PostProcessor's .run() method to
 490     indicate an error in the postprocessing task.
 491     """
 492     def __init__(self, msg):
 493         self.msg = msg
 494
 495 class MaxDownloadsReached(Exception):
 496     """ --max-downloads limit has been reached. """
 497     pass
 498
 499
 500 class UnavailableVideoError(Exception):
 501     """Unavailable Format exception.
 502
 503     This exception will be thrown when a video is requested
 504     in a format that is not available for that video.
 505     """
 506     pass
 507
 508
 509 class ContentTooShortError(Exception):
 510     """Content Too Short exception.
 511
 512     This exception may be raised by FileDownloader objects when a file they
 513     download is too small for what the server announced first, indicating
 514     the connection was probably interrupted.
 515     """
 516     # Both in bytes
 517     downloaded = None
 518     expected = None
 519
 520     def __init__(self, downloaded, expected):
 521         self.downloaded = downloaded
 522         self.expected = expected
 523
 524 class YoutubeDLHandler(compat_urllib_request.HTTPHandler):
 525     """Handler for HTTP requests and responses.
 526
 527     This class, when installed with an OpenerDirector, automatically adds
 528     the standard headers to every HTTP request and handles gzipped and
 529     deflated responses from web servers. If compression is to be avoided in
 530     a particular request, the original request in the program code only has
 531     to include the HTTP header "Youtubedl-No-Compression", which will be
 532     removed before making the real request.
 533
 534     Part of this code was copied from:
 535
 536     http://techknack.net/python-urllib2-handlers/
 537
 538     Andrew Rowls, the author of that code, agreed to release it to the
 539     public domain.
 540     """
 541
 542     @staticmethod
 543     def deflate(data):
 544         try:
 545             return zlib.decompress(data, -zlib.MAX_WBITS)
 546         except zlib.error:
 547             return zlib.decompress(data)
 548
 549     @staticmethod
 550     def addinfourl_wrapper(stream, headers, url, code):
 551         if hasattr(compat_urllib_request.addinfourl, 'getcode'):
 552             return compat_urllib_request.addinfourl(stream, headers, url, code)
 553         ret = compat_urllib_request.addinfourl(stream, headers, url)
 554         ret.code = code
 555         return ret
 556
 557     def http_request(self, req):
 558         for h, v in std_headers.items():
 559             if h not in req.headers:
 560                 req.add_header(h, v)
 561         if 'Youtubedl-no-compression' in req.headers:
 562             if 'Accept-encoding' in req.headers:
 563                 del req.headers['Accept-encoding']
 564             del req.headers['Youtubedl-no-compression']
 565         if 'Youtubedl-user-agent' in req.headers:
 566             if 'User-agent' in req.headers:
 567                 del req.headers['User-agent']
 568             req.headers['User-agent'] = req.headers['Youtubedl-user-agent']
 569             del req.headers['Youtubedl-user-agent']
 570
 571         if sys.version_info < (2, 7) and '#' in req.get_full_url():
 572             # Python 2.6 is brain-dead when it comes to fragments
 573             req._Request__original = req._Request__original.partition('#')[0]
 574             req._Request__r_type = req._Request__r_type.partition('#')[0]
 575
 576         return req
 577
 578     def http_response(self, req, resp):
 579         old_resp = resp
 580         # gzip
 581         if resp.headers.get('Content-encoding', '') == 'gzip':
 582             content = resp.read()
 583             gz = gzip.GzipFile(fileobj=io.BytesIO(content), mode='rb')
 584             try:
 585                 uncompressed = io.BytesIO(gz.read())
 586             except IOError as original_ioerror:
 587                 # There may be junk add the end of the file
 588                 # See http://stackoverflow.com/q/4928560/35070 for details
 589                 for i in range(1, 1024):
 590                     try:
 591                         gz = gzip.GzipFile(fileobj=io.BytesIO(content[:-i]), mode='rb')
 592                         uncompressed = io.BytesIO(gz.read())
 593                     except IOError:
 594                         continue
 595                     break
 596                 else:
 597                     raise original_ioerror
 598             resp = self.addinfourl_wrapper(uncompressed, old_resp.headers, old_resp.url, old_resp.code)
 599             resp.msg = old_resp.msg
 600         # deflate
 601         if resp.headers.get('Content-encoding', '') == 'deflate':
 602             gz = io.BytesIO(self.deflate(resp.read()))
 603             resp = self.addinfourl_wrapper(gz, old_resp.headers, old_resp.url, old_resp.code)
 604             resp.msg = old_resp.msg
 605         return resp
 606
 607     https_request = http_request
 608     https_response = http_response
 609
 610
 611 def parse_iso8601(date_str, delimiter='T'):
 612     """ Return a UNIX timestamp from the given date """
 613
 614     if date_str is None:
 615         return None
 616
 617     m = re.search(
 618         r'(\.[0-9]+)?(?:Z$| ?(?P<sign>\+|-)(?P<hours>[0-9]{2}):?(?P<minutes>[0-9]{2})$)',
 619         date_str)
 620     if not m:
 621         timezone = datetime.timedelta()
 622     else:
 623         date_str = date_str[:-len(m.group(0))]
 624         if not m.group('sign'):
 625             timezone = datetime.timedelta()
 626         else:
 627             sign = 1 if m.group('sign') == '+' else -1
 628             timezone = datetime.timedelta(
 629                 hours=sign * int(m.group('hours')),
 630                 minutes=sign * int(m.group('minutes')))
 631     date_format = '%Y-%m-%d{0}%H:%M:%S'.format(delimiter)
 632     dt = datetime.datetime.strptime(date_str, date_format) - timezone
 633     return calendar.timegm(dt.timetuple())
 634
 635
 636 def unified_strdate(date_str):
 637     """Return a string with the date in the format YYYYMMDD"""
 638
 639     if date_str is None:
 640         return None
 641
 642     upload_date = None
 643     #Replace commas
 644     date_str = date_str.replace(',', ' ')
 645     # %z (UTC offset) is only supported in python>=3.2
 646     date_str = re.sub(r' ?(\+|-)[0-9]{2}:?[0-9]{2}$', '', date_str)
 647     format_expressions = [
 648         '%d %B %Y',
 649         '%d %b %Y',
 650         '%B %d %Y',
 651         '%b %d %Y',
 652         '%b %dst %Y %I:%M%p',
 653         '%b %dnd %Y %I:%M%p',
 654         '%b %dth %Y %I:%M%p',
 655         '%Y-%m-%d',
 656         '%Y/%m/%d',
 657         '%d.%m.%Y',
 658         '%d/%m/%Y',
 659         '%d/%m/%y',
 660         '%Y/%m/%d %H:%M:%S',
 661         '%d/%m/%Y %H:%M:%S',
 662         '%Y-%m-%d %H:%M:%S',
 663         '%Y-%m-%d %H:%M:%S.%f',
 664         '%d.%m.%Y %H:%M',
 665         '%d.%m.%Y %H.%M',
 666         '%Y-%m-%dT%H:%M:%SZ',
 667         '%Y-%m-%dT%H:%M:%S.%fZ',
 668         '%Y-%m-%dT%H:%M:%S.%f0Z',
 669         '%Y-%m-%dT%H:%M:%S',
 670         '%Y-%m-%dT%H:%M:%S.%f',
 671         '%Y-%m-%dT%H:%M',
 672     ]
 673     for expression in format_expressions:
 674         try:
 675             upload_date = datetime.datetime.strptime(date_str, expression).strftime('%Y%m%d')
 676         except ValueError:
 677             pass
 678     if upload_date is None:
 679         timetuple = email.utils.parsedate_tz(date_str)
 680         if timetuple:
 681             upload_date = datetime.datetime(*timetuple[:6]).strftime('%Y%m%d')
 682     return upload_date
 683
 684 def determine_ext(url, default_ext='unknown_video'):
 685     if url is None:
 686         return default_ext
 687     guess = url.partition('?')[0].rpartition('.')[2]
 688     if re.match(r'^[A-Za-z0-9]+$', guess):
 689         return guess
 690     else:
 691         return default_ext
 692
 693 def subtitles_filename(filename, sub_lang, sub_format):
 694     return filename.rsplit('.', 1)[0] + '.' + sub_lang + '.' + sub_format
 695
 696 def date_from_str(date_str):
 697     """
 698     Return a datetime object from a string in the format YYYYMMDD or
 699     (now|today)[+-][0-9](day|week|month|year)(s)?"""
 700     today = datetime.date.today()
 701     if date_str == 'now'or date_str == 'today':
 702         return today
 703     match = re.match('(now|today)(?P<sign>[+-])(?P<time>\d+)(?P<unit>day|week|month|year)(s)?', date_str)
 704     if match is not None:
 705         sign = match.group('sign')
 706         time = int(match.group('time'))
 707         if sign == '-':
 708             time = -time
 709         unit = match.group('unit')
 710         #A bad aproximation?
 711         if unit == 'month':
 712             unit = 'day'
 713             time *= 30
 714         elif unit == 'year':
 715             unit = 'day'
 716             time *= 365
 717         unit += 's'
 718         delta = datetime.timedelta(**{unit: time})
 719         return today + delta
 720     return datetime.datetime.strptime(date_str, "%Y%m%d").date()
 721
 722 def hyphenate_date(date_str):
 723     """
 724     Convert a date in 'YYYYMMDD' format to 'YYYY-MM-DD' format"""
 725     match = re.match(r'^(\d\d\d\d)(\d\d)(\d\d)$', date_str)
 726     if match is not None:
 727         return '-'.join(match.groups())
 728     else:
 729         return date_str
 730
 731 class DateRange(object):
 732     """Represents a time interval between two dates"""
 733     def __init__(self, start=None, end=None):
 734         """start and end must be strings in the format accepted by date"""
 735         if start is not None:
 736             self.start = date_from_str(start)
 737         else:
 738             self.start = datetime.datetime.min.date()
 739         if end is not None:
 740             self.end = date_from_str(end)
 741         else:
 742             self.end = datetime.datetime.max.date()
 743         if self.start > self.end:
 744             raise ValueError('Date range: "%s" , the start date must be before the end date' % self)
 745     @classmethod
 746     def day(cls, day):
 747         """Returns a range that only contains the given day"""
 748         return cls(day,day)
 749     def __contains__(self, date):
 750         """Check if the date is in the range"""
 751         if not isinstance(date, datetime.date):
 752             date = date_from_str(date)
 753         return self.start <= date <= self.end
 754     def __str__(self):
 755         return '%s - %s' % ( self.start.isoformat(), self.end.isoformat())
 756
 757
 758 def platform_name():
 759     """ Returns the platform name as a compat_str """
 760     res = platform.platform()
 761     if isinstance(res, bytes):
 762         res = res.decode(preferredencoding())
 763
 764     assert isinstance(res, compat_str)
 765     return res
 766
 767
 768 def _windows_write_string(s, out):
 769     """ Returns True if the string was written using special methods,
 770     False if it has yet to be written out."""
 771     # Adapted from http://stackoverflow.com/a/3259271/35070
 772
 773     import ctypes
 774     import ctypes.wintypes
 775
 776     WIN_OUTPUT_IDS = {
 777         1: -11,
 778         2: -12,
 779     }
 780
 781     try:
 782         fileno = out.fileno()
 783     except AttributeError:
 784         # If the output stream doesn't have a fileno, it's virtual
 785         return False
 786     if fileno not in WIN_OUTPUT_IDS:
 787         return False
 788
 789     GetStdHandle = ctypes.WINFUNCTYPE(
 790         ctypes.wintypes.HANDLE, ctypes.wintypes.DWORD)(
 791         ("GetStdHandle", ctypes.windll.kernel32))
 792     h = GetStdHandle(WIN_OUTPUT_IDS[fileno])
 793
 794     WriteConsoleW = ctypes.WINFUNCTYPE(
 795         ctypes.wintypes.BOOL, ctypes.wintypes.HANDLE, ctypes.wintypes.LPWSTR,
 796         ctypes.wintypes.DWORD, ctypes.POINTER(ctypes.wintypes.DWORD),
 797         ctypes.wintypes.LPVOID)(("WriteConsoleW", ctypes.windll.kernel32))
 798     written = ctypes.wintypes.DWORD(0)
 799
 800     GetFileType = ctypes.WINFUNCTYPE(ctypes.wintypes.DWORD, ctypes.wintypes.DWORD)(("GetFileType", ctypes.windll.kernel32))
 801     FILE_TYPE_CHAR = 0x0002
 802     FILE_TYPE_REMOTE = 0x8000
 803     GetConsoleMode = ctypes.WINFUNCTYPE(
 804         ctypes.wintypes.BOOL, ctypes.wintypes.HANDLE,
 805         ctypes.POINTER(ctypes.wintypes.DWORD))(
 806         ("GetConsoleMode", ctypes.windll.kernel32))
 807     INVALID_HANDLE_VALUE = ctypes.wintypes.DWORD(-1).value
 808
 809     def not_a_console(handle):
 810         if handle == INVALID_HANDLE_VALUE or handle is None:
 811             return True
 812         return ((GetFileType(handle) & ~FILE_TYPE_REMOTE) != FILE_TYPE_CHAR
 813                 or GetConsoleMode(handle, ctypes.byref(ctypes.wintypes.DWORD())) == 0)
 814
 815     if not_a_console(h):
 816         return False
 817
 818     def next_nonbmp_pos(s):
 819         try:
 820             return next(i for i, c in enumerate(s) if ord(c) > 0xffff)
 821         except StopIteration:
 822             return len(s)
 823
 824     while s:
 825         count = min(next_nonbmp_pos(s), 1024)
 826
 827         ret = WriteConsoleW(
 828             h, s, count if count else 2, ctypes.byref(written), None)
 829         if ret == 0:
 830             raise OSError('Failed to write string')
 831         if not count:  # We just wrote a non-BMP character
 832             assert written.value == 2
 833             s = s[1:]
 834         else:
 835             assert written.value > 0
 836             s = s[written.value:]
 837     return True
 838
 839
 840 def write_string(s, out=None, encoding=None):
 841     if out is None:
 842         out = sys.stderr
 843     assert type(s) == compat_str
 844
 845     if sys.platform == 'win32' and encoding is None and hasattr(out, 'fileno'):
 846         if _windows_write_string(s, out):
 847             return
 848
 849     if ('b' in getattr(out, 'mode', '') or
 850             sys.version_info[0] < 3):  # Python 2 lies about mode of sys.stderr
 851         byt = s.encode(encoding or preferredencoding(), 'ignore')
 852         out.write(byt)
 853     elif hasattr(out, 'buffer'):
 854         enc = encoding or getattr(out, 'encoding', None) or preferredencoding()
 855         byt = s.encode(enc, 'ignore')
 856         out.buffer.write(byt)
 857     else:
 858         out.write(s)
 859     out.flush()
 860
 861
 862 def bytes_to_intlist(bs):
 863     if not bs:
 864         return []
 865     if isinstance(bs[0], int):  # Python 3
 866         return list(bs)
 867     else:
 868         return [ord(c) for c in bs]
 869
 870
 871 def intlist_to_bytes(xs):
 872     if not xs:
 873         return b''
 874     return struct_pack('%dB' % len(xs), *xs)
 875
 876
 877 # Cross-platform file locking
 878 if sys.platform == 'win32':
 879     import ctypes.wintypes
 880     import msvcrt
 881
 882     class OVERLAPPED(ctypes.Structure):
 883         _fields_ = [
 884             ('Internal', ctypes.wintypes.LPVOID),
 885             ('InternalHigh', ctypes.wintypes.LPVOID),
 886             ('Offset', ctypes.wintypes.DWORD),
 887             ('OffsetHigh', ctypes.wintypes.DWORD),
 888             ('hEvent', ctypes.wintypes.HANDLE),
 889         ]
 890
 891     kernel32 = ctypes.windll.kernel32
 892     LockFileEx = kernel32.LockFileEx
 893     LockFileEx.argtypes = [
 894         ctypes.wintypes.HANDLE,     # hFile
 895         ctypes.wintypes.DWORD,      # dwFlags
 896         ctypes.wintypes.DWORD,      # dwReserved
 897         ctypes.wintypes.DWORD,      # nNumberOfBytesToLockLow
 898         ctypes.wintypes.DWORD,      # nNumberOfBytesToLockHigh
 899         ctypes.POINTER(OVERLAPPED)  # Overlapped
 900     ]
 901     LockFileEx.restype = ctypes.wintypes.BOOL
 902     UnlockFileEx = kernel32.UnlockFileEx
 903     UnlockFileEx.argtypes = [
 904         ctypes.wintypes.HANDLE,     # hFile
 905         ctypes.wintypes.DWORD,      # dwReserved
 906         ctypes.wintypes.DWORD,      # nNumberOfBytesToLockLow
 907         ctypes.wintypes.DWORD,      # nNumberOfBytesToLockHigh
 908         ctypes.POINTER(OVERLAPPED)  # Overlapped
 909     ]
 910     UnlockFileEx.restype = ctypes.wintypes.BOOL
 911     whole_low = 0xffffffff
 912     whole_high = 0x7fffffff
 913
 914     def _lock_file(f, exclusive):
 915         overlapped = OVERLAPPED()
 916         overlapped.Offset = 0
 917         overlapped.OffsetHigh = 0
 918         overlapped.hEvent = 0
 919         f._lock_file_overlapped_p = ctypes.pointer(overlapped)
 920         handle = msvcrt.get_osfhandle(f.fileno())
 921         if not LockFileEx(handle, 0x2 if exclusive else 0x0, 0,
 922                           whole_low, whole_high, f._lock_file_overlapped_p):
 923             raise OSError('Locking file failed: %r' % ctypes.FormatError())
 924
 925     def _unlock_file(f):
 926         assert f._lock_file_overlapped_p
 927         handle = msvcrt.get_osfhandle(f.fileno())
 928         if not UnlockFileEx(handle, 0,
 929                             whole_low, whole_high, f._lock_file_overlapped_p):
 930             raise OSError('Unlocking file failed: %r' % ctypes.FormatError())
 931
 932 else:
 933     import fcntl
 934
 935     def _lock_file(f, exclusive):
 936         fcntl.flock(f, fcntl.LOCK_EX if exclusive else fcntl.LOCK_SH)
 937
 938     def _unlock_file(f):
 939         fcntl.flock(f, fcntl.LOCK_UN)
 940
 941
 942 class locked_file(object):
 943     def __init__(self, filename, mode, encoding=None):
 944         assert mode in ['r', 'a', 'w']
 945         self.f = io.open(filename, mode, encoding=encoding)
 946         self.mode = mode
 947
 948     def __enter__(self):
 949         exclusive = self.mode != 'r'
 950         try:
 951             _lock_file(self.f, exclusive)
 952         except IOError:
 953             self.f.close()
 954             raise
 955         return self
 956
 957     def __exit__(self, etype, value, traceback):
 958         try:
 959             _unlock_file(self.f)
 960         finally:
 961             self.f.close()
 962
 963     def __iter__(self):
 964         return iter(self.f)
 965
 966     def write(self, *args):
 967         return self.f.write(*args)
 968
 969     def read(self, *args):
 970         return self.f.read(*args)
 971
 972
 973 def get_filesystem_encoding():
 974     encoding = sys.getfilesystemencoding()
 975     return encoding if encoding is not None else 'utf-8'
 976
 977
 978 def shell_quote(args):
 979     quoted_args = []
 980     encoding = get_filesystem_encoding()
 981     for a in args:
 982         if isinstance(a, bytes):
 983             # We may get a filename encoded with 'encodeFilename'
 984             a = a.decode(encoding)
 985         quoted_args.append(pipes.quote(a))
 986     return ' '.join(quoted_args)
 987
 988
 989 def takewhile_inclusive(pred, seq):
 990     """ Like itertools.takewhile, but include the latest evaluated element
 991         (the first element so that Not pred(e)) """
 992     for e in seq:
 993         yield e
 994         if not pred(e):
 995             return
 996
 997
 998 def smuggle_url(url, data):
 999     """ Pass additional data in a URL for internal use. """
1000
1001     sdata = compat_urllib_parse.urlencode(
1002         {'__youtubedl_smuggle': json.dumps(data)})
1003     return url + '#' + sdata
1004
1005
1006 def unsmuggle_url(smug_url, default=None):
1007     if not '#__youtubedl_smuggle' in smug_url:
1008         return smug_url, default
1009     url, _, sdata = smug_url.rpartition('#')
1010     jsond = compat_parse_qs(sdata)['__youtubedl_smuggle'][0]
1011     data = json.loads(jsond)
1012     return url, data
1013
1014
1015 def format_bytes(bytes):
1016     if bytes is None:
1017         return 'N/A'
1018     if type(bytes) is str:
1019         bytes = float(bytes)
1020     if bytes == 0.0:
1021         exponent = 0
1022     else:
1023         exponent = int(math.log(bytes, 1024.0))
1024     suffix = ['B', 'KiB', 'MiB', 'GiB', 'TiB', 'PiB', 'EiB', 'ZiB', 'YiB'][exponent]
1025     converted = float(bytes) / float(1024 ** exponent)
1026     return '%.2f%s' % (converted, suffix)
1027
1028
1029 def get_term_width():
1030     columns = compat_getenv('COLUMNS', None)
1031     if columns:
1032         return int(columns)
1033
1034     try:
1035         sp = subprocess.Popen(
1036             ['stty', 'size'],
1037             stdout=subprocess.PIPE, stderr=subprocess.PIPE)
1038         out, err = sp.communicate()
1039         return int(out.split()[1])
1040     except:
1041         pass
1042     return None
1043
1044
1045 def month_by_name(name):
1046     """ Return the number of a month by (locale-independently) English name """
1047
1048     ENGLISH_NAMES = [
1049         'January', 'February', 'March', 'April', 'May', 'June',
1050         'July', 'August', 'September', 'October', 'November', 'December']
1051     try:
1052         return ENGLISH_NAMES.index(name) + 1
1053     except ValueError:
1054         return None
1055
1056
1057 def fix_xml_ampersands(xml_str):
1058     """Replace all the '&' by '&amp;' in XML"""
1059     return re.sub(
1060         r'&(?!amp;|lt;|gt;|apos;|quot;|#x[0-9a-fA-F]{,4};|#[0-9]{,4};)',
1061         '&amp;',
1062         xml_str)
1063
1064
1065 def setproctitle(title):
1066     assert isinstance(title, compat_str)
1067     try:
1068         libc = ctypes.cdll.LoadLibrary("libc.so.6")
1069     except OSError:
1070         return
1071     title_bytes = title.encode('utf-8')
1072     buf = ctypes.create_string_buffer(len(title_bytes))
1073     buf.value = title_bytes
1074     try:
1075         libc.prctl(15, buf, 0, 0, 0)
1076     except AttributeError:
1077         return  # Strange libc, just skip this
1078
1079
1080 def remove_start(s, start):
1081     if s.startswith(start):
1082         return s[len(start):]
1083     return s
1084
1085
1086 def remove_end(s, end):
1087     if s.endswith(end):
1088         return s[:-len(end)]
1089     return s
1090
1091
1092 def url_basename(url):
1093     path = compat_urlparse.urlparse(url).path
1094     return path.strip('/').split('/')[-1]
1095
1096
1097 class HEADRequest(compat_urllib_request.Request):
1098     def get_method(self):
1099         return "HEAD"
1100
1101
1102 def int_or_none(v, scale=1, default=None, get_attr=None, invscale=1):
1103     if get_attr:
1104         if v is not None:
1105             v = getattr(v, get_attr, None)
1106     if v == '':
1107         v = None
1108     return default if v is None else (int(v) * invscale // scale)
1109
1110
1111 def str_or_none(v, default=None):
1112     return default if v is None else compat_str(v)
1113
1114
1115 def str_to_int(int_str):
1116     """ A more relaxed version of int_or_none """
1117     if int_str is None:
1118         return None
1119     int_str = re.sub(r'[,\.\+]', '', int_str)
1120     return int(int_str)
1121
1122
1123 def float_or_none(v, scale=1, invscale=1, default=None):
1124     return default if v is None else (float(v) * invscale / scale)
1125
1126
1127 def parse_duration(s):
1128     if s is None:
1129         return None
1130
1131     s = s.strip()
1132
1133     m = re.match(
1134         r'''(?ix)T?
1135             (?:
1136                 (?:(?P<hours>[0-9]+)\s*(?:[:h]|hours?)\s*)?
1137                 (?P<mins>[0-9]+)\s*(?:[:m]|mins?|minutes?)\s*
1138             )?
1139             (?P<secs>[0-9]+)(?P<ms>\.[0-9]+)?\s*(?:s|secs?|seconds?)?$''', s)
1140     if not m:
1141         return None
1142     res = int(m.group('secs'))
1143     if m.group('mins'):
1144         res += int(m.group('mins')) * 60
1145         if m.group('hours'):
1146             res += int(m.group('hours')) * 60 * 60
1147     if m.group('ms'):
1148         res += float(m.group('ms'))
1149     return res
1150
1151
1152 def prepend_extension(filename, ext):
1153     name, real_ext = os.path.splitext(filename)
1154     return '{0}.{1}{2}'.format(name, ext, real_ext)
1155
1156
1157 def check_executable(exe, args=[]):
1158     """ Checks if the given binary is installed somewhere in PATH, and returns its name.
1159     args can be a list of arguments for a short output (like -version) """
1160     try:
1161         subprocess.Popen([exe] + args, stdout=subprocess.PIPE, stderr=subprocess.PIPE).communicate()
1162     except OSError:
1163         return False
1164     return exe
1165
1166
1167 def get_exe_version(exe, args=['--version'],
1168                     version_re=r'version\s+([0-9._-a-zA-Z]+)',
1169                     unrecognized='present'):
1170     """ Returns the version of the specified executable,
1171     or False if the executable is not present """
1172     try:
1173         out, err = subprocess.Popen(
1174             [exe] + args,
1175             stdout=subprocess.PIPE, stderr=subprocess.STDOUT).communicate()
1176     except OSError:
1177         return False
1178     firstline = out.partition(b'\n')[0].decode('ascii', 'ignore')
1179     m = re.search(version_re, firstline)
1180     if m:
1181         return m.group(1)
1182     else:
1183         return unrecognized
1184
1185
1186 class PagedList(object):
1187     def __len__(self):
1188         # This is only useful for tests
1189         return len(self.getslice())
1190
1191
1192 class OnDemandPagedList(PagedList):
1193     def __init__(self, pagefunc, pagesize):
1194         self._pagefunc = pagefunc
1195         self._pagesize = pagesize
1196
1197     def getslice(self, start=0, end=None):
1198         res = []
1199         for pagenum in itertools.count(start // self._pagesize):
1200             firstid = pagenum * self._pagesize
1201             nextfirstid = pagenum * self._pagesize + self._pagesize
1202             if start >= nextfirstid:
1203                 continue
1204
1205             page_results = list(self._pagefunc(pagenum))
1206
1207             startv = (
1208                 start % self._pagesize
1209                 if firstid <= start < nextfirstid
1210                 else 0)
1211
1212             endv = (
1213                 ((end - 1) % self._pagesize) + 1
1214                 if (end is not None and firstid <= end <= nextfirstid)
1215                 else None)
1216
1217             if startv != 0 or endv is not None:
1218                 page_results = page_results[startv:endv]
1219             res.extend(page_results)
1220
1221             # A little optimization - if current page is not "full", ie. does
1222             # not contain page_size videos then we can assume that this page
1223             # is the last one - there are no more ids on further pages -
1224             # i.e. no need to query again.
1225             if len(page_results) + startv < self._pagesize:
1226                 break
1227
1228             # If we got the whole page, but the next page is not interesting,
1229             # break out early as well
1230             if end == nextfirstid:
1231                 break
1232         return res
1233
1234
1235 class InAdvancePagedList(PagedList):
1236     def __init__(self, pagefunc, pagecount, pagesize):
1237         self._pagefunc = pagefunc
1238         self._pagecount = pagecount
1239         self._pagesize = pagesize
1240
1241     def getslice(self, start=0, end=None):
1242         res = []
1243         start_page = start // self._pagesize
1244         end_page = (
1245             self._pagecount if end is None else (end // self._pagesize + 1))
1246         skip_elems = start - start_page * self._pagesize
1247         only_more = None if end is None else end - start
1248         for pagenum in range(start_page, end_page):
1249             page = list(self._pagefunc(pagenum))
1250             if skip_elems:
1251                 page = page[skip_elems:]
1252                 skip_elems = None
1253             if only_more is not None:
1254                 if len(page) < only_more:
1255                     only_more -= len(page)
1256                 else:
1257                     page = page[:only_more]
1258                     res.extend(page)
1259                     break
1260             res.extend(page)
1261         return res
1262
1263
1264 def uppercase_escape(s):
1265     unicode_escape = codecs.getdecoder('unicode_escape')
1266     return re.sub(
1267         r'\\U[0-9a-fA-F]{8}',
1268         lambda m: unicode_escape(m.group(0))[0],
1269         s)
1270
1271
1272 def escape_rfc3986(s):
1273     """Escape non-ASCII characters as suggested by RFC 3986"""
1274     if sys.version_info < (3, 0) and isinstance(s, unicode):
1275         s = s.encode('utf-8')
1276     return compat_urllib_parse.quote(s, b"%/;:@&=+$,!~*'()?#[]")
1277
1278
1279 def escape_url(url):
1280     """Escape URL as suggested by RFC 3986"""
1281     url_parsed = compat_urllib_parse_urlparse(url)
1282     return url_parsed._replace(
1283         path=escape_rfc3986(url_parsed.path),
1284         params=escape_rfc3986(url_parsed.params),
1285         query=escape_rfc3986(url_parsed.query),
1286         fragment=escape_rfc3986(url_parsed.fragment)
1287     ).geturl()
1288
1289 try:
1290     struct.pack('!I', 0)
1291 except TypeError:
1292     # In Python 2.6 (and some 2.7 versions), struct requires a bytes argument
1293     def struct_pack(spec, *args):
1294         if isinstance(spec, compat_str):
1295             spec = spec.encode('ascii')
1296         return struct.pack(spec, *args)
1297
1298     def struct_unpack(spec, *args):
1299         if isinstance(spec, compat_str):
1300             spec = spec.encode('ascii')
1301         return struct.unpack(spec, *args)
1302 else:
1303     struct_pack = struct.pack
1304     struct_unpack = struct.unpack
1305
1306
1307 def read_batch_urls(batch_fd):
1308     def fixup(url):
1309         if not isinstance(url, compat_str):
1310             url = url.decode('utf-8', 'replace')
1311         BOM_UTF8 = '\xef\xbb\xbf'
1312         if url.startswith(BOM_UTF8):
1313             url = url[len(BOM_UTF8):]
1314         url = url.strip()
1315         if url.startswith(('#', ';', ']')):
1316             return False
1317         return url
1318
1319     with contextlib.closing(batch_fd) as fd:
1320         return [url for url in map(fixup, fd) if url]
1321
1322
1323 def urlencode_postdata(*args, **kargs):
1324     return compat_urllib_parse.urlencode(*args, **kargs).encode('ascii')
1325
1326
1327 try:
1328     etree_iter = xml.etree.ElementTree.Element.iter
1329 except AttributeError:  # Python <=2.6
1330     etree_iter = lambda n: n.findall('.//*')
1331
1332
1333 def parse_xml(s):
1334     class TreeBuilder(xml.etree.ElementTree.TreeBuilder):
1335         def doctype(self, name, pubid, system):
1336             pass  # Ignore doctypes
1337
1338     parser = xml.etree.ElementTree.XMLParser(target=TreeBuilder())
1339     kwargs = {'parser': parser} if sys.version_info >= (2, 7) else {}
1340     tree = xml.etree.ElementTree.XML(s.encode('utf-8'), **kwargs)
1341     # Fix up XML parser in Python 2.x
1342     if sys.version_info < (3, 0):
1343         for n in etree_iter(tree):
1344             if n.text is not None:
1345                 if not isinstance(n.text, compat_str):
1346                     n.text = n.text.decode('utf-8')
1347     return tree
1348
1349
1350 US_RATINGS = {
1351     'G': 0,
1352     'PG': 10,
1353     'PG-13': 13,
1354     'R': 16,
1355     'NC': 18,
1356 }
1357
1358
1359 def parse_age_limit(s):
1360     if s is None:
1361         return None
1362     m = re.match(r'^(?P<age>\d{1,2})\+?$', s)
1363     return int(m.group('age')) if m else US_RATINGS.get(s, None)
1364
1365
1366 def strip_jsonp(code):
1367     return re.sub(
1368         r'(?s)^[a-zA-Z0-9_]+\s*\(\s*(.*)\);?\s*?(?://[^\n]*)*$', r'\1', code)
1369
1370
1371 def js_to_json(code):
1372     def fix_kv(m):
1373         v = m.group(0)
1374         if v in ('true', 'false', 'null'):
1375             return v
1376         if v.startswith('"'):
1377             return v
1378         if v.startswith("'"):
1379             v = v[1:-1]
1380             v = re.sub(r"\\\\|\\'|\"", lambda m: {
1381                 '\\\\': '\\\\',
1382                 "\\'": "'",
1383                 '"': '\\"',
1384             }[m.group(0)], v)
1385         return '"%s"' % v
1386
1387     res = re.sub(r'''(?x)
1388         "(?:[^"\\]*(?:\\\\|\\")?)*"|
1389         '(?:[^'\\]*(?:\\\\|\\')?)*'|
1390         [a-zA-Z_][a-zA-Z_0-9]*
1391         ''', fix_kv, code)
1392     res = re.sub(r',(\s*\])', lambda m: m.group(1), res)
1393     return res
1394
1395
1396 def qualities(quality_ids):
1397     """ Get a numeric quality value out of a list of possible values """
1398     def q(qid):
1399         try:
1400             return quality_ids.index(qid)
1401         except ValueError:
1402             return -1
1403     return q
1404
1405
1406 DEFAULT_OUTTMPL = '%(title)s-%(id)s.%(ext)s'
1407
1408
1409 def limit_length(s, length):
1410     """ Add ellipses to overly long strings """
1411     if s is None:
1412         return None
1413     ELLIPSES = '...'
1414     if len(s) > length:
1415         return s[:length - len(ELLIPSES)] + ELLIPSES
1416     return s
1417
1418
1419 def version_tuple(v):
1420     return [int(e) for e in v.split('.')]
1421
1422
1423 def is_outdated_version(version, limit, assume_new=True):
1424     if not version:
1425         return not assume_new
1426     try:
1427         return version_tuple(version) < version_tuple(limit)
1428     except ValueError:
1429         return not assume_new
1430
1431
1432 def ytdl_is_updateable():
1433     """ Returns if youtube-dl can be updated with -U """
1434     from zipimport import zipimporter
1435
1436     return isinstance(globals().get('__loader__'), zipimporter) or hasattr(sys, 'frozen')
1437
1438
1439 def args_to_str(args):
1440     # Get a short string representation for a subprocess command
1441     return ' '.join(shlex_quote(a) for a in args)