[extractor/common] Allow multiline content tags

[youtube-dl] / youtube_dl / utils.py
diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py

index 7c0fb1592914e961a6b1e790bf14fb5525eff08a..efbe64fb315dba92383f8b1053a80558f4a12d7b 100644 (file)
--- a/youtube_dl/utils.py
+++ b/youtube_dl/utils.py
@@ -34,7 +34,6 @@ from .compat import (
      compat_chr,
      compat_getenv,
      compat_html_entities,
-    compat_html_parser,
      compat_parse_qs,
      compat_str,
      compat_urllib_error,
@@ -42,6 +41,7 @@ from .compat import (
      compat_urllib_parse_urlparse,
      compat_urllib_request,
      compat_urlparse,
+    shlex_quote,
  )
  
  
@@ -56,6 +56,7 @@ std_headers = {
      'Accept-Language': 'en-us,en;q=0.5',
  }
  
+
  def preferredencoding():
      """Get preferred encoding.
  
@@ -64,7 +65,7 @@ def preferredencoding():
      """
      try:
          pref = locale.getpreferredencoding()
-        u'TEST'.encode(pref)
+        'TEST'.encode(pref)
      except:
          pref = 'UTF-8'
  
@@ -72,12 +73,25 @@ def preferredencoding():
  
  
  def write_json_file(obj, fn):
-    """ Encode obj as JSON and write it to fn, atomically """
+    """ Encode obj as JSON and write it to fn, atomically if possible """
+
+    fn = encodeFilename(fn)
+    if sys.version_info < (3, 0) and sys.platform != 'win32':
+        encoding = get_filesystem_encoding()
+        # os.path.basename returns a bytes object, but NamedTemporaryFile
+        # will fail if the filename contains non ascii characters unless we
+        # use a unicode object
+        path_basename = lambda f: os.path.basename(fn).decode(encoding)
+        # the same for os.path.dirname
+        path_dirname = lambda f: os.path.dirname(fn).decode(encoding)
+    else:
+        path_basename = os.path.basename
+        path_dirname = os.path.dirname
  
      args = {
          'suffix': '.tmp',
-        'prefix': os.path.basename(fn) + '.',
-        'dir': os.path.dirname(fn),
+        'prefix': path_basename(fn) + '.',
+        'dir': path_dirname(fn),
          'delete': False,
      }
  
@@ -96,6 +110,13 @@ def write_json_file(obj, fn):
      try:
          with tf:
              json.dump(obj, tf)
+        if sys.platform == 'win32':
+            # Need to remove existing file on Windows, else os.rename raises
+            # WindowsError or FileExistsError.
+            try:
+                os.unlink(fn)
+            except OSError:
+                pass
          os.rename(tf.name, fn)
      except:
          try:
@@ -110,7 +131,7 @@ if sys.version_info >= (2, 7):
          """ Find the xpath xpath[@key=val] """
          assert re.match(r'^[a-zA-Z-]+$', key)
          assert re.match(r'^[a-zA-Z0-9@\s:._-]*$', val)
-        expr = xpath + u"[@%s='%s']" % (key, val)
+        expr = xpath + "[@%s='%s']" % (key, val)
          return node.find(expr)
  else:
      def find_xpath_attr(node, xpath, key, val):
@@ -126,6 +147,8 @@ else:
  
  # On python2.6 the xml.etree.ElementTree.Element methods don't support
  # the namespace parameter
+
+
  def xpath_with_ns(path, ns_map):
      components = [c.split(':') for c in path.split('/')]
      replaced = []
@@ -143,7 +166,7 @@ def xpath_text(node, xpath, name=None, fatal=False):
          xpath = xpath.encode('ascii')
  
      n = node.find(xpath)
-    if n is None:
+    if n is None or n.text is None:
          if fatal:
              name = xpath if name is None else name
              raise ExtractorError('Could not find XML element %s' % name)
@@ -204,7 +227,7 @@ def sanitize_open(filename, open_mode):
      It returns the tuple (stream, definitive_file_name).
      """
      try:
-        if filename == u'-':
+        if filename == '-':
              if sys.platform == 'win32':
                  import msvcrt
                  msvcrt.setmode(sys.stdout.fileno(), os.O_BINARY)
@@ -217,9 +240,9 @@ def sanitize_open(filename, open_mode):
  
          # In case of error, try to remove win32 forbidden chars
          alt_filename = os.path.join(
-                        re.sub(u'[/<>:"\\|\\\\?\\*]', u'#', path_part)
-                        for path_part in os.path.split(filename)
-                       )
+            re.sub('[/<>:"\\|\\\\?\\*]', '#', path_part)
+            for path_part in os.path.split(filename)
+        )
          if alt_filename == filename:
              raise
          else:
@@ -236,6 +259,7 @@ def timeconvert(timestr):
          timestamp = email.utils.mktime_tz(timetuple)
      return timestamp
  
+
  def sanitize_filename(s, restricted=False, is_id=False):
      """Sanitizes a string so it could be used as part of a filename.
      If restricted is set, use a stricter subset of allowed characters.
@@ -256,7 +280,7 @@ def sanitize_filename(s, restricted=False, is_id=False):
              return '_'
          return char
  
-    result = u''.join(map(replace_insane, s))
+    result = ''.join(map(replace_insane, s))
      if not is_id:
          while '__' in result:
              result = result.replace('__', '_')
@@ -268,6 +292,7 @@ def sanitize_filename(s, restricted=False, is_id=False):
              result = '_'
      return result
  
+
  def orderedSet(iterable):
      """ Remove all duplicates from the input iterable """
      res = []
@@ -286,15 +311,15 @@ def _htmlentity_transform(entity):
      mobj = re.match(r'#(x?[0-9]+)', entity)
      if mobj is not None:
          numstr = mobj.group(1)
-        if numstr.startswith(u'x'):
+        if numstr.startswith('x'):
              base = 16
-            numstr = u'0%s' % numstr
+            numstr = '0%s' % numstr
          else:
              base = 10
          return compat_chr(int(numstr, base))
  
      # Unknown entity in name, return its literal representation
-    return (u'&%s;' % entity)
+    return ('&%s;' % entity)
  
  
  def unescapeHTML(s):
@@ -318,7 +343,7 @@ def encodeFilename(s, for_subprocess=False):
          return s
  
      if sys.platform == 'win32' and sys.getwindowsversion()[0] >= 5:
-        # Pass u'' directly to use Unicode APIs on Windows 2000 and up
+        # Pass '' directly to use Unicode APIs on Windows 2000 and up
          # (Detecting Windows NT 4 is tricky because 'major >= 4' would
          # match Windows 9x series as well. Besides, NT 4 is obsolete.)
          if not for_subprocess:
@@ -338,7 +363,7 @@ def encodeArgument(s):
      if not isinstance(s, compat_str):
          # Legacy code that uses byte strings
          # Uncomment the following line after fixing all post processors
-        #assert False, 'Internal error: %r should be of type %r, is %r' % (s, compat_str, type(s))
+        # assert False, 'Internal error: %r should be of type %r, is %r' % (s, compat_str, type(s))
          s = s.decode('ascii')
      return encodeFilename(s, True)
  
@@ -352,6 +377,7 @@ def decodeOption(optval):
      assert isinstance(optval, compat_str)
      return optval
  
+
  def formatSeconds(secs):
      if secs > 3600:
          return '%d:%02d:%02d' % (secs // 3600, (secs % 3600) // 60, secs % 60)
@@ -362,6 +388,17 @@ def formatSeconds(secs):
  
  
  def make_HTTPS_handler(opts_no_check_certificate, **kwargs):
+    if hasattr(ssl, 'create_default_context'):  # Python >= 3.4 or 2.7.9
+        context = ssl.create_default_context(ssl.Purpose.CLIENT_AUTH)
+        if opts_no_check_certificate:
+            context.verify_mode = ssl.CERT_NONE
+        try:
+            return compat_urllib_request.HTTPSHandler(context=context, **kwargs)
+        except TypeError:
+            # Python 2.7.8
+            # (create_default_context present but HTTPSHandler has no context=)
+            pass
+
      if sys.version_info < (3, 2):
          import httplib
  
@@ -383,26 +420,18 @@ def make_HTTPS_handler(opts_no_check_certificate, **kwargs):
              def https_open(self, req):
                  return self.do_open(HTTPSConnectionV3, req)
          return HTTPSHandlerV3(**kwargs)
-    elif hasattr(ssl, 'create_default_context'):  # Python >= 3.4
-        context = ssl.create_default_context(ssl.Purpose.CLIENT_AUTH)
-        context.options &= ~ssl.OP_NO_SSLv3  # Allow older, not-as-secure SSLv3
-        if opts_no_check_certificate:
-            context.verify_mode = ssl.CERT_NONE
-        return compat_urllib_request.HTTPSHandler(context=context, **kwargs)
      else:  # Python < 3.4
          context = ssl.SSLContext(ssl.PROTOCOL_SSLv23)
          context.verify_mode = (ssl.CERT_NONE
                                 if opts_no_check_certificate
                                 else ssl.CERT_REQUIRED)
          context.set_default_verify_paths()
-        try:
-            context.load_default_certs()
-        except AttributeError:
-            pass  # Python < 3.4
          return compat_urllib_request.HTTPSHandler(context=context, **kwargs)
  
+
  class ExtractorError(Exception):
      """Error during info extraction."""
+
      def __init__(self, msg, tb=None, expected=False, cause=None, video_id=None):
          """ tb, if given, is the original traceback (so that it can be printed out).
          If expected is set, this is a normal error message and most likely not a bug in youtube-dl.
@@ -413,9 +442,15 @@ class ExtractorError(Exception):
          if video_id is not None:
              msg = video_id + ': ' + msg
          if cause:
-            msg += u' (caused by %r)' % cause
+            msg += ' (caused by %r)' % cause
          if not expected:
-            msg = msg + u'; please report this issue on https://yt-dl.org/bug . Be sure to call youtube-dl with the --verbose flag and include its complete output. Make sure you are using the latest version; type  youtube-dl -U  to update.'
+            if ytdl_is_updateable():
+                update_cmd = 'type  youtube-dl -U  to update'
+            else:
+                update_cmd = 'see  https://yt-dl.org/update  on how to update'
+            msg += '; please report this issue on https://yt-dl.org/bug .'
+            msg += ' Make sure you are using the latest version; %s.' % update_cmd
+            msg += ' Be sure to call youtube-dl with the --verbose flag and include its complete output.'
          super(ExtractorError, self).__init__(msg)
  
          self.traceback = tb
@@ -426,7 +461,14 @@ class ExtractorError(Exception):
      def format_traceback(self):
          if self.traceback is None:
              return None
-        return u''.join(traceback.format_tb(self.traceback))
+        return ''.join(traceback.format_tb(self.traceback))
+
+
+class UnsupportedError(ExtractorError):
+    def __init__(self, url):
+        super(UnsupportedError, self).__init__(
+            'Unsupported URL: %s' % url, expected=True)
+        self.url = url
  
  
  class RegexNotFoundError(ExtractorError):
@@ -441,6 +483,7 @@ class DownloadError(Exception):
      configured to continue on errors. They will contain the appropriate
      error message.
      """
+
      def __init__(self, msg, exc_info=None):
          """ exc_info, if given, is the original exception that caused the trouble (as returned by sys.exc_info()). """
          super(DownloadError, self).__init__(msg)
@@ -462,9 +505,11 @@ class PostProcessingError(Exception):
      This exception may be raised by PostProcessor's .run() method to
      indicate an error in the postprocessing task.
      """
+
      def __init__(self, msg):
          self.msg = msg
  
+
  class MaxDownloadsReached(Exception):
      """ --max-downloads limit has been reached. """
      pass
@@ -494,6 +539,7 @@ class ContentTooShortError(Exception):
          self.downloaded = downloaded
          self.expected = expected
  
+
  class YoutubeDLHandler(compat_urllib_request.HTTPHandler):
      """Handler for HTTP requests and responses.
  
@@ -606,17 +652,19 @@ def parse_iso8601(date_str, delimiter='T'):
      return calendar.timegm(dt.timetuple())
  
  
-def unified_strdate(date_str):
+def unified_strdate(date_str, day_first=True):
      """Return a string with the date in the format YYYYMMDD"""
  
      if date_str is None:
          return None
-
      upload_date = None
-    #Replace commas
+    # Replace commas
      date_str = date_str.replace(',', ' ')
      # %z (UTC offset) is only supported in python>=3.2
      date_str = re.sub(r' ?(\+|-)[0-9]{2}:?[0-9]{2}$', '', date_str)
+    # Remove AM/PM + timezone
+    date_str = re.sub(r'(?i)\s*(?:AM|PM)\s+[A-Z]+', '', date_str)
+
      format_expressions = [
          '%d %B %Y',
          '%d %b %Y',
@@ -631,7 +679,6 @@ def unified_strdate(date_str):
          '%d/%m/%Y',
          '%d/%m/%y',
          '%Y/%m/%d %H:%M:%S',
-        '%d/%m/%Y %H:%M:%S',
          '%Y-%m-%d %H:%M:%S',
          '%Y-%m-%d %H:%M:%S.%f',
          '%d.%m.%Y %H:%M',
@@ -643,6 +690,14 @@ def unified_strdate(date_str):
          '%Y-%m-%dT%H:%M:%S.%f',
          '%Y-%m-%dT%H:%M',
      ]
+    if day_first:
+        format_expressions.extend([
+            '%d/%m/%Y %H:%M:%S',
+        ])
+    else:
+        format_expressions.extend([
+            '%m/%d/%Y %H:%M:%S',
+        ])
      for expression in format_expressions:
          try:
              upload_date = datetime.datetime.strptime(date_str, expression).strftime('%Y%m%d')
@@ -654,25 +709,30 @@ def unified_strdate(date_str):
              upload_date = datetime.datetime(*timetuple[:6]).strftime('%Y%m%d')
      return upload_date
  
-def determine_ext(url, default_ext=u'unknown_video'):
+
+def determine_ext(url, default_ext='unknown_video'):
      if url is None:
          return default_ext
-    guess = url.partition(u'?')[0].rpartition(u'.')[2]
+    guess = url.partition('?')[0].rpartition('.')[2]
      if re.match(r'^[A-Za-z0-9]+$', guess):
          return guess
      else:
          return default_ext
  
+
  def subtitles_filename(filename, sub_lang, sub_format):
-    return filename.rsplit('.', 1)[0] + u'.' + sub_lang + u'.' + sub_format
+    return filename.rsplit('.', 1)[0] + '.' + sub_lang + '.' + sub_format
+
  
  def date_from_str(date_str):
      """
      Return a datetime object from a string in the format YYYYMMDD or
      (now|today)[+-][0-9](day|week|month|year)(s)?"""
      today = datetime.date.today()
-    if date_str == 'now'or date_str == 'today':
+    if date_str in ('now', 'today'):
          return today
+    if date_str == 'yesterday':
+        return today - datetime.timedelta(days=1)
      match = re.match('(now|today)(?P<sign>[+-])(?P<time>\d+)(?P<unit>day|week|month|year)(s)?', date_str)
      if match is not None:
          sign = match.group('sign')
@@ -680,7 +740,7 @@ def date_from_str(date_str):
          if sign == '-':
              time = -time
          unit = match.group('unit')
-        #A bad aproximation?
+        # A bad aproximation?
          if unit == 'month':
              unit = 'day'
              time *= 30
@@ -691,7 +751,8 @@ def date_from_str(date_str):
          delta = datetime.timedelta(**{unit: time})
          return today + delta
      return datetime.datetime.strptime(date_str, "%Y%m%d").date()
-    
+
+
  def hyphenate_date(date_str):
      """
      Convert a date in 'YYYYMMDD' format to 'YYYY-MM-DD' format"""
@@ -701,8 +762,10 @@ def hyphenate_date(date_str):
      else:
          return date_str
  
+
  class DateRange(object):
      """Represents a time interval between two dates"""
+
      def __init__(self, start=None, end=None):
          """start and end must be strings in the format accepted by date"""
          if start is not None:
@@ -715,17 +778,20 @@ class DateRange(object):
              self.end = datetime.datetime.max.date()
          if self.start > self.end:
              raise ValueError('Date range: "%s" , the start date must be before the end date' % self)
+
      @classmethod
      def day(cls, day):
          """Returns a range that only contains the given day"""
-        return cls(day,day)
+        return cls(day, day)
+
      def __contains__(self, date):
          """Check if the date is in the range"""
          if not isinstance(date, datetime.date):
              date = date_from_str(date)
          return self.start <= date <= self.end
+
      def __str__(self):
-        return '%s - %s' % ( self.start.isoformat(), self.end.isoformat())
+        return '%s - %s' % (self.start.isoformat(), self.end.isoformat())
  
  
  def platform_name():
@@ -761,22 +827,22 @@ def _windows_write_string(s, out):
  
      GetStdHandle = ctypes.WINFUNCTYPE(
          ctypes.wintypes.HANDLE, ctypes.wintypes.DWORD)(
-        ("GetStdHandle", ctypes.windll.kernel32))
+        (b"GetStdHandle", ctypes.windll.kernel32))
      h = GetStdHandle(WIN_OUTPUT_IDS[fileno])
  
      WriteConsoleW = ctypes.WINFUNCTYPE(
          ctypes.wintypes.BOOL, ctypes.wintypes.HANDLE, ctypes.wintypes.LPWSTR,
          ctypes.wintypes.DWORD, ctypes.POINTER(ctypes.wintypes.DWORD),
-        ctypes.wintypes.LPVOID)(("WriteConsoleW", ctypes.windll.kernel32))
+        ctypes.wintypes.LPVOID)((b"WriteConsoleW", ctypes.windll.kernel32))
      written = ctypes.wintypes.DWORD(0)
  
-    GetFileType = ctypes.WINFUNCTYPE(ctypes.wintypes.DWORD, ctypes.wintypes.DWORD)(("GetFileType", ctypes.windll.kernel32))
+    GetFileType = ctypes.WINFUNCTYPE(ctypes.wintypes.DWORD, ctypes.wintypes.DWORD)((b"GetFileType", ctypes.windll.kernel32))
      FILE_TYPE_CHAR = 0x0002
      FILE_TYPE_REMOTE = 0x8000
      GetConsoleMode = ctypes.WINFUNCTYPE(
          ctypes.wintypes.BOOL, ctypes.wintypes.HANDLE,
          ctypes.POINTER(ctypes.wintypes.DWORD))(
-        ("GetConsoleMode", ctypes.windll.kernel32))
+        (b"GetConsoleMode", ctypes.windll.kernel32))
      INVALID_HANDLE_VALUE = ctypes.wintypes.DWORD(-1).value
  
      def not_a_console(handle):
@@ -844,10 +910,7 @@ def bytes_to_intlist(bs):
  def intlist_to_bytes(xs):
      if not xs:
          return b''
-    if isinstance(chr(0), bytes):  # Python 2
-        return ''.join([chr(x) for x in xs])
-    else:
-        return bytes(xs)
+    return struct_pack('%dB' % len(xs), *xs)
  
  
  # Cross-platform file locking
@@ -959,7 +1022,7 @@ def shell_quote(args):
              # We may get a filename encoded with 'encodeFilename'
              a = a.decode(encoding)
          quoted_args.append(pipes.quote(a))
-    return u' '.join(quoted_args)
+    return ' '.join(quoted_args)
  
  
  def takewhile_inclusive(pred, seq):
@@ -975,31 +1038,85 @@ def smuggle_url(url, data):
      """ Pass additional data in a URL for internal use. """
  
      sdata = compat_urllib_parse.urlencode(
-        {u'__youtubedl_smuggle': json.dumps(data)})
-    return url + u'#' + sdata
+        {'__youtubedl_smuggle': json.dumps(data)})
+    return url + '#' + sdata
  
  
  def unsmuggle_url(smug_url, default=None):
-    if not '#__youtubedl_smuggle' in smug_url:
+    if '#__youtubedl_smuggle' not in smug_url:
          return smug_url, default
-    url, _, sdata = smug_url.rpartition(u'#')
-    jsond = compat_parse_qs(sdata)[u'__youtubedl_smuggle'][0]
+    url, _, sdata = smug_url.rpartition('#')
+    jsond = compat_parse_qs(sdata)['__youtubedl_smuggle'][0]
      data = json.loads(jsond)
      return url, data
  
  
  def format_bytes(bytes):
      if bytes is None:
-        return u'N/A'
+        return 'N/A'
      if type(bytes) is str:
          bytes = float(bytes)
      if bytes == 0.0:
          exponent = 0
      else:
          exponent = int(math.log(bytes, 1024.0))
-    suffix = [u'B', u'KiB', u'MiB', u'GiB', u'TiB', u'PiB', u'EiB', u'ZiB', u'YiB'][exponent]
+    suffix = ['B', 'KiB', 'MiB', 'GiB', 'TiB', 'PiB', 'EiB', 'ZiB', 'YiB'][exponent]
      converted = float(bytes) / float(1024 ** exponent)
-    return u'%.2f%s' % (converted, suffix)
+    return '%.2f%s' % (converted, suffix)
+
+
+def parse_filesize(s):
+    if s is None:
+        return None
+
+    # The lower-case forms are of course incorrect and inofficial,
+    # but we support those too
+    _UNIT_TABLE = {
+        'B': 1,
+        'b': 1,
+        'KiB': 1024,
+        'KB': 1000,
+        'kB': 1024,
+        'Kb': 1000,
+        'MiB': 1024 ** 2,
+        'MB': 1000 ** 2,
+        'mB': 1024 ** 2,
+        'Mb': 1000 ** 2,
+        'GiB': 1024 ** 3,
+        'GB': 1000 ** 3,
+        'gB': 1024 ** 3,
+        'Gb': 1000 ** 3,
+        'TiB': 1024 ** 4,
+        'TB': 1000 ** 4,
+        'tB': 1024 ** 4,
+        'Tb': 1000 ** 4,
+        'PiB': 1024 ** 5,
+        'PB': 1000 ** 5,
+        'pB': 1024 ** 5,
+        'Pb': 1000 ** 5,
+        'EiB': 1024 ** 6,
+        'EB': 1000 ** 6,
+        'eB': 1024 ** 6,
+        'Eb': 1000 ** 6,
+        'ZiB': 1024 ** 7,
+        'ZB': 1000 ** 7,
+        'zB': 1024 ** 7,
+        'Zb': 1000 ** 7,
+        'YiB': 1024 ** 8,
+        'YB': 1000 ** 8,
+        'yB': 1024 ** 8,
+        'Yb': 1000 ** 8,
+    }
+
+    units_re = '|'.join(re.escape(u) for u in _UNIT_TABLE)
+    m = re.match(
+        r'(?P<num>[0-9]+(?:[,.][0-9]*)?)\s*(?P<unit>%s)' % units_re, s)
+    if not m:
+        return None
+
+    num_str = m.group('num').replace(',', '.')
+    mult = _UNIT_TABLE[m.group('unit')]
+    return int(float(num_str) * mult)
  
  
  def get_term_width():
@@ -1022,8 +1139,8 @@ def month_by_name(name):
      """ Return the number of a month by (locale-independently) English name """
  
      ENGLISH_NAMES = [
-        u'January', u'February', u'March', u'April', u'May', u'June',
-        u'July', u'August', u'September', u'October', u'November', u'December']
+        'January', 'February', 'March', 'April', 'May', 'June',
+        'July', 'August', 'September', 'October', 'November', 'December']
      try:
          return ENGLISH_NAMES.index(name) + 1
      except ValueError:
@@ -1034,7 +1151,7 @@ def fix_xml_ampersands(xml_str):
      """Replace all the '&' by '&amp;' in XML"""
      return re.sub(
          r'&(?!amp;|lt;|gt;|apos;|quot;|#x[0-9a-fA-F]{,4};|#[0-9]{,4};)',
-        u'&amp;',
+        '&amp;',
          xml_str)
  
  
@@ -1067,7 +1184,7 @@ def remove_end(s, end):
  
  def url_basename(url):
      path = compat_urlparse.urlparse(url).path
-    return path.strip(u'/').split(u'/')[-1]
+    return path.strip('/').split('/')[-1]
  
  
  class HEADRequest(compat_urllib_request.Request):
@@ -1092,7 +1209,7 @@ def str_to_int(int_str):
      """ A more relaxed version of int_or_none """
      if int_str is None:
          return None
-    int_str = re.sub(r'[,\.\+]', u'', int_str)
+    int_str = re.sub(r'[,\.\+]', '', int_str)
      return int(int_str)
  
  
@@ -1107,22 +1224,38 @@ def parse_duration(s):
      s = s.strip()
  
      m = re.match(
-        r'(?i)(?:(?:(?P<hours>[0-9]+)\s*(?:[:h]|hours?)\s*)?(?P<mins>[0-9]+)\s*(?:[:m]|mins?|minutes?)\s*)?(?P<secs>[0-9]+)(?P<ms>\.[0-9]+)?\s*(?:s|secs?|seconds?)?$', s)
+        r'''(?ix)T?
+        (?:
+            (?P<only_mins>[0-9.]+)\s*(?:mins?|minutes?)\s*|
+            (?P<only_hours>[0-9.]+)\s*(?:hours?)|
+
+            (?:
+                (?:(?P<hours>[0-9]+)\s*(?:[:h]|hours?)\s*)?
+                (?P<mins>[0-9]+)\s*(?:[:m]|mins?|minutes?)\s*
+            )?
+            (?P<secs>[0-9]+)(?P<ms>\.[0-9]+)?\s*(?:s|secs?|seconds?)?
+        )$''', s)
      if not m:
          return None
-    res = int(m.group('secs'))
+    res = 0
+    if m.group('only_mins'):
+        return float_or_none(m.group('only_mins'), invscale=60)
+    if m.group('only_hours'):
+        return float_or_none(m.group('only_hours'), invscale=60 * 60)
+    if m.group('secs'):
+        res += int(m.group('secs'))
      if m.group('mins'):
          res += int(m.group('mins')) * 60
-        if m.group('hours'):
-            res += int(m.group('hours')) * 60 * 60
+    if m.group('hours'):
+        res += int(m.group('hours')) * 60 * 60
      if m.group('ms'):
          res += float(m.group('ms'))
      return res
  
  
  def prepend_extension(filename, ext):
-    name, real_ext = os.path.splitext(filename) 
-    return u'{0}.{1}{2}'.format(name, ext, real_ext)
+    name, real_ext = os.path.splitext(filename)
+    return '{0}.{1}{2}'.format(name, ext, real_ext)
  
  
  def check_executable(exe, args=[]):
@@ -1136,18 +1269,25 @@ def check_executable(exe, args=[]):
  
  
  def get_exe_version(exe, args=['--version'],
-                    version_re=r'version\s+([0-9._-a-zA-Z]+)',
-                    unrecognized=u'present'):
+                    version_re=None, unrecognized='present'):
      """ Returns the version of the specified executable,
      or False if the executable is not present """
      try:
-        out, err = subprocess.Popen(
+        out, _ = subprocess.Popen(
              [exe] + args,
              stdout=subprocess.PIPE, stderr=subprocess.STDOUT).communicate()
      except OSError:
          return False
-    firstline = out.partition(b'\n')[0].decode('ascii', 'ignore')
-    m = re.search(version_re, firstline)
+    if isinstance(out, bytes):  # Python 2.x
+        out = out.decode('ascii', 'ignore')
+    return detect_exe_version(out, version_re, unrecognized)
+
+
+def detect_exe_version(output, version_re=None, unrecognized='present'):
+    assert isinstance(output, compat_str)
+    if version_re is None:
+        version_re = r'version\s+([-0-9._a-zA-Z]+)'
+    m = re.search(version_re, output)
      if m:
          return m.group(1)
      else:
@@ -1258,7 +1398,7 @@ def escape_url(url):
      ).geturl()
  
  try:
-    struct.pack(u'!I', 0)
+    struct.pack('!I', 0)
  except TypeError:
      # In Python 2.6 (and some 2.7 versions), struct requires a bytes argument
      def struct_pack(spec, *args):
@@ -1279,7 +1419,7 @@ def read_batch_urls(batch_fd):
      def fixup(url):
          if not isinstance(url, compat_str):
              url = url.decode('utf-8', 'replace')
-        BOM_UTF8 = u'\xef\xbb\xbf'
+        BOM_UTF8 = '\xef\xbb\xbf'
          if url.startswith(BOM_UTF8):
              url = url[len(BOM_UTF8):]
          url = url.strip()
@@ -1335,7 +1475,8 @@ def parse_age_limit(s):
  
  
  def strip_jsonp(code):
-    return re.sub(r'(?s)^[a-zA-Z0-9_]+\s*\(\s*(.*)\);?\s*?\s*$', r'\1', code)
+    return re.sub(
+        r'(?s)^[a-zA-Z0-9_]+\s*\(\s*(.*)\);?\s*?(?://[^\n]*)*$', r'\1', code)
  
  
  def js_to_json(code):
@@ -1387,7 +1528,7 @@ def limit_length(s, length):
  
  
  def version_tuple(v):
-    return [int(e) for e in v.split('.')]
+    return tuple(int(e) for e in re.split(r'[-.]', v))
  
  
  def is_outdated_version(version, limit, assume_new=True):
@@ -1397,3 +1538,15 @@ def is_outdated_version(version, limit, assume_new=True):
          return version_tuple(version) < version_tuple(limit)
      except ValueError:
          return not assume_new
+
+
+def ytdl_is_updateable():
+    """ Returns if youtube-dl can be updated with -U """
+    from zipimport import zipimporter
+
+    return isinstance(globals().get('__loader__'), zipimporter) or hasattr(sys, 'frozen')
+
+
+def args_to_str(args):
+    # Get a short string representation for a subprocess command
+    return ' '.join(shlex_quote(a) for a in args)