[utils] YoutubeDLHandler: don't use 'Youtubedl-user-agent' for overriding the default...

[youtube-dl] / youtube_dl / utils.py
diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py

index 7c0fb1592914e961a6b1e790bf14fb5525eff08a..d22b0313460f4dc42453bcd7772bd59f5dcfe074 100644 (file)
--- a/youtube_dl/utils.py
+++ b/youtube_dl/utils.py
@@ -10,6 +10,7 @@ import ctypes
  import datetime
  import email.utils
  import errno
  import datetime
  import email.utils
  import errno
+import functools
  import gzip
  import itertools
  import io
  import gzip
  import itertools
  import io
@@ -34,14 +35,16 @@ from .compat import (
      compat_chr,
      compat_getenv,
      compat_html_entities,
      compat_chr,
      compat_getenv,
      compat_html_entities,
-    compat_html_parser,
+    compat_http_client,
      compat_parse_qs,
      compat_parse_qs,
+    compat_socket_create_connection,
      compat_str,
      compat_urllib_error,
      compat_urllib_parse,
      compat_urllib_parse_urlparse,
      compat_urllib_request,
      compat_urlparse,
      compat_str,
      compat_urllib_error,
      compat_urllib_parse,
      compat_urllib_parse_urlparse,
      compat_urllib_request,
      compat_urlparse,
+    shlex_quote,
  )
  
  
  )
  
  
@@ -56,6 +59,7 @@ std_headers = {
      'Accept-Language': 'en-us,en;q=0.5',
  }
  
      'Accept-Language': 'en-us,en;q=0.5',
  }
  
+
  def preferredencoding():
      """Get preferred encoding.
  
  def preferredencoding():
      """Get preferred encoding.
  
@@ -64,7 +68,7 @@ def preferredencoding():
      """
      try:
          pref = locale.getpreferredencoding()
      """
      try:
          pref = locale.getpreferredencoding()
-        u'TEST'.encode(pref)
+        'TEST'.encode(pref)
      except:
          pref = 'UTF-8'
  
      except:
          pref = 'UTF-8'
  
@@ -72,12 +76,25 @@ def preferredencoding():
  
  
  def write_json_file(obj, fn):
  
  
  def write_json_file(obj, fn):
-    """ Encode obj as JSON and write it to fn, atomically """
+    """ Encode obj as JSON and write it to fn, atomically if possible """
+
+    fn = encodeFilename(fn)
+    if sys.version_info < (3, 0) and sys.platform != 'win32':
+        encoding = get_filesystem_encoding()
+        # os.path.basename returns a bytes object, but NamedTemporaryFile
+        # will fail if the filename contains non ascii characters unless we
+        # use a unicode object
+        path_basename = lambda f: os.path.basename(fn).decode(encoding)
+        # the same for os.path.dirname
+        path_dirname = lambda f: os.path.dirname(fn).decode(encoding)
+    else:
+        path_basename = os.path.basename
+        path_dirname = os.path.dirname
  
      args = {
          'suffix': '.tmp',
  
      args = {
          'suffix': '.tmp',
-        'prefix': os.path.basename(fn) + '.',
-        'dir': os.path.dirname(fn),
+        'prefix': path_basename(fn) + '.',
+        'dir': path_dirname(fn),
          'delete': False,
      }
  
          'delete': False,
      }
  
@@ -96,6 +113,13 @@ def write_json_file(obj, fn):
      try:
          with tf:
              json.dump(obj, tf)
      try:
          with tf:
              json.dump(obj, tf)
+        if sys.platform == 'win32':
+            # Need to remove existing file on Windows, else os.rename raises
+            # WindowsError or FileExistsError.
+            try:
+                os.unlink(fn)
+            except OSError:
+                pass
          os.rename(tf.name, fn)
      except:
          try:
          os.rename(tf.name, fn)
      except:
          try:
@@ -110,7 +134,7 @@ if sys.version_info >= (2, 7):
          """ Find the xpath xpath[@key=val] """
          assert re.match(r'^[a-zA-Z-]+$', key)
          assert re.match(r'^[a-zA-Z0-9@\s:._-]*$', val)
          """ Find the xpath xpath[@key=val] """
          assert re.match(r'^[a-zA-Z-]+$', key)
          assert re.match(r'^[a-zA-Z0-9@\s:._-]*$', val)
-        expr = xpath + u"[@%s='%s']" % (key, val)
+        expr = xpath + "[@%s='%s']" % (key, val)
          return node.find(expr)
  else:
      def find_xpath_attr(node, xpath, key, val):
          return node.find(expr)
  else:
      def find_xpath_attr(node, xpath, key, val):
@@ -126,6 +150,8 @@ else:
  
  # On python2.6 the xml.etree.ElementTree.Element methods don't support
  # the namespace parameter
  
  # On python2.6 the xml.etree.ElementTree.Element methods don't support
  # the namespace parameter
+
+
  def xpath_with_ns(path, ns_map):
      components = [c.split(':') for c in path.split('/')]
      replaced = []
  def xpath_with_ns(path, ns_map):
      components = [c.split(':') for c in path.split('/')]
      replaced = []
@@ -143,7 +169,7 @@ def xpath_text(node, xpath, name=None, fatal=False):
          xpath = xpath.encode('ascii')
  
      n = node.find(xpath)
          xpath = xpath.encode('ascii')
  
      n = node.find(xpath)
-    if n is None:
+    if n is None or n.text is None:
          if fatal:
              name = xpath if name is None else name
              raise ExtractorError('Could not find XML element %s' % name)
          if fatal:
              name = xpath if name is None else name
              raise ExtractorError('Could not find XML element %s' % name)
@@ -182,6 +208,10 @@ def get_element_by_attribute(attribute, value, html):
  
  def clean_html(html):
      """Clean an HTML snippet into a readable string"""
  
  def clean_html(html):
      """Clean an HTML snippet into a readable string"""
+
+    if html is None:  # Convenience for sanitizing descriptions etc.
+        return html
+
      # Newline vs <br />
      html = html.replace('\n', ' ')
      html = re.sub(r'\s*<\s*br\s*/?\s*>\s*', '\n', html)
      # Newline vs <br />
      html = html.replace('\n', ' ')
      html = re.sub(r'\s*<\s*br\s*/?\s*>\s*', '\n', html)
@@ -204,7 +234,7 @@ def sanitize_open(filename, open_mode):
      It returns the tuple (stream, definitive_file_name).
      """
      try:
      It returns the tuple (stream, definitive_file_name).
      """
      try:
-        if filename == u'-':
+        if filename == '-':
              if sys.platform == 'win32':
                  import msvcrt
                  msvcrt.setmode(sys.stdout.fileno(), os.O_BINARY)
              if sys.platform == 'win32':
                  import msvcrt
                  msvcrt.setmode(sys.stdout.fileno(), os.O_BINARY)
@@ -217,9 +247,9 @@ def sanitize_open(filename, open_mode):
  
          # In case of error, try to remove win32 forbidden chars
          alt_filename = os.path.join(
  
          # In case of error, try to remove win32 forbidden chars
          alt_filename = os.path.join(
-                        re.sub(u'[/<>:"\\|\\\\?\\*]', u'#', path_part)
-                        for path_part in os.path.split(filename)
-                       )
+            re.sub('[/<>:"\\|\\\\?\\*]', '#', path_part)
+            for path_part in os.path.split(filename)
+        )
          if alt_filename == filename:
              raise
          else:
          if alt_filename == filename:
              raise
          else:
@@ -236,6 +266,7 @@ def timeconvert(timestr):
          timestamp = email.utils.mktime_tz(timetuple)
      return timestamp
  
          timestamp = email.utils.mktime_tz(timetuple)
      return timestamp
  
+
  def sanitize_filename(s, restricted=False, is_id=False):
      """Sanitizes a string so it could be used as part of a filename.
      If restricted is set, use a stricter subset of allowed characters.
  def sanitize_filename(s, restricted=False, is_id=False):
      """Sanitizes a string so it could be used as part of a filename.
      If restricted is set, use a stricter subset of allowed characters.
@@ -256,7 +287,9 @@ def sanitize_filename(s, restricted=False, is_id=False):
              return '_'
          return char
  
              return '_'
          return char
  
-    result = u''.join(map(replace_insane, s))
+    # Handle timestamps
+    s = re.sub(r'[0-9]+(?::[0-9]+)+', lambda m: m.group(0).replace(':', '_'), s)
+    result = ''.join(map(replace_insane, s))
      if not is_id:
          while '__' in result:
              result = result.replace('__', '_')
      if not is_id:
          while '__' in result:
              result = result.replace('__', '_')
@@ -268,6 +301,7 @@ def sanitize_filename(s, restricted=False, is_id=False):
              result = '_'
      return result
  
              result = '_'
      return result
  
+
  def orderedSet(iterable):
      """ Remove all duplicates from the input iterable """
      res = []
  def orderedSet(iterable):
      """ Remove all duplicates from the input iterable """
      res = []
@@ -286,15 +320,15 @@ def _htmlentity_transform(entity):
      mobj = re.match(r'#(x?[0-9]+)', entity)
      if mobj is not None:
          numstr = mobj.group(1)
      mobj = re.match(r'#(x?[0-9]+)', entity)
      if mobj is not None:
          numstr = mobj.group(1)
-        if numstr.startswith(u'x'):
+        if numstr.startswith('x'):
              base = 16
              base = 16
-            numstr = u'0%s' % numstr
+            numstr = '0%s' % numstr
          else:
              base = 10
          return compat_chr(int(numstr, base))
  
      # Unknown entity in name, return its literal representation
          else:
              base = 10
          return compat_chr(int(numstr, base))
  
      # Unknown entity in name, return its literal representation
-    return (u'&%s;' % entity)
+    return ('&%s;' % entity)
  
  
  def unescapeHTML(s):
  
  
  def unescapeHTML(s):
@@ -318,7 +352,7 @@ def encodeFilename(s, for_subprocess=False):
          return s
  
      if sys.platform == 'win32' and sys.getwindowsversion()[0] >= 5:
          return s
  
      if sys.platform == 'win32' and sys.getwindowsversion()[0] >= 5:
-        # Pass u'' directly to use Unicode APIs on Windows 2000 and up
+        # Pass '' directly to use Unicode APIs on Windows 2000 and up
          # (Detecting Windows NT 4 is tricky because 'major >= 4' would
          # match Windows 9x series as well. Besides, NT 4 is obsolete.)
          if not for_subprocess:
          # (Detecting Windows NT 4 is tricky because 'major >= 4' would
          # match Windows 9x series as well. Besides, NT 4 is obsolete.)
          if not for_subprocess:
@@ -338,7 +372,7 @@ def encodeArgument(s):
      if not isinstance(s, compat_str):
          # Legacy code that uses byte strings
          # Uncomment the following line after fixing all post processors
      if not isinstance(s, compat_str):
          # Legacy code that uses byte strings
          # Uncomment the following line after fixing all post processors
-        #assert False, 'Internal error: %r should be of type %r, is %r' % (s, compat_str, type(s))
+        # assert False, 'Internal error: %r should be of type %r, is %r' % (s, compat_str, type(s))
          s = s.decode('ascii')
      return encodeFilename(s, True)
  
          s = s.decode('ascii')
      return encodeFilename(s, True)
  
@@ -352,6 +386,7 @@ def decodeOption(optval):
      assert isinstance(optval, compat_str)
      return optval
  
      assert isinstance(optval, compat_str)
      return optval
  
+
  def formatSeconds(secs):
      if secs > 3600:
          return '%d:%02d:%02d' % (secs // 3600, (secs % 3600) // 60, secs % 60)
  def formatSeconds(secs):
      if secs > 3600:
          return '%d:%02d:%02d' % (secs // 3600, (secs % 3600) // 60, secs % 60)
@@ -361,48 +396,34 @@ def formatSeconds(secs):
          return '%d' % secs
  
  
          return '%d' % secs
  
  
-def make_HTTPS_handler(opts_no_check_certificate, **kwargs):
-    if sys.version_info < (3, 2):
-        import httplib
-
-        class HTTPSConnectionV3(httplib.HTTPSConnection):
-            def __init__(self, *args, **kwargs):
-                httplib.HTTPSConnection.__init__(self, *args, **kwargs)
-
-            def connect(self):
-                sock = socket.create_connection((self.host, self.port), self.timeout)
-                if getattr(self, '_tunnel_host', False):
-                    self.sock = sock
-                    self._tunnel()
-                try:
-                    self.sock = ssl.wrap_socket(sock, self.key_file, self.cert_file, ssl_version=ssl.PROTOCOL_TLSv1)
-                except ssl.SSLError:
-                    self.sock = ssl.wrap_socket(sock, self.key_file, self.cert_file, ssl_version=ssl.PROTOCOL_SSLv23)
-
-        class HTTPSHandlerV3(compat_urllib_request.HTTPSHandler):
-            def https_open(self, req):
-                return self.do_open(HTTPSConnectionV3, req)
-        return HTTPSHandlerV3(**kwargs)
-    elif hasattr(ssl, 'create_default_context'):  # Python >= 3.4
-        context = ssl.create_default_context(ssl.Purpose.CLIENT_AUTH)
-        context.options &= ~ssl.OP_NO_SSLv3  # Allow older, not-as-secure SSLv3
+def make_HTTPS_handler(params, **kwargs):
+    opts_no_check_certificate = params.get('nocheckcertificate', False)
+    if hasattr(ssl, 'create_default_context'):  # Python >= 3.4 or 2.7.9
+        context = ssl.create_default_context(ssl.Purpose.SERVER_AUTH)
          if opts_no_check_certificate:
          if opts_no_check_certificate:
+            context.check_hostname = False
              context.verify_mode = ssl.CERT_NONE
              context.verify_mode = ssl.CERT_NONE
-        return compat_urllib_request.HTTPSHandler(context=context, **kwargs)
+        try:
+            return YoutubeDLHTTPSHandler(params, context=context, **kwargs)
+        except TypeError:
+            # Python 2.7.8
+            # (create_default_context present but HTTPSHandler has no context=)
+            pass
+
+    if sys.version_info < (3, 2):
+        return YoutubeDLHTTPSHandler(params, **kwargs)
      else:  # Python < 3.4
      else:  # Python < 3.4
-        context = ssl.SSLContext(ssl.PROTOCOL_SSLv23)
+        context = ssl.SSLContext(ssl.PROTOCOL_TLSv1)
          context.verify_mode = (ssl.CERT_NONE
                                 if opts_no_check_certificate
                                 else ssl.CERT_REQUIRED)
          context.set_default_verify_paths()
          context.verify_mode = (ssl.CERT_NONE
                                 if opts_no_check_certificate
                                 else ssl.CERT_REQUIRED)
          context.set_default_verify_paths()
-        try:
-            context.load_default_certs()
-        except AttributeError:
-            pass  # Python < 3.4
-        return compat_urllib_request.HTTPSHandler(context=context, **kwargs)
+        return YoutubeDLHTTPSHandler(params, context=context, **kwargs)
+
  
  class ExtractorError(Exception):
      """Error during info extraction."""
  
  class ExtractorError(Exception):
      """Error during info extraction."""
+
      def __init__(self, msg, tb=None, expected=False, cause=None, video_id=None):
          """ tb, if given, is the original traceback (so that it can be printed out).
          If expected is set, this is a normal error message and most likely not a bug in youtube-dl.
      def __init__(self, msg, tb=None, expected=False, cause=None, video_id=None):
          """ tb, if given, is the original traceback (so that it can be printed out).
          If expected is set, this is a normal error message and most likely not a bug in youtube-dl.
@@ -413,9 +434,15 @@ class ExtractorError(Exception):
          if video_id is not None:
              msg = video_id + ': ' + msg
          if cause:
          if video_id is not None:
              msg = video_id + ': ' + msg
          if cause:
-            msg += u' (caused by %r)' % cause
+            msg += ' (caused by %r)' % cause
          if not expected:
          if not expected:
-            msg = msg + u'; please report this issue on https://yt-dl.org/bug . Be sure to call youtube-dl with the --verbose flag and include its complete output. Make sure you are using the latest version; type  youtube-dl -U  to update.'
+            if ytdl_is_updateable():
+                update_cmd = 'type  youtube-dl -U  to update'
+            else:
+                update_cmd = 'see  https://yt-dl.org/update  on how to update'
+            msg += '; please report this issue on https://yt-dl.org/bug .'
+            msg += ' Make sure you are using the latest version; %s.' % update_cmd
+            msg += ' Be sure to call youtube-dl with the --verbose flag and include its complete output.'
          super(ExtractorError, self).__init__(msg)
  
          self.traceback = tb
          super(ExtractorError, self).__init__(msg)
  
          self.traceback = tb
@@ -426,7 +453,14 @@ class ExtractorError(Exception):
      def format_traceback(self):
          if self.traceback is None:
              return None
      def format_traceback(self):
          if self.traceback is None:
              return None
-        return u''.join(traceback.format_tb(self.traceback))
+        return ''.join(traceback.format_tb(self.traceback))
+
+
+class UnsupportedError(ExtractorError):
+    def __init__(self, url):
+        super(UnsupportedError, self).__init__(
+            'Unsupported URL: %s' % url, expected=True)
+        self.url = url
  
  
  class RegexNotFoundError(ExtractorError):
  
  
  class RegexNotFoundError(ExtractorError):
@@ -441,6 +475,7 @@ class DownloadError(Exception):
      configured to continue on errors. They will contain the appropriate
      error message.
      """
      configured to continue on errors. They will contain the appropriate
      error message.
      """
+
      def __init__(self, msg, exc_info=None):
          """ exc_info, if given, is the original exception that caused the trouble (as returned by sys.exc_info()). """
          super(DownloadError, self).__init__(msg)
      def __init__(self, msg, exc_info=None):
          """ exc_info, if given, is the original exception that caused the trouble (as returned by sys.exc_info()). """
          super(DownloadError, self).__init__(msg)
@@ -462,9 +497,11 @@ class PostProcessingError(Exception):
      This exception may be raised by PostProcessor's .run() method to
      indicate an error in the postprocessing task.
      """
      This exception may be raised by PostProcessor's .run() method to
      indicate an error in the postprocessing task.
      """
+
      def __init__(self, msg):
          self.msg = msg
  
      def __init__(self, msg):
          self.msg = msg
  
+
  class MaxDownloadsReached(Exception):
      """ --max-downloads limit has been reached. """
      pass
  class MaxDownloadsReached(Exception):
      """ --max-downloads limit has been reached. """
      pass
@@ -494,6 +531,29 @@ class ContentTooShortError(Exception):
          self.downloaded = downloaded
          self.expected = expected
  
          self.downloaded = downloaded
          self.expected = expected
  
+
+def _create_http_connection(ydl_handler, http_class, is_https, *args, **kwargs):
+    hc = http_class(*args, **kwargs)
+    source_address = ydl_handler._params.get('source_address')
+    if source_address is not None:
+        sa = (source_address, 0)
+        if hasattr(hc, 'source_address'):  # Python 2.7+
+            hc.source_address = sa
+        else:  # Python 2.6
+            def _hc_connect(self, *args, **kwargs):
+                sock = compat_socket_create_connection(
+                    (self.host, self.port), self.timeout, sa)
+                if is_https:
+                    self.sock = ssl.wrap_socket(
+                        sock, self.key_file, self.cert_file,
+                        ssl_version=ssl.PROTOCOL_TLSv1)
+                else:
+                    self.sock = sock
+            hc.connect = functools.partial(_hc_connect, hc)
+
+    return hc
+
+
  class YoutubeDLHandler(compat_urllib_request.HTTPHandler):
      """Handler for HTTP requests and responses.
  
  class YoutubeDLHandler(compat_urllib_request.HTTPHandler):
      """Handler for HTTP requests and responses.
  
@@ -512,6 +572,15 @@ class YoutubeDLHandler(compat_urllib_request.HTTPHandler):
      public domain.
      """
  
      public domain.
      """
  
+    def __init__(self, params, *args, **kwargs):
+        compat_urllib_request.HTTPHandler.__init__(self, *args, **kwargs)
+        self._params = params
+
+    def http_open(self, req):
+        return self.do_open(functools.partial(
+            _create_http_connection, self, compat_http_client.HTTPConnection, False),
+            req)
+
      @staticmethod
      def deflate(data):
          try:
      @staticmethod
      def deflate(data):
          try:
@@ -529,17 +598,14 @@ class YoutubeDLHandler(compat_urllib_request.HTTPHandler):
  
      def http_request(self, req):
          for h, v in std_headers.items():
  
      def http_request(self, req):
          for h, v in std_headers.items():
-            if h not in req.headers:
+            # Capitalize is needed because of Python bug 2275: http://bugs.python.org/issue2275
+            # The dict keys are capitalized because of this bug by urllib
+            if h.capitalize() not in req.headers:
                  req.add_header(h, v)
          if 'Youtubedl-no-compression' in req.headers:
              if 'Accept-encoding' in req.headers:
                  del req.headers['Accept-encoding']
              del req.headers['Youtubedl-no-compression']
                  req.add_header(h, v)
          if 'Youtubedl-no-compression' in req.headers:
              if 'Accept-encoding' in req.headers:
                  del req.headers['Accept-encoding']
              del req.headers['Youtubedl-no-compression']
-        if 'Youtubedl-user-agent' in req.headers:
-            if 'User-agent' in req.headers:
-                del req.headers['User-agent']
-            req.headers['User-agent'] = req.headers['Youtubedl-user-agent']
-            del req.headers['Youtubedl-user-agent']
  
          if sys.version_info < (2, 7) and '#' in req.get_full_url():
              # Python 2.6 is brain-dead when it comes to fragments
  
          if sys.version_info < (2, 7) and '#' in req.get_full_url():
              # Python 2.6 is brain-dead when it comes to fragments
@@ -581,6 +647,18 @@ class YoutubeDLHandler(compat_urllib_request.HTTPHandler):
      https_response = http_response
  
  
      https_response = http_response
  
  
+class YoutubeDLHTTPSHandler(compat_urllib_request.HTTPSHandler):
+    def __init__(self, params, https_conn_class=None, *args, **kwargs):
+        compat_urllib_request.HTTPSHandler.__init__(self, *args, **kwargs)
+        self._https_conn_class = https_conn_class or compat_http_client.HTTPSConnection
+        self._params = params
+
+    def https_open(self, req):
+        return self.do_open(functools.partial(
+            _create_http_connection, self, self._https_conn_class, True),
+            req)
+
+
  def parse_iso8601(date_str, delimiter='T'):
      """ Return a UNIX timestamp from the given date """
  
  def parse_iso8601(date_str, delimiter='T'):
      """ Return a UNIX timestamp from the given date """
  
@@ -606,17 +684,19 @@ def parse_iso8601(date_str, delimiter='T'):
      return calendar.timegm(dt.timetuple())
  
  
      return calendar.timegm(dt.timetuple())
  
  
-def unified_strdate(date_str):
+def unified_strdate(date_str, day_first=True):
      """Return a string with the date in the format YYYYMMDD"""
  
      if date_str is None:
          return None
      """Return a string with the date in the format YYYYMMDD"""
  
      if date_str is None:
          return None
-
      upload_date = None
      upload_date = None
-    #Replace commas
+    # Replace commas
      date_str = date_str.replace(',', ' ')
      # %z (UTC offset) is only supported in python>=3.2
      date_str = re.sub(r' ?(\+|-)[0-9]{2}:?[0-9]{2}$', '', date_str)
      date_str = date_str.replace(',', ' ')
      # %z (UTC offset) is only supported in python>=3.2
      date_str = re.sub(r' ?(\+|-)[0-9]{2}:?[0-9]{2}$', '', date_str)
+    # Remove AM/PM + timezone
+    date_str = re.sub(r'(?i)\s*(?:AM|PM)\s+[A-Z]+', '', date_str)
+
      format_expressions = [
          '%d %B %Y',
          '%d %b %Y',
      format_expressions = [
          '%d %B %Y',
          '%d %b %Y',
@@ -625,13 +705,10 @@ def unified_strdate(date_str):
          '%b %dst %Y %I:%M%p',
          '%b %dnd %Y %I:%M%p',
          '%b %dth %Y %I:%M%p',
          '%b %dst %Y %I:%M%p',
          '%b %dnd %Y %I:%M%p',
          '%b %dth %Y %I:%M%p',
+        '%Y %m %d',
          '%Y-%m-%d',
          '%Y/%m/%d',
          '%Y-%m-%d',
          '%Y/%m/%d',
-        '%d.%m.%Y',
-        '%d/%m/%Y',
-        '%d/%m/%y',
          '%Y/%m/%d %H:%M:%S',
          '%Y/%m/%d %H:%M:%S',
-        '%d/%m/%Y %H:%M:%S',
          '%Y-%m-%d %H:%M:%S',
          '%Y-%m-%d %H:%M:%S.%f',
          '%d.%m.%Y %H:%M',
          '%Y-%m-%d %H:%M:%S',
          '%Y-%m-%d %H:%M:%S.%f',
          '%d.%m.%Y %H:%M',
@@ -643,6 +720,20 @@ def unified_strdate(date_str):
          '%Y-%m-%dT%H:%M:%S.%f',
          '%Y-%m-%dT%H:%M',
      ]
          '%Y-%m-%dT%H:%M:%S.%f',
          '%Y-%m-%dT%H:%M',
      ]
+    if day_first:
+        format_expressions.extend([
+            '%d.%m.%Y',
+            '%d/%m/%Y',
+            '%d/%m/%y',
+            '%d/%m/%Y %H:%M:%S',
+        ])
+    else:
+        format_expressions.extend([
+            '%m.%d.%Y',
+            '%m/%d/%Y',
+            '%m/%d/%y',
+            '%m/%d/%Y %H:%M:%S',
+        ])
      for expression in format_expressions:
          try:
              upload_date = datetime.datetime.strptime(date_str, expression).strftime('%Y%m%d')
      for expression in format_expressions:
          try:
              upload_date = datetime.datetime.strptime(date_str, expression).strftime('%Y%m%d')
@@ -654,25 +745,30 @@ def unified_strdate(date_str):
              upload_date = datetime.datetime(*timetuple[:6]).strftime('%Y%m%d')
      return upload_date
  
              upload_date = datetime.datetime(*timetuple[:6]).strftime('%Y%m%d')
      return upload_date
  
-def determine_ext(url, default_ext=u'unknown_video'):
+
+def determine_ext(url, default_ext='unknown_video'):
      if url is None:
          return default_ext
      if url is None:
          return default_ext
-    guess = url.partition(u'?')[0].rpartition(u'.')[2]
+    guess = url.partition('?')[0].rpartition('.')[2]
      if re.match(r'^[A-Za-z0-9]+$', guess):
          return guess
      else:
          return default_ext
  
      if re.match(r'^[A-Za-z0-9]+$', guess):
          return guess
      else:
          return default_ext
  
+
  def subtitles_filename(filename, sub_lang, sub_format):
  def subtitles_filename(filename, sub_lang, sub_format):
-    return filename.rsplit('.', 1)[0] + u'.' + sub_lang + u'.' + sub_format
+    return filename.rsplit('.', 1)[0] + '.' + sub_lang + '.' + sub_format
+
  
  def date_from_str(date_str):
      """
      Return a datetime object from a string in the format YYYYMMDD or
      (now|today)[+-][0-9](day|week|month|year)(s)?"""
      today = datetime.date.today()
  
  def date_from_str(date_str):
      """
      Return a datetime object from a string in the format YYYYMMDD or
      (now|today)[+-][0-9](day|week|month|year)(s)?"""
      today = datetime.date.today()
-    if date_str == 'now'or date_str == 'today':
+    if date_str in ('now', 'today'):
          return today
          return today
+    if date_str == 'yesterday':
+        return today - datetime.timedelta(days=1)
      match = re.match('(now|today)(?P<sign>[+-])(?P<time>\d+)(?P<unit>day|week|month|year)(s)?', date_str)
      if match is not None:
          sign = match.group('sign')
      match = re.match('(now|today)(?P<sign>[+-])(?P<time>\d+)(?P<unit>day|week|month|year)(s)?', date_str)
      if match is not None:
          sign = match.group('sign')
@@ -680,7 +776,7 @@ def date_from_str(date_str):
          if sign == '-':
              time = -time
          unit = match.group('unit')
          if sign == '-':
              time = -time
          unit = match.group('unit')
-        #A bad aproximation?
+        # A bad aproximation?
          if unit == 'month':
              unit = 'day'
              time *= 30
          if unit == 'month':
              unit = 'day'
              time *= 30
@@ -691,7 +787,8 @@ def date_from_str(date_str):
          delta = datetime.timedelta(**{unit: time})
          return today + delta
      return datetime.datetime.strptime(date_str, "%Y%m%d").date()
          delta = datetime.timedelta(**{unit: time})
          return today + delta
      return datetime.datetime.strptime(date_str, "%Y%m%d").date()
-    
+
+
  def hyphenate_date(date_str):
      """
      Convert a date in 'YYYYMMDD' format to 'YYYY-MM-DD' format"""
  def hyphenate_date(date_str):
      """
      Convert a date in 'YYYYMMDD' format to 'YYYY-MM-DD' format"""
@@ -701,8 +798,10 @@ def hyphenate_date(date_str):
      else:
          return date_str
  
      else:
          return date_str
  
+
  class DateRange(object):
      """Represents a time interval between two dates"""
  class DateRange(object):
      """Represents a time interval between two dates"""
+
      def __init__(self, start=None, end=None):
          """start and end must be strings in the format accepted by date"""
          if start is not None:
      def __init__(self, start=None, end=None):
          """start and end must be strings in the format accepted by date"""
          if start is not None:
@@ -715,17 +814,20 @@ class DateRange(object):
              self.end = datetime.datetime.max.date()
          if self.start > self.end:
              raise ValueError('Date range: "%s" , the start date must be before the end date' % self)
              self.end = datetime.datetime.max.date()
          if self.start > self.end:
              raise ValueError('Date range: "%s" , the start date must be before the end date' % self)
+
      @classmethod
      def day(cls, day):
          """Returns a range that only contains the given day"""
      @classmethod
      def day(cls, day):
          """Returns a range that only contains the given day"""
-        return cls(day,day)
+        return cls(day, day)
+
      def __contains__(self, date):
          """Check if the date is in the range"""
          if not isinstance(date, datetime.date):
              date = date_from_str(date)
          return self.start <= date <= self.end
      def __contains__(self, date):
          """Check if the date is in the range"""
          if not isinstance(date, datetime.date):
              date = date_from_str(date)
          return self.start <= date <= self.end
+
      def __str__(self):
      def __str__(self):
-        return '%s - %s' % ( self.start.isoformat(), self.end.isoformat())
+        return '%s - %s' % (self.start.isoformat(), self.end.isoformat())
  
  
  def platform_name():
  
  
  def platform_name():
@@ -756,27 +858,30 @@ def _windows_write_string(s, out):
      except AttributeError:
          # If the output stream doesn't have a fileno, it's virtual
          return False
      except AttributeError:
          # If the output stream doesn't have a fileno, it's virtual
          return False
+    except io.UnsupportedOperation:
+        # Some strange Windows pseudo files?
+        return False
      if fileno not in WIN_OUTPUT_IDS:
          return False
  
      GetStdHandle = ctypes.WINFUNCTYPE(
          ctypes.wintypes.HANDLE, ctypes.wintypes.DWORD)(
      if fileno not in WIN_OUTPUT_IDS:
          return False
  
      GetStdHandle = ctypes.WINFUNCTYPE(
          ctypes.wintypes.HANDLE, ctypes.wintypes.DWORD)(
-        ("GetStdHandle", ctypes.windll.kernel32))
+        (b"GetStdHandle", ctypes.windll.kernel32))
      h = GetStdHandle(WIN_OUTPUT_IDS[fileno])
  
      WriteConsoleW = ctypes.WINFUNCTYPE(
          ctypes.wintypes.BOOL, ctypes.wintypes.HANDLE, ctypes.wintypes.LPWSTR,
          ctypes.wintypes.DWORD, ctypes.POINTER(ctypes.wintypes.DWORD),
      h = GetStdHandle(WIN_OUTPUT_IDS[fileno])
  
      WriteConsoleW = ctypes.WINFUNCTYPE(
          ctypes.wintypes.BOOL, ctypes.wintypes.HANDLE, ctypes.wintypes.LPWSTR,
          ctypes.wintypes.DWORD, ctypes.POINTER(ctypes.wintypes.DWORD),
-        ctypes.wintypes.LPVOID)(("WriteConsoleW", ctypes.windll.kernel32))
+        ctypes.wintypes.LPVOID)((b"WriteConsoleW", ctypes.windll.kernel32))
      written = ctypes.wintypes.DWORD(0)
  
      written = ctypes.wintypes.DWORD(0)
  
-    GetFileType = ctypes.WINFUNCTYPE(ctypes.wintypes.DWORD, ctypes.wintypes.DWORD)(("GetFileType", ctypes.windll.kernel32))
+    GetFileType = ctypes.WINFUNCTYPE(ctypes.wintypes.DWORD, ctypes.wintypes.DWORD)((b"GetFileType", ctypes.windll.kernel32))
      FILE_TYPE_CHAR = 0x0002
      FILE_TYPE_REMOTE = 0x8000
      GetConsoleMode = ctypes.WINFUNCTYPE(
          ctypes.wintypes.BOOL, ctypes.wintypes.HANDLE,
          ctypes.POINTER(ctypes.wintypes.DWORD))(
      FILE_TYPE_CHAR = 0x0002
      FILE_TYPE_REMOTE = 0x8000
      GetConsoleMode = ctypes.WINFUNCTYPE(
          ctypes.wintypes.BOOL, ctypes.wintypes.HANDLE,
          ctypes.POINTER(ctypes.wintypes.DWORD))(
-        ("GetConsoleMode", ctypes.windll.kernel32))
+        (b"GetConsoleMode", ctypes.windll.kernel32))
      INVALID_HANDLE_VALUE = ctypes.wintypes.DWORD(-1).value
  
      def not_a_console(handle):
      INVALID_HANDLE_VALUE = ctypes.wintypes.DWORD(-1).value
  
      def not_a_console(handle):
@@ -844,10 +949,7 @@ def bytes_to_intlist(bs):
  def intlist_to_bytes(xs):
      if not xs:
          return b''
  def intlist_to_bytes(xs):
      if not xs:
          return b''
-    if isinstance(chr(0), bytes):  # Python 2
-        return ''.join([chr(x) for x in xs])
-    else:
-        return bytes(xs)
+    return struct_pack('%dB' % len(xs), *xs)
  
  
  # Cross-platform file locking
  
  
  # Cross-platform file locking
@@ -959,7 +1061,7 @@ def shell_quote(args):
              # We may get a filename encoded with 'encodeFilename'
              a = a.decode(encoding)
          quoted_args.append(pipes.quote(a))
              # We may get a filename encoded with 'encodeFilename'
              a = a.decode(encoding)
          quoted_args.append(pipes.quote(a))
-    return u' '.join(quoted_args)
+    return ' '.join(quoted_args)
  
  
  def takewhile_inclusive(pred, seq):
  
  
  def takewhile_inclusive(pred, seq):
@@ -975,31 +1077,85 @@ def smuggle_url(url, data):
      """ Pass additional data in a URL for internal use. """
  
      sdata = compat_urllib_parse.urlencode(
      """ Pass additional data in a URL for internal use. """
  
      sdata = compat_urllib_parse.urlencode(
-        {u'__youtubedl_smuggle': json.dumps(data)})
-    return url + u'#' + sdata
+        {'__youtubedl_smuggle': json.dumps(data)})
+    return url + '#' + sdata
  
  
  def unsmuggle_url(smug_url, default=None):
  
  
  def unsmuggle_url(smug_url, default=None):
-    if not '#__youtubedl_smuggle' in smug_url:
+    if '#__youtubedl_smuggle' not in smug_url:
          return smug_url, default
          return smug_url, default
-    url, _, sdata = smug_url.rpartition(u'#')
-    jsond = compat_parse_qs(sdata)[u'__youtubedl_smuggle'][0]
+    url, _, sdata = smug_url.rpartition('#')
+    jsond = compat_parse_qs(sdata)['__youtubedl_smuggle'][0]
      data = json.loads(jsond)
      return url, data
  
  
  def format_bytes(bytes):
      if bytes is None:
      data = json.loads(jsond)
      return url, data
  
  
  def format_bytes(bytes):
      if bytes is None:
-        return u'N/A'
+        return 'N/A'
      if type(bytes) is str:
          bytes = float(bytes)
      if bytes == 0.0:
          exponent = 0
      else:
          exponent = int(math.log(bytes, 1024.0))
      if type(bytes) is str:
          bytes = float(bytes)
      if bytes == 0.0:
          exponent = 0
      else:
          exponent = int(math.log(bytes, 1024.0))
-    suffix = [u'B', u'KiB', u'MiB', u'GiB', u'TiB', u'PiB', u'EiB', u'ZiB', u'YiB'][exponent]
+    suffix = ['B', 'KiB', 'MiB', 'GiB', 'TiB', 'PiB', 'EiB', 'ZiB', 'YiB'][exponent]
      converted = float(bytes) / float(1024 ** exponent)
      converted = float(bytes) / float(1024 ** exponent)
-    return u'%.2f%s' % (converted, suffix)
+    return '%.2f%s' % (converted, suffix)
+
+
+def parse_filesize(s):
+    if s is None:
+        return None
+
+    # The lower-case forms are of course incorrect and inofficial,
+    # but we support those too
+    _UNIT_TABLE = {
+        'B': 1,
+        'b': 1,
+        'KiB': 1024,
+        'KB': 1000,
+        'kB': 1024,
+        'Kb': 1000,
+        'MiB': 1024 ** 2,
+        'MB': 1000 ** 2,
+        'mB': 1024 ** 2,
+        'Mb': 1000 ** 2,
+        'GiB': 1024 ** 3,
+        'GB': 1000 ** 3,
+        'gB': 1024 ** 3,
+        'Gb': 1000 ** 3,
+        'TiB': 1024 ** 4,
+        'TB': 1000 ** 4,
+        'tB': 1024 ** 4,
+        'Tb': 1000 ** 4,
+        'PiB': 1024 ** 5,
+        'PB': 1000 ** 5,
+        'pB': 1024 ** 5,
+        'Pb': 1000 ** 5,
+        'EiB': 1024 ** 6,
+        'EB': 1000 ** 6,
+        'eB': 1024 ** 6,
+        'Eb': 1000 ** 6,
+        'ZiB': 1024 ** 7,
+        'ZB': 1000 ** 7,
+        'zB': 1024 ** 7,
+        'Zb': 1000 ** 7,
+        'YiB': 1024 ** 8,
+        'YB': 1000 ** 8,
+        'yB': 1024 ** 8,
+        'Yb': 1000 ** 8,
+    }
+
+    units_re = '|'.join(re.escape(u) for u in _UNIT_TABLE)
+    m = re.match(
+        r'(?P<num>[0-9]+(?:[,.][0-9]*)?)\s*(?P<unit>%s)' % units_re, s)
+    if not m:
+        return None
+
+    num_str = m.group('num').replace(',', '.')
+    mult = _UNIT_TABLE[m.group('unit')]
+    return int(float(num_str) * mult)
  
  
  def get_term_width():
  
  
  def get_term_width():
@@ -1022,8 +1178,8 @@ def month_by_name(name):
      """ Return the number of a month by (locale-independently) English name """
  
      ENGLISH_NAMES = [
      """ Return the number of a month by (locale-independently) English name """
  
      ENGLISH_NAMES = [
-        u'January', u'February', u'March', u'April', u'May', u'June',
-        u'July', u'August', u'September', u'October', u'November', u'December']
+        'January', 'February', 'March', 'April', 'May', 'June',
+        'July', 'August', 'September', 'October', 'November', 'December']
      try:
          return ENGLISH_NAMES.index(name) + 1
      except ValueError:
      try:
          return ENGLISH_NAMES.index(name) + 1
      except ValueError:
@@ -1034,7 +1190,7 @@ def fix_xml_ampersands(xml_str):
      """Replace all the '&' by '&amp;' in XML"""
      return re.sub(
          r'&(?!amp;|lt;|gt;|apos;|quot;|#x[0-9a-fA-F]{,4};|#[0-9]{,4};)',
      """Replace all the '&' by '&amp;' in XML"""
      return re.sub(
          r'&(?!amp;|lt;|gt;|apos;|quot;|#x[0-9a-fA-F]{,4};|#[0-9]{,4};)',
-        u'&amp;',
+        '&amp;',
          xml_str)
  
  
          xml_str)
  
  
@@ -1067,7 +1223,7 @@ def remove_end(s, end):
  
  def url_basename(url):
      path = compat_urlparse.urlparse(url).path
  
  def url_basename(url):
      path = compat_urlparse.urlparse(url).path
-    return path.strip(u'/').split(u'/')[-1]
+    return path.strip('/').split('/')[-1]
  
  
  class HEADRequest(compat_urllib_request.Request):
  
  
  class HEADRequest(compat_urllib_request.Request):
@@ -1092,7 +1248,7 @@ def str_to_int(int_str):
      """ A more relaxed version of int_or_none """
      if int_str is None:
          return None
      """ A more relaxed version of int_or_none """
      if int_str is None:
          return None
-    int_str = re.sub(r'[,\.\+]', u'', int_str)
+    int_str = re.sub(r'[,\.\+]', '', int_str)
      return int(int_str)
  
  
      return int(int_str)
  
  
@@ -1101,28 +1257,44 @@ def float_or_none(v, scale=1, invscale=1, default=None):
  
  
  def parse_duration(s):
  
  
  def parse_duration(s):
-    if s is None:
+    if not isinstance(s, basestring if sys.version_info < (3, 0) else compat_str):
          return None
  
      s = s.strip()
  
      m = re.match(
          return None
  
      s = s.strip()
  
      m = re.match(
-        r'(?i)(?:(?:(?P<hours>[0-9]+)\s*(?:[:h]|hours?)\s*)?(?P<mins>[0-9]+)\s*(?:[:m]|mins?|minutes?)\s*)?(?P<secs>[0-9]+)(?P<ms>\.[0-9]+)?\s*(?:s|secs?|seconds?)?$', s)
+        r'''(?ix)(?:P?T)?
+        (?:
+            (?P<only_mins>[0-9.]+)\s*(?:mins?|minutes?)\s*|
+            (?P<only_hours>[0-9.]+)\s*(?:hours?)|
+
+            (?:
+                (?:(?P<hours>[0-9]+)\s*(?:[:h]|hours?)\s*)?
+                (?P<mins>[0-9]+)\s*(?:[:m]|mins?|minutes?)\s*
+            )?
+            (?P<secs>[0-9]+)(?P<ms>\.[0-9]+)?\s*(?:s|secs?|seconds?)?
+        )$''', s)
      if not m:
          return None
      if not m:
          return None
-    res = int(m.group('secs'))
+    res = 0
+    if m.group('only_mins'):
+        return float_or_none(m.group('only_mins'), invscale=60)
+    if m.group('only_hours'):
+        return float_or_none(m.group('only_hours'), invscale=60 * 60)
+    if m.group('secs'):
+        res += int(m.group('secs'))
      if m.group('mins'):
          res += int(m.group('mins')) * 60
      if m.group('mins'):
          res += int(m.group('mins')) * 60
-        if m.group('hours'):
-            res += int(m.group('hours')) * 60 * 60
+    if m.group('hours'):
+        res += int(m.group('hours')) * 60 * 60
      if m.group('ms'):
          res += float(m.group('ms'))
      return res
  
  
  def prepend_extension(filename, ext):
      if m.group('ms'):
          res += float(m.group('ms'))
      return res
  
  
  def prepend_extension(filename, ext):
-    name, real_ext = os.path.splitext(filename) 
-    return u'{0}.{1}{2}'.format(name, ext, real_ext)
+    name, real_ext = os.path.splitext(filename)
+    return '{0}.{1}{2}'.format(name, ext, real_ext)
  
  
  def check_executable(exe, args=[]):
  
  
  def check_executable(exe, args=[]):
@@ -1136,18 +1308,25 @@ def check_executable(exe, args=[]):
  
  
  def get_exe_version(exe, args=['--version'],
  
  
  def get_exe_version(exe, args=['--version'],
-                    version_re=r'version\s+([0-9._-a-zA-Z]+)',
-                    unrecognized=u'present'):
+                    version_re=None, unrecognized='present'):
      """ Returns the version of the specified executable,
      or False if the executable is not present """
      try:
      """ Returns the version of the specified executable,
      or False if the executable is not present """
      try:
-        out, err = subprocess.Popen(
+        out, _ = subprocess.Popen(
              [exe] + args,
              stdout=subprocess.PIPE, stderr=subprocess.STDOUT).communicate()
      except OSError:
          return False
              [exe] + args,
              stdout=subprocess.PIPE, stderr=subprocess.STDOUT).communicate()
      except OSError:
          return False
-    firstline = out.partition(b'\n')[0].decode('ascii', 'ignore')
-    m = re.search(version_re, firstline)
+    if isinstance(out, bytes):  # Python 2.x
+        out = out.decode('ascii', 'ignore')
+    return detect_exe_version(out, version_re, unrecognized)
+
+
+def detect_exe_version(output, version_re=None, unrecognized='present'):
+    assert isinstance(output, compat_str)
+    if version_re is None:
+        version_re = r'version\s+([-0-9._a-zA-Z]+)'
+    m = re.search(version_re, output)
      if m:
          return m.group(1)
      else:
      if m:
          return m.group(1)
      else:
@@ -1258,7 +1437,7 @@ def escape_url(url):
      ).geturl()
  
  try:
      ).geturl()
  
  try:
-    struct.pack(u'!I', 0)
+    struct.pack('!I', 0)
  except TypeError:
      # In Python 2.6 (and some 2.7 versions), struct requires a bytes argument
      def struct_pack(spec, *args):
  except TypeError:
      # In Python 2.6 (and some 2.7 versions), struct requires a bytes argument
      def struct_pack(spec, *args):
@@ -1279,7 +1458,7 @@ def read_batch_urls(batch_fd):
      def fixup(url):
          if not isinstance(url, compat_str):
              url = url.decode('utf-8', 'replace')
      def fixup(url):
          if not isinstance(url, compat_str):
              url = url.decode('utf-8', 'replace')
-        BOM_UTF8 = u'\xef\xbb\xbf'
+        BOM_UTF8 = '\xef\xbb\xbf'
          if url.startswith(BOM_UTF8):
              url = url[len(BOM_UTF8):]
          url = url.strip()
          if url.startswith(BOM_UTF8):
              url = url[len(BOM_UTF8):]
          url = url.strip()
@@ -1335,7 +1514,8 @@ def parse_age_limit(s):
  
  
  def strip_jsonp(code):
  
  
  def strip_jsonp(code):
-    return re.sub(r'(?s)^[a-zA-Z0-9_]+\s*\(\s*(.*)\);?\s*?\s*$', r'\1', code)
+    return re.sub(
+        r'(?s)^[a-zA-Z0-9_]+\s*\(\s*(.*)\);?\s*?(?://[^\n]*)*$', r'\1', code)
  
  
  def js_to_json(code):
  
  
  def js_to_json(code):
@@ -1387,7 +1567,7 @@ def limit_length(s, length):
  
  
  def version_tuple(v):
  
  
  def version_tuple(v):
-    return [int(e) for e in v.split('.')]
+    return tuple(int(e) for e in re.split(r'[-.]', v))
  
  
  def is_outdated_version(version, limit, assume_new=True):
  
  
  def is_outdated_version(version, limit, assume_new=True):
@@ -1397,3 +1577,85 @@ def is_outdated_version(version, limit, assume_new=True):
          return version_tuple(version) < version_tuple(limit)
      except ValueError:
          return not assume_new
          return version_tuple(version) < version_tuple(limit)
      except ValueError:
          return not assume_new
+
+
+def ytdl_is_updateable():
+    """ Returns if youtube-dl can be updated with -U """
+    from zipimport import zipimporter
+
+    return isinstance(globals().get('__loader__'), zipimporter) or hasattr(sys, 'frozen')
+
+
+def args_to_str(args):
+    # Get a short string representation for a subprocess command
+    return ' '.join(shlex_quote(a) for a in args)
+
+
+def urlhandle_detect_ext(url_handle):
+    try:
+        url_handle.headers
+        getheader = lambda h: url_handle.headers[h]
+    except AttributeError:  # Python < 3
+        getheader = url_handle.info().getheader
+
+    cd = getheader('Content-Disposition')
+    if cd:
+        m = re.match(r'attachment;\s*filename="(?P<filename>[^"]+)"', cd)
+        if m:
+            e = determine_ext(m.group('filename'), default_ext=None)
+            if e:
+                return e
+
+    return getheader('Content-Type').split("/")[1]
+
+
+def age_restricted(content_limit, age_limit):
+    """ Returns True iff the content should be blocked """
+
+    if age_limit is None:  # No limit set
+        return False
+    if content_limit is None:
+        return False  # Content available for everyone
+    return age_limit < content_limit
+
+
+def is_html(first_bytes):
+    """ Detect whether a file contains HTML by examining its first bytes. """
+
+    BOMS = [
+        (b'\xef\xbb\xbf', 'utf-8'),
+        (b'\x00\x00\xfe\xff', 'utf-32-be'),
+        (b'\xff\xfe\x00\x00', 'utf-32-le'),
+        (b'\xff\xfe', 'utf-16-le'),
+        (b'\xfe\xff', 'utf-16-be'),
+    ]
+    for bom, enc in BOMS:
+        if first_bytes.startswith(bom):
+            s = first_bytes[len(bom):].decode(enc, 'replace')
+            break
+    else:
+        s = first_bytes.decode('utf-8', 'replace')
+
+    return re.match(r'^\s*<', s)
+
+
+def determine_protocol(info_dict):
+    protocol = info_dict.get('protocol')
+    if protocol is not None:
+        return protocol
+
+    url = info_dict['url']
+    if url.startswith('rtmp'):
+        return 'rtmp'
+    elif url.startswith('mms'):
+        return 'mms'
+    elif url.startswith('rtsp'):
+        return 'rtsp'
+
+    ext = determine_ext(url)
+    if ext == 'm3u8':
+        return 'm3u8'
+    elif ext == 'f4m':
+        return 'f4m'
+
+    return compat_urllib_parse_urlparse(url).scheme