_ Git - youtube-dl/blob - youtube_dl/utils.py

   1 #!/usr/bin/env python
   2 # -*- coding: utf-8 -*-
   3
   4 import datetime
   5 import email.utils
   6 import errno
   7 import gzip
   8 import io
   9 import json
  10 import locale
  11 import os
  12 import platform
  13 import re
  14 import socket
  15 import sys
  16 import traceback
  17 import zlib
  18
  19 try:
  20     import urllib.request as compat_urllib_request
  21 except ImportError: # Python 2
  22     import urllib2 as compat_urllib_request
  23
  24 try:
  25     import urllib.error as compat_urllib_error
  26 except ImportError: # Python 2
  27     import urllib2 as compat_urllib_error
  28
  29 try:
  30     import urllib.parse as compat_urllib_parse
  31 except ImportError: # Python 2
  32     import urllib as compat_urllib_parse
  33
  34 try:
  35     from urllib.parse import urlparse as compat_urllib_parse_urlparse
  36 except ImportError: # Python 2
  37     from urlparse import urlparse as compat_urllib_parse_urlparse
  38
  39 try:
  40     import urllib.parse as compat_urlparse
  41 except ImportError: # Python 2
  42     import urlparse as compat_urlparse
  43
  44 try:
  45     import http.cookiejar as compat_cookiejar
  46 except ImportError: # Python 2
  47     import cookielib as compat_cookiejar
  48
  49 try:
  50     import html.entities as compat_html_entities
  51 except ImportError: # Python 2
  52     import htmlentitydefs as compat_html_entities
  53
  54 try:
  55     import html.parser as compat_html_parser
  56 except ImportError: # Python 2
  57     import HTMLParser as compat_html_parser
  58
  59 try:
  60     import http.client as compat_http_client
  61 except ImportError: # Python 2
  62     import httplib as compat_http_client
  63
  64 try:
  65     from urllib.error import HTTPError as compat_HTTPError
  66 except ImportError:  # Python 2
  67     from urllib2 import HTTPError as compat_HTTPError
  68
  69 try:
  70     from subprocess import DEVNULL
  71     compat_subprocess_get_DEVNULL = lambda: DEVNULL
  72 except ImportError:
  73     compat_subprocess_get_DEVNULL = lambda: open(os.path.devnull, 'w')
  74
  75 try:
  76     from urllib.parse import parse_qs as compat_parse_qs
  77 except ImportError: # Python 2
  78     # HACK: The following is the correct parse_qs implementation from cpython 3's stdlib.
  79     # Python 2's version is apparently totally broken
  80     def _unquote(string, encoding='utf-8', errors='replace'):
  81         if string == '':
  82             return string
  83         res = string.split('%')
  84         if len(res) == 1:
  85             return string
  86         if encoding is None:
  87             encoding = 'utf-8'
  88         if errors is None:
  89             errors = 'replace'
  90         # pct_sequence: contiguous sequence of percent-encoded bytes, decoded
  91         pct_sequence = b''
  92         string = res[0]
  93         for item in res[1:]:
  94             try:
  95                 if not item:
  96                     raise ValueError
  97                 pct_sequence += item[:2].decode('hex')
  98                 rest = item[2:]
  99                 if not rest:
 100                     # This segment was just a single percent-encoded character.
 101                     # May be part of a sequence of code units, so delay decoding.
 102                     # (Stored in pct_sequence).
 103                     continue
 104             except ValueError:
 105                 rest = '%' + item
 106             # Encountered non-percent-encoded characters. Flush the current
 107             # pct_sequence.
 108             string += pct_sequence.decode(encoding, errors) + rest
 109             pct_sequence = b''
 110         if pct_sequence:
 111             # Flush the final pct_sequence
 112             string += pct_sequence.decode(encoding, errors)
 113         return string
 114
 115     def _parse_qsl(qs, keep_blank_values=False, strict_parsing=False,
 116                 encoding='utf-8', errors='replace'):
 117         qs, _coerce_result = qs, unicode
 118         pairs = [s2 for s1 in qs.split('&') for s2 in s1.split(';')]
 119         r = []
 120         for name_value in pairs:
 121             if not name_value and not strict_parsing:
 122                 continue
 123             nv = name_value.split('=', 1)
 124             if len(nv) != 2:
 125                 if strict_parsing:
 126                     raise ValueError("bad query field: %r" % (name_value,))
 127                 # Handle case of a control-name with no equal sign
 128                 if keep_blank_values:
 129                     nv.append('')
 130                 else:
 131                     continue
 132             if len(nv[1]) or keep_blank_values:
 133                 name = nv[0].replace('+', ' ')
 134                 name = _unquote(name, encoding=encoding, errors=errors)
 135                 name = _coerce_result(name)
 136                 value = nv[1].replace('+', ' ')
 137                 value = _unquote(value, encoding=encoding, errors=errors)
 138                 value = _coerce_result(value)
 139                 r.append((name, value))
 140         return r
 141
 142     def compat_parse_qs(qs, keep_blank_values=False, strict_parsing=False,
 143                 encoding='utf-8', errors='replace'):
 144         parsed_result = {}
 145         pairs = _parse_qsl(qs, keep_blank_values, strict_parsing,
 146                         encoding=encoding, errors=errors)
 147         for name, value in pairs:
 148             if name in parsed_result:
 149                 parsed_result[name].append(value)
 150             else:
 151                 parsed_result[name] = [value]
 152         return parsed_result
 153
 154 try:
 155     compat_str = unicode # Python 2
 156 except NameError:
 157     compat_str = str
 158
 159 try:
 160     compat_chr = unichr # Python 2
 161 except NameError:
 162     compat_chr = chr
 163
 164 def compat_ord(c):
 165     if type(c) is int: return c
 166     else: return ord(c)
 167
 168 # This is not clearly defined otherwise
 169 compiled_regex_type = type(re.compile(''))
 170
 171 std_headers = {
 172     'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64; rv:10.0) Gecko/20100101 Firefox/10.0',
 173     'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.7',
 174     'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
 175     'Accept-Encoding': 'gzip, deflate',
 176     'Accept-Language': 'en-us,en;q=0.5',
 177 }
 178
 179 def preferredencoding():
 180     """Get preferred encoding.
 181
 182     Returns the best encoding scheme for the system, based on
 183     locale.getpreferredencoding() and some further tweaks.
 184     """
 185     try:
 186         pref = locale.getpreferredencoding()
 187         u'TEST'.encode(pref)
 188     except:
 189         pref = 'UTF-8'
 190
 191     return pref
 192
 193 if sys.version_info < (3,0):
 194     def compat_print(s):
 195         print(s.encode(preferredencoding(), 'xmlcharrefreplace'))
 196 else:
 197     def compat_print(s):
 198         assert type(s) == type(u'')
 199         print(s)
 200
 201 # In Python 2.x, json.dump expects a bytestream.
 202 # In Python 3.x, it writes to a character stream
 203 if sys.version_info < (3,0):
 204     def write_json_file(obj, fn):
 205         with open(fn, 'wb') as f:
 206             json.dump(obj, f)
 207 else:
 208     def write_json_file(obj, fn):
 209         with open(fn, 'w', encoding='utf-8') as f:
 210             json.dump(obj, f)
 211
 212 if sys.version_info >= (2,7):
 213     def find_xpath_attr(node, xpath, key, val):
 214         """ Find the xpath xpath[@key=val] """
 215         assert re.match(r'^[a-zA-Z]+$', key)
 216         assert re.match(r'^[a-zA-Z0-9@\s]*$', val)
 217         expr = xpath + u"[@%s='%s']" % (key, val)
 218         return node.find(expr)
 219 else:
 220     def find_xpath_attr(node, xpath, key, val):
 221         for f in node.findall(xpath):
 222             if f.attrib.get(key) == val:
 223                 return f
 224         return None
 225
 226 def htmlentity_transform(matchobj):
 227     """Transforms an HTML entity to a character.
 228
 229     This function receives a match object and is intended to be used with
 230     the re.sub() function.
 231     """
 232     entity = matchobj.group(1)
 233
 234     # Known non-numeric HTML entity
 235     if entity in compat_html_entities.name2codepoint:
 236         return compat_chr(compat_html_entities.name2codepoint[entity])
 237
 238     mobj = re.match(u'(?u)#(x?\\d+)', entity)
 239     if mobj is not None:
 240         numstr = mobj.group(1)
 241         if numstr.startswith(u'x'):
 242             base = 16
 243             numstr = u'0%s' % numstr
 244         else:
 245             base = 10
 246         return compat_chr(int(numstr, base))
 247
 248     # Unknown entity in name, return its literal representation
 249     return (u'&%s;' % entity)
 250
 251 compat_html_parser.locatestarttagend = re.compile(r"""<[a-zA-Z][-.a-zA-Z0-9:_]*(?:\s+(?:(?<=['"\s])[^\s/>][^\s/=>]*(?:\s*=+\s*(?:'[^']*'|"[^"]*"|(?!['"])[^>\s]*))?\s*)*)?\s*""", re.VERBOSE) # backport bugfix
 252 class BaseHTMLParser(compat_html_parser.HTMLParser):
 253     def __init(self):
 254         compat_html_parser.HTMLParser.__init__(self)
 255         self.html = None
 256
 257     def loads(self, html):
 258         self.html = html
 259         self.feed(html)
 260         self.close()
 261
 262 class AttrParser(BaseHTMLParser):
 263     """Modified HTMLParser that isolates a tag with the specified attribute"""
 264     def __init__(self, attribute, value):
 265         self.attribute = attribute
 266         self.value = value
 267         self.result = None
 268         self.started = False
 269         self.depth = {}
 270         self.watch_startpos = False
 271         self.error_count = 0
 272         BaseHTMLParser.__init__(self)
 273
 274     def error(self, message):
 275         if self.error_count > 10 or self.started:
 276             raise compat_html_parser.HTMLParseError(message, self.getpos())
 277         self.rawdata = '\n'.join(self.html.split('\n')[self.getpos()[0]:]) # skip one line
 278         self.error_count += 1
 279         self.goahead(1)
 280
 281     def handle_starttag(self, tag, attrs):
 282         attrs = dict(attrs)
 283         if self.started:
 284             self.find_startpos(None)
 285         if self.attribute in attrs and attrs[self.attribute] == self.value:
 286             self.result = [tag]
 287             self.started = True
 288             self.watch_startpos = True
 289         if self.started:
 290             if not tag in self.depth: self.depth[tag] = 0
 291             self.depth[tag] += 1
 292
 293     def handle_endtag(self, tag):
 294         if self.started:
 295             if tag in self.depth: self.depth[tag] -= 1
 296             if self.depth[self.result[0]] == 0:
 297                 self.started = False
 298                 self.result.append(self.getpos())
 299
 300     def find_startpos(self, x):
 301         """Needed to put the start position of the result (self.result[1])
 302         after the opening tag with the requested id"""
 303         if self.watch_startpos:
 304             self.watch_startpos = False
 305             self.result.append(self.getpos())
 306     handle_entityref = handle_charref = handle_data = handle_comment = \
 307     handle_decl = handle_pi = unknown_decl = find_startpos
 308
 309     def get_result(self):
 310         if self.result is None:
 311             return None
 312         if len(self.result) != 3:
 313             return None
 314         lines = self.html.split('\n')
 315         lines = lines[self.result[1][0]-1:self.result[2][0]]
 316         lines[0] = lines[0][self.result[1][1]:]
 317         if len(lines) == 1:
 318             lines[-1] = lines[-1][:self.result[2][1]-self.result[1][1]]
 319         lines[-1] = lines[-1][:self.result[2][1]]
 320         return '\n'.join(lines).strip()
 321 # Hack for https://github.com/rg3/youtube-dl/issues/662
 322 if sys.version_info < (2, 7, 3):
 323     AttrParser.parse_endtag = (lambda self, i:
 324         i + len("</scr'+'ipt>")
 325         if self.rawdata[i:].startswith("</scr'+'ipt>")
 326         else compat_html_parser.HTMLParser.parse_endtag(self, i))
 327
 328 def get_element_by_id(id, html):
 329     """Return the content of the tag with the specified ID in the passed HTML document"""
 330     return get_element_by_attribute("id", id, html)
 331
 332 def get_element_by_attribute(attribute, value, html):
 333     """Return the content of the tag with the specified attribute in the passed HTML document"""
 334     parser = AttrParser(attribute, value)
 335     try:
 336         parser.loads(html)
 337     except compat_html_parser.HTMLParseError:
 338         pass
 339     return parser.get_result()
 340
 341 class MetaParser(BaseHTMLParser):
 342     """
 343     Modified HTMLParser that isolates a meta tag with the specified name
 344     attribute.
 345     """
 346     def __init__(self, name):
 347         BaseHTMLParser.__init__(self)
 348         self.name = name
 349         self.content = None
 350         self.result = None
 351
 352     def handle_starttag(self, tag, attrs):
 353         if tag != 'meta':
 354             return
 355         attrs = dict(attrs)
 356         if attrs.get('name') == self.name:
 357             self.result = attrs.get('content')
 358
 359     def get_result(self):
 360         return self.result
 361
 362 def get_meta_content(name, html):
 363     """
 364     Return the content attribute from the meta tag with the given name attribute.
 365     """
 366     parser = MetaParser(name)
 367     try:
 368         parser.loads(html)
 369     except compat_html_parser.HTMLParseError:
 370         pass
 371     return parser.get_result()
 372
 373
 374 def clean_html(html):
 375     """Clean an HTML snippet into a readable string"""
 376     # Newline vs <br />
 377     html = html.replace('\n', ' ')
 378     html = re.sub(r'\s*<\s*br\s*/?\s*>\s*', '\n', html)
 379     html = re.sub(r'<\s*/\s*p\s*>\s*<\s*p[^>]*>', '\n', html)
 380     # Strip html tags
 381     html = re.sub('<.*?>', '', html)
 382     # Replace html entities
 383     html = unescapeHTML(html)
 384     return html.strip()
 385
 386
 387 def sanitize_open(filename, open_mode):
 388     """Try to open the given filename, and slightly tweak it if this fails.
 389
 390     Attempts to open the given filename. If this fails, it tries to change
 391     the filename slightly, step by step, until it's either able to open it
 392     or it fails and raises a final exception, like the standard open()
 393     function.
 394
 395     It returns the tuple (stream, definitive_file_name).
 396     """
 397     try:
 398         if filename == u'-':
 399             if sys.platform == 'win32':
 400                 import msvcrt
 401                 msvcrt.setmode(sys.stdout.fileno(), os.O_BINARY)
 402             return (sys.stdout.buffer if hasattr(sys.stdout, 'buffer') else sys.stdout, filename)
 403         stream = open(encodeFilename(filename), open_mode)
 404         return (stream, filename)
 405     except (IOError, OSError) as err:
 406         if err.errno in (errno.EACCES,):
 407             raise
 408
 409         # In case of error, try to remove win32 forbidden chars
 410         alt_filename = os.path.join(
 411                         re.sub(u'[/<>:"\\|\\\\?\\*]', u'#', path_part)
 412                         for path_part in os.path.split(filename)
 413                        )
 414         if alt_filename == filename:
 415             raise
 416         else:
 417             # An exception here should be caught in the caller
 418             stream = open(encodeFilename(filename), open_mode)
 419             return (stream, alt_filename)
 420
 421
 422 def timeconvert(timestr):
 423     """Convert RFC 2822 defined time string into system timestamp"""
 424     timestamp = None
 425     timetuple = email.utils.parsedate_tz(timestr)
 426     if timetuple is not None:
 427         timestamp = email.utils.mktime_tz(timetuple)
 428     return timestamp
 429
 430 def sanitize_filename(s, restricted=False, is_id=False):
 431     """Sanitizes a string so it could be used as part of a filename.
 432     If restricted is set, use a stricter subset of allowed characters.
 433     Set is_id if this is not an arbitrary string, but an ID that should be kept if possible
 434     """
 435     def replace_insane(char):
 436         if char == '?' or ord(char) < 32 or ord(char) == 127:
 437             return ''
 438         elif char == '"':
 439             return '' if restricted else '\''
 440         elif char == ':':
 441             return '_-' if restricted else ' -'
 442         elif char in '\\/|*<>':
 443             return '_'
 444         if restricted and (char in '!&\'()[]{}$;`^,#' or char.isspace()):
 445             return '_'
 446         if restricted and ord(char) > 127:
 447             return '_'
 448         return char
 449
 450     result = u''.join(map(replace_insane, s))
 451     if not is_id:
 452         while '__' in result:
 453             result = result.replace('__', '_')
 454         result = result.strip('_')
 455         # Common case of "Foreign band name - English song title"
 456         if restricted and result.startswith('-_'):
 457             result = result[2:]
 458         if not result:
 459             result = '_'
 460     return result
 461
 462 def orderedSet(iterable):
 463     """ Remove all duplicates from the input iterable """
 464     res = []
 465     for el in iterable:
 466         if el not in res:
 467             res.append(el)
 468     return res
 469
 470 def unescapeHTML(s):
 471     """
 472     @param s a string
 473     """
 474     assert type(s) == type(u'')
 475
 476     result = re.sub(u'(?u)&(.+?);', htmlentity_transform, s)
 477     return result
 478
 479 def encodeFilename(s):
 480     """
 481     @param s The name of the file
 482     """
 483
 484     assert type(s) == type(u'')
 485
 486     # Python 3 has a Unicode API
 487     if sys.version_info >= (3, 0):
 488         return s
 489
 490     if sys.platform == 'win32' and sys.getwindowsversion()[0] >= 5:
 491         # Pass u'' directly to use Unicode APIs on Windows 2000 and up
 492         # (Detecting Windows NT 4 is tricky because 'major >= 4' would
 493         # match Windows 9x series as well. Besides, NT 4 is obsolete.)
 494         return s
 495     else:
 496         encoding = sys.getfilesystemencoding()
 497         if encoding is None:
 498             encoding = 'utf-8'
 499         return s.encode(encoding, 'ignore')
 500
 501 def decodeOption(optval):
 502     if optval is None:
 503         return optval
 504     if isinstance(optval, bytes):
 505         optval = optval.decode(preferredencoding())
 506
 507     assert isinstance(optval, compat_str)
 508     return optval
 509
 510 def formatSeconds(secs):
 511     if secs > 3600:
 512         return '%d:%02d:%02d' % (secs // 3600, (secs % 3600) // 60, secs % 60)
 513     elif secs > 60:
 514         return '%d:%02d' % (secs // 60, secs % 60)
 515     else:
 516         return '%d' % secs
 517
 518 def make_HTTPS_handler(opts):
 519     if sys.version_info < (3,2):
 520         # Python's 2.x handler is very simplistic
 521         return compat_urllib_request.HTTPSHandler()
 522     else:
 523         import ssl
 524         context = ssl.SSLContext(ssl.PROTOCOL_SSLv23)
 525         context.set_default_verify_paths()
 526
 527         context.verify_mode = (ssl.CERT_NONE
 528                                if opts.no_check_certificate
 529                                else ssl.CERT_REQUIRED)
 530         return compat_urllib_request.HTTPSHandler(context=context)
 531
 532 class ExtractorError(Exception):
 533     """Error during info extraction."""
 534     def __init__(self, msg, tb=None, expected=False, cause=None):
 535         """ tb, if given, is the original traceback (so that it can be printed out).
 536         If expected is set, this is a normal error message and most likely not a bug in youtube-dl.
 537         """
 538
 539         if sys.exc_info()[0] in (compat_urllib_error.URLError, socket.timeout, UnavailableVideoError):
 540             expected = True
 541         if not expected:
 542             msg = msg + u'; please report this issue on https://yt-dl.org/bug . Be sure to call youtube-dl with the --verbose flag and include its complete output. Make sure you are using the latest version; type  youtube-dl -U  to update.'
 543         super(ExtractorError, self).__init__(msg)
 544
 545         self.traceback = tb
 546         self.exc_info = sys.exc_info()  # preserve original exception
 547         self.cause = cause
 548
 549     def format_traceback(self):
 550         if self.traceback is None:
 551             return None
 552         return u''.join(traceback.format_tb(self.traceback))
 553
 554
 555 class DownloadError(Exception):
 556     """Download Error exception.
 557
 558     This exception may be thrown by FileDownloader objects if they are not
 559     configured to continue on errors. They will contain the appropriate
 560     error message.
 561     """
 562     def __init__(self, msg, exc_info=None):
 563         """ exc_info, if given, is the original exception that caused the trouble (as returned by sys.exc_info()). """
 564         super(DownloadError, self).__init__(msg)
 565         self.exc_info = exc_info
 566
 567
 568 class SameFileError(Exception):
 569     """Same File exception.
 570
 571     This exception will be thrown by FileDownloader objects if they detect
 572     multiple files would have to be downloaded to the same file on disk.
 573     """
 574     pass
 575
 576
 577 class PostProcessingError(Exception):
 578     """Post Processing exception.
 579
 580     This exception may be raised by PostProcessor's .run() method to
 581     indicate an error in the postprocessing task.
 582     """
 583     def __init__(self, msg):
 584         self.msg = msg
 585
 586 class MaxDownloadsReached(Exception):
 587     """ --max-downloads limit has been reached. """
 588     pass
 589
 590
 591 class UnavailableVideoError(Exception):
 592     """Unavailable Format exception.
 593
 594     This exception will be thrown when a video is requested
 595     in a format that is not available for that video.
 596     """
 597     pass
 598
 599
 600 class ContentTooShortError(Exception):
 601     """Content Too Short exception.
 602
 603     This exception may be raised by FileDownloader objects when a file they
 604     download is too small for what the server announced first, indicating
 605     the connection was probably interrupted.
 606     """
 607     # Both in bytes
 608     downloaded = None
 609     expected = None
 610
 611     def __init__(self, downloaded, expected):
 612         self.downloaded = downloaded
 613         self.expected = expected
 614
 615 class YoutubeDLHandler(compat_urllib_request.HTTPHandler):
 616     """Handler for HTTP requests and responses.
 617
 618     This class, when installed with an OpenerDirector, automatically adds
 619     the standard headers to every HTTP request and handles gzipped and
 620     deflated responses from web servers. If compression is to be avoided in
 621     a particular request, the original request in the program code only has
 622     to include the HTTP header "Youtubedl-No-Compression", which will be
 623     removed before making the real request.
 624
 625     Part of this code was copied from:
 626
 627     http://techknack.net/python-urllib2-handlers/
 628
 629     Andrew Rowls, the author of that code, agreed to release it to the
 630     public domain.
 631     """
 632
 633     @staticmethod
 634     def deflate(data):
 635         try:
 636             return zlib.decompress(data, -zlib.MAX_WBITS)
 637         except zlib.error:
 638             return zlib.decompress(data)
 639
 640     @staticmethod
 641     def addinfourl_wrapper(stream, headers, url, code):
 642         if hasattr(compat_urllib_request.addinfourl, 'getcode'):
 643             return compat_urllib_request.addinfourl(stream, headers, url, code)
 644         ret = compat_urllib_request.addinfourl(stream, headers, url)
 645         ret.code = code
 646         return ret
 647
 648     def http_request(self, req):
 649         for h,v in std_headers.items():
 650             if h in req.headers:
 651                 del req.headers[h]
 652             req.add_header(h, v)
 653         if 'Youtubedl-no-compression' in req.headers:
 654             if 'Accept-encoding' in req.headers:
 655                 del req.headers['Accept-encoding']
 656             del req.headers['Youtubedl-no-compression']
 657         if 'Youtubedl-user-agent' in req.headers:
 658             if 'User-agent' in req.headers:
 659                 del req.headers['User-agent']
 660             req.headers['User-agent'] = req.headers['Youtubedl-user-agent']
 661             del req.headers['Youtubedl-user-agent']
 662         return req
 663
 664     def http_response(self, req, resp):
 665         old_resp = resp
 666         # gzip
 667         if resp.headers.get('Content-encoding', '') == 'gzip':
 668             content = resp.read()
 669             gz = gzip.GzipFile(fileobj=io.BytesIO(content), mode='rb')
 670             try:
 671                 uncompressed = io.BytesIO(gz.read())
 672             except IOError as original_ioerror:
 673                 # There may be junk add the end of the file
 674                 # See http://stackoverflow.com/q/4928560/35070 for details
 675                 for i in range(1, 1024):
 676                     try:
 677                         gz = gzip.GzipFile(fileobj=io.BytesIO(content[:-i]), mode='rb')
 678                         uncompressed = io.BytesIO(gz.read())
 679                     except IOError:
 680                         continue
 681                     break
 682                 else:
 683                     raise original_ioerror
 684             resp = self.addinfourl_wrapper(uncompressed, old_resp.headers, old_resp.url, old_resp.code)
 685             resp.msg = old_resp.msg
 686         # deflate
 687         if resp.headers.get('Content-encoding', '') == 'deflate':
 688             gz = io.BytesIO(self.deflate(resp.read()))
 689             resp = self.addinfourl_wrapper(gz, old_resp.headers, old_resp.url, old_resp.code)
 690             resp.msg = old_resp.msg
 691         return resp
 692
 693     https_request = http_request
 694     https_response = http_response
 695
 696 def unified_strdate(date_str):
 697     """Return a string with the date in the format YYYYMMDD"""
 698     upload_date = None
 699     #Replace commas
 700     date_str = date_str.replace(',',' ')
 701     # %z (UTC offset) is only supported in python>=3.2
 702     date_str = re.sub(r' (\+|-)[\d]*$', '', date_str)
 703     format_expressions = [
 704         '%d %B %Y',
 705         '%B %d %Y',
 706         '%b %d %Y',
 707         '%Y-%m-%d',
 708         '%d/%m/%Y',
 709         '%Y/%m/%d %H:%M:%S',
 710         '%d.%m.%Y %H:%M',
 711         '%Y-%m-%dT%H:%M:%SZ',
 712     ]
 713     for expression in format_expressions:
 714         try:
 715             upload_date = datetime.datetime.strptime(date_str, expression).strftime('%Y%m%d')
 716         except:
 717             pass
 718     return upload_date
 719
 720 def determine_ext(url, default_ext=u'unknown_video'):
 721     guess = url.partition(u'?')[0].rpartition(u'.')[2]
 722     if re.match(r'^[A-Za-z0-9]+$', guess):
 723         return guess
 724     else:
 725         return default_ext
 726
 727 def subtitles_filename(filename, sub_lang, sub_format):
 728     return filename.rsplit('.', 1)[0] + u'.' + sub_lang + u'.' + sub_format
 729
 730 def date_from_str(date_str):
 731     """
 732     Return a datetime object from a string in the format YYYYMMDD or
 733     (now|today)[+-][0-9](day|week|month|year)(s)?"""
 734     today = datetime.date.today()
 735     if date_str == 'now'or date_str == 'today':
 736         return today
 737     match = re.match('(now|today)(?P<sign>[+-])(?P<time>\d+)(?P<unit>day|week|month|year)(s)?', date_str)
 738     if match is not None:
 739         sign = match.group('sign')
 740         time = int(match.group('time'))
 741         if sign == '-':
 742             time = -time
 743         unit = match.group('unit')
 744         #A bad aproximation?
 745         if unit == 'month':
 746             unit = 'day'
 747             time *= 30
 748         elif unit == 'year':
 749             unit = 'day'
 750             time *= 365
 751         unit += 's'
 752         delta = datetime.timedelta(**{unit: time})
 753         return today + delta
 754     return datetime.datetime.strptime(date_str, "%Y%m%d").date()
 755
 756 class DateRange(object):
 757     """Represents a time interval between two dates"""
 758     def __init__(self, start=None, end=None):
 759         """start and end must be strings in the format accepted by date"""
 760         if start is not None:
 761             self.start = date_from_str(start)
 762         else:
 763             self.start = datetime.datetime.min.date()
 764         if end is not None:
 765             self.end = date_from_str(end)
 766         else:
 767             self.end = datetime.datetime.max.date()
 768         if self.start > self.end:
 769             raise ValueError('Date range: "%s" , the start date must be before the end date' % self)
 770     @classmethod
 771     def day(cls, day):
 772         """Returns a range that only contains the given day"""
 773         return cls(day,day)
 774     def __contains__(self, date):
 775         """Check if the date is in the range"""
 776         if not isinstance(date, datetime.date):
 777             date = date_from_str(date)
 778         return self.start <= date <= self.end
 779     def __str__(self):
 780         return '%s - %s' % ( self.start.isoformat(), self.end.isoformat())
 781
 782
 783 def platform_name():
 784     """ Returns the platform name as a compat_str """
 785     res = platform.platform()
 786     if isinstance(res, bytes):
 787         res = res.decode(preferredencoding())
 788
 789     assert isinstance(res, compat_str)
 790     return res
 791
 792
 793 def write_string(s, out=None):
 794     if out is None:
 795         out = sys.stderr
 796     assert type(s) == type(u'')
 797
 798     if ('b' in getattr(out, 'mode', '') or
 799             sys.version_info[0] < 3):  # Python 2 lies about mode of sys.stderr
 800         s = s.encode(preferredencoding(), 'ignore')
 801     out.write(s)
 802     out.flush()
 803
 804
 805 def bytes_to_intlist(bs):
 806     if not bs:
 807         return []
 808     if isinstance(bs[0], int):  # Python 3
 809         return list(bs)
 810     else:
 811         return [ord(c) for c in bs]
 812
 813
 814 def intlist_to_bytes(xs):
 815     if not xs:
 816         return b''
 817     if isinstance(chr(0), bytes):  # Python 2
 818         return ''.join([chr(x) for x in xs])
 819     else:
 820         return bytes(xs)