_ Git - youtube-dl/blob - youtube_dl/utils.py

   1 #!/usr/bin/env python
   2 # -*- coding: utf-8 -*-
   3
   4 import datetime
   5 import email.utils
   6 import errno
   7 import gzip
   8 import io
   9 import json
  10 import locale
  11 import os
  12 import platform
  13 import re
  14 import socket
  15 import sys
  16 import traceback
  17 import zlib
  18
  19 try:
  20     import urllib.request as compat_urllib_request
  21 except ImportError: # Python 2
  22     import urllib2 as compat_urllib_request
  23
  24 try:
  25     import urllib.error as compat_urllib_error
  26 except ImportError: # Python 2
  27     import urllib2 as compat_urllib_error
  28
  29 try:
  30     import urllib.parse as compat_urllib_parse
  31 except ImportError: # Python 2
  32     import urllib as compat_urllib_parse
  33
  34 try:
  35     from urllib.parse import urlparse as compat_urllib_parse_urlparse
  36 except ImportError: # Python 2
  37     from urlparse import urlparse as compat_urllib_parse_urlparse
  38
  39 try:
  40     import urllib.parse as compat_urlparse
  41 except ImportError: # Python 2
  42     import urlparse as compat_urlparse
  43
  44 try:
  45     import http.cookiejar as compat_cookiejar
  46 except ImportError: # Python 2
  47     import cookielib as compat_cookiejar
  48
  49 try:
  50     import html.entities as compat_html_entities
  51 except ImportError: # Python 2
  52     import htmlentitydefs as compat_html_entities
  53
  54 try:
  55     import html.parser as compat_html_parser
  56 except ImportError: # Python 2
  57     import HTMLParser as compat_html_parser
  58
  59 try:
  60     import http.client as compat_http_client
  61 except ImportError: # Python 2
  62     import httplib as compat_http_client
  63
  64 try:
  65     from urllib.error import HTTPError as compat_HTTPError
  66 except ImportError:  # Python 2
  67     from urllib2 import HTTPError as compat_HTTPError
  68
  69 try:
  70     from urllib.request import urlretrieve as compat_urlretrieve
  71 except ImportError:  # Python 2
  72     from urllib import urlretrieve as compat_urlretrieve
  73
  74
  75 try:
  76     from subprocess import DEVNULL
  77     compat_subprocess_get_DEVNULL = lambda: DEVNULL
  78 except ImportError:
  79     compat_subprocess_get_DEVNULL = lambda: open(os.path.devnull, 'w')
  80
  81 try:
  82     from urllib.parse import parse_qs as compat_parse_qs
  83 except ImportError: # Python 2
  84     # HACK: The following is the correct parse_qs implementation from cpython 3's stdlib.
  85     # Python 2's version is apparently totally broken
  86     def _unquote(string, encoding='utf-8', errors='replace'):
  87         if string == '':
  88             return string
  89         res = string.split('%')
  90         if len(res) == 1:
  91             return string
  92         if encoding is None:
  93             encoding = 'utf-8'
  94         if errors is None:
  95             errors = 'replace'
  96         # pct_sequence: contiguous sequence of percent-encoded bytes, decoded
  97         pct_sequence = b''
  98         string = res[0]
  99         for item in res[1:]:
 100             try:
 101                 if not item:
 102                     raise ValueError
 103                 pct_sequence += item[:2].decode('hex')
 104                 rest = item[2:]
 105                 if not rest:
 106                     # This segment was just a single percent-encoded character.
 107                     # May be part of a sequence of code units, so delay decoding.
 108                     # (Stored in pct_sequence).
 109                     continue
 110             except ValueError:
 111                 rest = '%' + item
 112             # Encountered non-percent-encoded characters. Flush the current
 113             # pct_sequence.
 114             string += pct_sequence.decode(encoding, errors) + rest
 115             pct_sequence = b''
 116         if pct_sequence:
 117             # Flush the final pct_sequence
 118             string += pct_sequence.decode(encoding, errors)
 119         return string
 120
 121     def _parse_qsl(qs, keep_blank_values=False, strict_parsing=False,
 122                 encoding='utf-8', errors='replace'):
 123         qs, _coerce_result = qs, unicode
 124         pairs = [s2 for s1 in qs.split('&') for s2 in s1.split(';')]
 125         r = []
 126         for name_value in pairs:
 127             if not name_value and not strict_parsing:
 128                 continue
 129             nv = name_value.split('=', 1)
 130             if len(nv) != 2:
 131                 if strict_parsing:
 132                     raise ValueError("bad query field: %r" % (name_value,))
 133                 # Handle case of a control-name with no equal sign
 134                 if keep_blank_values:
 135                     nv.append('')
 136                 else:
 137                     continue
 138             if len(nv[1]) or keep_blank_values:
 139                 name = nv[0].replace('+', ' ')
 140                 name = _unquote(name, encoding=encoding, errors=errors)
 141                 name = _coerce_result(name)
 142                 value = nv[1].replace('+', ' ')
 143                 value = _unquote(value, encoding=encoding, errors=errors)
 144                 value = _coerce_result(value)
 145                 r.append((name, value))
 146         return r
 147
 148     def compat_parse_qs(qs, keep_blank_values=False, strict_parsing=False,
 149                 encoding='utf-8', errors='replace'):
 150         parsed_result = {}
 151         pairs = _parse_qsl(qs, keep_blank_values, strict_parsing,
 152                         encoding=encoding, errors=errors)
 153         for name, value in pairs:
 154             if name in parsed_result:
 155                 parsed_result[name].append(value)
 156             else:
 157                 parsed_result[name] = [value]
 158         return parsed_result
 159
 160 try:
 161     compat_str = unicode # Python 2
 162 except NameError:
 163     compat_str = str
 164
 165 try:
 166     compat_chr = unichr # Python 2
 167 except NameError:
 168     compat_chr = chr
 169
 170 def compat_ord(c):
 171     if type(c) is int: return c
 172     else: return ord(c)
 173
 174 # This is not clearly defined otherwise
 175 compiled_regex_type = type(re.compile(''))
 176
 177 std_headers = {
 178     'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64; rv:10.0) Gecko/20100101 Firefox/10.0',
 179     'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.7',
 180     'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
 181     'Accept-Encoding': 'gzip, deflate',
 182     'Accept-Language': 'en-us,en;q=0.5',
 183 }
 184
 185 def preferredencoding():
 186     """Get preferred encoding.
 187
 188     Returns the best encoding scheme for the system, based on
 189     locale.getpreferredencoding() and some further tweaks.
 190     """
 191     try:
 192         pref = locale.getpreferredencoding()
 193         u'TEST'.encode(pref)
 194     except:
 195         pref = 'UTF-8'
 196
 197     return pref
 198
 199 if sys.version_info < (3,0):
 200     def compat_print(s):
 201         print(s.encode(preferredencoding(), 'xmlcharrefreplace'))
 202 else:
 203     def compat_print(s):
 204         assert type(s) == type(u'')
 205         print(s)
 206
 207 # In Python 2.x, json.dump expects a bytestream.
 208 # In Python 3.x, it writes to a character stream
 209 if sys.version_info < (3,0):
 210     def write_json_file(obj, fn):
 211         with open(fn, 'wb') as f:
 212             json.dump(obj, f)
 213 else:
 214     def write_json_file(obj, fn):
 215         with open(fn, 'w', encoding='utf-8') as f:
 216             json.dump(obj, f)
 217
 218 if sys.version_info >= (2,7):
 219     def find_xpath_attr(node, xpath, key, val):
 220         """ Find the xpath xpath[@key=val] """
 221         assert re.match(r'^[a-zA-Z]+$', key)
 222         assert re.match(r'^[a-zA-Z0-9@\s]*$', val)
 223         expr = xpath + u"[@%s='%s']" % (key, val)
 224         return node.find(expr)
 225 else:
 226     def find_xpath_attr(node, xpath, key, val):
 227         for f in node.findall(xpath):
 228             if f.attrib.get(key) == val:
 229                 return f
 230         return None
 231
 232 def htmlentity_transform(matchobj):
 233     """Transforms an HTML entity to a character.
 234
 235     This function receives a match object and is intended to be used with
 236     the re.sub() function.
 237     """
 238     entity = matchobj.group(1)
 239
 240     # Known non-numeric HTML entity
 241     if entity in compat_html_entities.name2codepoint:
 242         return compat_chr(compat_html_entities.name2codepoint[entity])
 243
 244     mobj = re.match(u'(?u)#(x?\\d+)', entity)
 245     if mobj is not None:
 246         numstr = mobj.group(1)
 247         if numstr.startswith(u'x'):
 248             base = 16
 249             numstr = u'0%s' % numstr
 250         else:
 251             base = 10
 252         return compat_chr(int(numstr, base))
 253
 254     # Unknown entity in name, return its literal representation
 255     return (u'&%s;' % entity)
 256
 257 compat_html_parser.locatestarttagend = re.compile(r"""<[a-zA-Z][-.a-zA-Z0-9:_]*(?:\s+(?:(?<=['"\s])[^\s/>][^\s/=>]*(?:\s*=+\s*(?:'[^']*'|"[^"]*"|(?!['"])[^>\s]*))?\s*)*)?\s*""", re.VERBOSE) # backport bugfix
 258 class BaseHTMLParser(compat_html_parser.HTMLParser):
 259     def __init(self):
 260         compat_html_parser.HTMLParser.__init__(self)
 261         self.html = None
 262
 263     def loads(self, html):
 264         self.html = html
 265         self.feed(html)
 266         self.close()
 267
 268 class AttrParser(BaseHTMLParser):
 269     """Modified HTMLParser that isolates a tag with the specified attribute"""
 270     def __init__(self, attribute, value):
 271         self.attribute = attribute
 272         self.value = value
 273         self.result = None
 274         self.started = False
 275         self.depth = {}
 276         self.watch_startpos = False
 277         self.error_count = 0
 278         BaseHTMLParser.__init__(self)
 279
 280     def error(self, message):
 281         if self.error_count > 10 or self.started:
 282             raise compat_html_parser.HTMLParseError(message, self.getpos())
 283         self.rawdata = '\n'.join(self.html.split('\n')[self.getpos()[0]:]) # skip one line
 284         self.error_count += 1
 285         self.goahead(1)
 286
 287     def handle_starttag(self, tag, attrs):
 288         attrs = dict(attrs)
 289         if self.started:
 290             self.find_startpos(None)
 291         if self.attribute in attrs and attrs[self.attribute] == self.value:
 292             self.result = [tag]
 293             self.started = True
 294             self.watch_startpos = True
 295         if self.started:
 296             if not tag in self.depth: self.depth[tag] = 0
 297             self.depth[tag] += 1
 298
 299     def handle_endtag(self, tag):
 300         if self.started:
 301             if tag in self.depth: self.depth[tag] -= 1
 302             if self.depth[self.result[0]] == 0:
 303                 self.started = False
 304                 self.result.append(self.getpos())
 305
 306     def find_startpos(self, x):
 307         """Needed to put the start position of the result (self.result[1])
 308         after the opening tag with the requested id"""
 309         if self.watch_startpos:
 310             self.watch_startpos = False
 311             self.result.append(self.getpos())
 312     handle_entityref = handle_charref = handle_data = handle_comment = \
 313     handle_decl = handle_pi = unknown_decl = find_startpos
 314
 315     def get_result(self):
 316         if self.result is None:
 317             return None
 318         if len(self.result) != 3:
 319             return None
 320         lines = self.html.split('\n')
 321         lines = lines[self.result[1][0]-1:self.result[2][0]]
 322         lines[0] = lines[0][self.result[1][1]:]
 323         if len(lines) == 1:
 324             lines[-1] = lines[-1][:self.result[2][1]-self.result[1][1]]
 325         lines[-1] = lines[-1][:self.result[2][1]]
 326         return '\n'.join(lines).strip()
 327 # Hack for https://github.com/rg3/youtube-dl/issues/662
 328 if sys.version_info < (2, 7, 3):
 329     AttrParser.parse_endtag = (lambda self, i:
 330         i + len("</scr'+'ipt>")
 331         if self.rawdata[i:].startswith("</scr'+'ipt>")
 332         else compat_html_parser.HTMLParser.parse_endtag(self, i))
 333
 334 def get_element_by_id(id, html):
 335     """Return the content of the tag with the specified ID in the passed HTML document"""
 336     return get_element_by_attribute("id", id, html)
 337
 338 def get_element_by_attribute(attribute, value, html):
 339     """Return the content of the tag with the specified attribute in the passed HTML document"""
 340     parser = AttrParser(attribute, value)
 341     try:
 342         parser.loads(html)
 343     except compat_html_parser.HTMLParseError:
 344         pass
 345     return parser.get_result()
 346
 347 class MetaParser(BaseHTMLParser):
 348     """
 349     Modified HTMLParser that isolates a meta tag with the specified name
 350     attribute.
 351     """
 352     def __init__(self, name):
 353         BaseHTMLParser.__init__(self)
 354         self.name = name
 355         self.content = None
 356         self.result = None
 357
 358     def handle_starttag(self, tag, attrs):
 359         if tag != 'meta':
 360             return
 361         attrs = dict(attrs)
 362         if attrs.get('name') == self.name:
 363             self.result = attrs.get('content')
 364
 365     def get_result(self):
 366         return self.result
 367
 368 def get_meta_content(name, html):
 369     """
 370     Return the content attribute from the meta tag with the given name attribute.
 371     """
 372     parser = MetaParser(name)
 373     try:
 374         parser.loads(html)
 375     except compat_html_parser.HTMLParseError:
 376         pass
 377     return parser.get_result()
 378
 379
 380 def clean_html(html):
 381     """Clean an HTML snippet into a readable string"""
 382     # Newline vs <br />
 383     html = html.replace('\n', ' ')
 384     html = re.sub(r'\s*<\s*br\s*/?\s*>\s*', '\n', html)
 385     html = re.sub(r'<\s*/\s*p\s*>\s*<\s*p[^>]*>', '\n', html)
 386     # Strip html tags
 387     html = re.sub('<.*?>', '', html)
 388     # Replace html entities
 389     html = unescapeHTML(html)
 390     return html.strip()
 391
 392
 393 def sanitize_open(filename, open_mode):
 394     """Try to open the given filename, and slightly tweak it if this fails.
 395
 396     Attempts to open the given filename. If this fails, it tries to change
 397     the filename slightly, step by step, until it's either able to open it
 398     or it fails and raises a final exception, like the standard open()
 399     function.
 400
 401     It returns the tuple (stream, definitive_file_name).
 402     """
 403     try:
 404         if filename == u'-':
 405             if sys.platform == 'win32':
 406                 import msvcrt
 407                 msvcrt.setmode(sys.stdout.fileno(), os.O_BINARY)
 408             return (sys.stdout.buffer if hasattr(sys.stdout, 'buffer') else sys.stdout, filename)
 409         stream = open(encodeFilename(filename), open_mode)
 410         return (stream, filename)
 411     except (IOError, OSError) as err:
 412         if err.errno in (errno.EACCES,):
 413             raise
 414
 415         # In case of error, try to remove win32 forbidden chars
 416         alt_filename = os.path.join(
 417                         re.sub(u'[/<>:"\\|\\\\?\\*]', u'#', path_part)
 418                         for path_part in os.path.split(filename)
 419                        )
 420         if alt_filename == filename:
 421             raise
 422         else:
 423             # An exception here should be caught in the caller
 424             stream = open(encodeFilename(filename), open_mode)
 425             return (stream, alt_filename)
 426
 427
 428 def timeconvert(timestr):
 429     """Convert RFC 2822 defined time string into system timestamp"""
 430     timestamp = None
 431     timetuple = email.utils.parsedate_tz(timestr)
 432     if timetuple is not None:
 433         timestamp = email.utils.mktime_tz(timetuple)
 434     return timestamp
 435
 436 def sanitize_filename(s, restricted=False, is_id=False):
 437     """Sanitizes a string so it could be used as part of a filename.
 438     If restricted is set, use a stricter subset of allowed characters.
 439     Set is_id if this is not an arbitrary string, but an ID that should be kept if possible
 440     """
 441     def replace_insane(char):
 442         if char == '?' or ord(char) < 32 or ord(char) == 127:
 443             return ''
 444         elif char == '"':
 445             return '' if restricted else '\''
 446         elif char == ':':
 447             return '_-' if restricted else ' -'
 448         elif char in '\\/|*<>':
 449             return '_'
 450         if restricted and (char in '!&\'()[]{}$;`^,#' or char.isspace()):
 451             return '_'
 452         if restricted and ord(char) > 127:
 453             return '_'
 454         return char
 455
 456     result = u''.join(map(replace_insane, s))
 457     if not is_id:
 458         while '__' in result:
 459             result = result.replace('__', '_')
 460         result = result.strip('_')
 461         # Common case of "Foreign band name - English song title"
 462         if restricted and result.startswith('-_'):
 463             result = result[2:]
 464         if not result:
 465             result = '_'
 466     return result
 467
 468 def orderedSet(iterable):
 469     """ Remove all duplicates from the input iterable """
 470     res = []
 471     for el in iterable:
 472         if el not in res:
 473             res.append(el)
 474     return res
 475
 476 def unescapeHTML(s):
 477     """
 478     @param s a string
 479     """
 480     assert type(s) == type(u'')
 481
 482     result = re.sub(u'(?u)&(.+?);', htmlentity_transform, s)
 483     return result
 484
 485 def encodeFilename(s):
 486     """
 487     @param s The name of the file
 488     """
 489
 490     assert type(s) == type(u'')
 491
 492     # Python 3 has a Unicode API
 493     if sys.version_info >= (3, 0):
 494         return s
 495
 496     if sys.platform == 'win32' and sys.getwindowsversion()[0] >= 5:
 497         # Pass u'' directly to use Unicode APIs on Windows 2000 and up
 498         # (Detecting Windows NT 4 is tricky because 'major >= 4' would
 499         # match Windows 9x series as well. Besides, NT 4 is obsolete.)
 500         return s
 501     else:
 502         encoding = sys.getfilesystemencoding()
 503         if encoding is None:
 504             encoding = 'utf-8'
 505         return s.encode(encoding, 'ignore')
 506
 507 def decodeOption(optval):
 508     if optval is None:
 509         return optval
 510     if isinstance(optval, bytes):
 511         optval = optval.decode(preferredencoding())
 512
 513     assert isinstance(optval, compat_str)
 514     return optval
 515
 516 def formatSeconds(secs):
 517     if secs > 3600:
 518         return '%d:%02d:%02d' % (secs // 3600, (secs % 3600) // 60, secs % 60)
 519     elif secs > 60:
 520         return '%d:%02d' % (secs // 60, secs % 60)
 521     else:
 522         return '%d' % secs
 523
 524 def make_HTTPS_handler(opts):
 525     if sys.version_info < (3,2):
 526         # Python's 2.x handler is very simplistic
 527         return compat_urllib_request.HTTPSHandler()
 528     else:
 529         import ssl
 530         context = ssl.SSLContext(ssl.PROTOCOL_SSLv23)
 531         context.set_default_verify_paths()
 532
 533         context.verify_mode = (ssl.CERT_NONE
 534                                if opts.no_check_certificate
 535                                else ssl.CERT_REQUIRED)
 536         return compat_urllib_request.HTTPSHandler(context=context)
 537
 538 class ExtractorError(Exception):
 539     """Error during info extraction."""
 540     def __init__(self, msg, tb=None, expected=False, cause=None):
 541         """ tb, if given, is the original traceback (so that it can be printed out).
 542         If expected is set, this is a normal error message and most likely not a bug in youtube-dl.
 543         """
 544
 545         if sys.exc_info()[0] in (compat_urllib_error.URLError, socket.timeout, UnavailableVideoError):
 546             expected = True
 547         if not expected:
 548             msg = msg + u'; please report this issue on https://yt-dl.org/bug . Be sure to call youtube-dl with the --verbose flag and include its complete output. Make sure you are using the latest version; type  youtube-dl -U  to update.'
 549         super(ExtractorError, self).__init__(msg)
 550
 551         self.traceback = tb
 552         self.exc_info = sys.exc_info()  # preserve original exception
 553         self.cause = cause
 554
 555     def format_traceback(self):
 556         if self.traceback is None:
 557             return None
 558         return u''.join(traceback.format_tb(self.traceback))
 559
 560
 561 class DownloadError(Exception):
 562     """Download Error exception.
 563
 564     This exception may be thrown by FileDownloader objects if they are not
 565     configured to continue on errors. They will contain the appropriate
 566     error message.
 567     """
 568     def __init__(self, msg, exc_info=None):
 569         """ exc_info, if given, is the original exception that caused the trouble (as returned by sys.exc_info()). """
 570         super(DownloadError, self).__init__(msg)
 571         self.exc_info = exc_info
 572
 573
 574 class SameFileError(Exception):
 575     """Same File exception.
 576
 577     This exception will be thrown by FileDownloader objects if they detect
 578     multiple files would have to be downloaded to the same file on disk.
 579     """
 580     pass
 581
 582
 583 class PostProcessingError(Exception):
 584     """Post Processing exception.
 585
 586     This exception may be raised by PostProcessor's .run() method to
 587     indicate an error in the postprocessing task.
 588     """
 589     def __init__(self, msg):
 590         self.msg = msg
 591
 592 class MaxDownloadsReached(Exception):
 593     """ --max-downloads limit has been reached. """
 594     pass
 595
 596
 597 class UnavailableVideoError(Exception):
 598     """Unavailable Format exception.
 599
 600     This exception will be thrown when a video is requested
 601     in a format that is not available for that video.
 602     """
 603     pass
 604
 605
 606 class ContentTooShortError(Exception):
 607     """Content Too Short exception.
 608
 609     This exception may be raised by FileDownloader objects when a file they
 610     download is too small for what the server announced first, indicating
 611     the connection was probably interrupted.
 612     """
 613     # Both in bytes
 614     downloaded = None
 615     expected = None
 616
 617     def __init__(self, downloaded, expected):
 618         self.downloaded = downloaded
 619         self.expected = expected
 620
 621 class YoutubeDLHandler(compat_urllib_request.HTTPHandler):
 622     """Handler for HTTP requests and responses.
 623
 624     This class, when installed with an OpenerDirector, automatically adds
 625     the standard headers to every HTTP request and handles gzipped and
 626     deflated responses from web servers. If compression is to be avoided in
 627     a particular request, the original request in the program code only has
 628     to include the HTTP header "Youtubedl-No-Compression", which will be
 629     removed before making the real request.
 630
 631     Part of this code was copied from:
 632
 633     http://techknack.net/python-urllib2-handlers/
 634
 635     Andrew Rowls, the author of that code, agreed to release it to the
 636     public domain.
 637     """
 638
 639     @staticmethod
 640     def deflate(data):
 641         try:
 642             return zlib.decompress(data, -zlib.MAX_WBITS)
 643         except zlib.error:
 644             return zlib.decompress(data)
 645
 646     @staticmethod
 647     def addinfourl_wrapper(stream, headers, url, code):
 648         if hasattr(compat_urllib_request.addinfourl, 'getcode'):
 649             return compat_urllib_request.addinfourl(stream, headers, url, code)
 650         ret = compat_urllib_request.addinfourl(stream, headers, url)
 651         ret.code = code
 652         return ret
 653
 654     def http_request(self, req):
 655         for h,v in std_headers.items():
 656             if h in req.headers:
 657                 del req.headers[h]
 658             req.add_header(h, v)
 659         if 'Youtubedl-no-compression' in req.headers:
 660             if 'Accept-encoding' in req.headers:
 661                 del req.headers['Accept-encoding']
 662             del req.headers['Youtubedl-no-compression']
 663         if 'Youtubedl-user-agent' in req.headers:
 664             if 'User-agent' in req.headers:
 665                 del req.headers['User-agent']
 666             req.headers['User-agent'] = req.headers['Youtubedl-user-agent']
 667             del req.headers['Youtubedl-user-agent']
 668         return req
 669
 670     def http_response(self, req, resp):
 671         old_resp = resp
 672         # gzip
 673         if resp.headers.get('Content-encoding', '') == 'gzip':
 674             content = resp.read()
 675             gz = gzip.GzipFile(fileobj=io.BytesIO(content), mode='rb')
 676             try:
 677                 uncompressed = io.BytesIO(gz.read())
 678             except IOError as original_ioerror:
 679                 # There may be junk add the end of the file
 680                 # See http://stackoverflow.com/q/4928560/35070 for details
 681                 for i in range(1, 1024):
 682                     try:
 683                         gz = gzip.GzipFile(fileobj=io.BytesIO(content[:-i]), mode='rb')
 684                         uncompressed = io.BytesIO(gz.read())
 685                     except IOError:
 686                         continue
 687                     break
 688                 else:
 689                     raise original_ioerror
 690             resp = self.addinfourl_wrapper(uncompressed, old_resp.headers, old_resp.url, old_resp.code)
 691             resp.msg = old_resp.msg
 692         # deflate
 693         if resp.headers.get('Content-encoding', '') == 'deflate':
 694             gz = io.BytesIO(self.deflate(resp.read()))
 695             resp = self.addinfourl_wrapper(gz, old_resp.headers, old_resp.url, old_resp.code)
 696             resp.msg = old_resp.msg
 697         return resp
 698
 699     https_request = http_request
 700     https_response = http_response
 701
 702 def unified_strdate(date_str):
 703     """Return a string with the date in the format YYYYMMDD"""
 704     upload_date = None
 705     #Replace commas
 706     date_str = date_str.replace(',',' ')
 707     # %z (UTC offset) is only supported in python>=3.2
 708     date_str = re.sub(r' (\+|-)[\d]*$', '', date_str)
 709     format_expressions = [
 710         '%d %B %Y',
 711         '%B %d %Y',
 712         '%b %d %Y',
 713         '%Y-%m-%d',
 714         '%d/%m/%Y',
 715         '%Y/%m/%d %H:%M:%S',
 716         '%d.%m.%Y %H:%M',
 717         '%Y-%m-%dT%H:%M:%SZ',
 718     ]
 719     for expression in format_expressions:
 720         try:
 721             upload_date = datetime.datetime.strptime(date_str, expression).strftime('%Y%m%d')
 722         except:
 723             pass
 724     return upload_date
 725
 726 def determine_ext(url, default_ext=u'unknown_video'):
 727     guess = url.partition(u'?')[0].rpartition(u'.')[2]
 728     if re.match(r'^[A-Za-z0-9]+$', guess):
 729         return guess
 730     else:
 731         return default_ext
 732
 733 def subtitles_filename(filename, sub_lang, sub_format):
 734     return filename.rsplit('.', 1)[0] + u'.' + sub_lang + u'.' + sub_format
 735
 736 def date_from_str(date_str):
 737     """
 738     Return a datetime object from a string in the format YYYYMMDD or
 739     (now|today)[+-][0-9](day|week|month|year)(s)?"""
 740     today = datetime.date.today()
 741     if date_str == 'now'or date_str == 'today':
 742         return today
 743     match = re.match('(now|today)(?P<sign>[+-])(?P<time>\d+)(?P<unit>day|week|month|year)(s)?', date_str)
 744     if match is not None:
 745         sign = match.group('sign')
 746         time = int(match.group('time'))
 747         if sign == '-':
 748             time = -time
 749         unit = match.group('unit')
 750         #A bad aproximation?
 751         if unit == 'month':
 752             unit = 'day'
 753             time *= 30
 754         elif unit == 'year':
 755             unit = 'day'
 756             time *= 365
 757         unit += 's'
 758         delta = datetime.timedelta(**{unit: time})
 759         return today + delta
 760     return datetime.datetime.strptime(date_str, "%Y%m%d").date()
 761
 762 class DateRange(object):
 763     """Represents a time interval between two dates"""
 764     def __init__(self, start=None, end=None):
 765         """start and end must be strings in the format accepted by date"""
 766         if start is not None:
 767             self.start = date_from_str(start)
 768         else:
 769             self.start = datetime.datetime.min.date()
 770         if end is not None:
 771             self.end = date_from_str(end)
 772         else:
 773             self.end = datetime.datetime.max.date()
 774         if self.start > self.end:
 775             raise ValueError('Date range: "%s" , the start date must be before the end date' % self)
 776     @classmethod
 777     def day(cls, day):
 778         """Returns a range that only contains the given day"""
 779         return cls(day,day)
 780     def __contains__(self, date):
 781         """Check if the date is in the range"""
 782         if not isinstance(date, datetime.date):
 783             date = date_from_str(date)
 784         return self.start <= date <= self.end
 785     def __str__(self):
 786         return '%s - %s' % ( self.start.isoformat(), self.end.isoformat())
 787
 788
 789 def platform_name():
 790     """ Returns the platform name as a compat_str """
 791     res = platform.platform()
 792     if isinstance(res, bytes):
 793         res = res.decode(preferredencoding())
 794
 795     assert isinstance(res, compat_str)
 796     return res
 797
 798
 799 def write_string(s, out=None):
 800     if out is None:
 801         out = sys.stderr
 802     assert type(s) == type(u'')
 803
 804     if ('b' in getattr(out, 'mode', '') or
 805             sys.version_info[0] < 3):  # Python 2 lies about mode of sys.stderr
 806         s = s.encode(preferredencoding(), 'ignore')
 807     out.write(s)
 808     out.flush()
 809
 810
 811 def bytes_to_intlist(bs):
 812     if not bs:
 813         return []
 814     if isinstance(bs[0], int):  # Python 3
 815         return list(bs)
 816     else:
 817         return [ord(c) for c in bs]
 818
 819
 820 def intlist_to_bytes(xs):
 821     if not xs:
 822         return b''
 823     if isinstance(chr(0), bytes):  # Python 2
 824         return ''.join([chr(x) for x in xs])
 825     else:
 826         return bytes(xs)