_ Git - youtube-dl/blob - youtube_dl/utils.py

   1 #!/usr/bin/env python
   2 # -*- coding: utf-8 -*-
   3
   4 import datetime
   5 import email.utils
   6 import errno
   7 import gzip
   8 import io
   9 import json
  10 import locale
  11 import os
  12 import pipes
  13 import platform
  14 import re
  15 import socket
  16 import sys
  17 import traceback
  18 import zlib
  19
  20 try:
  21     import urllib.request as compat_urllib_request
  22 except ImportError: # Python 2
  23     import urllib2 as compat_urllib_request
  24
  25 try:
  26     import urllib.error as compat_urllib_error
  27 except ImportError: # Python 2
  28     import urllib2 as compat_urllib_error
  29
  30 try:
  31     import urllib.parse as compat_urllib_parse
  32 except ImportError: # Python 2
  33     import urllib as compat_urllib_parse
  34
  35 try:
  36     from urllib.parse import urlparse as compat_urllib_parse_urlparse
  37 except ImportError: # Python 2
  38     from urlparse import urlparse as compat_urllib_parse_urlparse
  39
  40 try:
  41     import urllib.parse as compat_urlparse
  42 except ImportError: # Python 2
  43     import urlparse as compat_urlparse
  44
  45 try:
  46     import http.cookiejar as compat_cookiejar
  47 except ImportError: # Python 2
  48     import cookielib as compat_cookiejar
  49
  50 try:
  51     import html.entities as compat_html_entities
  52 except ImportError: # Python 2
  53     import htmlentitydefs as compat_html_entities
  54
  55 try:
  56     import html.parser as compat_html_parser
  57 except ImportError: # Python 2
  58     import HTMLParser as compat_html_parser
  59
  60 try:
  61     import http.client as compat_http_client
  62 except ImportError: # Python 2
  63     import httplib as compat_http_client
  64
  65 try:
  66     from urllib.error import HTTPError as compat_HTTPError
  67 except ImportError:  # Python 2
  68     from urllib2 import HTTPError as compat_HTTPError
  69
  70 try:
  71     from urllib.request import urlretrieve as compat_urlretrieve
  72 except ImportError:  # Python 2
  73     from urllib import urlretrieve as compat_urlretrieve
  74
  75
  76 try:
  77     from subprocess import DEVNULL
  78     compat_subprocess_get_DEVNULL = lambda: DEVNULL
  79 except ImportError:
  80     compat_subprocess_get_DEVNULL = lambda: open(os.path.devnull, 'w')
  81
  82 try:
  83     from urllib.parse import parse_qs as compat_parse_qs
  84 except ImportError: # Python 2
  85     # HACK: The following is the correct parse_qs implementation from cpython 3's stdlib.
  86     # Python 2's version is apparently totally broken
  87     def _unquote(string, encoding='utf-8', errors='replace'):
  88         if string == '':
  89             return string
  90         res = string.split('%')
  91         if len(res) == 1:
  92             return string
  93         if encoding is None:
  94             encoding = 'utf-8'
  95         if errors is None:
  96             errors = 'replace'
  97         # pct_sequence: contiguous sequence of percent-encoded bytes, decoded
  98         pct_sequence = b''
  99         string = res[0]
 100         for item in res[1:]:
 101             try:
 102                 if not item:
 103                     raise ValueError
 104                 pct_sequence += item[:2].decode('hex')
 105                 rest = item[2:]
 106                 if not rest:
 107                     # This segment was just a single percent-encoded character.
 108                     # May be part of a sequence of code units, so delay decoding.
 109                     # (Stored in pct_sequence).
 110                     continue
 111             except ValueError:
 112                 rest = '%' + item
 113             # Encountered non-percent-encoded characters. Flush the current
 114             # pct_sequence.
 115             string += pct_sequence.decode(encoding, errors) + rest
 116             pct_sequence = b''
 117         if pct_sequence:
 118             # Flush the final pct_sequence
 119             string += pct_sequence.decode(encoding, errors)
 120         return string
 121
 122     def _parse_qsl(qs, keep_blank_values=False, strict_parsing=False,
 123                 encoding='utf-8', errors='replace'):
 124         qs, _coerce_result = qs, unicode
 125         pairs = [s2 for s1 in qs.split('&') for s2 in s1.split(';')]
 126         r = []
 127         for name_value in pairs:
 128             if not name_value and not strict_parsing:
 129                 continue
 130             nv = name_value.split('=', 1)
 131             if len(nv) != 2:
 132                 if strict_parsing:
 133                     raise ValueError("bad query field: %r" % (name_value,))
 134                 # Handle case of a control-name with no equal sign
 135                 if keep_blank_values:
 136                     nv.append('')
 137                 else:
 138                     continue
 139             if len(nv[1]) or keep_blank_values:
 140                 name = nv[0].replace('+', ' ')
 141                 name = _unquote(name, encoding=encoding, errors=errors)
 142                 name = _coerce_result(name)
 143                 value = nv[1].replace('+', ' ')
 144                 value = _unquote(value, encoding=encoding, errors=errors)
 145                 value = _coerce_result(value)
 146                 r.append((name, value))
 147         return r
 148
 149     def compat_parse_qs(qs, keep_blank_values=False, strict_parsing=False,
 150                 encoding='utf-8', errors='replace'):
 151         parsed_result = {}
 152         pairs = _parse_qsl(qs, keep_blank_values, strict_parsing,
 153                         encoding=encoding, errors=errors)
 154         for name, value in pairs:
 155             if name in parsed_result:
 156                 parsed_result[name].append(value)
 157             else:
 158                 parsed_result[name] = [value]
 159         return parsed_result
 160
 161 try:
 162     compat_str = unicode # Python 2
 163 except NameError:
 164     compat_str = str
 165
 166 try:
 167     compat_chr = unichr # Python 2
 168 except NameError:
 169     compat_chr = chr
 170
 171 def compat_ord(c):
 172     if type(c) is int: return c
 173     else: return ord(c)
 174
 175 # This is not clearly defined otherwise
 176 compiled_regex_type = type(re.compile(''))
 177
 178 std_headers = {
 179     'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64; rv:10.0) Gecko/20100101 Firefox/10.0 (Chrome)',
 180     'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.7',
 181     'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
 182     'Accept-Encoding': 'gzip, deflate',
 183     'Accept-Language': 'en-us,en;q=0.5',
 184 }
 185
 186 def preferredencoding():
 187     """Get preferred encoding.
 188
 189     Returns the best encoding scheme for the system, based on
 190     locale.getpreferredencoding() and some further tweaks.
 191     """
 192     try:
 193         pref = locale.getpreferredencoding()
 194         u'TEST'.encode(pref)
 195     except:
 196         pref = 'UTF-8'
 197
 198     return pref
 199
 200 if sys.version_info < (3,0):
 201     def compat_print(s):
 202         print(s.encode(preferredencoding(), 'xmlcharrefreplace'))
 203 else:
 204     def compat_print(s):
 205         assert type(s) == type(u'')
 206         print(s)
 207
 208 # In Python 2.x, json.dump expects a bytestream.
 209 # In Python 3.x, it writes to a character stream
 210 if sys.version_info < (3,0):
 211     def write_json_file(obj, fn):
 212         with open(fn, 'wb') as f:
 213             json.dump(obj, f)
 214 else:
 215     def write_json_file(obj, fn):
 216         with open(fn, 'w', encoding='utf-8') as f:
 217             json.dump(obj, f)
 218
 219 if sys.version_info >= (2,7):
 220     def find_xpath_attr(node, xpath, key, val):
 221         """ Find the xpath xpath[@key=val] """
 222         assert re.match(r'^[a-zA-Z]+$', key)
 223         assert re.match(r'^[a-zA-Z0-9@\s]*$', val)
 224         expr = xpath + u"[@%s='%s']" % (key, val)
 225         return node.find(expr)
 226 else:
 227     def find_xpath_attr(node, xpath, key, val):
 228         for f in node.findall(xpath):
 229             if f.attrib.get(key) == val:
 230                 return f
 231         return None
 232
 233 def htmlentity_transform(matchobj):
 234     """Transforms an HTML entity to a character.
 235
 236     This function receives a match object and is intended to be used with
 237     the re.sub() function.
 238     """
 239     entity = matchobj.group(1)
 240
 241     # Known non-numeric HTML entity
 242     if entity in compat_html_entities.name2codepoint:
 243         return compat_chr(compat_html_entities.name2codepoint[entity])
 244
 245     mobj = re.match(u'(?u)#(x?\\d+)', entity)
 246     if mobj is not None:
 247         numstr = mobj.group(1)
 248         if numstr.startswith(u'x'):
 249             base = 16
 250             numstr = u'0%s' % numstr
 251         else:
 252             base = 10
 253         return compat_chr(int(numstr, base))
 254
 255     # Unknown entity in name, return its literal representation
 256     return (u'&%s;' % entity)
 257
 258 compat_html_parser.locatestarttagend = re.compile(r"""<[a-zA-Z][-.a-zA-Z0-9:_]*(?:\s+(?:(?<=['"\s])[^\s/>][^\s/=>]*(?:\s*=+\s*(?:'[^']*'|"[^"]*"|(?!['"])[^>\s]*))?\s*)*)?\s*""", re.VERBOSE) # backport bugfix
 259 class BaseHTMLParser(compat_html_parser.HTMLParser):
 260     def __init(self):
 261         compat_html_parser.HTMLParser.__init__(self)
 262         self.html = None
 263
 264     def loads(self, html):
 265         self.html = html
 266         self.feed(html)
 267         self.close()
 268
 269 class AttrParser(BaseHTMLParser):
 270     """Modified HTMLParser that isolates a tag with the specified attribute"""
 271     def __init__(self, attribute, value):
 272         self.attribute = attribute
 273         self.value = value
 274         self.result = None
 275         self.started = False
 276         self.depth = {}
 277         self.watch_startpos = False
 278         self.error_count = 0
 279         BaseHTMLParser.__init__(self)
 280
 281     def error(self, message):
 282         if self.error_count > 10 or self.started:
 283             raise compat_html_parser.HTMLParseError(message, self.getpos())
 284         self.rawdata = '\n'.join(self.html.split('\n')[self.getpos()[0]:]) # skip one line
 285         self.error_count += 1
 286         self.goahead(1)
 287
 288     def handle_starttag(self, tag, attrs):
 289         attrs = dict(attrs)
 290         if self.started:
 291             self.find_startpos(None)
 292         if self.attribute in attrs and attrs[self.attribute] == self.value:
 293             self.result = [tag]
 294             self.started = True
 295             self.watch_startpos = True
 296         if self.started:
 297             if not tag in self.depth: self.depth[tag] = 0
 298             self.depth[tag] += 1
 299
 300     def handle_endtag(self, tag):
 301         if self.started:
 302             if tag in self.depth: self.depth[tag] -= 1
 303             if self.depth[self.result[0]] == 0:
 304                 self.started = False
 305                 self.result.append(self.getpos())
 306
 307     def find_startpos(self, x):
 308         """Needed to put the start position of the result (self.result[1])
 309         after the opening tag with the requested id"""
 310         if self.watch_startpos:
 311             self.watch_startpos = False
 312             self.result.append(self.getpos())
 313     handle_entityref = handle_charref = handle_data = handle_comment = \
 314     handle_decl = handle_pi = unknown_decl = find_startpos
 315
 316     def get_result(self):
 317         if self.result is None:
 318             return None
 319         if len(self.result) != 3:
 320             return None
 321         lines = self.html.split('\n')
 322         lines = lines[self.result[1][0]-1:self.result[2][0]]
 323         lines[0] = lines[0][self.result[1][1]:]
 324         if len(lines) == 1:
 325             lines[-1] = lines[-1][:self.result[2][1]-self.result[1][1]]
 326         lines[-1] = lines[-1][:self.result[2][1]]
 327         return '\n'.join(lines).strip()
 328 # Hack for https://github.com/rg3/youtube-dl/issues/662
 329 if sys.version_info < (2, 7, 3):
 330     AttrParser.parse_endtag = (lambda self, i:
 331         i + len("</scr'+'ipt>")
 332         if self.rawdata[i:].startswith("</scr'+'ipt>")
 333         else compat_html_parser.HTMLParser.parse_endtag(self, i))
 334
 335 def get_element_by_id(id, html):
 336     """Return the content of the tag with the specified ID in the passed HTML document"""
 337     return get_element_by_attribute("id", id, html)
 338
 339 def get_element_by_attribute(attribute, value, html):
 340     """Return the content of the tag with the specified attribute in the passed HTML document"""
 341     parser = AttrParser(attribute, value)
 342     try:
 343         parser.loads(html)
 344     except compat_html_parser.HTMLParseError:
 345         pass
 346     return parser.get_result()
 347
 348 class MetaParser(BaseHTMLParser):
 349     """
 350     Modified HTMLParser that isolates a meta tag with the specified name
 351     attribute.
 352     """
 353     def __init__(self, name):
 354         BaseHTMLParser.__init__(self)
 355         self.name = name
 356         self.content = None
 357         self.result = None
 358
 359     def handle_starttag(self, tag, attrs):
 360         if tag != 'meta':
 361             return
 362         attrs = dict(attrs)
 363         if attrs.get('name') == self.name:
 364             self.result = attrs.get('content')
 365
 366     def get_result(self):
 367         return self.result
 368
 369 def get_meta_content(name, html):
 370     """
 371     Return the content attribute from the meta tag with the given name attribute.
 372     """
 373     parser = MetaParser(name)
 374     try:
 375         parser.loads(html)
 376     except compat_html_parser.HTMLParseError:
 377         pass
 378     return parser.get_result()
 379
 380
 381 def clean_html(html):
 382     """Clean an HTML snippet into a readable string"""
 383     # Newline vs <br />
 384     html = html.replace('\n', ' ')
 385     html = re.sub(r'\s*<\s*br\s*/?\s*>\s*', '\n', html)
 386     html = re.sub(r'<\s*/\s*p\s*>\s*<\s*p[^>]*>', '\n', html)
 387     # Strip html tags
 388     html = re.sub('<.*?>', '', html)
 389     # Replace html entities
 390     html = unescapeHTML(html)
 391     return html.strip()
 392
 393
 394 def sanitize_open(filename, open_mode):
 395     """Try to open the given filename, and slightly tweak it if this fails.
 396
 397     Attempts to open the given filename. If this fails, it tries to change
 398     the filename slightly, step by step, until it's either able to open it
 399     or it fails and raises a final exception, like the standard open()
 400     function.
 401
 402     It returns the tuple (stream, definitive_file_name).
 403     """
 404     try:
 405         if filename == u'-':
 406             if sys.platform == 'win32':
 407                 import msvcrt
 408                 msvcrt.setmode(sys.stdout.fileno(), os.O_BINARY)
 409             return (sys.stdout.buffer if hasattr(sys.stdout, 'buffer') else sys.stdout, filename)
 410         stream = open(encodeFilename(filename), open_mode)
 411         return (stream, filename)
 412     except (IOError, OSError) as err:
 413         if err.errno in (errno.EACCES,):
 414             raise
 415
 416         # In case of error, try to remove win32 forbidden chars
 417         alt_filename = os.path.join(
 418                         re.sub(u'[/<>:"\\|\\\\?\\*]', u'#', path_part)
 419                         for path_part in os.path.split(filename)
 420                        )
 421         if alt_filename == filename:
 422             raise
 423         else:
 424             # An exception here should be caught in the caller
 425             stream = open(encodeFilename(filename), open_mode)
 426             return (stream, alt_filename)
 427
 428
 429 def timeconvert(timestr):
 430     """Convert RFC 2822 defined time string into system timestamp"""
 431     timestamp = None
 432     timetuple = email.utils.parsedate_tz(timestr)
 433     if timetuple is not None:
 434         timestamp = email.utils.mktime_tz(timetuple)
 435     return timestamp
 436
 437 def sanitize_filename(s, restricted=False, is_id=False):
 438     """Sanitizes a string so it could be used as part of a filename.
 439     If restricted is set, use a stricter subset of allowed characters.
 440     Set is_id if this is not an arbitrary string, but an ID that should be kept if possible
 441     """
 442     def replace_insane(char):
 443         if char == '?' or ord(char) < 32 or ord(char) == 127:
 444             return ''
 445         elif char == '"':
 446             return '' if restricted else '\''
 447         elif char == ':':
 448             return '_-' if restricted else ' -'
 449         elif char in '\\/|*<>':
 450             return '_'
 451         if restricted and (char in '!&\'()[]{}$;`^,#' or char.isspace()):
 452             return '_'
 453         if restricted and ord(char) > 127:
 454             return '_'
 455         return char
 456
 457     result = u''.join(map(replace_insane, s))
 458     if not is_id:
 459         while '__' in result:
 460             result = result.replace('__', '_')
 461         result = result.strip('_')
 462         # Common case of "Foreign band name - English song title"
 463         if restricted and result.startswith('-_'):
 464             result = result[2:]
 465         if not result:
 466             result = '_'
 467     return result
 468
 469 def orderedSet(iterable):
 470     """ Remove all duplicates from the input iterable """
 471     res = []
 472     for el in iterable:
 473         if el not in res:
 474             res.append(el)
 475     return res
 476
 477 def unescapeHTML(s):
 478     """
 479     @param s a string
 480     """
 481     assert type(s) == type(u'')
 482
 483     result = re.sub(u'(?u)&(.+?);', htmlentity_transform, s)
 484     return result
 485
 486 def encodeFilename(s):
 487     """
 488     @param s The name of the file
 489     """
 490
 491     assert type(s) == type(u'')
 492
 493     # Python 3 has a Unicode API
 494     if sys.version_info >= (3, 0):
 495         return s
 496
 497     if sys.platform == 'win32' and sys.getwindowsversion()[0] >= 5:
 498         # Pass u'' directly to use Unicode APIs on Windows 2000 and up
 499         # (Detecting Windows NT 4 is tricky because 'major >= 4' would
 500         # match Windows 9x series as well. Besides, NT 4 is obsolete.)
 501         return s
 502     else:
 503         encoding = sys.getfilesystemencoding()
 504         if encoding is None:
 505             encoding = 'utf-8'
 506         return s.encode(encoding, 'ignore')
 507
 508 def decodeOption(optval):
 509     if optval is None:
 510         return optval
 511     if isinstance(optval, bytes):
 512         optval = optval.decode(preferredencoding())
 513
 514     assert isinstance(optval, compat_str)
 515     return optval
 516
 517 def formatSeconds(secs):
 518     if secs > 3600:
 519         return '%d:%02d:%02d' % (secs // 3600, (secs % 3600) // 60, secs % 60)
 520     elif secs > 60:
 521         return '%d:%02d' % (secs // 60, secs % 60)
 522     else:
 523         return '%d' % secs
 524
 525 def make_HTTPS_handler(opts):
 526     if sys.version_info < (3,2):
 527         # Python's 2.x handler is very simplistic
 528         return compat_urllib_request.HTTPSHandler()
 529     else:
 530         import ssl
 531         context = ssl.SSLContext(ssl.PROTOCOL_SSLv23)
 532         context.set_default_verify_paths()
 533
 534         context.verify_mode = (ssl.CERT_NONE
 535                                if opts.no_check_certificate
 536                                else ssl.CERT_REQUIRED)
 537         return compat_urllib_request.HTTPSHandler(context=context)
 538
 539 class ExtractorError(Exception):
 540     """Error during info extraction."""
 541     def __init__(self, msg, tb=None, expected=False, cause=None):
 542         """ tb, if given, is the original traceback (so that it can be printed out).
 543         If expected is set, this is a normal error message and most likely not a bug in youtube-dl.
 544         """
 545
 546         if sys.exc_info()[0] in (compat_urllib_error.URLError, socket.timeout, UnavailableVideoError):
 547             expected = True
 548         if not expected:
 549             msg = msg + u'; please report this issue on https://yt-dl.org/bug . Be sure to call youtube-dl with the --verbose flag and include its complete output. Make sure you are using the latest version; type  youtube-dl -U  to update.'
 550         super(ExtractorError, self).__init__(msg)
 551
 552         self.traceback = tb
 553         self.exc_info = sys.exc_info()  # preserve original exception
 554         self.cause = cause
 555
 556     def format_traceback(self):
 557         if self.traceback is None:
 558             return None
 559         return u''.join(traceback.format_tb(self.traceback))
 560
 561
 562 class DownloadError(Exception):
 563     """Download Error exception.
 564
 565     This exception may be thrown by FileDownloader objects if they are not
 566     configured to continue on errors. They will contain the appropriate
 567     error message.
 568     """
 569     def __init__(self, msg, exc_info=None):
 570         """ exc_info, if given, is the original exception that caused the trouble (as returned by sys.exc_info()). """
 571         super(DownloadError, self).__init__(msg)
 572         self.exc_info = exc_info
 573
 574
 575 class SameFileError(Exception):
 576     """Same File exception.
 577
 578     This exception will be thrown by FileDownloader objects if they detect
 579     multiple files would have to be downloaded to the same file on disk.
 580     """
 581     pass
 582
 583
 584 class PostProcessingError(Exception):
 585     """Post Processing exception.
 586
 587     This exception may be raised by PostProcessor's .run() method to
 588     indicate an error in the postprocessing task.
 589     """
 590     def __init__(self, msg):
 591         self.msg = msg
 592
 593 class MaxDownloadsReached(Exception):
 594     """ --max-downloads limit has been reached. """
 595     pass
 596
 597
 598 class UnavailableVideoError(Exception):
 599     """Unavailable Format exception.
 600
 601     This exception will be thrown when a video is requested
 602     in a format that is not available for that video.
 603     """
 604     pass
 605
 606
 607 class ContentTooShortError(Exception):
 608     """Content Too Short exception.
 609
 610     This exception may be raised by FileDownloader objects when a file they
 611     download is too small for what the server announced first, indicating
 612     the connection was probably interrupted.
 613     """
 614     # Both in bytes
 615     downloaded = None
 616     expected = None
 617
 618     def __init__(self, downloaded, expected):
 619         self.downloaded = downloaded
 620         self.expected = expected
 621
 622 class YoutubeDLHandler(compat_urllib_request.HTTPHandler):
 623     """Handler for HTTP requests and responses.
 624
 625     This class, when installed with an OpenerDirector, automatically adds
 626     the standard headers to every HTTP request and handles gzipped and
 627     deflated responses from web servers. If compression is to be avoided in
 628     a particular request, the original request in the program code only has
 629     to include the HTTP header "Youtubedl-No-Compression", which will be
 630     removed before making the real request.
 631
 632     Part of this code was copied from:
 633
 634     http://techknack.net/python-urllib2-handlers/
 635
 636     Andrew Rowls, the author of that code, agreed to release it to the
 637     public domain.
 638     """
 639
 640     @staticmethod
 641     def deflate(data):
 642         try:
 643             return zlib.decompress(data, -zlib.MAX_WBITS)
 644         except zlib.error:
 645             return zlib.decompress(data)
 646
 647     @staticmethod
 648     def addinfourl_wrapper(stream, headers, url, code):
 649         if hasattr(compat_urllib_request.addinfourl, 'getcode'):
 650             return compat_urllib_request.addinfourl(stream, headers, url, code)
 651         ret = compat_urllib_request.addinfourl(stream, headers, url)
 652         ret.code = code
 653         return ret
 654
 655     def http_request(self, req):
 656         for h,v in std_headers.items():
 657             if h in req.headers:
 658                 del req.headers[h]
 659             req.add_header(h, v)
 660         if 'Youtubedl-no-compression' in req.headers:
 661             if 'Accept-encoding' in req.headers:
 662                 del req.headers['Accept-encoding']
 663             del req.headers['Youtubedl-no-compression']
 664         if 'Youtubedl-user-agent' in req.headers:
 665             if 'User-agent' in req.headers:
 666                 del req.headers['User-agent']
 667             req.headers['User-agent'] = req.headers['Youtubedl-user-agent']
 668             del req.headers['Youtubedl-user-agent']
 669         return req
 670
 671     def http_response(self, req, resp):
 672         old_resp = resp
 673         # gzip
 674         if resp.headers.get('Content-encoding', '') == 'gzip':
 675             content = resp.read()
 676             gz = gzip.GzipFile(fileobj=io.BytesIO(content), mode='rb')
 677             try:
 678                 uncompressed = io.BytesIO(gz.read())
 679             except IOError as original_ioerror:
 680                 # There may be junk add the end of the file
 681                 # See http://stackoverflow.com/q/4928560/35070 for details
 682                 for i in range(1, 1024):
 683                     try:
 684                         gz = gzip.GzipFile(fileobj=io.BytesIO(content[:-i]), mode='rb')
 685                         uncompressed = io.BytesIO(gz.read())
 686                     except IOError:
 687                         continue
 688                     break
 689                 else:
 690                     raise original_ioerror
 691             resp = self.addinfourl_wrapper(uncompressed, old_resp.headers, old_resp.url, old_resp.code)
 692             resp.msg = old_resp.msg
 693         # deflate
 694         if resp.headers.get('Content-encoding', '') == 'deflate':
 695             gz = io.BytesIO(self.deflate(resp.read()))
 696             resp = self.addinfourl_wrapper(gz, old_resp.headers, old_resp.url, old_resp.code)
 697             resp.msg = old_resp.msg
 698         return resp
 699
 700     https_request = http_request
 701     https_response = http_response
 702
 703 def unified_strdate(date_str):
 704     """Return a string with the date in the format YYYYMMDD"""
 705     upload_date = None
 706     #Replace commas
 707     date_str = date_str.replace(',',' ')
 708     # %z (UTC offset) is only supported in python>=3.2
 709     date_str = re.sub(r' (\+|-)[\d]*$', '', date_str)
 710     format_expressions = [
 711         '%d %B %Y',
 712         '%B %d %Y',
 713         '%b %d %Y',
 714         '%Y-%m-%d',
 715         '%d/%m/%Y',
 716         '%Y/%m/%d %H:%M:%S',
 717         '%d.%m.%Y %H:%M',
 718         '%Y-%m-%dT%H:%M:%SZ',
 719         '%Y-%m-%dT%H:%M:%S',
 720     ]
 721     for expression in format_expressions:
 722         try:
 723             upload_date = datetime.datetime.strptime(date_str, expression).strftime('%Y%m%d')
 724         except:
 725             pass
 726     return upload_date
 727
 728 def determine_ext(url, default_ext=u'unknown_video'):
 729     guess = url.partition(u'?')[0].rpartition(u'.')[2]
 730     if re.match(r'^[A-Za-z0-9]+$', guess):
 731         return guess
 732     else:
 733         return default_ext
 734
 735 def subtitles_filename(filename, sub_lang, sub_format):
 736     return filename.rsplit('.', 1)[0] + u'.' + sub_lang + u'.' + sub_format
 737
 738 def date_from_str(date_str):
 739     """
 740     Return a datetime object from a string in the format YYYYMMDD or
 741     (now|today)[+-][0-9](day|week|month|year)(s)?"""
 742     today = datetime.date.today()
 743     if date_str == 'now'or date_str == 'today':
 744         return today
 745     match = re.match('(now|today)(?P<sign>[+-])(?P<time>\d+)(?P<unit>day|week|month|year)(s)?', date_str)
 746     if match is not None:
 747         sign = match.group('sign')
 748         time = int(match.group('time'))
 749         if sign == '-':
 750             time = -time
 751         unit = match.group('unit')
 752         #A bad aproximation?
 753         if unit == 'month':
 754             unit = 'day'
 755             time *= 30
 756         elif unit == 'year':
 757             unit = 'day'
 758             time *= 365
 759         unit += 's'
 760         delta = datetime.timedelta(**{unit: time})
 761         return today + delta
 762     return datetime.datetime.strptime(date_str, "%Y%m%d").date()
 763
 764 class DateRange(object):
 765     """Represents a time interval between two dates"""
 766     def __init__(self, start=None, end=None):
 767         """start and end must be strings in the format accepted by date"""
 768         if start is not None:
 769             self.start = date_from_str(start)
 770         else:
 771             self.start = datetime.datetime.min.date()
 772         if end is not None:
 773             self.end = date_from_str(end)
 774         else:
 775             self.end = datetime.datetime.max.date()
 776         if self.start > self.end:
 777             raise ValueError('Date range: "%s" , the start date must be before the end date' % self)
 778     @classmethod
 779     def day(cls, day):
 780         """Returns a range that only contains the given day"""
 781         return cls(day,day)
 782     def __contains__(self, date):
 783         """Check if the date is in the range"""
 784         if not isinstance(date, datetime.date):
 785             date = date_from_str(date)
 786         return self.start <= date <= self.end
 787     def __str__(self):
 788         return '%s - %s' % ( self.start.isoformat(), self.end.isoformat())
 789
 790
 791 def platform_name():
 792     """ Returns the platform name as a compat_str """
 793     res = platform.platform()
 794     if isinstance(res, bytes):
 795         res = res.decode(preferredencoding())
 796
 797     assert isinstance(res, compat_str)
 798     return res
 799
 800
 801 def write_string(s, out=None):
 802     if out is None:
 803         out = sys.stderr
 804     assert type(s) == type(u'')
 805
 806     if ('b' in getattr(out, 'mode', '') or
 807             sys.version_info[0] < 3):  # Python 2 lies about mode of sys.stderr
 808         s = s.encode(preferredencoding(), 'ignore')
 809     out.write(s)
 810     out.flush()
 811
 812
 813 def bytes_to_intlist(bs):
 814     if not bs:
 815         return []
 816     if isinstance(bs[0], int):  # Python 3
 817         return list(bs)
 818     else:
 819         return [ord(c) for c in bs]
 820
 821
 822 def intlist_to_bytes(xs):
 823     if not xs:
 824         return b''
 825     if isinstance(chr(0), bytes):  # Python 2
 826         return ''.join([chr(x) for x in xs])
 827     else:
 828         return bytes(xs)
 829
 830
 831 def get_cachedir(params={}):
 832     cache_root = os.environ.get('XDG_CACHE_HOME',
 833                                 os.path.expanduser('~/.cache'))
 834     return params.get('cachedir', os.path.join(cache_root, 'youtube-dl'))
 835
 836
 837 # Cross-platform file locking
 838 if sys.platform == 'win32':
 839     import ctypes.wintypes
 840     import msvcrt
 841
 842     class OVERLAPPED(ctypes.Structure):
 843         _fields_ = [
 844             ('Internal', ctypes.wintypes.LPVOID),
 845             ('InternalHigh', ctypes.wintypes.LPVOID),
 846             ('Offset', ctypes.wintypes.DWORD),
 847             ('OffsetHigh', ctypes.wintypes.DWORD),
 848             ('hEvent', ctypes.wintypes.HANDLE),
 849         ]
 850
 851     kernel32 = ctypes.windll.kernel32
 852     LockFileEx = kernel32.LockFileEx
 853     LockFileEx.argtypes = [
 854         ctypes.wintypes.HANDLE,     # hFile
 855         ctypes.wintypes.DWORD,      # dwFlags
 856         ctypes.wintypes.DWORD,      # dwReserved
 857         ctypes.wintypes.DWORD,      # nNumberOfBytesToLockLow
 858         ctypes.wintypes.DWORD,      # nNumberOfBytesToLockHigh
 859         ctypes.POINTER(OVERLAPPED)  # Overlapped
 860     ]
 861     LockFileEx.restype = ctypes.wintypes.BOOL
 862     UnlockFileEx = kernel32.UnlockFileEx
 863     UnlockFileEx.argtypes = [
 864         ctypes.wintypes.HANDLE,     # hFile
 865         ctypes.wintypes.DWORD,      # dwReserved
 866         ctypes.wintypes.DWORD,      # nNumberOfBytesToLockLow
 867         ctypes.wintypes.DWORD,      # nNumberOfBytesToLockHigh
 868         ctypes.POINTER(OVERLAPPED)  # Overlapped
 869     ]
 870     UnlockFileEx.restype = ctypes.wintypes.BOOL
 871     whole_low = 0xffffffff
 872     whole_high = 0x7fffffff
 873
 874     def _lock_file(f, exclusive):
 875         overlapped = OVERLAPPED()
 876         overlapped.Offset = 0
 877         overlapped.OffsetHigh = 0
 878         overlapped.hEvent = 0
 879         f._lock_file_overlapped_p = ctypes.pointer(overlapped)
 880         handle = msvcrt.get_osfhandle(f.fileno())
 881         if not LockFileEx(handle, 0x2 if exclusive else 0x0, 0,
 882                           whole_low, whole_high, f._lock_file_overlapped_p):
 883             raise OSError('Locking file failed: %r' % ctypes.FormatError())
 884
 885     def _unlock_file(f):
 886         assert f._lock_file_overlapped_p
 887         handle = msvcrt.get_osfhandle(f.fileno())
 888         if not UnlockFileEx(handle, 0,
 889                             whole_low, whole_high, f._lock_file_overlapped_p):
 890             raise OSError('Unlocking file failed: %r' % ctypes.FormatError())
 891
 892 else:
 893     import fcntl
 894
 895     def _lock_file(f, exclusive):
 896         fcntl.lockf(f, fcntl.LOCK_EX if exclusive else fcntl.LOCK_SH)
 897
 898     def _unlock_file(f):
 899         fcntl.lockf(f, fcntl.LOCK_UN)
 900
 901
 902 class locked_file(object):
 903     def __init__(self, filename, mode, encoding=None):
 904         assert mode in ['r', 'a', 'w']
 905         self.f = io.open(filename, mode, encoding=encoding)
 906         self.mode = mode
 907
 908     def __enter__(self):
 909         exclusive = self.mode != 'r'
 910         try:
 911             _lock_file(self.f, exclusive)
 912         except IOError:
 913             self.f.close()
 914             raise
 915         return self
 916
 917     def __exit__(self, etype, value, traceback):
 918         try:
 919             _unlock_file(self.f)
 920         finally:
 921             self.f.close()
 922
 923     def __iter__(self):
 924         return iter(self.f)
 925
 926     def write(self, *args):
 927         return self.f.write(*args)
 928
 929     def read(self, *args):
 930         return self.f.read(*args)
 931
 932
 933 def shell_quote(args):
 934     return ' '.join(map(pipes.quote, args))