_ Git - youtube-dl/blob - youtube_dl/utils.py

   1 #!/usr/bin/env python
   2 # coding: utf-8
   3
   4 from __future__ import unicode_literals
   5
   6 import base64
   7 import binascii
   8 import calendar
   9 import codecs
  10 import contextlib
  11 import ctypes
  12 import datetime
  13 import email.utils
  14 import email.header
  15 import errno
  16 import functools
  17 import gzip
  18 import io
  19 import itertools
  20 import json
  21 import locale
  22 import math
  23 import operator
  24 import os
  25 import platform
  26 import random
  27 import re
  28 import socket
  29 import ssl
  30 import subprocess
  31 import sys
  32 import tempfile
  33 import traceback
  34 import xml.etree.ElementTree
  35 import zlib
  36
  37 from .compat import (
  38     compat_HTMLParseError,
  39     compat_HTMLParser,
  40     compat_basestring,
  41     compat_chr,
  42     compat_etree_fromstring,
  43     compat_expanduser,
  44     compat_html_entities,
  45     compat_html_entities_html5,
  46     compat_http_client,
  47     compat_kwargs,
  48     compat_os_name,
  49     compat_parse_qs,
  50     compat_shlex_quote,
  51     compat_socket_create_connection,
  52     compat_str,
  53     compat_struct_pack,
  54     compat_struct_unpack,
  55     compat_urllib_error,
  56     compat_urllib_parse,
  57     compat_urllib_parse_urlencode,
  58     compat_urllib_parse_urlparse,
  59     compat_urllib_parse_unquote_plus,
  60     compat_urllib_request,
  61     compat_urlparse,
  62     compat_xpath,
  63 )
  64
  65 from .socks import (
  66     ProxyType,
  67     sockssocket,
  68 )
  69
  70
  71 def register_socks_protocols():
  72     # "Register" SOCKS protocols
  73     # In Python < 2.6.5, urlsplit() suffers from bug https://bugs.python.org/issue7904
  74     # URLs with protocols not in urlparse.uses_netloc are not handled correctly
  75     for scheme in ('socks', 'socks4', 'socks4a', 'socks5'):
  76         if scheme not in compat_urlparse.uses_netloc:
  77             compat_urlparse.uses_netloc.append(scheme)
  78
  79
  80 # This is not clearly defined otherwise
  81 compiled_regex_type = type(re.compile(''))
  82
  83 std_headers = {
  84     'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64; rv:10.0) Gecko/20150101 Firefox/47.0 (Chrome)',
  85     'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.7',
  86     'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
  87     'Accept-Encoding': 'gzip, deflate',
  88     'Accept-Language': 'en-us,en;q=0.5',
  89 }
  90
  91
  92 USER_AGENTS = {
  93     'Safari': 'Mozilla/5.0 (X11; Linux x86_64; rv:10.0) AppleWebKit/533.20.25 (KHTML, like Gecko) Version/5.0.4 Safari/533.20.27',
  94 }
  95
  96
  97 NO_DEFAULT = object()
  98
  99 ENGLISH_MONTH_NAMES = [
 100     'January', 'February', 'March', 'April', 'May', 'June',
 101     'July', 'August', 'September', 'October', 'November', 'December']
 102
 103 MONTH_NAMES = {
 104     'en': ENGLISH_MONTH_NAMES,
 105     'fr': [
 106         'janvier', 'février', 'mars', 'avril', 'mai', 'juin',
 107         'juillet', 'août', 'septembre', 'octobre', 'novembre', 'décembre'],
 108 }
 109
 110 KNOWN_EXTENSIONS = (
 111     'mp4', 'm4a', 'm4p', 'm4b', 'm4r', 'm4v', 'aac',
 112     'flv', 'f4v', 'f4a', 'f4b',
 113     'webm', 'ogg', 'ogv', 'oga', 'ogx', 'spx', 'opus',
 114     'mkv', 'mka', 'mk3d',
 115     'avi', 'divx',
 116     'mov',
 117     'asf', 'wmv', 'wma',
 118     '3gp', '3g2',
 119     'mp3',
 120     'flac',
 121     'ape',
 122     'wav',
 123     'f4f', 'f4m', 'm3u8', 'smil')
 124
 125 # needed for sanitizing filenames in restricted mode
 126 ACCENT_CHARS = dict(zip('ÂÃÄÀÁÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖŐØŒÙÚÛÜŰÝÞßàáâãäåæçèéêëìíîïðñòóôõöőøœùúûüűýþÿ',
 127                         itertools.chain('AAAAAA', ['AE'], 'CEEEEIIIIDNOOOOOOO', ['OE'], 'UUUUUYP', ['ss'],
 128                                         'aaaaaa', ['ae'], 'ceeeeiiiionooooooo', ['oe'], 'uuuuuypy')))
 129
 130 DATE_FORMATS = (
 131     '%d %B %Y',
 132     '%d %b %Y',
 133     '%B %d %Y',
 134     '%B %dst %Y',
 135     '%B %dnd %Y',
 136     '%B %dth %Y',
 137     '%b %d %Y',
 138     '%b %dst %Y',
 139     '%b %dnd %Y',
 140     '%b %dth %Y',
 141     '%b %dst %Y %I:%M',
 142     '%b %dnd %Y %I:%M',
 143     '%b %dth %Y %I:%M',
 144     '%Y %m %d',
 145     '%Y-%m-%d',
 146     '%Y/%m/%d',
 147     '%Y/%m/%d %H:%M',
 148     '%Y/%m/%d %H:%M:%S',
 149     '%Y-%m-%d %H:%M',
 150     '%Y-%m-%d %H:%M:%S',
 151     '%Y-%m-%d %H:%M:%S.%f',
 152     '%d.%m.%Y %H:%M',
 153     '%d.%m.%Y %H.%M',
 154     '%Y-%m-%dT%H:%M:%SZ',
 155     '%Y-%m-%dT%H:%M:%S.%fZ',
 156     '%Y-%m-%dT%H:%M:%S.%f0Z',
 157     '%Y-%m-%dT%H:%M:%S',
 158     '%Y-%m-%dT%H:%M:%S.%f',
 159     '%Y-%m-%dT%H:%M',
 160     '%b %d %Y at %H:%M',
 161     '%b %d %Y at %H:%M:%S',
 162 )
 163
 164 DATE_FORMATS_DAY_FIRST = list(DATE_FORMATS)
 165 DATE_FORMATS_DAY_FIRST.extend([
 166     '%d-%m-%Y',
 167     '%d.%m.%Y',
 168     '%d.%m.%y',
 169     '%d/%m/%Y',
 170     '%d/%m/%y',
 171     '%d/%m/%Y %H:%M:%S',
 172 ])
 173
 174 DATE_FORMATS_MONTH_FIRST = list(DATE_FORMATS)
 175 DATE_FORMATS_MONTH_FIRST.extend([
 176     '%m-%d-%Y',
 177     '%m.%d.%Y',
 178     '%m/%d/%Y',
 179     '%m/%d/%y',
 180     '%m/%d/%Y %H:%M:%S',
 181 ])
 182
 183 PACKED_CODES_RE = r"}\('(.+)',(\d+),(\d+),'([^']+)'\.split\('\|'\)"
 184
 185
 186 def preferredencoding():
 187     """Get preferred encoding.
 188
 189     Returns the best encoding scheme for the system, based on
 190     locale.getpreferredencoding() and some further tweaks.
 191     """
 192     try:
 193         pref = locale.getpreferredencoding()
 194         'TEST'.encode(pref)
 195     except Exception:
 196         pref = 'UTF-8'
 197
 198     return pref
 199
 200
 201 def write_json_file(obj, fn):
 202     """ Encode obj as JSON and write it to fn, atomically if possible """
 203
 204     fn = encodeFilename(fn)
 205     if sys.version_info < (3, 0) and sys.platform != 'win32':
 206         encoding = get_filesystem_encoding()
 207         # os.path.basename returns a bytes object, but NamedTemporaryFile
 208         # will fail if the filename contains non ascii characters unless we
 209         # use a unicode object
 210         path_basename = lambda f: os.path.basename(fn).decode(encoding)
 211         # the same for os.path.dirname
 212         path_dirname = lambda f: os.path.dirname(fn).decode(encoding)
 213     else:
 214         path_basename = os.path.basename
 215         path_dirname = os.path.dirname
 216
 217     args = {
 218         'suffix': '.tmp',
 219         'prefix': path_basename(fn) + '.',
 220         'dir': path_dirname(fn),
 221         'delete': False,
 222     }
 223
 224     # In Python 2.x, json.dump expects a bytestream.
 225     # In Python 3.x, it writes to a character stream
 226     if sys.version_info < (3, 0):
 227         args['mode'] = 'wb'
 228     else:
 229         args.update({
 230             'mode': 'w',
 231             'encoding': 'utf-8',
 232         })
 233
 234     tf = tempfile.NamedTemporaryFile(**compat_kwargs(args))
 235
 236     try:
 237         with tf:
 238             json.dump(obj, tf)
 239         if sys.platform == 'win32':
 240             # Need to remove existing file on Windows, else os.rename raises
 241             # WindowsError or FileExistsError.
 242             try:
 243                 os.unlink(fn)
 244             except OSError:
 245                 pass
 246         os.rename(tf.name, fn)
 247     except Exception:
 248         try:
 249             os.remove(tf.name)
 250         except OSError:
 251             pass
 252         raise
 253
 254
 255 if sys.version_info >= (2, 7):
 256     def find_xpath_attr(node, xpath, key, val=None):
 257         """ Find the xpath xpath[@key=val] """
 258         assert re.match(r'^[a-zA-Z_-]+$', key)
 259         expr = xpath + ('[@%s]' % key if val is None else "[@%s='%s']" % (key, val))
 260         return node.find(expr)
 261 else:
 262     def find_xpath_attr(node, xpath, key, val=None):
 263         for f in node.findall(compat_xpath(xpath)):
 264             if key not in f.attrib:
 265                 continue
 266             if val is None or f.attrib.get(key) == val:
 267                 return f
 268         return None
 269
 270 # On python2.6 the xml.etree.ElementTree.Element methods don't support
 271 # the namespace parameter
 272
 273
 274 def xpath_with_ns(path, ns_map):
 275     components = [c.split(':') for c in path.split('/')]
 276     replaced = []
 277     for c in components:
 278         if len(c) == 1:
 279             replaced.append(c[0])
 280         else:
 281             ns, tag = c
 282             replaced.append('{%s}%s' % (ns_map[ns], tag))
 283     return '/'.join(replaced)
 284
 285
 286 def xpath_element(node, xpath, name=None, fatal=False, default=NO_DEFAULT):
 287     def _find_xpath(xpath):
 288         return node.find(compat_xpath(xpath))
 289
 290     if isinstance(xpath, (str, compat_str)):
 291         n = _find_xpath(xpath)
 292     else:
 293         for xp in xpath:
 294             n = _find_xpath(xp)
 295             if n is not None:
 296                 break
 297
 298     if n is None:
 299         if default is not NO_DEFAULT:
 300             return default
 301         elif fatal:
 302             name = xpath if name is None else name
 303             raise ExtractorError('Could not find XML element %s' % name)
 304         else:
 305             return None
 306     return n
 307
 308
 309 def xpath_text(node, xpath, name=None, fatal=False, default=NO_DEFAULT):
 310     n = xpath_element(node, xpath, name, fatal=fatal, default=default)
 311     if n is None or n == default:
 312         return n
 313     if n.text is None:
 314         if default is not NO_DEFAULT:
 315             return default
 316         elif fatal:
 317             name = xpath if name is None else name
 318             raise ExtractorError('Could not find XML element\'s text %s' % name)
 319         else:
 320             return None
 321     return n.text
 322
 323
 324 def xpath_attr(node, xpath, key, name=None, fatal=False, default=NO_DEFAULT):
 325     n = find_xpath_attr(node, xpath, key)
 326     if n is None:
 327         if default is not NO_DEFAULT:
 328             return default
 329         elif fatal:
 330             name = '%s[@%s]' % (xpath, key) if name is None else name
 331             raise ExtractorError('Could not find XML attribute %s' % name)
 332         else:
 333             return None
 334     return n.attrib[key]
 335
 336
 337 def get_element_by_id(id, html):
 338     """Return the content of the tag with the specified ID in the passed HTML document"""
 339     return get_element_by_attribute('id', id, html)
 340
 341
 342 def get_element_by_class(class_name, html):
 343     """Return the content of the first tag with the specified class in the passed HTML document"""
 344     retval = get_elements_by_class(class_name, html)
 345     return retval[0] if retval else None
 346
 347
 348 def get_element_by_attribute(attribute, value, html, escape_value=True):
 349     retval = get_elements_by_attribute(attribute, value, html, escape_value)
 350     return retval[0] if retval else None
 351
 352
 353 def get_elements_by_class(class_name, html):
 354     """Return the content of all tags with the specified class in the passed HTML document as a list"""
 355     return get_elements_by_attribute(
 356         'class', r'[^\'"]*\b%s\b[^\'"]*' % re.escape(class_name),
 357         html, escape_value=False)
 358
 359
 360 def get_elements_by_attribute(attribute, value, html, escape_value=True):
 361     """Return the content of the tag with the specified attribute in the passed HTML document"""
 362
 363     value = re.escape(value) if escape_value else value
 364
 365     retlist = []
 366     for m in re.finditer(r'''(?xs)
 367         <([a-zA-Z0-9:._-]+)
 368          (?:\s+[a-zA-Z0-9:._-]+(?:=[a-zA-Z0-9:._-]*|="[^"]*"|='[^']*'|))*?
 369          \s+%s=['"]?%s['"]?
 370          (?:\s+[a-zA-Z0-9:._-]+(?:=[a-zA-Z0-9:._-]*|="[^"]*"|='[^']*'|))*?
 371         \s*>
 372         (?P<content>.*?)
 373         </\1>
 374     ''' % (re.escape(attribute), value), html):
 375         res = m.group('content')
 376
 377         if res.startswith('"') or res.startswith("'"):
 378             res = res[1:-1]
 379
 380         retlist.append(unescapeHTML(res))
 381
 382     return retlist
 383
 384
 385 class HTMLAttributeParser(compat_HTMLParser):
 386     """Trivial HTML parser to gather the attributes for a single element"""
 387     def __init__(self):
 388         self.attrs = {}
 389         compat_HTMLParser.__init__(self)
 390
 391     def handle_starttag(self, tag, attrs):
 392         self.attrs = dict(attrs)
 393
 394
 395 def extract_attributes(html_element):
 396     """Given a string for an HTML element such as
 397     <el
 398          a="foo" B="bar" c="&98;az" d=boz
 399          empty= noval entity="&amp;"
 400          sq='"' dq="'"
 401     >
 402     Decode and return a dictionary of attributes.
 403     {
 404         'a': 'foo', 'b': 'bar', c: 'baz', d: 'boz',
 405         'empty': '', 'noval': None, 'entity': '&',
 406         'sq': '"', 'dq': '\''
 407     }.
 408     NB HTMLParser is stricter in Python 2.6 & 3.2 than in later versions,
 409     but the cases in the unit test will work for all of 2.6, 2.7, 3.2-3.5.
 410     """
 411     parser = HTMLAttributeParser()
 412     try:
 413         parser.feed(html_element)
 414         parser.close()
 415     # Older Python may throw HTMLParseError in case of malformed HTML
 416     except compat_HTMLParseError:
 417         pass
 418     return parser.attrs
 419
 420
 421 def clean_html(html):
 422     """Clean an HTML snippet into a readable string"""
 423
 424     if html is None:  # Convenience for sanitizing descriptions etc.
 425         return html
 426
 427     # Newline vs <br />
 428     html = html.replace('\n', ' ')
 429     html = re.sub(r'(?u)\s*<\s*br\s*/?\s*>\s*', '\n', html)
 430     html = re.sub(r'(?u)<\s*/\s*p\s*>\s*<\s*p[^>]*>', '\n', html)
 431     # Strip html tags
 432     html = re.sub('<.*?>', '', html)
 433     # Replace html entities
 434     html = unescapeHTML(html)
 435     return html.strip()
 436
 437
 438 def sanitize_open(filename, open_mode):
 439     """Try to open the given filename, and slightly tweak it if this fails.
 440
 441     Attempts to open the given filename. If this fails, it tries to change
 442     the filename slightly, step by step, until it's either able to open it
 443     or it fails and raises a final exception, like the standard open()
 444     function.
 445
 446     It returns the tuple (stream, definitive_file_name).
 447     """
 448     try:
 449         if filename == '-':
 450             if sys.platform == 'win32':
 451                 import msvcrt
 452                 msvcrt.setmode(sys.stdout.fileno(), os.O_BINARY)
 453             return (sys.stdout.buffer if hasattr(sys.stdout, 'buffer') else sys.stdout, filename)
 454         stream = open(encodeFilename(filename), open_mode)
 455         return (stream, filename)
 456     except (IOError, OSError) as err:
 457         if err.errno in (errno.EACCES,):
 458             raise
 459
 460         # In case of error, try to remove win32 forbidden chars
 461         alt_filename = sanitize_path(filename)
 462         if alt_filename == filename:
 463             raise
 464         else:
 465             # An exception here should be caught in the caller
 466             stream = open(encodeFilename(alt_filename), open_mode)
 467             return (stream, alt_filename)
 468
 469
 470 def timeconvert(timestr):
 471     """Convert RFC 2822 defined time string into system timestamp"""
 472     timestamp = None
 473     timetuple = email.utils.parsedate_tz(timestr)
 474     if timetuple is not None:
 475         timestamp = email.utils.mktime_tz(timetuple)
 476     return timestamp
 477
 478
 479 def sanitize_filename(s, restricted=False, is_id=False):
 480     """Sanitizes a string so it could be used as part of a filename.
 481     If restricted is set, use a stricter subset of allowed characters.
 482     Set is_id if this is not an arbitrary string, but an ID that should be kept
 483     if possible.
 484     """
 485     def replace_insane(char):
 486         if restricted and char in ACCENT_CHARS:
 487             return ACCENT_CHARS[char]
 488         if char == '?' or ord(char) < 32 or ord(char) == 127:
 489             return ''
 490         elif char == '"':
 491             return '' if restricted else '\''
 492         elif char == ':':
 493             return '_-' if restricted else ' -'
 494         elif char in '\\/|*<>':
 495             return '_'
 496         if restricted and (char in '!&\'()[]{}$;`^,#' or char.isspace()):
 497             return '_'
 498         if restricted and ord(char) > 127:
 499             return '_'
 500         return char
 501
 502     # Handle timestamps
 503     s = re.sub(r'[0-9]+(?::[0-9]+)+', lambda m: m.group(0).replace(':', '_'), s)
 504     result = ''.join(map(replace_insane, s))
 505     if not is_id:
 506         while '__' in result:
 507             result = result.replace('__', '_')
 508         result = result.strip('_')
 509         # Common case of "Foreign band name - English song title"
 510         if restricted and result.startswith('-_'):
 511             result = result[2:]
 512         if result.startswith('-'):
 513             result = '_' + result[len('-'):]
 514         result = result.lstrip('.')
 515         if not result:
 516             result = '_'
 517     return result
 518
 519
 520 def sanitize_path(s):
 521     """Sanitizes and normalizes path on Windows"""
 522     if sys.platform != 'win32':
 523         return s
 524     drive_or_unc, _ = os.path.splitdrive(s)
 525     if sys.version_info < (2, 7) and not drive_or_unc:
 526         drive_or_unc, _ = os.path.splitunc(s)
 527     norm_path = os.path.normpath(remove_start(s, drive_or_unc)).split(os.path.sep)
 528     if drive_or_unc:
 529         norm_path.pop(0)
 530     sanitized_path = [
 531         path_part if path_part in ['.', '..'] else re.sub(r'(?:[/<>:"\|\\?\*]|[\s.]$)', '#', path_part)
 532         for path_part in norm_path]
 533     if drive_or_unc:
 534         sanitized_path.insert(0, drive_or_unc + os.path.sep)
 535     return os.path.join(*sanitized_path)
 536
 537
 538 # Prepend protocol-less URLs with `http:` scheme in order to mitigate the number of
 539 # unwanted failures due to missing protocol
 540 def sanitize_url(url):
 541     return 'http:%s' % url if url.startswith('//') else url
 542
 543
 544 def sanitized_Request(url, *args, **kwargs):
 545     return compat_urllib_request.Request(sanitize_url(url), *args, **kwargs)
 546
 547
 548 def expand_path(s):
 549     """Expand shell variables and ~"""
 550     return os.path.expandvars(compat_expanduser(s))
 551
 552
 553 def orderedSet(iterable):
 554     """ Remove all duplicates from the input iterable """
 555     res = []
 556     for el in iterable:
 557         if el not in res:
 558             res.append(el)
 559     return res
 560
 561
 562 def _htmlentity_transform(entity_with_semicolon):
 563     """Transforms an HTML entity to a character."""
 564     entity = entity_with_semicolon[:-1]
 565
 566     # Known non-numeric HTML entity
 567     if entity in compat_html_entities.name2codepoint:
 568         return compat_chr(compat_html_entities.name2codepoint[entity])
 569
 570     # TODO: HTML5 allows entities without a semicolon. For example,
 571     # '&Eacuteric' should be decoded as 'Éric'.
 572     if entity_with_semicolon in compat_html_entities_html5:
 573         return compat_html_entities_html5[entity_with_semicolon]
 574
 575     mobj = re.match(r'#(x[0-9a-fA-F]+|[0-9]+)', entity)
 576     if mobj is not None:
 577         numstr = mobj.group(1)
 578         if numstr.startswith('x'):
 579             base = 16
 580             numstr = '0%s' % numstr
 581         else:
 582             base = 10
 583         # See https://github.com/rg3/youtube-dl/issues/7518
 584         try:
 585             return compat_chr(int(numstr, base))
 586         except ValueError:
 587             pass
 588
 589     # Unknown entity in name, return its literal representation
 590     return '&%s;' % entity
 591
 592
 593 def unescapeHTML(s):
 594     if s is None:
 595         return None
 596     assert type(s) == compat_str
 597
 598     return re.sub(
 599         r'&([^&;]+;)', lambda m: _htmlentity_transform(m.group(1)), s)
 600
 601
 602 def get_subprocess_encoding():
 603     if sys.platform == 'win32' and sys.getwindowsversion()[0] >= 5:
 604         # For subprocess calls, encode with locale encoding
 605         # Refer to http://stackoverflow.com/a/9951851/35070
 606         encoding = preferredencoding()
 607     else:
 608         encoding = sys.getfilesystemencoding()
 609     if encoding is None:
 610         encoding = 'utf-8'
 611     return encoding
 612
 613
 614 def encodeFilename(s, for_subprocess=False):
 615     """
 616     @param s The name of the file
 617     """
 618
 619     assert type(s) == compat_str
 620
 621     # Python 3 has a Unicode API
 622     if sys.version_info >= (3, 0):
 623         return s
 624
 625     # Pass '' directly to use Unicode APIs on Windows 2000 and up
 626     # (Detecting Windows NT 4 is tricky because 'major >= 4' would
 627     # match Windows 9x series as well. Besides, NT 4 is obsolete.)
 628     if not for_subprocess and sys.platform == 'win32' and sys.getwindowsversion()[0] >= 5:
 629         return s
 630
 631     # Jython assumes filenames are Unicode strings though reported as Python 2.x compatible
 632     if sys.platform.startswith('java'):
 633         return s
 634
 635     return s.encode(get_subprocess_encoding(), 'ignore')
 636
 637
 638 def decodeFilename(b, for_subprocess=False):
 639
 640     if sys.version_info >= (3, 0):
 641         return b
 642
 643     if not isinstance(b, bytes):
 644         return b
 645
 646     return b.decode(get_subprocess_encoding(), 'ignore')
 647
 648
 649 def encodeArgument(s):
 650     if not isinstance(s, compat_str):
 651         # Legacy code that uses byte strings
 652         # Uncomment the following line after fixing all post processors
 653         # assert False, 'Internal error: %r should be of type %r, is %r' % (s, compat_str, type(s))
 654         s = s.decode('ascii')
 655     return encodeFilename(s, True)
 656
 657
 658 def decodeArgument(b):
 659     return decodeFilename(b, True)
 660
 661
 662 def decodeOption(optval):
 663     if optval is None:
 664         return optval
 665     if isinstance(optval, bytes):
 666         optval = optval.decode(preferredencoding())
 667
 668     assert isinstance(optval, compat_str)
 669     return optval
 670
 671
 672 def formatSeconds(secs):
 673     if secs > 3600:
 674         return '%d:%02d:%02d' % (secs // 3600, (secs % 3600) // 60, secs % 60)
 675     elif secs > 60:
 676         return '%d:%02d' % (secs // 60, secs % 60)
 677     else:
 678         return '%d' % secs
 679
 680
 681 def make_HTTPS_handler(params, **kwargs):
 682     opts_no_check_certificate = params.get('nocheckcertificate', False)
 683     if hasattr(ssl, 'create_default_context'):  # Python >= 3.4 or 2.7.9
 684         context = ssl.create_default_context(ssl.Purpose.SERVER_AUTH)
 685         if opts_no_check_certificate:
 686             context.check_hostname = False
 687             context.verify_mode = ssl.CERT_NONE
 688         try:
 689             return YoutubeDLHTTPSHandler(params, context=context, **kwargs)
 690         except TypeError:
 691             # Python 2.7.8
 692             # (create_default_context present but HTTPSHandler has no context=)
 693             pass
 694
 695     if sys.version_info < (3, 2):
 696         return YoutubeDLHTTPSHandler(params, **kwargs)
 697     else:  # Python < 3.4
 698         context = ssl.SSLContext(ssl.PROTOCOL_TLSv1)
 699         context.verify_mode = (ssl.CERT_NONE
 700                                if opts_no_check_certificate
 701                                else ssl.CERT_REQUIRED)
 702         context.set_default_verify_paths()
 703         return YoutubeDLHTTPSHandler(params, context=context, **kwargs)
 704
 705
 706 def bug_reports_message():
 707     if ytdl_is_updateable():
 708         update_cmd = 'type  youtube-dl -U  to update'
 709     else:
 710         update_cmd = 'see  https://yt-dl.org/update  on how to update'
 711     msg = '; please report this issue on https://yt-dl.org/bug .'
 712     msg += ' Make sure you are using the latest version; %s.' % update_cmd
 713     msg += ' Be sure to call youtube-dl with the --verbose flag and include its complete output.'
 714     return msg
 715
 716
 717 class YoutubeDLError(Exception):
 718     """Base exception for YoutubeDL errors."""
 719     pass
 720
 721
 722 class ExtractorError(YoutubeDLError):
 723     """Error during info extraction."""
 724
 725     def __init__(self, msg, tb=None, expected=False, cause=None, video_id=None):
 726         """ tb, if given, is the original traceback (so that it can be printed out).
 727         If expected is set, this is a normal error message and most likely not a bug in youtube-dl.
 728         """
 729
 730         if sys.exc_info()[0] in (compat_urllib_error.URLError, socket.timeout, UnavailableVideoError):
 731             expected = True
 732         if video_id is not None:
 733             msg = video_id + ': ' + msg
 734         if cause:
 735             msg += ' (caused by %r)' % cause
 736         if not expected:
 737             msg += bug_reports_message()
 738         super(ExtractorError, self).__init__(msg)
 739
 740         self.traceback = tb
 741         self.exc_info = sys.exc_info()  # preserve original exception
 742         self.cause = cause
 743         self.video_id = video_id
 744
 745     def format_traceback(self):
 746         if self.traceback is None:
 747             return None
 748         return ''.join(traceback.format_tb(self.traceback))
 749
 750
 751 class UnsupportedError(ExtractorError):
 752     def __init__(self, url):
 753         super(UnsupportedError, self).__init__(
 754             'Unsupported URL: %s' % url, expected=True)
 755         self.url = url
 756
 757
 758 class RegexNotFoundError(ExtractorError):
 759     """Error when a regex didn't match"""
 760     pass
 761
 762
 763 class GeoRestrictedError(ExtractorError):
 764     """Geographic restriction Error exception.
 765
 766     This exception may be thrown when a video is not available from your
 767     geographic location due to geographic restrictions imposed by a website.
 768     """
 769     def __init__(self, msg, countries=None):
 770         super(GeoRestrictedError, self).__init__(msg, expected=True)
 771         self.msg = msg
 772         self.countries = countries
 773
 774
 775 class DownloadError(YoutubeDLError):
 776     """Download Error exception.
 777
 778     This exception may be thrown by FileDownloader objects if they are not
 779     configured to continue on errors. They will contain the appropriate
 780     error message.
 781     """
 782
 783     def __init__(self, msg, exc_info=None):
 784         """ exc_info, if given, is the original exception that caused the trouble (as returned by sys.exc_info()). """
 785         super(DownloadError, self).__init__(msg)
 786         self.exc_info = exc_info
 787
 788
 789 class SameFileError(YoutubeDLError):
 790     """Same File exception.
 791
 792     This exception will be thrown by FileDownloader objects if they detect
 793     multiple files would have to be downloaded to the same file on disk.
 794     """
 795     pass
 796
 797
 798 class PostProcessingError(YoutubeDLError):
 799     """Post Processing exception.
 800
 801     This exception may be raised by PostProcessor's .run() method to
 802     indicate an error in the postprocessing task.
 803     """
 804
 805     def __init__(self, msg):
 806         super(PostProcessingError, self).__init__(msg)
 807         self.msg = msg
 808
 809
 810 class MaxDownloadsReached(YoutubeDLError):
 811     """ --max-downloads limit has been reached. """
 812     pass
 813
 814
 815 class UnavailableVideoError(YoutubeDLError):
 816     """Unavailable Format exception.
 817
 818     This exception will be thrown when a video is requested
 819     in a format that is not available for that video.
 820     """
 821     pass
 822
 823
 824 class ContentTooShortError(YoutubeDLError):
 825     """Content Too Short exception.
 826
 827     This exception may be raised by FileDownloader objects when a file they
 828     download is too small for what the server announced first, indicating
 829     the connection was probably interrupted.
 830     """
 831
 832     def __init__(self, downloaded, expected):
 833         super(ContentTooShortError, self).__init__(
 834             'Downloaded {0} bytes, expected {1} bytes'.format(downloaded, expected)
 835         )
 836         # Both in bytes
 837         self.downloaded = downloaded
 838         self.expected = expected
 839
 840
 841 class XAttrMetadataError(YoutubeDLError):
 842     def __init__(self, code=None, msg='Unknown error'):
 843         super(XAttrMetadataError, self).__init__(msg)
 844         self.code = code
 845         self.msg = msg
 846
 847         # Parsing code and msg
 848         if (self.code in (errno.ENOSPC, errno.EDQUOT) or
 849                 'No space left' in self.msg or 'Disk quota excedded' in self.msg):
 850             self.reason = 'NO_SPACE'
 851         elif self.code == errno.E2BIG or 'Argument list too long' in self.msg:
 852             self.reason = 'VALUE_TOO_LONG'
 853         else:
 854             self.reason = 'NOT_SUPPORTED'
 855
 856
 857 class XAttrUnavailableError(YoutubeDLError):
 858     pass
 859
 860
 861 def _create_http_connection(ydl_handler, http_class, is_https, *args, **kwargs):
 862     # Working around python 2 bug (see http://bugs.python.org/issue17849) by limiting
 863     # expected HTTP responses to meet HTTP/1.0 or later (see also
 864     # https://github.com/rg3/youtube-dl/issues/6727)
 865     if sys.version_info < (3, 0):
 866         kwargs[b'strict'] = True
 867     hc = http_class(*args, **kwargs)
 868     source_address = ydl_handler._params.get('source_address')
 869     if source_address is not None:
 870         sa = (source_address, 0)
 871         if hasattr(hc, 'source_address'):  # Python 2.7+
 872             hc.source_address = sa
 873         else:  # Python 2.6
 874             def _hc_connect(self, *args, **kwargs):
 875                 sock = compat_socket_create_connection(
 876                     (self.host, self.port), self.timeout, sa)
 877                 if is_https:
 878                     self.sock = ssl.wrap_socket(
 879                         sock, self.key_file, self.cert_file,
 880                         ssl_version=ssl.PROTOCOL_TLSv1)
 881                 else:
 882                     self.sock = sock
 883             hc.connect = functools.partial(_hc_connect, hc)
 884
 885     return hc
 886
 887
 888 def handle_youtubedl_headers(headers):
 889     filtered_headers = headers
 890
 891     if 'Youtubedl-no-compression' in filtered_headers:
 892         filtered_headers = dict((k, v) for k, v in filtered_headers.items() if k.lower() != 'accept-encoding')
 893         del filtered_headers['Youtubedl-no-compression']
 894
 895     return filtered_headers
 896
 897
 898 class YoutubeDLHandler(compat_urllib_request.HTTPHandler):
 899     """Handler for HTTP requests and responses.
 900
 901     This class, when installed with an OpenerDirector, automatically adds
 902     the standard headers to every HTTP request and handles gzipped and
 903     deflated responses from web servers. If compression is to be avoided in
 904     a particular request, the original request in the program code only has
 905     to include the HTTP header "Youtubedl-no-compression", which will be
 906     removed before making the real request.
 907
 908     Part of this code was copied from:
 909
 910     http://techknack.net/python-urllib2-handlers/
 911
 912     Andrew Rowls, the author of that code, agreed to release it to the
 913     public domain.
 914     """
 915
 916     def __init__(self, params, *args, **kwargs):
 917         compat_urllib_request.HTTPHandler.__init__(self, *args, **kwargs)
 918         self._params = params
 919
 920     def http_open(self, req):
 921         conn_class = compat_http_client.HTTPConnection
 922
 923         socks_proxy = req.headers.get('Ytdl-socks-proxy')
 924         if socks_proxy:
 925             conn_class = make_socks_conn_class(conn_class, socks_proxy)
 926             del req.headers['Ytdl-socks-proxy']
 927
 928         return self.do_open(functools.partial(
 929             _create_http_connection, self, conn_class, False),
 930             req)
 931
 932     @staticmethod
 933     def deflate(data):
 934         try:
 935             return zlib.decompress(data, -zlib.MAX_WBITS)
 936         except zlib.error:
 937             return zlib.decompress(data)
 938
 939     def http_request(self, req):
 940         # According to RFC 3986, URLs can not contain non-ASCII characters, however this is not
 941         # always respected by websites, some tend to give out URLs with non percent-encoded
 942         # non-ASCII characters (see telemb.py, ard.py [#3412])
 943         # urllib chokes on URLs with non-ASCII characters (see http://bugs.python.org/issue3991)
 944         # To work around aforementioned issue we will replace request's original URL with
 945         # percent-encoded one
 946         # Since redirects are also affected (e.g. http://www.southpark.de/alle-episoden/s18e09)
 947         # the code of this workaround has been moved here from YoutubeDL.urlopen()
 948         url = req.get_full_url()
 949         url_escaped = escape_url(url)
 950
 951         # Substitute URL if any change after escaping
 952         if url != url_escaped:
 953             req = update_Request(req, url=url_escaped)
 954
 955         for h, v in std_headers.items():
 956             # Capitalize is needed because of Python bug 2275: http://bugs.python.org/issue2275
 957             # The dict keys are capitalized because of this bug by urllib
 958             if h.capitalize() not in req.headers:
 959                 req.add_header(h, v)
 960
 961         req.headers = handle_youtubedl_headers(req.headers)
 962
 963         if sys.version_info < (2, 7) and '#' in req.get_full_url():
 964             # Python 2.6 is brain-dead when it comes to fragments
 965             req._Request__original = req._Request__original.partition('#')[0]
 966             req._Request__r_type = req._Request__r_type.partition('#')[0]
 967
 968         return req
 969
 970     def http_response(self, req, resp):
 971         old_resp = resp
 972         # gzip
 973         if resp.headers.get('Content-encoding', '') == 'gzip':
 974             content = resp.read()
 975             gz = gzip.GzipFile(fileobj=io.BytesIO(content), mode='rb')
 976             try:
 977                 uncompressed = io.BytesIO(gz.read())
 978             except IOError as original_ioerror:
 979                 # There may be junk add the end of the file
 980                 # See http://stackoverflow.com/q/4928560/35070 for details
 981                 for i in range(1, 1024):
 982                     try:
 983                         gz = gzip.GzipFile(fileobj=io.BytesIO(content[:-i]), mode='rb')
 984                         uncompressed = io.BytesIO(gz.read())
 985                     except IOError:
 986                         continue
 987                     break
 988                 else:
 989                     raise original_ioerror
 990             resp = compat_urllib_request.addinfourl(uncompressed, old_resp.headers, old_resp.url, old_resp.code)
 991             resp.msg = old_resp.msg
 992             del resp.headers['Content-encoding']
 993         # deflate
 994         if resp.headers.get('Content-encoding', '') == 'deflate':
 995             gz = io.BytesIO(self.deflate(resp.read()))
 996             resp = compat_urllib_request.addinfourl(gz, old_resp.headers, old_resp.url, old_resp.code)
 997             resp.msg = old_resp.msg
 998             del resp.headers['Content-encoding']
 999         # Percent-encode redirect URL of Location HTTP header to satisfy RFC 3986 (see
1000         # https://github.com/rg3/youtube-dl/issues/6457).
1001         if 300 <= resp.code < 400:
1002             location = resp.headers.get('Location')
1003             if location:
1004                 # As of RFC 2616 default charset is iso-8859-1 that is respected by python 3
1005                 if sys.version_info >= (3, 0):
1006                     location = location.encode('iso-8859-1').decode('utf-8')
1007                 else:
1008                     location = location.decode('utf-8')
1009                 location_escaped = escape_url(location)
1010                 if location != location_escaped:
1011                     del resp.headers['Location']
1012                     if sys.version_info < (3, 0):
1013                         location_escaped = location_escaped.encode('utf-8')
1014                     resp.headers['Location'] = location_escaped
1015         return resp
1016
1017     https_request = http_request
1018     https_response = http_response
1019
1020
1021 def make_socks_conn_class(base_class, socks_proxy):
1022     assert issubclass(base_class, (
1023         compat_http_client.HTTPConnection, compat_http_client.HTTPSConnection))
1024
1025     url_components = compat_urlparse.urlparse(socks_proxy)
1026     if url_components.scheme.lower() == 'socks5':
1027         socks_type = ProxyType.SOCKS5
1028     elif url_components.scheme.lower() in ('socks', 'socks4'):
1029         socks_type = ProxyType.SOCKS4
1030     elif url_components.scheme.lower() == 'socks4a':
1031         socks_type = ProxyType.SOCKS4A
1032
1033     def unquote_if_non_empty(s):
1034         if not s:
1035             return s
1036         return compat_urllib_parse_unquote_plus(s)
1037
1038     proxy_args = (
1039         socks_type,
1040         url_components.hostname, url_components.port or 1080,
1041         True,  # Remote DNS
1042         unquote_if_non_empty(url_components.username),
1043         unquote_if_non_empty(url_components.password),
1044     )
1045
1046     class SocksConnection(base_class):
1047         def connect(self):
1048             self.sock = sockssocket()
1049             self.sock.setproxy(*proxy_args)
1050             if type(self.timeout) in (int, float):
1051                 self.sock.settimeout(self.timeout)
1052             self.sock.connect((self.host, self.port))
1053
1054             if isinstance(self, compat_http_client.HTTPSConnection):
1055                 if hasattr(self, '_context'):  # Python > 2.6
1056                     self.sock = self._context.wrap_socket(
1057                         self.sock, server_hostname=self.host)
1058                 else:
1059                     self.sock = ssl.wrap_socket(self.sock)
1060
1061     return SocksConnection
1062
1063
1064 class YoutubeDLHTTPSHandler(compat_urllib_request.HTTPSHandler):
1065     def __init__(self, params, https_conn_class=None, *args, **kwargs):
1066         compat_urllib_request.HTTPSHandler.__init__(self, *args, **kwargs)
1067         self._https_conn_class = https_conn_class or compat_http_client.HTTPSConnection
1068         self._params = params
1069
1070     def https_open(self, req):
1071         kwargs = {}
1072         conn_class = self._https_conn_class
1073
1074         if hasattr(self, '_context'):  # python > 2.6
1075             kwargs['context'] = self._context
1076         if hasattr(self, '_check_hostname'):  # python 3.x
1077             kwargs['check_hostname'] = self._check_hostname
1078
1079         socks_proxy = req.headers.get('Ytdl-socks-proxy')
1080         if socks_proxy:
1081             conn_class = make_socks_conn_class(conn_class, socks_proxy)
1082             del req.headers['Ytdl-socks-proxy']
1083
1084         return self.do_open(functools.partial(
1085             _create_http_connection, self, conn_class, True),
1086             req, **kwargs)
1087
1088
1089 class YoutubeDLCookieProcessor(compat_urllib_request.HTTPCookieProcessor):
1090     def __init__(self, cookiejar=None):
1091         compat_urllib_request.HTTPCookieProcessor.__init__(self, cookiejar)
1092
1093     def http_response(self, request, response):
1094         # Python 2 will choke on next HTTP request in row if there are non-ASCII
1095         # characters in Set-Cookie HTTP header of last response (see
1096         # https://github.com/rg3/youtube-dl/issues/6769).
1097         # In order to at least prevent crashing we will percent encode Set-Cookie
1098         # header before HTTPCookieProcessor starts processing it.
1099         # if sys.version_info < (3, 0) and response.headers:
1100         #     for set_cookie_header in ('Set-Cookie', 'Set-Cookie2'):
1101         #         set_cookie = response.headers.get(set_cookie_header)
1102         #         if set_cookie:
1103         #             set_cookie_escaped = compat_urllib_parse.quote(set_cookie, b"%/;:@&=+$,!~*'()?#[] ")
1104         #             if set_cookie != set_cookie_escaped:
1105         #                 del response.headers[set_cookie_header]
1106         #                 response.headers[set_cookie_header] = set_cookie_escaped
1107         return compat_urllib_request.HTTPCookieProcessor.http_response(self, request, response)
1108
1109     https_request = compat_urllib_request.HTTPCookieProcessor.http_request
1110     https_response = http_response
1111
1112
1113 def extract_timezone(date_str):
1114     m = re.search(
1115         r'^.{8,}?(?P<tz>Z$| ?(?P<sign>\+|-)(?P<hours>[0-9]{2}):?(?P<minutes>[0-9]{2})$)',
1116         date_str)
1117     if not m:
1118         timezone = datetime.timedelta()
1119     else:
1120         date_str = date_str[:-len(m.group('tz'))]
1121         if not m.group('sign'):
1122             timezone = datetime.timedelta()
1123         else:
1124             sign = 1 if m.group('sign') == '+' else -1
1125             timezone = datetime.timedelta(
1126                 hours=sign * int(m.group('hours')),
1127                 minutes=sign * int(m.group('minutes')))
1128     return timezone, date_str
1129
1130
1131 def parse_iso8601(date_str, delimiter='T', timezone=None):
1132     """ Return a UNIX timestamp from the given date """
1133
1134     if date_str is None:
1135         return None
1136
1137     date_str = re.sub(r'\.[0-9]+', '', date_str)
1138
1139     if timezone is None:
1140         timezone, date_str = extract_timezone(date_str)
1141
1142     try:
1143         date_format = '%Y-%m-%d{0}%H:%M:%S'.format(delimiter)
1144         dt = datetime.datetime.strptime(date_str, date_format) - timezone
1145         return calendar.timegm(dt.timetuple())
1146     except ValueError:
1147         pass
1148
1149
1150 def date_formats(day_first=True):
1151     return DATE_FORMATS_DAY_FIRST if day_first else DATE_FORMATS_MONTH_FIRST
1152
1153
1154 def unified_strdate(date_str, day_first=True):
1155     """Return a string with the date in the format YYYYMMDD"""
1156
1157     if date_str is None:
1158         return None
1159     upload_date = None
1160     # Replace commas
1161     date_str = date_str.replace(',', ' ')
1162     # Remove AM/PM + timezone
1163     date_str = re.sub(r'(?i)\s*(?:AM|PM)(?:\s+[A-Z]+)?', '', date_str)
1164     _, date_str = extract_timezone(date_str)
1165
1166     for expression in date_formats(day_first):
1167         try:
1168             upload_date = datetime.datetime.strptime(date_str, expression).strftime('%Y%m%d')
1169         except ValueError:
1170             pass
1171     if upload_date is None:
1172         timetuple = email.utils.parsedate_tz(date_str)
1173         if timetuple:
1174             try:
1175                 upload_date = datetime.datetime(*timetuple[:6]).strftime('%Y%m%d')
1176             except ValueError:
1177                 pass
1178     if upload_date is not None:
1179         return compat_str(upload_date)
1180
1181
1182 def unified_timestamp(date_str, day_first=True):
1183     if date_str is None:
1184         return None
1185
1186     date_str = re.sub(r'[,|]', '', date_str)
1187
1188     pm_delta = 12 if re.search(r'(?i)PM', date_str) else 0
1189     timezone, date_str = extract_timezone(date_str)
1190
1191     # Remove AM/PM + timezone
1192     date_str = re.sub(r'(?i)\s*(?:AM|PM)(?:\s+[A-Z]+)?', '', date_str)
1193
1194     # Remove unrecognized timezones from ISO 8601 alike timestamps
1195     m = re.search(r'\d{1,2}:\d{1,2}(?:\.\d+)?(?P<tz>\s*[A-Z]+)$', date_str)
1196     if m:
1197         date_str = date_str[:-len(m.group('tz'))]
1198
1199     for expression in date_formats(day_first):
1200         try:
1201             dt = datetime.datetime.strptime(date_str, expression) - timezone + datetime.timedelta(hours=pm_delta)
1202             return calendar.timegm(dt.timetuple())
1203         except ValueError:
1204             pass
1205     timetuple = email.utils.parsedate_tz(date_str)
1206     if timetuple:
1207         return calendar.timegm(timetuple) + pm_delta * 3600
1208
1209
1210 def determine_ext(url, default_ext='unknown_video'):
1211     if url is None:
1212         return default_ext
1213     guess = url.partition('?')[0].rpartition('.')[2]
1214     if re.match(r'^[A-Za-z0-9]+$', guess):
1215         return guess
1216     # Try extract ext from URLs like http://example.com/foo/bar.mp4/?download
1217     elif guess.rstrip('/') in KNOWN_EXTENSIONS:
1218         return guess.rstrip('/')
1219     else:
1220         return default_ext
1221
1222
1223 def subtitles_filename(filename, sub_lang, sub_format):
1224     return filename.rsplit('.', 1)[0] + '.' + sub_lang + '.' + sub_format
1225
1226
1227 def date_from_str(date_str):
1228     """
1229     Return a datetime object from a string in the format YYYYMMDD or
1230     (now|today)[+-][0-9](day|week|month|year)(s)?"""
1231     today = datetime.date.today()
1232     if date_str in ('now', 'today'):
1233         return today
1234     if date_str == 'yesterday':
1235         return today - datetime.timedelta(days=1)
1236     match = re.match(r'(now|today)(?P<sign>[+-])(?P<time>\d+)(?P<unit>day|week|month|year)(s)?', date_str)
1237     if match is not None:
1238         sign = match.group('sign')
1239         time = int(match.group('time'))
1240         if sign == '-':
1241             time = -time
1242         unit = match.group('unit')
1243         # A bad approximation?
1244         if unit == 'month':
1245             unit = 'day'
1246             time *= 30
1247         elif unit == 'year':
1248             unit = 'day'
1249             time *= 365
1250         unit += 's'
1251         delta = datetime.timedelta(**{unit: time})
1252         return today + delta
1253     return datetime.datetime.strptime(date_str, '%Y%m%d').date()
1254
1255
1256 def hyphenate_date(date_str):
1257     """
1258     Convert a date in 'YYYYMMDD' format to 'YYYY-MM-DD' format"""
1259     match = re.match(r'^(\d\d\d\d)(\d\d)(\d\d)$', date_str)
1260     if match is not None:
1261         return '-'.join(match.groups())
1262     else:
1263         return date_str
1264
1265
1266 class DateRange(object):
1267     """Represents a time interval between two dates"""
1268
1269     def __init__(self, start=None, end=None):
1270         """start and end must be strings in the format accepted by date"""
1271         if start is not None:
1272             self.start = date_from_str(start)
1273         else:
1274             self.start = datetime.datetime.min.date()
1275         if end is not None:
1276             self.end = date_from_str(end)
1277         else:
1278             self.end = datetime.datetime.max.date()
1279         if self.start > self.end:
1280             raise ValueError('Date range: "%s" , the start date must be before the end date' % self)
1281
1282     @classmethod
1283     def day(cls, day):
1284         """Returns a range that only contains the given day"""
1285         return cls(day, day)
1286
1287     def __contains__(self, date):
1288         """Check if the date is in the range"""
1289         if not isinstance(date, datetime.date):
1290             date = date_from_str(date)
1291         return self.start <= date <= self.end
1292
1293     def __str__(self):
1294         return '%s - %s' % (self.start.isoformat(), self.end.isoformat())
1295
1296
1297 def platform_name():
1298     """ Returns the platform name as a compat_str """
1299     res = platform.platform()
1300     if isinstance(res, bytes):
1301         res = res.decode(preferredencoding())
1302
1303     assert isinstance(res, compat_str)
1304     return res
1305
1306
1307 def _windows_write_string(s, out):
1308     """ Returns True if the string was written using special methods,
1309     False if it has yet to be written out."""
1310     # Adapted from http://stackoverflow.com/a/3259271/35070
1311
1312     import ctypes
1313     import ctypes.wintypes
1314
1315     WIN_OUTPUT_IDS = {
1316         1: -11,
1317         2: -12,
1318     }
1319
1320     try:
1321         fileno = out.fileno()
1322     except AttributeError:
1323         # If the output stream doesn't have a fileno, it's virtual
1324         return False
1325     except io.UnsupportedOperation:
1326         # Some strange Windows pseudo files?
1327         return False
1328     if fileno not in WIN_OUTPUT_IDS:
1329         return False
1330
1331     GetStdHandle = ctypes.WINFUNCTYPE(
1332         ctypes.wintypes.HANDLE, ctypes.wintypes.DWORD)(
1333         (b'GetStdHandle', ctypes.windll.kernel32))
1334     h = GetStdHandle(WIN_OUTPUT_IDS[fileno])
1335
1336     WriteConsoleW = ctypes.WINFUNCTYPE(
1337         ctypes.wintypes.BOOL, ctypes.wintypes.HANDLE, ctypes.wintypes.LPWSTR,
1338         ctypes.wintypes.DWORD, ctypes.POINTER(ctypes.wintypes.DWORD),
1339         ctypes.wintypes.LPVOID)((b'WriteConsoleW', ctypes.windll.kernel32))
1340     written = ctypes.wintypes.DWORD(0)
1341
1342     GetFileType = ctypes.WINFUNCTYPE(ctypes.wintypes.DWORD, ctypes.wintypes.DWORD)((b'GetFileType', ctypes.windll.kernel32))
1343     FILE_TYPE_CHAR = 0x0002
1344     FILE_TYPE_REMOTE = 0x8000
1345     GetConsoleMode = ctypes.WINFUNCTYPE(
1346         ctypes.wintypes.BOOL, ctypes.wintypes.HANDLE,
1347         ctypes.POINTER(ctypes.wintypes.DWORD))(
1348         (b'GetConsoleMode', ctypes.windll.kernel32))
1349     INVALID_HANDLE_VALUE = ctypes.wintypes.DWORD(-1).value
1350
1351     def not_a_console(handle):
1352         if handle == INVALID_HANDLE_VALUE or handle is None:
1353             return True
1354         return ((GetFileType(handle) & ~FILE_TYPE_REMOTE) != FILE_TYPE_CHAR or
1355                 GetConsoleMode(handle, ctypes.byref(ctypes.wintypes.DWORD())) == 0)
1356
1357     if not_a_console(h):
1358         return False
1359
1360     def next_nonbmp_pos(s):
1361         try:
1362             return next(i for i, c in enumerate(s) if ord(c) > 0xffff)
1363         except StopIteration:
1364             return len(s)
1365
1366     while s:
1367         count = min(next_nonbmp_pos(s), 1024)
1368
1369         ret = WriteConsoleW(
1370             h, s, count if count else 2, ctypes.byref(written), None)
1371         if ret == 0:
1372             raise OSError('Failed to write string')
1373         if not count:  # We just wrote a non-BMP character
1374             assert written.value == 2
1375             s = s[1:]
1376         else:
1377             assert written.value > 0
1378             s = s[written.value:]
1379     return True
1380
1381
1382 def write_string(s, out=None, encoding=None):
1383     if out is None:
1384         out = sys.stderr
1385     assert type(s) == compat_str
1386
1387     if sys.platform == 'win32' and encoding is None and hasattr(out, 'fileno'):
1388         if _windows_write_string(s, out):
1389             return
1390
1391     if ('b' in getattr(out, 'mode', '') or
1392             sys.version_info[0] < 3):  # Python 2 lies about mode of sys.stderr
1393         byt = s.encode(encoding or preferredencoding(), 'ignore')
1394         out.write(byt)
1395     elif hasattr(out, 'buffer'):
1396         enc = encoding or getattr(out, 'encoding', None) or preferredencoding()
1397         byt = s.encode(enc, 'ignore')
1398         out.buffer.write(byt)
1399     else:
1400         out.write(s)
1401     out.flush()
1402
1403
1404 def bytes_to_intlist(bs):
1405     if not bs:
1406         return []
1407     if isinstance(bs[0], int):  # Python 3
1408         return list(bs)
1409     else:
1410         return [ord(c) for c in bs]
1411
1412
1413 def intlist_to_bytes(xs):
1414     if not xs:
1415         return b''
1416     return compat_struct_pack('%dB' % len(xs), *xs)
1417
1418
1419 # Cross-platform file locking
1420 if sys.platform == 'win32':
1421     import ctypes.wintypes
1422     import msvcrt
1423
1424     class OVERLAPPED(ctypes.Structure):
1425         _fields_ = [
1426             ('Internal', ctypes.wintypes.LPVOID),
1427             ('InternalHigh', ctypes.wintypes.LPVOID),
1428             ('Offset', ctypes.wintypes.DWORD),
1429             ('OffsetHigh', ctypes.wintypes.DWORD),
1430             ('hEvent', ctypes.wintypes.HANDLE),
1431         ]
1432
1433     kernel32 = ctypes.windll.kernel32
1434     LockFileEx = kernel32.LockFileEx
1435     LockFileEx.argtypes = [
1436         ctypes.wintypes.HANDLE,     # hFile
1437         ctypes.wintypes.DWORD,      # dwFlags
1438         ctypes.wintypes.DWORD,      # dwReserved
1439         ctypes.wintypes.DWORD,      # nNumberOfBytesToLockLow
1440         ctypes.wintypes.DWORD,      # nNumberOfBytesToLockHigh
1441         ctypes.POINTER(OVERLAPPED)  # Overlapped
1442     ]
1443     LockFileEx.restype = ctypes.wintypes.BOOL
1444     UnlockFileEx = kernel32.UnlockFileEx
1445     UnlockFileEx.argtypes = [
1446         ctypes.wintypes.HANDLE,     # hFile
1447         ctypes.wintypes.DWORD,      # dwReserved
1448         ctypes.wintypes.DWORD,      # nNumberOfBytesToLockLow
1449         ctypes.wintypes.DWORD,      # nNumberOfBytesToLockHigh
1450         ctypes.POINTER(OVERLAPPED)  # Overlapped
1451     ]
1452     UnlockFileEx.restype = ctypes.wintypes.BOOL
1453     whole_low = 0xffffffff
1454     whole_high = 0x7fffffff
1455
1456     def _lock_file(f, exclusive):
1457         overlapped = OVERLAPPED()
1458         overlapped.Offset = 0
1459         overlapped.OffsetHigh = 0
1460         overlapped.hEvent = 0
1461         f._lock_file_overlapped_p = ctypes.pointer(overlapped)
1462         handle = msvcrt.get_osfhandle(f.fileno())
1463         if not LockFileEx(handle, 0x2 if exclusive else 0x0, 0,
1464                           whole_low, whole_high, f._lock_file_overlapped_p):
1465             raise OSError('Locking file failed: %r' % ctypes.FormatError())
1466
1467     def _unlock_file(f):
1468         assert f._lock_file_overlapped_p
1469         handle = msvcrt.get_osfhandle(f.fileno())
1470         if not UnlockFileEx(handle, 0,
1471                             whole_low, whole_high, f._lock_file_overlapped_p):
1472             raise OSError('Unlocking file failed: %r' % ctypes.FormatError())
1473
1474 else:
1475     # Some platforms, such as Jython, is missing fcntl
1476     try:
1477         import fcntl
1478
1479         def _lock_file(f, exclusive):
1480             fcntl.flock(f, fcntl.LOCK_EX if exclusive else fcntl.LOCK_SH)
1481
1482         def _unlock_file(f):
1483             fcntl.flock(f, fcntl.LOCK_UN)
1484     except ImportError:
1485         UNSUPPORTED_MSG = 'file locking is not supported on this platform'
1486
1487         def _lock_file(f, exclusive):
1488             raise IOError(UNSUPPORTED_MSG)
1489
1490         def _unlock_file(f):
1491             raise IOError(UNSUPPORTED_MSG)
1492
1493
1494 class locked_file(object):
1495     def __init__(self, filename, mode, encoding=None):
1496         assert mode in ['r', 'a', 'w']
1497         self.f = io.open(filename, mode, encoding=encoding)
1498         self.mode = mode
1499
1500     def __enter__(self):
1501         exclusive = self.mode != 'r'
1502         try:
1503             _lock_file(self.f, exclusive)
1504         except IOError:
1505             self.f.close()
1506             raise
1507         return self
1508
1509     def __exit__(self, etype, value, traceback):
1510         try:
1511             _unlock_file(self.f)
1512         finally:
1513             self.f.close()
1514
1515     def __iter__(self):
1516         return iter(self.f)
1517
1518     def write(self, *args):
1519         return self.f.write(*args)
1520
1521     def read(self, *args):
1522         return self.f.read(*args)
1523
1524
1525 def get_filesystem_encoding():
1526     encoding = sys.getfilesystemencoding()
1527     return encoding if encoding is not None else 'utf-8'
1528
1529
1530 def shell_quote(args):
1531     quoted_args = []
1532     encoding = get_filesystem_encoding()
1533     for a in args:
1534         if isinstance(a, bytes):
1535             # We may get a filename encoded with 'encodeFilename'
1536             a = a.decode(encoding)
1537         quoted_args.append(compat_shlex_quote(a))
1538     return ' '.join(quoted_args)
1539
1540
1541 def smuggle_url(url, data):
1542     """ Pass additional data in a URL for internal use. """
1543
1544     url, idata = unsmuggle_url(url, {})
1545     data.update(idata)
1546     sdata = compat_urllib_parse_urlencode(
1547         {'__youtubedl_smuggle': json.dumps(data)})
1548     return url + '#' + sdata
1549
1550
1551 def unsmuggle_url(smug_url, default=None):
1552     if '#__youtubedl_smuggle' not in smug_url:
1553         return smug_url, default
1554     url, _, sdata = smug_url.rpartition('#')
1555     jsond = compat_parse_qs(sdata)['__youtubedl_smuggle'][0]
1556     data = json.loads(jsond)
1557     return url, data
1558
1559
1560 def format_bytes(bytes):
1561     if bytes is None:
1562         return 'N/A'
1563     if type(bytes) is str:
1564         bytes = float(bytes)
1565     if bytes == 0.0:
1566         exponent = 0
1567     else:
1568         exponent = int(math.log(bytes, 1024.0))
1569     suffix = ['B', 'KiB', 'MiB', 'GiB', 'TiB', 'PiB', 'EiB', 'ZiB', 'YiB'][exponent]
1570     converted = float(bytes) / float(1024 ** exponent)
1571     return '%.2f%s' % (converted, suffix)
1572
1573
1574 def lookup_unit_table(unit_table, s):
1575     units_re = '|'.join(re.escape(u) for u in unit_table)
1576     m = re.match(
1577         r'(?P<num>[0-9]+(?:[,.][0-9]*)?)\s*(?P<unit>%s)\b' % units_re, s)
1578     if not m:
1579         return None
1580     num_str = m.group('num').replace(',', '.')
1581     mult = unit_table[m.group('unit')]
1582     return int(float(num_str) * mult)
1583
1584
1585 def parse_filesize(s):
1586     if s is None:
1587         return None
1588
1589     # The lower-case forms are of course incorrect and unofficial,
1590     # but we support those too
1591     _UNIT_TABLE = {
1592         'B': 1,
1593         'b': 1,
1594         'bytes': 1,
1595         'KiB': 1024,
1596         'KB': 1000,
1597         'kB': 1024,
1598         'Kb': 1000,
1599         'kb': 1000,
1600         'kilobytes': 1000,
1601         'kibibytes': 1024,
1602         'MiB': 1024 ** 2,
1603         'MB': 1000 ** 2,
1604         'mB': 1024 ** 2,
1605         'Mb': 1000 ** 2,
1606         'mb': 1000 ** 2,
1607         'megabytes': 1000 ** 2,
1608         'mebibytes': 1024 ** 2,
1609         'GiB': 1024 ** 3,
1610         'GB': 1000 ** 3,
1611         'gB': 1024 ** 3,
1612         'Gb': 1000 ** 3,
1613         'gb': 1000 ** 3,
1614         'gigabytes': 1000 ** 3,
1615         'gibibytes': 1024 ** 3,
1616         'TiB': 1024 ** 4,
1617         'TB': 1000 ** 4,
1618         'tB': 1024 ** 4,
1619         'Tb': 1000 ** 4,
1620         'tb': 1000 ** 4,
1621         'terabytes': 1000 ** 4,
1622         'tebibytes': 1024 ** 4,
1623         'PiB': 1024 ** 5,
1624         'PB': 1000 ** 5,
1625         'pB': 1024 ** 5,
1626         'Pb': 1000 ** 5,
1627         'pb': 1000 ** 5,
1628         'petabytes': 1000 ** 5,
1629         'pebibytes': 1024 ** 5,
1630         'EiB': 1024 ** 6,
1631         'EB': 1000 ** 6,
1632         'eB': 1024 ** 6,
1633         'Eb': 1000 ** 6,
1634         'eb': 1000 ** 6,
1635         'exabytes': 1000 ** 6,
1636         'exbibytes': 1024 ** 6,
1637         'ZiB': 1024 ** 7,
1638         'ZB': 1000 ** 7,
1639         'zB': 1024 ** 7,
1640         'Zb': 1000 ** 7,
1641         'zb': 1000 ** 7,
1642         'zettabytes': 1000 ** 7,
1643         'zebibytes': 1024 ** 7,
1644         'YiB': 1024 ** 8,
1645         'YB': 1000 ** 8,
1646         'yB': 1024 ** 8,
1647         'Yb': 1000 ** 8,
1648         'yb': 1000 ** 8,
1649         'yottabytes': 1000 ** 8,
1650         'yobibytes': 1024 ** 8,
1651     }
1652
1653     return lookup_unit_table(_UNIT_TABLE, s)
1654
1655
1656 def parse_count(s):
1657     if s is None:
1658         return None
1659
1660     s = s.strip()
1661
1662     if re.match(r'^[\d,.]+$', s):
1663         return str_to_int(s)
1664
1665     _UNIT_TABLE = {
1666         'k': 1000,
1667         'K': 1000,
1668         'm': 1000 ** 2,
1669         'M': 1000 ** 2,
1670         'kk': 1000 ** 2,
1671         'KK': 1000 ** 2,
1672     }
1673
1674     return lookup_unit_table(_UNIT_TABLE, s)
1675
1676
1677 def month_by_name(name, lang='en'):
1678     """ Return the number of a month by (locale-independently) English name """
1679
1680     month_names = MONTH_NAMES.get(lang, MONTH_NAMES['en'])
1681
1682     try:
1683         return month_names.index(name) + 1
1684     except ValueError:
1685         return None
1686
1687
1688 def month_by_abbreviation(abbrev):
1689     """ Return the number of a month by (locale-independently) English
1690         abbreviations """
1691
1692     try:
1693         return [s[:3] for s in ENGLISH_MONTH_NAMES].index(abbrev) + 1
1694     except ValueError:
1695         return None
1696
1697
1698 def fix_xml_ampersands(xml_str):
1699     """Replace all the '&' by '&amp;' in XML"""
1700     return re.sub(
1701         r'&(?!amp;|lt;|gt;|apos;|quot;|#x[0-9a-fA-F]{,4};|#[0-9]{,4};)',
1702         '&amp;',
1703         xml_str)
1704
1705
1706 def setproctitle(title):
1707     assert isinstance(title, compat_str)
1708
1709     # ctypes in Jython is not complete
1710     # http://bugs.jython.org/issue2148
1711     if sys.platform.startswith('java'):
1712         return
1713
1714     try:
1715         libc = ctypes.cdll.LoadLibrary('libc.so.6')
1716     except OSError:
1717         return
1718     except TypeError:
1719         # LoadLibrary in Windows Python 2.7.13 only expects
1720         # a bytestring, but since unicode_literals turns
1721         # every string into a unicode string, it fails.
1722         return
1723     title_bytes = title.encode('utf-8')
1724     buf = ctypes.create_string_buffer(len(title_bytes))
1725     buf.value = title_bytes
1726     try:
1727         libc.prctl(15, buf, 0, 0, 0)
1728     except AttributeError:
1729         return  # Strange libc, just skip this
1730
1731
1732 def remove_start(s, start):
1733     return s[len(start):] if s is not None and s.startswith(start) else s
1734
1735
1736 def remove_end(s, end):
1737     return s[:-len(end)] if s is not None and s.endswith(end) else s
1738
1739
1740 def remove_quotes(s):
1741     if s is None or len(s) < 2:
1742         return s
1743     for quote in ('"', "'", ):
1744         if s[0] == quote and s[-1] == quote:
1745             return s[1:-1]
1746     return s
1747
1748
1749 def url_basename(url):
1750     path = compat_urlparse.urlparse(url).path
1751     return path.strip('/').split('/')[-1]
1752
1753
1754 def base_url(url):
1755     return re.match(r'https?://[^?#&]+/', url).group()
1756
1757
1758 def urljoin(base, path):
1759     if isinstance(path, bytes):
1760         path = path.decode('utf-8')
1761     if not isinstance(path, compat_str) or not path:
1762         return None
1763     if re.match(r'^(?:https?:)?//', path):
1764         return path
1765     if isinstance(base, bytes):
1766         base = base.decode('utf-8')
1767     if not isinstance(base, compat_str) or not re.match(
1768             r'^(?:https?:)?//', base):
1769         return None
1770     return compat_urlparse.urljoin(base, path)
1771
1772
1773 class HEADRequest(compat_urllib_request.Request):
1774     def get_method(self):
1775         return 'HEAD'
1776
1777
1778 class PUTRequest(compat_urllib_request.Request):
1779     def get_method(self):
1780         return 'PUT'
1781
1782
1783 def int_or_none(v, scale=1, default=None, get_attr=None, invscale=1):
1784     if get_attr:
1785         if v is not None:
1786             v = getattr(v, get_attr, None)
1787     if v == '':
1788         v = None
1789     if v is None:
1790         return default
1791     try:
1792         return int(v) * invscale // scale
1793     except ValueError:
1794         return default
1795
1796
1797 def str_or_none(v, default=None):
1798     return default if v is None else compat_str(v)
1799
1800
1801 def str_to_int(int_str):
1802     """ A more relaxed version of int_or_none """
1803     if int_str is None:
1804         return None
1805     int_str = re.sub(r'[,\.\+]', '', int_str)
1806     return int(int_str)
1807
1808
1809 def float_or_none(v, scale=1, invscale=1, default=None):
1810     if v is None:
1811         return default
1812     try:
1813         return float(v) * invscale / scale
1814     except ValueError:
1815         return default
1816
1817
1818 def bool_or_none(v, default=None):
1819     return v if isinstance(v, bool) else default
1820
1821
1822 def strip_or_none(v):
1823     return None if v is None else v.strip()
1824
1825
1826 def parse_duration(s):
1827     if not isinstance(s, compat_basestring):
1828         return None
1829
1830     s = s.strip()
1831
1832     days, hours, mins, secs, ms = [None] * 5
1833     m = re.match(r'(?:(?:(?:(?P<days>[0-9]+):)?(?P<hours>[0-9]+):)?(?P<mins>[0-9]+):)?(?P<secs>[0-9]+)(?P<ms>\.[0-9]+)?Z?$', s)
1834     if m:
1835         days, hours, mins, secs, ms = m.groups()
1836     else:
1837         m = re.match(
1838             r'''(?ix)(?:P?T)?
1839                 (?:
1840                     (?P<days>[0-9]+)\s*d(?:ays?)?\s*
1841                 )?
1842                 (?:
1843                     (?P<hours>[0-9]+)\s*h(?:ours?)?\s*
1844                 )?
1845                 (?:
1846                     (?P<mins>[0-9]+)\s*m(?:in(?:ute)?s?)?\s*
1847                 )?
1848                 (?:
1849                     (?P<secs>[0-9]+)(?P<ms>\.[0-9]+)?\s*s(?:ec(?:ond)?s?)?\s*
1850                 )?Z?$''', s)
1851         if m:
1852             days, hours, mins, secs, ms = m.groups()
1853         else:
1854             m = re.match(r'(?i)(?:(?P<hours>[0-9.]+)\s*(?:hours?)|(?P<mins>[0-9.]+)\s*(?:mins?\.?|minutes?)\s*)Z?$', s)
1855             if m:
1856                 hours, mins = m.groups()
1857             else:
1858                 return None
1859
1860     duration = 0
1861     if secs:
1862         duration += float(secs)
1863     if mins:
1864         duration += float(mins) * 60
1865     if hours:
1866         duration += float(hours) * 60 * 60
1867     if days:
1868         duration += float(days) * 24 * 60 * 60
1869     if ms:
1870         duration += float(ms)
1871     return duration
1872
1873
1874 def prepend_extension(filename, ext, expected_real_ext=None):
1875     name, real_ext = os.path.splitext(filename)
1876     return (
1877         '{0}.{1}{2}'.format(name, ext, real_ext)
1878         if not expected_real_ext or real_ext[1:] == expected_real_ext
1879         else '{0}.{1}'.format(filename, ext))
1880
1881
1882 def replace_extension(filename, ext, expected_real_ext=None):
1883     name, real_ext = os.path.splitext(filename)
1884     return '{0}.{1}'.format(
1885         name if not expected_real_ext or real_ext[1:] == expected_real_ext else filename,
1886         ext)
1887
1888
1889 def check_executable(exe, args=[]):
1890     """ Checks if the given binary is installed somewhere in PATH, and returns its name.
1891     args can be a list of arguments for a short output (like -version) """
1892     try:
1893         subprocess.Popen([exe] + args, stdout=subprocess.PIPE, stderr=subprocess.PIPE).communicate()
1894     except OSError:
1895         return False
1896     return exe
1897
1898
1899 def get_exe_version(exe, args=['--version'],
1900                     version_re=None, unrecognized='present'):
1901     """ Returns the version of the specified executable,
1902     or False if the executable is not present """
1903     try:
1904         # STDIN should be redirected too. On UNIX-like systems, ffmpeg triggers
1905         # SIGTTOU if youtube-dl is run in the background.
1906         # See https://github.com/rg3/youtube-dl/issues/955#issuecomment-209789656
1907         out, _ = subprocess.Popen(
1908             [encodeArgument(exe)] + args,
1909             stdin=subprocess.PIPE,
1910             stdout=subprocess.PIPE, stderr=subprocess.STDOUT).communicate()
1911     except OSError:
1912         return False
1913     if isinstance(out, bytes):  # Python 2.x
1914         out = out.decode('ascii', 'ignore')
1915     return detect_exe_version(out, version_re, unrecognized)
1916
1917
1918 def detect_exe_version(output, version_re=None, unrecognized='present'):
1919     assert isinstance(output, compat_str)
1920     if version_re is None:
1921         version_re = r'version\s+([-0-9._a-zA-Z]+)'
1922     m = re.search(version_re, output)
1923     if m:
1924         return m.group(1)
1925     else:
1926         return unrecognized
1927
1928
1929 class PagedList(object):
1930     def __len__(self):
1931         # This is only useful for tests
1932         return len(self.getslice())
1933
1934
1935 class OnDemandPagedList(PagedList):
1936     def __init__(self, pagefunc, pagesize, use_cache=False):
1937         self._pagefunc = pagefunc
1938         self._pagesize = pagesize
1939         self._use_cache = use_cache
1940         if use_cache:
1941             self._cache = {}
1942
1943     def getslice(self, start=0, end=None):
1944         res = []
1945         for pagenum in itertools.count(start // self._pagesize):
1946             firstid = pagenum * self._pagesize
1947             nextfirstid = pagenum * self._pagesize + self._pagesize
1948             if start >= nextfirstid:
1949                 continue
1950
1951             page_results = None
1952             if self._use_cache:
1953                 page_results = self._cache.get(pagenum)
1954             if page_results is None:
1955                 page_results = list(self._pagefunc(pagenum))
1956             if self._use_cache:
1957                 self._cache[pagenum] = page_results
1958
1959             startv = (
1960                 start % self._pagesize
1961                 if firstid <= start < nextfirstid
1962                 else 0)
1963
1964             endv = (
1965                 ((end - 1) % self._pagesize) + 1
1966                 if (end is not None and firstid <= end <= nextfirstid)
1967                 else None)
1968
1969             if startv != 0 or endv is not None:
1970                 page_results = page_results[startv:endv]
1971             res.extend(page_results)
1972
1973             # A little optimization - if current page is not "full", ie. does
1974             # not contain page_size videos then we can assume that this page
1975             # is the last one - there are no more ids on further pages -
1976             # i.e. no need to query again.
1977             if len(page_results) + startv < self._pagesize:
1978                 break
1979
1980             # If we got the whole page, but the next page is not interesting,
1981             # break out early as well
1982             if end == nextfirstid:
1983                 break
1984         return res
1985
1986
1987 class InAdvancePagedList(PagedList):
1988     def __init__(self, pagefunc, pagecount, pagesize):
1989         self._pagefunc = pagefunc
1990         self._pagecount = pagecount
1991         self._pagesize = pagesize
1992
1993     def getslice(self, start=0, end=None):
1994         res = []
1995         start_page = start // self._pagesize
1996         end_page = (
1997             self._pagecount if end is None else (end // self._pagesize + 1))
1998         skip_elems = start - start_page * self._pagesize
1999         only_more = None if end is None else end - start
2000         for pagenum in range(start_page, end_page):
2001             page = list(self._pagefunc(pagenum))
2002             if skip_elems:
2003                 page = page[skip_elems:]
2004                 skip_elems = None
2005             if only_more is not None:
2006                 if len(page) < only_more:
2007                     only_more -= len(page)
2008                 else:
2009                     page = page[:only_more]
2010                     res.extend(page)
2011                     break
2012             res.extend(page)
2013         return res
2014
2015
2016 def uppercase_escape(s):
2017     unicode_escape = codecs.getdecoder('unicode_escape')
2018     return re.sub(
2019         r'\\U[0-9a-fA-F]{8}',
2020         lambda m: unicode_escape(m.group(0))[0],
2021         s)
2022
2023
2024 def lowercase_escape(s):
2025     unicode_escape = codecs.getdecoder('unicode_escape')
2026     return re.sub(
2027         r'\\u[0-9a-fA-F]{4}',
2028         lambda m: unicode_escape(m.group(0))[0],
2029         s)
2030
2031
2032 def escape_rfc3986(s):
2033     """Escape non-ASCII characters as suggested by RFC 3986"""
2034     if sys.version_info < (3, 0) and isinstance(s, compat_str):
2035         s = s.encode('utf-8')
2036     return compat_urllib_parse.quote(s, b"%/;:@&=+$,!~*'()?#[]")
2037
2038
2039 def escape_url(url):
2040     """Escape URL as suggested by RFC 3986"""
2041     url_parsed = compat_urllib_parse_urlparse(url)
2042     return url_parsed._replace(
2043         netloc=url_parsed.netloc.encode('idna').decode('ascii'),
2044         path=escape_rfc3986(url_parsed.path),
2045         params=escape_rfc3986(url_parsed.params),
2046         query=escape_rfc3986(url_parsed.query),
2047         fragment=escape_rfc3986(url_parsed.fragment)
2048     ).geturl()
2049
2050
2051 def read_batch_urls(batch_fd):
2052     def fixup(url):
2053         if not isinstance(url, compat_str):
2054             url = url.decode('utf-8', 'replace')
2055         BOM_UTF8 = '\xef\xbb\xbf'
2056         if url.startswith(BOM_UTF8):
2057             url = url[len(BOM_UTF8):]
2058         url = url.strip()
2059         if url.startswith(('#', ';', ']')):
2060             return False
2061         return url
2062
2063     with contextlib.closing(batch_fd) as fd:
2064         return [url for url in map(fixup, fd) if url]
2065
2066
2067 def urlencode_postdata(*args, **kargs):
2068     return compat_urllib_parse_urlencode(*args, **kargs).encode('ascii')
2069
2070
2071 def update_url_query(url, query):
2072     if not query:
2073         return url
2074     parsed_url = compat_urlparse.urlparse(url)
2075     qs = compat_parse_qs(parsed_url.query)
2076     qs.update(query)
2077     return compat_urlparse.urlunparse(parsed_url._replace(
2078         query=compat_urllib_parse_urlencode(qs, True)))
2079
2080
2081 def update_Request(req, url=None, data=None, headers={}, query={}):
2082     req_headers = req.headers.copy()
2083     req_headers.update(headers)
2084     req_data = data or req.data
2085     req_url = update_url_query(url or req.get_full_url(), query)
2086     req_get_method = req.get_method()
2087     if req_get_method == 'HEAD':
2088         req_type = HEADRequest
2089     elif req_get_method == 'PUT':
2090         req_type = PUTRequest
2091     else:
2092         req_type = compat_urllib_request.Request
2093     new_req = req_type(
2094         req_url, data=req_data, headers=req_headers,
2095         origin_req_host=req.origin_req_host, unverifiable=req.unverifiable)
2096     if hasattr(req, 'timeout'):
2097         new_req.timeout = req.timeout
2098     return new_req
2099
2100
2101 def _multipart_encode_impl(data, boundary):
2102     content_type = 'multipart/form-data; boundary=%s' % boundary
2103
2104     out = b''
2105     for k, v in data.items():
2106         out += b'--' + boundary.encode('ascii') + b'\r\n'
2107         if isinstance(k, compat_str):
2108             k = k.encode('utf-8')
2109         if isinstance(v, compat_str):
2110             v = v.encode('utf-8')
2111         # RFC 2047 requires non-ASCII field names to be encoded, while RFC 7578
2112         # suggests sending UTF-8 directly. Firefox sends UTF-8, too
2113         content = b'Content-Disposition: form-data; name="' + k + b'"\r\n\r\n' + v + b'\r\n'
2114         if boundary.encode('ascii') in content:
2115             raise ValueError('Boundary overlaps with data')
2116         out += content
2117
2118     out += b'--' + boundary.encode('ascii') + b'--\r\n'
2119
2120     return out, content_type
2121
2122
2123 def multipart_encode(data, boundary=None):
2124     '''
2125     Encode a dict to RFC 7578-compliant form-data
2126
2127     data:
2128         A dict where keys and values can be either Unicode or bytes-like
2129         objects.
2130     boundary:
2131         If specified a Unicode object, it's used as the boundary. Otherwise
2132         a random boundary is generated.
2133
2134     Reference: https://tools.ietf.org/html/rfc7578
2135     '''
2136     has_specified_boundary = boundary is not None
2137
2138     while True:
2139         if boundary is None:
2140             boundary = '---------------' + str(random.randrange(0x0fffffff, 0xffffffff))
2141
2142         try:
2143             out, content_type = _multipart_encode_impl(data, boundary)
2144             break
2145         except ValueError:
2146             if has_specified_boundary:
2147                 raise
2148             boundary = None
2149
2150     return out, content_type
2151
2152
2153 def dict_get(d, key_or_keys, default=None, skip_false_values=True):
2154     if isinstance(key_or_keys, (list, tuple)):
2155         for key in key_or_keys:
2156             if key not in d or d[key] is None or skip_false_values and not d[key]:
2157                 continue
2158             return d[key]
2159         return default
2160     return d.get(key_or_keys, default)
2161
2162
2163 def try_get(src, getter, expected_type=None):
2164     if not isinstance(getter, (list, tuple)):
2165         getter = [getter]
2166     for get in getter:
2167         try:
2168             v = get(src)
2169         except (AttributeError, KeyError, TypeError, IndexError):
2170             pass
2171         else:
2172             if expected_type is None or isinstance(v, expected_type):
2173                 return v
2174
2175
2176 def encode_compat_str(string, encoding=preferredencoding(), errors='strict'):
2177     return string if isinstance(string, compat_str) else compat_str(string, encoding, errors)
2178
2179
2180 US_RATINGS = {
2181     'G': 0,
2182     'PG': 10,
2183     'PG-13': 13,
2184     'R': 16,
2185     'NC': 18,
2186 }
2187
2188
2189 TV_PARENTAL_GUIDELINES = {
2190     'TV-Y': 0,
2191     'TV-Y7': 7,
2192     'TV-G': 0,
2193     'TV-PG': 0,
2194     'TV-14': 14,
2195     'TV-MA': 17,
2196 }
2197
2198
2199 def parse_age_limit(s):
2200     if type(s) == int:
2201         return s if 0 <= s <= 21 else None
2202     if not isinstance(s, compat_basestring):
2203         return None
2204     m = re.match(r'^(?P<age>\d{1,2})\+?$', s)
2205     if m:
2206         return int(m.group('age'))
2207     if s in US_RATINGS:
2208         return US_RATINGS[s]
2209     return TV_PARENTAL_GUIDELINES.get(s)
2210
2211
2212 def strip_jsonp(code):
2213     return re.sub(
2214         r'''(?sx)^
2215             (?:window\.)?(?P<func_name>[a-zA-Z0-9_.$]+)
2216             (?:\s*&&\s*(?P=func_name))?
2217             \s*\(\s*(?P<callback_data>.*)\);?
2218             \s*?(?://[^\n]*)*$''',
2219         r'\g<callback_data>', code)
2220
2221
2222 def js_to_json(code):
2223     COMMENT_RE = r'/\*(?:(?!\*/).)*?\*/|//[^\n]*'
2224     SKIP_RE = r'\s*(?:{comment})?\s*'.format(comment=COMMENT_RE)
2225     INTEGER_TABLE = (
2226         (r'(?s)^(0[xX][0-9a-fA-F]+){skip}:?$'.format(skip=SKIP_RE), 16),
2227         (r'(?s)^(0+[0-7]+){skip}:?$'.format(skip=SKIP_RE), 8),
2228     )
2229
2230     def fix_kv(m):
2231         v = m.group(0)
2232         if v in ('true', 'false', 'null'):
2233             return v
2234         elif v.startswith('/*') or v.startswith('//') or v == ',':
2235             return ""
2236
2237         if v[0] in ("'", '"'):
2238             v = re.sub(r'(?s)\\.|"', lambda m: {
2239                 '"': '\\"',
2240                 "\\'": "'",
2241                 '\\\n': '',
2242                 '\\x': '\\u00',
2243             }.get(m.group(0), m.group(0)), v[1:-1])
2244
2245         for regex, base in INTEGER_TABLE:
2246             im = re.match(regex, v)
2247             if im:
2248                 i = int(im.group(1), base)
2249                 return '"%d":' % i if v.endswith(':') else '%d' % i
2250
2251         return '"%s"' % v
2252
2253     return re.sub(r'''(?sx)
2254         "(?:[^"\\]*(?:\\\\|\\['"nurtbfx/\n]))*[^"\\]*"|
2255         '(?:[^'\\]*(?:\\\\|\\['"nurtbfx/\n]))*[^'\\]*'|
2256         {comment}|,(?={skip}[\]}}])|
2257         [a-zA-Z_][.a-zA-Z_0-9]*|
2258         \b(?:0[xX][0-9a-fA-F]+|0+[0-7]+)(?:{skip}:)?|
2259         [0-9]+(?={skip}:)
2260         '''.format(comment=COMMENT_RE, skip=SKIP_RE), fix_kv, code)
2261
2262
2263 def qualities(quality_ids):
2264     """ Get a numeric quality value out of a list of possible values """
2265     def q(qid):
2266         try:
2267             return quality_ids.index(qid)
2268         except ValueError:
2269             return -1
2270     return q
2271
2272
2273 DEFAULT_OUTTMPL = '%(title)s-%(id)s.%(ext)s'
2274
2275
2276 def limit_length(s, length):
2277     """ Add ellipses to overly long strings """
2278     if s is None:
2279         return None
2280     ELLIPSES = '...'
2281     if len(s) > length:
2282         return s[:length - len(ELLIPSES)] + ELLIPSES
2283     return s
2284
2285
2286 def version_tuple(v):
2287     return tuple(int(e) for e in re.split(r'[-.]', v))
2288
2289
2290 def is_outdated_version(version, limit, assume_new=True):
2291     if not version:
2292         return not assume_new
2293     try:
2294         return version_tuple(version) < version_tuple(limit)
2295     except ValueError:
2296         return not assume_new
2297
2298
2299 def ytdl_is_updateable():
2300     """ Returns if youtube-dl can be updated with -U """
2301     from zipimport import zipimporter
2302
2303     return isinstance(globals().get('__loader__'), zipimporter) or hasattr(sys, 'frozen')
2304
2305
2306 def args_to_str(args):
2307     # Get a short string representation for a subprocess command
2308     return ' '.join(compat_shlex_quote(a) for a in args)
2309
2310
2311 def error_to_compat_str(err):
2312     err_str = str(err)
2313     # On python 2 error byte string must be decoded with proper
2314     # encoding rather than ascii
2315     if sys.version_info[0] < 3:
2316         err_str = err_str.decode(preferredencoding())
2317     return err_str
2318
2319
2320 def mimetype2ext(mt):
2321     if mt is None:
2322         return None
2323
2324     ext = {
2325         'audio/mp4': 'm4a',
2326         # Per RFC 3003, audio/mpeg can be .mp1, .mp2 or .mp3. Here use .mp3 as
2327         # it's the most popular one
2328         'audio/mpeg': 'mp3',
2329     }.get(mt)
2330     if ext is not None:
2331         return ext
2332
2333     _, _, res = mt.rpartition('/')
2334     res = res.split(';')[0].strip().lower()
2335
2336     return {
2337         '3gpp': '3gp',
2338         'smptett+xml': 'tt',
2339         'ttaf+xml': 'dfxp',
2340         'ttml+xml': 'ttml',
2341         'x-flv': 'flv',
2342         'x-mp4-fragmented': 'mp4',
2343         'x-ms-wmv': 'wmv',
2344         'mpegurl': 'm3u8',
2345         'x-mpegurl': 'm3u8',
2346         'vnd.apple.mpegurl': 'm3u8',
2347         'dash+xml': 'mpd',
2348         'f4m+xml': 'f4m',
2349         'hds+xml': 'f4m',
2350         'vnd.ms-sstr+xml': 'ism',
2351         'quicktime': 'mov',
2352         'mp2t': 'ts',
2353     }.get(res, res)
2354
2355
2356 def parse_codecs(codecs_str):
2357     # http://tools.ietf.org/html/rfc6381
2358     if not codecs_str:
2359         return {}
2360     splited_codecs = list(filter(None, map(
2361         lambda str: str.strip(), codecs_str.strip().strip(',').split(','))))
2362     vcodec, acodec = None, None
2363     for full_codec in splited_codecs:
2364         codec = full_codec.split('.')[0]
2365         if codec in ('avc1', 'avc2', 'avc3', 'avc4', 'vp9', 'vp8', 'hev1', 'hev2', 'h263', 'h264', 'mp4v'):
2366             if not vcodec:
2367                 vcodec = full_codec
2368         elif codec in ('mp4a', 'opus', 'vorbis', 'mp3', 'aac', 'ac-3', 'ec-3', 'eac3', 'dtsc', 'dtse', 'dtsh', 'dtsl'):
2369             if not acodec:
2370                 acodec = full_codec
2371         else:
2372             write_string('WARNING: Unknown codec %s\n' % full_codec, sys.stderr)
2373     if not vcodec and not acodec:
2374         if len(splited_codecs) == 2:
2375             return {
2376                 'vcodec': vcodec,
2377                 'acodec': acodec,
2378             }
2379         elif len(splited_codecs) == 1:
2380             return {
2381                 'vcodec': 'none',
2382                 'acodec': vcodec,
2383             }
2384     else:
2385         return {
2386             'vcodec': vcodec or 'none',
2387             'acodec': acodec or 'none',
2388         }
2389     return {}
2390
2391
2392 def urlhandle_detect_ext(url_handle):
2393     getheader = url_handle.headers.get
2394
2395     cd = getheader('Content-Disposition')
2396     if cd:
2397         m = re.match(r'attachment;\s*filename="(?P<filename>[^"]+)"', cd)
2398         if m:
2399             e = determine_ext(m.group('filename'), default_ext=None)
2400             if e:
2401                 return e
2402
2403     return mimetype2ext(getheader('Content-Type'))
2404
2405
2406 def encode_data_uri(data, mime_type):
2407     return 'data:%s;base64,%s' % (mime_type, base64.b64encode(data).decode('ascii'))
2408
2409
2410 def age_restricted(content_limit, age_limit):
2411     """ Returns True iff the content should be blocked """
2412
2413     if age_limit is None:  # No limit set
2414         return False
2415     if content_limit is None:
2416         return False  # Content available for everyone
2417     return age_limit < content_limit
2418
2419
2420 def is_html(first_bytes):
2421     """ Detect whether a file contains HTML by examining its first bytes. """
2422
2423     BOMS = [
2424         (b'\xef\xbb\xbf', 'utf-8'),
2425         (b'\x00\x00\xfe\xff', 'utf-32-be'),
2426         (b'\xff\xfe\x00\x00', 'utf-32-le'),
2427         (b'\xff\xfe', 'utf-16-le'),
2428         (b'\xfe\xff', 'utf-16-be'),
2429     ]
2430     for bom, enc in BOMS:
2431         if first_bytes.startswith(bom):
2432             s = first_bytes[len(bom):].decode(enc, 'replace')
2433             break
2434     else:
2435         s = first_bytes.decode('utf-8', 'replace')
2436
2437     return re.match(r'^\s*<', s)
2438
2439
2440 def determine_protocol(info_dict):
2441     protocol = info_dict.get('protocol')
2442     if protocol is not None:
2443         return protocol
2444
2445     url = info_dict['url']
2446     if url.startswith('rtmp'):
2447         return 'rtmp'
2448     elif url.startswith('mms'):
2449         return 'mms'
2450     elif url.startswith('rtsp'):
2451         return 'rtsp'
2452
2453     ext = determine_ext(url)
2454     if ext == 'm3u8':
2455         return 'm3u8'
2456     elif ext == 'f4m':
2457         return 'f4m'
2458
2459     return compat_urllib_parse_urlparse(url).scheme
2460
2461
2462 def render_table(header_row, data):
2463     """ Render a list of rows, each as a list of values """
2464     table = [header_row] + data
2465     max_lens = [max(len(compat_str(v)) for v in col) for col in zip(*table)]
2466     format_str = ' '.join('%-' + compat_str(ml + 1) + 's' for ml in max_lens[:-1]) + '%s'
2467     return '\n'.join(format_str % tuple(row) for row in table)
2468
2469
2470 def _match_one(filter_part, dct):
2471     COMPARISON_OPERATORS = {
2472         '<': operator.lt,
2473         '<=': operator.le,
2474         '>': operator.gt,
2475         '>=': operator.ge,
2476         '=': operator.eq,
2477         '!=': operator.ne,
2478     }
2479     operator_rex = re.compile(r'''(?x)\s*
2480         (?P<key>[a-z_]+)
2481         \s*(?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
2482         (?:
2483             (?P<intval>[0-9.]+(?:[kKmMgGtTpPeEzZyY]i?[Bb]?)?)|
2484             (?P<quote>["\'])(?P<quotedstrval>(?:\\.|(?!(?P=quote)|\\).)+?)(?P=quote)|
2485             (?P<strval>(?![0-9.])[a-z0-9A-Z]*)
2486         )
2487         \s*$
2488         ''' % '|'.join(map(re.escape, COMPARISON_OPERATORS.keys())))
2489     m = operator_rex.search(filter_part)
2490     if m:
2491         op = COMPARISON_OPERATORS[m.group('op')]
2492         actual_value = dct.get(m.group('key'))
2493         if (m.group('quotedstrval') is not None or
2494             m.group('strval') is not None or
2495             # If the original field is a string and matching comparisonvalue is
2496             # a number we should respect the origin of the original field
2497             # and process comparison value as a string (see
2498             # https://github.com/rg3/youtube-dl/issues/11082).
2499             actual_value is not None and m.group('intval') is not None and
2500                 isinstance(actual_value, compat_str)):
2501             if m.group('op') not in ('=', '!='):
2502                 raise ValueError(
2503                     'Operator %s does not support string values!' % m.group('op'))
2504             comparison_value = m.group('quotedstrval') or m.group('strval') or m.group('intval')
2505             quote = m.group('quote')
2506             if quote is not None:
2507                 comparison_value = comparison_value.replace(r'\%s' % quote, quote)
2508         else:
2509             try:
2510                 comparison_value = int(m.group('intval'))
2511             except ValueError:
2512                 comparison_value = parse_filesize(m.group('intval'))
2513                 if comparison_value is None:
2514                     comparison_value = parse_filesize(m.group('intval') + 'B')
2515                 if comparison_value is None:
2516                     raise ValueError(
2517                         'Invalid integer value %r in filter part %r' % (
2518                             m.group('intval'), filter_part))
2519         if actual_value is None:
2520             return m.group('none_inclusive')
2521         return op(actual_value, comparison_value)
2522
2523     UNARY_OPERATORS = {
2524         '': lambda v: v is not None,
2525         '!': lambda v: v is None,
2526     }
2527     operator_rex = re.compile(r'''(?x)\s*
2528         (?P<op>%s)\s*(?P<key>[a-z_]+)
2529         \s*$
2530         ''' % '|'.join(map(re.escape, UNARY_OPERATORS.keys())))
2531     m = operator_rex.search(filter_part)
2532     if m:
2533         op = UNARY_OPERATORS[m.group('op')]
2534         actual_value = dct.get(m.group('key'))
2535         return op(actual_value)
2536
2537     raise ValueError('Invalid filter part %r' % filter_part)
2538
2539
2540 def match_str(filter_str, dct):
2541     """ Filter a dictionary with a simple string syntax. Returns True (=passes filter) or false """
2542
2543     return all(
2544         _match_one(filter_part, dct) for filter_part in filter_str.split('&'))
2545
2546
2547 def match_filter_func(filter_str):
2548     def _match_func(info_dict):
2549         if match_str(filter_str, info_dict):
2550             return None
2551         else:
2552             video_title = info_dict.get('title', info_dict.get('id', 'video'))
2553             return '%s does not pass filter %s, skipping ..' % (video_title, filter_str)
2554     return _match_func
2555
2556
2557 def parse_dfxp_time_expr(time_expr):
2558     if not time_expr:
2559         return
2560
2561     mobj = re.match(r'^(?P<time_offset>\d+(?:\.\d+)?)s?$', time_expr)
2562     if mobj:
2563         return float(mobj.group('time_offset'))
2564
2565     mobj = re.match(r'^(\d+):(\d\d):(\d\d(?:(?:\.|:)\d+)?)$', time_expr)
2566     if mobj:
2567         return 3600 * int(mobj.group(1)) + 60 * int(mobj.group(2)) + float(mobj.group(3).replace(':', '.'))
2568
2569
2570 def srt_subtitles_timecode(seconds):
2571     return '%02d:%02d:%02d,%03d' % (seconds / 3600, (seconds % 3600) / 60, seconds % 60, (seconds % 1) * 1000)
2572
2573
2574 def dfxp2srt(dfxp_data):
2575     LEGACY_NAMESPACES = (
2576         ('http://www.w3.org/ns/ttml', [
2577             'http://www.w3.org/2004/11/ttaf1',
2578             'http://www.w3.org/2006/04/ttaf1',
2579             'http://www.w3.org/2006/10/ttaf1',
2580         ]),
2581         ('http://www.w3.org/ns/ttml#styling', [
2582             'http://www.w3.org/ns/ttml#style',
2583         ]),
2584     )
2585
2586     SUPPORTED_STYLING = [
2587         'color',
2588         'fontFamily',
2589         'fontSize',
2590         'fontStyle',
2591         'fontWeight',
2592         'textDecoration'
2593     ]
2594
2595     _x = functools.partial(xpath_with_ns, ns_map={
2596         'ttml': 'http://www.w3.org/ns/ttml',
2597         'tts': 'http://www.w3.org/ns/ttml#styling',
2598     })
2599
2600     styles = {}
2601     default_style = {}
2602
2603     class TTMLPElementParser(object):
2604         _out = ''
2605         _unclosed_elements = []
2606         _applied_styles = []
2607
2608         def start(self, tag, attrib):
2609             if tag in (_x('ttml:br'), 'br'):
2610                 self._out += '\n'
2611             else:
2612                 unclosed_elements = []
2613                 style = {}
2614                 element_style_id = attrib.get('style')
2615                 if default_style:
2616                     style.update(default_style)
2617                 if element_style_id:
2618                     style.update(styles.get(element_style_id, {}))
2619                 for prop in SUPPORTED_STYLING:
2620                     prop_val = attrib.get(_x('tts:' + prop))
2621                     if prop_val:
2622                         style[prop] = prop_val
2623                 if style:
2624                     font = ''
2625                     for k, v in sorted(style.items()):
2626                         if self._applied_styles and self._applied_styles[-1].get(k) == v:
2627                             continue
2628                         if k == 'color':
2629                             font += ' color="%s"' % v
2630                         elif k == 'fontSize':
2631                             font += ' size="%s"' % v
2632                         elif k == 'fontFamily':
2633                             font += ' face="%s"' % v
2634                         elif k == 'fontWeight' and v == 'bold':
2635                             self._out += '<b>'
2636                             unclosed_elements.append('b')
2637                         elif k == 'fontStyle' and v == 'italic':
2638                             self._out += '<i>'
2639                             unclosed_elements.append('i')
2640                         elif k == 'textDecoration' and v == 'underline':
2641                             self._out += '<u>'
2642                             unclosed_elements.append('u')
2643                     if font:
2644                         self._out += '<font' + font + '>'
2645                         unclosed_elements.append('font')
2646                     applied_style = {}
2647                     if self._applied_styles:
2648                         applied_style.update(self._applied_styles[-1])
2649                     applied_style.update(style)
2650                     self._applied_styles.append(applied_style)
2651                 self._unclosed_elements.append(unclosed_elements)
2652
2653         def end(self, tag):
2654             if tag not in (_x('ttml:br'), 'br'):
2655                 unclosed_elements = self._unclosed_elements.pop()
2656                 for element in reversed(unclosed_elements):
2657                     self._out += '</%s>' % element
2658                 if unclosed_elements and self._applied_styles:
2659                     self._applied_styles.pop()
2660
2661         def data(self, data):
2662             self._out += data
2663
2664         def close(self):
2665             return self._out.strip()
2666
2667     def parse_node(node):
2668         target = TTMLPElementParser()
2669         parser = xml.etree.ElementTree.XMLParser(target=target)
2670         parser.feed(xml.etree.ElementTree.tostring(node))
2671         return parser.close()
2672
2673     for k, v in LEGACY_NAMESPACES:
2674         for ns in v:
2675             dfxp_data = dfxp_data.replace(ns, k)
2676
2677     dfxp = compat_etree_fromstring(dfxp_data.encode('utf-8'))
2678     out = []
2679     paras = dfxp.findall(_x('.//ttml:p')) or dfxp.findall('.//p')
2680
2681     if not paras:
2682         raise ValueError('Invalid dfxp/TTML subtitle')
2683
2684     repeat = False
2685     while True:
2686         for style in dfxp.findall(_x('.//ttml:style')):
2687             style_id = style.get('id')
2688             parent_style_id = style.get('style')
2689             if parent_style_id:
2690                 if parent_style_id not in styles:
2691                     repeat = True
2692                     continue
2693                 styles[style_id] = styles[parent_style_id].copy()
2694             for prop in SUPPORTED_STYLING:
2695                 prop_val = style.get(_x('tts:' + prop))
2696                 if prop_val:
2697                     styles.setdefault(style_id, {})[prop] = prop_val
2698         if repeat:
2699             repeat = False
2700         else:
2701             break
2702
2703     for p in ('body', 'div'):
2704         ele = xpath_element(dfxp, [_x('.//ttml:' + p), './/' + p])
2705         if ele is None:
2706             continue
2707         style = styles.get(ele.get('style'))
2708         if not style:
2709             continue
2710         default_style.update(style)
2711
2712     for para, index in zip(paras, itertools.count(1)):
2713         begin_time = parse_dfxp_time_expr(para.attrib.get('begin'))
2714         end_time = parse_dfxp_time_expr(para.attrib.get('end'))
2715         dur = parse_dfxp_time_expr(para.attrib.get('dur'))
2716         if begin_time is None:
2717             continue
2718         if not end_time:
2719             if not dur:
2720                 continue
2721             end_time = begin_time + dur
2722         out.append('%d\n%s --> %s\n%s\n\n' % (
2723             index,
2724             srt_subtitles_timecode(begin_time),
2725             srt_subtitles_timecode(end_time),
2726             parse_node(para)))
2727
2728     return ''.join(out)
2729
2730
2731 def cli_option(params, command_option, param):
2732     param = params.get(param)
2733     if param:
2734         param = compat_str(param)
2735     return [command_option, param] if param is not None else []
2736
2737
2738 def cli_bool_option(params, command_option, param, true_value='true', false_value='false', separator=None):
2739     param = params.get(param)
2740     if param is None:
2741         return []
2742     assert isinstance(param, bool)
2743     if separator:
2744         return [command_option + separator + (true_value if param else false_value)]
2745     return [command_option, true_value if param else false_value]
2746
2747
2748 def cli_valueless_option(params, command_option, param, expected_value=True):
2749     param = params.get(param)
2750     return [command_option] if param == expected_value else []
2751
2752
2753 def cli_configuration_args(params, param, default=[]):
2754     ex_args = params.get(param)
2755     if ex_args is None:
2756         return default
2757     assert isinstance(ex_args, list)
2758     return ex_args
2759
2760
2761 class ISO639Utils(object):
2762     # See http://www.loc.gov/standards/iso639-2/ISO-639-2_utf-8.txt
2763     _lang_map = {
2764         'aa': 'aar',
2765         'ab': 'abk',
2766         'ae': 'ave',
2767         'af': 'afr',
2768         'ak': 'aka',
2769         'am': 'amh',
2770         'an': 'arg',
2771         'ar': 'ara',
2772         'as': 'asm',
2773         'av': 'ava',
2774         'ay': 'aym',
2775         'az': 'aze',
2776         'ba': 'bak',
2777         'be': 'bel',
2778         'bg': 'bul',
2779         'bh': 'bih',
2780         'bi': 'bis',
2781         'bm': 'bam',
2782         'bn': 'ben',
2783         'bo': 'bod',
2784         'br': 'bre',
2785         'bs': 'bos',
2786         'ca': 'cat',
2787         'ce': 'che',
2788         'ch': 'cha',
2789         'co': 'cos',
2790         'cr': 'cre',
2791         'cs': 'ces',
2792         'cu': 'chu',
2793         'cv': 'chv',
2794         'cy': 'cym',
2795         'da': 'dan',
2796         'de': 'deu',
2797         'dv': 'div',
2798         'dz': 'dzo',
2799         'ee': 'ewe',
2800         'el': 'ell',
2801         'en': 'eng',
2802         'eo': 'epo',
2803         'es': 'spa',
2804         'et': 'est',
2805         'eu': 'eus',
2806         'fa': 'fas',
2807         'ff': 'ful',
2808         'fi': 'fin',
2809         'fj': 'fij',
2810         'fo': 'fao',
2811         'fr': 'fra',
2812         'fy': 'fry',
2813         'ga': 'gle',
2814         'gd': 'gla',
2815         'gl': 'glg',
2816         'gn': 'grn',
2817         'gu': 'guj',
2818         'gv': 'glv',
2819         'ha': 'hau',
2820         'he': 'heb',
2821         'hi': 'hin',
2822         'ho': 'hmo',
2823         'hr': 'hrv',
2824         'ht': 'hat',
2825         'hu': 'hun',
2826         'hy': 'hye',
2827         'hz': 'her',
2828         'ia': 'ina',
2829         'id': 'ind',
2830         'ie': 'ile',
2831         'ig': 'ibo',
2832         'ii': 'iii',
2833         'ik': 'ipk',
2834         'io': 'ido',
2835         'is': 'isl',
2836         'it': 'ita',
2837         'iu': 'iku',
2838         'ja': 'jpn',
2839         'jv': 'jav',
2840         'ka': 'kat',
2841         'kg': 'kon',
2842         'ki': 'kik',
2843         'kj': 'kua',
2844         'kk': 'kaz',
2845         'kl': 'kal',
2846         'km': 'khm',
2847         'kn': 'kan',
2848         'ko': 'kor',
2849         'kr': 'kau',
2850         'ks': 'kas',
2851         'ku': 'kur',
2852         'kv': 'kom',
2853         'kw': 'cor',
2854         'ky': 'kir',
2855         'la': 'lat',
2856         'lb': 'ltz',
2857         'lg': 'lug',
2858         'li': 'lim',
2859         'ln': 'lin',
2860         'lo': 'lao',
2861         'lt': 'lit',
2862         'lu': 'lub',
2863         'lv': 'lav',
2864         'mg': 'mlg',
2865         'mh': 'mah',
2866         'mi': 'mri',
2867         'mk': 'mkd',
2868         'ml': 'mal',
2869         'mn': 'mon',
2870         'mr': 'mar',
2871         'ms': 'msa',
2872         'mt': 'mlt',
2873         'my': 'mya',
2874         'na': 'nau',
2875         'nb': 'nob',
2876         'nd': 'nde',
2877         'ne': 'nep',
2878         'ng': 'ndo',
2879         'nl': 'nld',
2880         'nn': 'nno',
2881         'no': 'nor',
2882         'nr': 'nbl',
2883         'nv': 'nav',
2884         'ny': 'nya',
2885         'oc': 'oci',
2886         'oj': 'oji',
2887         'om': 'orm',
2888         'or': 'ori',
2889         'os': 'oss',
2890         'pa': 'pan',
2891         'pi': 'pli',
2892         'pl': 'pol',
2893         'ps': 'pus',
2894         'pt': 'por',
2895         'qu': 'que',
2896         'rm': 'roh',
2897         'rn': 'run',
2898         'ro': 'ron',
2899         'ru': 'rus',
2900         'rw': 'kin',
2901         'sa': 'san',
2902         'sc': 'srd',
2903         'sd': 'snd',
2904         'se': 'sme',
2905         'sg': 'sag',
2906         'si': 'sin',
2907         'sk': 'slk',
2908         'sl': 'slv',
2909         'sm': 'smo',
2910         'sn': 'sna',
2911         'so': 'som',
2912         'sq': 'sqi',
2913         'sr': 'srp',
2914         'ss': 'ssw',
2915         'st': 'sot',
2916         'su': 'sun',
2917         'sv': 'swe',
2918         'sw': 'swa',
2919         'ta': 'tam',
2920         'te': 'tel',
2921         'tg': 'tgk',
2922         'th': 'tha',
2923         'ti': 'tir',
2924         'tk': 'tuk',
2925         'tl': 'tgl',
2926         'tn': 'tsn',
2927         'to': 'ton',
2928         'tr': 'tur',
2929         'ts': 'tso',
2930         'tt': 'tat',
2931         'tw': 'twi',
2932         'ty': 'tah',
2933         'ug': 'uig',
2934         'uk': 'ukr',
2935         'ur': 'urd',
2936         'uz': 'uzb',
2937         've': 'ven',
2938         'vi': 'vie',
2939         'vo': 'vol',
2940         'wa': 'wln',
2941         'wo': 'wol',
2942         'xh': 'xho',
2943         'yi': 'yid',
2944         'yo': 'yor',
2945         'za': 'zha',
2946         'zh': 'zho',
2947         'zu': 'zul',
2948     }
2949
2950     @classmethod
2951     def short2long(cls, code):
2952         """Convert language code from ISO 639-1 to ISO 639-2/T"""
2953         return cls._lang_map.get(code[:2])
2954
2955     @classmethod
2956     def long2short(cls, code):
2957         """Convert language code from ISO 639-2/T to ISO 639-1"""
2958         for short_name, long_name in cls._lang_map.items():
2959             if long_name == code:
2960                 return short_name
2961
2962
2963 class ISO3166Utils(object):
2964     # From http://data.okfn.org/data/core/country-list
2965     _country_map = {
2966         'AF': 'Afghanistan',
2967         'AX': 'Åland Islands',
2968         'AL': 'Albania',
2969         'DZ': 'Algeria',
2970         'AS': 'American Samoa',
2971         'AD': 'Andorra',
2972         'AO': 'Angola',
2973         'AI': 'Anguilla',
2974         'AQ': 'Antarctica',
2975         'AG': 'Antigua and Barbuda',
2976         'AR': 'Argentina',
2977         'AM': 'Armenia',
2978         'AW': 'Aruba',
2979         'AU': 'Australia',
2980         'AT': 'Austria',
2981         'AZ': 'Azerbaijan',
2982         'BS': 'Bahamas',
2983         'BH': 'Bahrain',
2984         'BD': 'Bangladesh',
2985         'BB': 'Barbados',
2986         'BY': 'Belarus',
2987         'BE': 'Belgium',
2988         'BZ': 'Belize',
2989         'BJ': 'Benin',
2990         'BM': 'Bermuda',
2991         'BT': 'Bhutan',
2992         'BO': 'Bolivia, Plurinational State of',
2993         'BQ': 'Bonaire, Sint Eustatius and Saba',
2994         'BA': 'Bosnia and Herzegovina',
2995         'BW': 'Botswana',
2996         'BV': 'Bouvet Island',
2997         'BR': 'Brazil',
2998         'IO': 'British Indian Ocean Territory',
2999         'BN': 'Brunei Darussalam',
3000         'BG': 'Bulgaria',
3001         'BF': 'Burkina Faso',
3002         'BI': 'Burundi',
3003         'KH': 'Cambodia',
3004         'CM': 'Cameroon',
3005         'CA': 'Canada',
3006         'CV': 'Cape Verde',
3007         'KY': 'Cayman Islands',
3008         'CF': 'Central African Republic',
3009         'TD': 'Chad',
3010         'CL': 'Chile',
3011         'CN': 'China',
3012         'CX': 'Christmas Island',
3013         'CC': 'Cocos (Keeling) Islands',
3014         'CO': 'Colombia',
3015         'KM': 'Comoros',
3016         'CG': 'Congo',
3017         'CD': 'Congo, the Democratic Republic of the',
3018         'CK': 'Cook Islands',
3019         'CR': 'Costa Rica',
3020         'CI': 'Côte d\'Ivoire',
3021         'HR': 'Croatia',
3022         'CU': 'Cuba',
3023         'CW': 'Curaçao',
3024         'CY': 'Cyprus',
3025         'CZ': 'Czech Republic',
3026         'DK': 'Denmark',
3027         'DJ': 'Djibouti',
3028         'DM': 'Dominica',
3029         'DO': 'Dominican Republic',
3030         'EC': 'Ecuador',
3031         'EG': 'Egypt',
3032         'SV': 'El Salvador',
3033         'GQ': 'Equatorial Guinea',
3034         'ER': 'Eritrea',
3035         'EE': 'Estonia',
3036         'ET': 'Ethiopia',
3037         'FK': 'Falkland Islands (Malvinas)',
3038         'FO': 'Faroe Islands',
3039         'FJ': 'Fiji',
3040         'FI': 'Finland',
3041         'FR': 'France',
3042         'GF': 'French Guiana',
3043         'PF': 'French Polynesia',
3044         'TF': 'French Southern Territories',
3045         'GA': 'Gabon',
3046         'GM': 'Gambia',
3047         'GE': 'Georgia',
3048         'DE': 'Germany',
3049         'GH': 'Ghana',
3050         'GI': 'Gibraltar',
3051         'GR': 'Greece',
3052         'GL': 'Greenland',
3053         'GD': 'Grenada',
3054         'GP': 'Guadeloupe',
3055         'GU': 'Guam',
3056         'GT': 'Guatemala',
3057         'GG': 'Guernsey',
3058         'GN': 'Guinea',
3059         'GW': 'Guinea-Bissau',
3060         'GY': 'Guyana',
3061         'HT': 'Haiti',
3062         'HM': 'Heard Island and McDonald Islands',
3063         'VA': 'Holy See (Vatican City State)',
3064         'HN': 'Honduras',
3065         'HK': 'Hong Kong',
3066         'HU': 'Hungary',
3067         'IS': 'Iceland',
3068         'IN': 'India',
3069         'ID': 'Indonesia',
3070         'IR': 'Iran, Islamic Republic of',
3071         'IQ': 'Iraq',
3072         'IE': 'Ireland',
3073         'IM': 'Isle of Man',
3074         'IL': 'Israel',
3075         'IT': 'Italy',
3076         'JM': 'Jamaica',
3077         'JP': 'Japan',
3078         'JE': 'Jersey',
3079         'JO': 'Jordan',
3080         'KZ': 'Kazakhstan',
3081         'KE': 'Kenya',
3082         'KI': 'Kiribati',
3083         'KP': 'Korea, Democratic People\'s Republic of',
3084         'KR': 'Korea, Republic of',
3085         'KW': 'Kuwait',
3086         'KG': 'Kyrgyzstan',
3087         'LA': 'Lao People\'s Democratic Republic',
3088         'LV': 'Latvia',
3089         'LB': 'Lebanon',
3090         'LS': 'Lesotho',
3091         'LR': 'Liberia',
3092         'LY': 'Libya',
3093         'LI': 'Liechtenstein',
3094         'LT': 'Lithuania',
3095         'LU': 'Luxembourg',
3096         'MO': 'Macao',
3097         'MK': 'Macedonia, the Former Yugoslav Republic of',
3098         'MG': 'Madagascar',
3099         'MW': 'Malawi',
3100         'MY': 'Malaysia',
3101         'MV': 'Maldives',
3102         'ML': 'Mali',
3103         'MT': 'Malta',
3104         'MH': 'Marshall Islands',
3105         'MQ': 'Martinique',
3106         'MR': 'Mauritania',
3107         'MU': 'Mauritius',
3108         'YT': 'Mayotte',
3109         'MX': 'Mexico',
3110         'FM': 'Micronesia, Federated States of',
3111         'MD': 'Moldova, Republic of',
3112         'MC': 'Monaco',
3113         'MN': 'Mongolia',
3114         'ME': 'Montenegro',
3115         'MS': 'Montserrat',
3116         'MA': 'Morocco',
3117         'MZ': 'Mozambique',
3118         'MM': 'Myanmar',
3119         'NA': 'Namibia',
3120         'NR': 'Nauru',
3121         'NP': 'Nepal',
3122         'NL': 'Netherlands',
3123         'NC': 'New Caledonia',
3124         'NZ': 'New Zealand',
3125         'NI': 'Nicaragua',
3126         'NE': 'Niger',
3127         'NG': 'Nigeria',
3128         'NU': 'Niue',
3129         'NF': 'Norfolk Island',
3130         'MP': 'Northern Mariana Islands',
3131         'NO': 'Norway',
3132         'OM': 'Oman',
3133         'PK': 'Pakistan',
3134         'PW': 'Palau',
3135         'PS': 'Palestine, State of',
3136         'PA': 'Panama',
3137         'PG': 'Papua New Guinea',
3138         'PY': 'Paraguay',
3139         'PE': 'Peru',
3140         'PH': 'Philippines',
3141         'PN': 'Pitcairn',
3142         'PL': 'Poland',
3143         'PT': 'Portugal',
3144         'PR': 'Puerto Rico',
3145         'QA': 'Qatar',
3146         'RE': 'Réunion',
3147         'RO': 'Romania',
3148         'RU': 'Russian Federation',
3149         'RW': 'Rwanda',
3150         'BL': 'Saint Barthélemy',
3151         'SH': 'Saint Helena, Ascension and Tristan da Cunha',
3152         'KN': 'Saint Kitts and Nevis',
3153         'LC': 'Saint Lucia',
3154         'MF': 'Saint Martin (French part)',
3155         'PM': 'Saint Pierre and Miquelon',
3156         'VC': 'Saint Vincent and the Grenadines',
3157         'WS': 'Samoa',
3158         'SM': 'San Marino',
3159         'ST': 'Sao Tome and Principe',
3160         'SA': 'Saudi Arabia',
3161         'SN': 'Senegal',
3162         'RS': 'Serbia',
3163         'SC': 'Seychelles',
3164         'SL': 'Sierra Leone',
3165         'SG': 'Singapore',
3166         'SX': 'Sint Maarten (Dutch part)',
3167         'SK': 'Slovakia',
3168         'SI': 'Slovenia',
3169         'SB': 'Solomon Islands',
3170         'SO': 'Somalia',
3171         'ZA': 'South Africa',
3172         'GS': 'South Georgia and the South Sandwich Islands',
3173         'SS': 'South Sudan',
3174         'ES': 'Spain',
3175         'LK': 'Sri Lanka',
3176         'SD': 'Sudan',
3177         'SR': 'Suriname',
3178         'SJ': 'Svalbard and Jan Mayen',
3179         'SZ': 'Swaziland',
3180         'SE': 'Sweden',
3181         'CH': 'Switzerland',
3182         'SY': 'Syrian Arab Republic',
3183         'TW': 'Taiwan, Province of China',
3184         'TJ': 'Tajikistan',
3185         'TZ': 'Tanzania, United Republic of',
3186         'TH': 'Thailand',
3187         'TL': 'Timor-Leste',
3188         'TG': 'Togo',
3189         'TK': 'Tokelau',
3190         'TO': 'Tonga',
3191         'TT': 'Trinidad and Tobago',
3192         'TN': 'Tunisia',
3193         'TR': 'Turkey',
3194         'TM': 'Turkmenistan',
3195         'TC': 'Turks and Caicos Islands',
3196         'TV': 'Tuvalu',
3197         'UG': 'Uganda',
3198         'UA': 'Ukraine',
3199         'AE': 'United Arab Emirates',
3200         'GB': 'United Kingdom',
3201         'US': 'United States',
3202         'UM': 'United States Minor Outlying Islands',
3203         'UY': 'Uruguay',
3204         'UZ': 'Uzbekistan',
3205         'VU': 'Vanuatu',
3206         'VE': 'Venezuela, Bolivarian Republic of',
3207         'VN': 'Viet Nam',
3208         'VG': 'Virgin Islands, British',
3209         'VI': 'Virgin Islands, U.S.',
3210         'WF': 'Wallis and Futuna',
3211         'EH': 'Western Sahara',
3212         'YE': 'Yemen',
3213         'ZM': 'Zambia',
3214         'ZW': 'Zimbabwe',
3215     }
3216
3217     @classmethod
3218     def short2full(cls, code):
3219         """Convert an ISO 3166-2 country code to the corresponding full name"""
3220         return cls._country_map.get(code.upper())
3221
3222
3223 class GeoUtils(object):
3224     # Major IPv4 address blocks per country
3225     _country_ip_map = {
3226         'AD': '85.94.160.0/19',
3227         'AE': '94.200.0.0/13',
3228         'AF': '149.54.0.0/17',
3229         'AG': '209.59.64.0/18',
3230         'AI': '204.14.248.0/21',
3231         'AL': '46.99.0.0/16',
3232         'AM': '46.70.0.0/15',
3233         'AO': '105.168.0.0/13',
3234         'AP': '159.117.192.0/21',
3235         'AR': '181.0.0.0/12',
3236         'AS': '202.70.112.0/20',
3237         'AT': '84.112.0.0/13',
3238         'AU': '1.128.0.0/11',
3239         'AW': '181.41.0.0/18',
3240         'AZ': '5.191.0.0/16',
3241         'BA': '31.176.128.0/17',
3242         'BB': '65.48.128.0/17',
3243         'BD': '114.130.0.0/16',
3244         'BE': '57.0.0.0/8',
3245         'BF': '129.45.128.0/17',
3246         'BG': '95.42.0.0/15',
3247         'BH': '37.131.0.0/17',
3248         'BI': '154.117.192.0/18',
3249         'BJ': '137.255.0.0/16',
3250         'BL': '192.131.134.0/24',
3251         'BM': '196.12.64.0/18',
3252         'BN': '156.31.0.0/16',
3253         'BO': '161.56.0.0/16',
3254         'BQ': '161.0.80.0/20',
3255         'BR': '152.240.0.0/12',
3256         'BS': '24.51.64.0/18',
3257         'BT': '119.2.96.0/19',
3258         'BW': '168.167.0.0/16',
3259         'BY': '178.120.0.0/13',
3260         'BZ': '179.42.192.0/18',
3261         'CA': '99.224.0.0/11',
3262         'CD': '41.243.0.0/16',
3263         'CF': '196.32.200.0/21',
3264         'CG': '197.214.128.0/17',
3265         'CH': '85.0.0.0/13',
3266         'CI': '154.232.0.0/14',
3267         'CK': '202.65.32.0/19',
3268         'CL': '152.172.0.0/14',
3269         'CM': '165.210.0.0/15',
3270         'CN': '36.128.0.0/10',
3271         'CO': '181.240.0.0/12',
3272         'CR': '201.192.0.0/12',
3273         'CU': '152.206.0.0/15',
3274         'CV': '165.90.96.0/19',
3275         'CW': '190.88.128.0/17',
3276         'CY': '46.198.0.0/15',
3277         'CZ': '88.100.0.0/14',
3278         'DE': '53.0.0.0/8',
3279         'DJ': '197.241.0.0/17',
3280         'DK': '87.48.0.0/12',
3281         'DM': '192.243.48.0/20',
3282         'DO': '152.166.0.0/15',
3283         'DZ': '41.96.0.0/12',
3284         'EC': '186.68.0.0/15',
3285         'EE': '90.190.0.0/15',
3286         'EG': '156.160.0.0/11',
3287         'ER': '196.200.96.0/20',
3288         'ES': '88.0.0.0/11',
3289         'ET': '196.188.0.0/14',
3290         'EU': '2.16.0.0/13',
3291         'FI': '91.152.0.0/13',
3292         'FJ': '144.120.0.0/16',
3293         'FM': '119.252.112.0/20',
3294         'FO': '88.85.32.0/19',
3295         'FR': '90.0.0.0/9',
3296         'GA': '41.158.0.0/15',
3297         'GB': '25.0.0.0/8',
3298         'GD': '74.122.88.0/21',
3299         'GE': '31.146.0.0/16',
3300         'GF': '161.22.64.0/18',
3301         'GG': '62.68.160.0/19',
3302         'GH': '45.208.0.0/14',
3303         'GI': '85.115.128.0/19',
3304         'GL': '88.83.0.0/19',
3305         'GM': '160.182.0.0/15',
3306         'GN': '197.149.192.0/18',
3307         'GP': '104.250.0.0/19',
3308         'GQ': '105.235.224.0/20',
3309         'GR': '94.64.0.0/13',
3310         'GT': '168.234.0.0/16',
3311         'GU': '168.123.0.0/16',
3312         'GW': '197.214.80.0/20',
3313         'GY': '181.41.64.0/18',
3314         'HK': '113.252.0.0/14',
3315         'HN': '181.210.0.0/16',
3316         'HR': '93.136.0.0/13',
3317         'HT': '148.102.128.0/17',
3318         'HU': '84.0.0.0/14',
3319         'ID': '39.192.0.0/10',
3320         'IE': '87.32.0.0/12',
3321         'IL': '79.176.0.0/13',
3322         'IM': '5.62.80.0/20',
3323         'IN': '117.192.0.0/10',
3324         'IO': '203.83.48.0/21',
3325         'IQ': '37.236.0.0/14',
3326         'IR': '2.176.0.0/12',
3327         'IS': '82.221.0.0/16',
3328         'IT': '79.0.0.0/10',
3329         'JE': '87.244.64.0/18',
3330         'JM': '72.27.0.0/17',
3331         'JO': '176.29.0.0/16',
3332         'JP': '126.0.0.0/8',
3333         'KE': '105.48.0.0/12',
3334         'KG': '158.181.128.0/17',
3335         'KH': '36.37.128.0/17',
3336         'KI': '103.25.140.0/22',
3337         'KM': '197.255.224.0/20',
3338         'KN': '198.32.32.0/19',
3339         'KP': '175.45.176.0/22',
3340         'KR': '175.192.0.0/10',
3341         'KW': '37.36.0.0/14',
3342         'KY': '64.96.0.0/15',
3343         'KZ': '2.72.0.0/13',
3344         'LA': '115.84.64.0/18',
3345         'LB': '178.135.0.0/16',
3346         'LC': '192.147.231.0/24',
3347         'LI': '82.117.0.0/19',
3348         'LK': '112.134.0.0/15',
3349         'LR': '41.86.0.0/19',
3350         'LS': '129.232.0.0/17',
3351         'LT': '78.56.0.0/13',
3352         'LU': '188.42.0.0/16',
3353         'LV': '46.109.0.0/16',
3354         'LY': '41.252.0.0/14',
3355         'MA': '105.128.0.0/11',
3356         'MC': '88.209.64.0/18',
3357         'MD': '37.246.0.0/16',
3358         'ME': '178.175.0.0/17',
3359         'MF': '74.112.232.0/21',
3360         'MG': '154.126.0.0/17',
3361         'MH': '117.103.88.0/21',
3362         'MK': '77.28.0.0/15',
3363         'ML': '154.118.128.0/18',
3364         'MM': '37.111.0.0/17',
3365         'MN': '49.0.128.0/17',
3366         'MO': '60.246.0.0/16',
3367         'MP': '202.88.64.0/20',
3368         'MQ': '109.203.224.0/19',
3369         'MR': '41.188.64.0/18',
3370         'MS': '208.90.112.0/22',
3371         'MT': '46.11.0.0/16',
3372         'MU': '105.16.0.0/12',
3373         'MV': '27.114.128.0/18',
3374         'MW': '105.234.0.0/16',
3375         'MX': '187.192.0.0/11',
3376         'MY': '175.136.0.0/13',
3377         'MZ': '197.218.0.0/15',
3378         'NA': '41.182.0.0/16',
3379         'NC': '101.101.0.0/18',
3380         'NE': '197.214.0.0/18',
3381         'NF': '203.17.240.0/22',
3382         'NG': '105.112.0.0/12',
3383         'NI': '186.76.0.0/15',
3384         'NL': '145.96.0.0/11',
3385         'NO': '84.208.0.0/13',
3386         'NP': '36.252.0.0/15',
3387         'NR': '203.98.224.0/19',
3388         'NU': '49.156.48.0/22',
3389         'NZ': '49.224.0.0/14',
3390         'OM': '5.36.0.0/15',
3391         'PA': '186.72.0.0/15',
3392         'PE': '186.160.0.0/14',
3393         'PF': '123.50.64.0/18',
3394         'PG': '124.240.192.0/19',
3395         'PH': '49.144.0.0/13',
3396         'PK': '39.32.0.0/11',
3397         'PL': '83.0.0.0/11',
3398         'PM': '70.36.0.0/20',
3399         'PR': '66.50.0.0/16',
3400         'PS': '188.161.0.0/16',
3401         'PT': '85.240.0.0/13',
3402         'PW': '202.124.224.0/20',
3403         'PY': '181.120.0.0/14',
3404         'QA': '37.210.0.0/15',
3405         'RE': '139.26.0.0/16',
3406         'RO': '79.112.0.0/13',
3407         'RS': '178.220.0.0/14',
3408         'RU': '5.136.0.0/13',
3409         'RW': '105.178.0.0/15',
3410         'SA': '188.48.0.0/13',
3411         'SB': '202.1.160.0/19',
3412         'SC': '154.192.0.0/11',
3413         'SD': '154.96.0.0/13',
3414         'SE': '78.64.0.0/12',
3415         'SG': '152.56.0.0/14',
3416         'SI': '188.196.0.0/14',
3417         'SK': '78.98.0.0/15',
3418         'SL': '197.215.0.0/17',
3419         'SM': '89.186.32.0/19',
3420         'SN': '41.82.0.0/15',
3421         'SO': '197.220.64.0/19',
3422         'SR': '186.179.128.0/17',
3423         'SS': '105.235.208.0/21',
3424         'ST': '197.159.160.0/19',
3425         'SV': '168.243.0.0/16',
3426         'SX': '190.102.0.0/20',
3427         'SY': '5.0.0.0/16',
3428         'SZ': '41.84.224.0/19',
3429         'TC': '65.255.48.0/20',
3430         'TD': '154.68.128.0/19',
3431         'TG': '196.168.0.0/14',
3432         'TH': '171.96.0.0/13',
3433         'TJ': '85.9.128.0/18',
3434         'TK': '27.96.24.0/21',
3435         'TL': '180.189.160.0/20',
3436         'TM': '95.85.96.0/19',
3437         'TN': '197.0.0.0/11',
3438         'TO': '175.176.144.0/21',
3439         'TR': '78.160.0.0/11',
3440         'TT': '186.44.0.0/15',
3441         'TV': '202.2.96.0/19',
3442         'TW': '120.96.0.0/11',
3443         'TZ': '156.156.0.0/14',
3444         'UA': '93.72.0.0/13',
3445         'UG': '154.224.0.0/13',
3446         'US': '3.0.0.0/8',
3447         'UY': '167.56.0.0/13',
3448         'UZ': '82.215.64.0/18',
3449         'VA': '212.77.0.0/19',
3450         'VC': '24.92.144.0/20',
3451         'VE': '186.88.0.0/13',
3452         'VG': '172.103.64.0/18',
3453         'VI': '146.226.0.0/16',
3454         'VN': '14.160.0.0/11',
3455         'VU': '202.80.32.0/20',
3456         'WF': '117.20.32.0/21',
3457         'WS': '202.4.32.0/19',
3458         'YE': '134.35.0.0/16',
3459         'YT': '41.242.116.0/22',
3460         'ZA': '41.0.0.0/11',
3461         'ZM': '165.56.0.0/13',
3462         'ZW': '41.85.192.0/19',
3463     }
3464
3465     @classmethod
3466     def random_ipv4(cls, code):
3467         block = cls._country_ip_map.get(code.upper())
3468         if not block:
3469             return None
3470         addr, preflen = block.split('/')
3471         addr_min = compat_struct_unpack('!L', socket.inet_aton(addr))[0]
3472         addr_max = addr_min | (0xffffffff >> int(preflen))
3473         return compat_str(socket.inet_ntoa(
3474             compat_struct_pack('!L', random.randint(addr_min, addr_max))))
3475
3476
3477 class PerRequestProxyHandler(compat_urllib_request.ProxyHandler):
3478     def __init__(self, proxies=None):
3479         # Set default handlers
3480         for type in ('http', 'https'):
3481             setattr(self, '%s_open' % type,
3482                     lambda r, proxy='__noproxy__', type=type, meth=self.proxy_open:
3483                         meth(r, proxy, type))
3484         return compat_urllib_request.ProxyHandler.__init__(self, proxies)
3485
3486     def proxy_open(self, req, proxy, type):
3487         req_proxy = req.headers.get('Ytdl-request-proxy')
3488         if req_proxy is not None:
3489             proxy = req_proxy
3490             del req.headers['Ytdl-request-proxy']
3491
3492         if proxy == '__noproxy__':
3493             return None  # No Proxy
3494         if compat_urlparse.urlparse(proxy).scheme.lower() in ('socks', 'socks4', 'socks4a', 'socks5'):
3495             req.add_header('Ytdl-socks-proxy', proxy)
3496             # youtube-dl's http/https handlers do wrapping the socket with socks
3497             return None
3498         return compat_urllib_request.ProxyHandler.proxy_open(
3499             self, req, proxy, type)
3500
3501
3502 # Both long_to_bytes and bytes_to_long are adapted from PyCrypto, which is
3503 # released into Public Domain
3504 # https://github.com/dlitz/pycrypto/blob/master/lib/Crypto/Util/number.py#L387
3505
3506 def long_to_bytes(n, blocksize=0):
3507     """long_to_bytes(n:long, blocksize:int) : string
3508     Convert a long integer to a byte string.
3509
3510     If optional blocksize is given and greater than zero, pad the front of the
3511     byte string with binary zeros so that the length is a multiple of
3512     blocksize.
3513     """
3514     # after much testing, this algorithm was deemed to be the fastest
3515     s = b''
3516     n = int(n)
3517     while n > 0:
3518         s = compat_struct_pack('>I', n & 0xffffffff) + s
3519         n = n >> 32
3520     # strip off leading zeros
3521     for i in range(len(s)):
3522         if s[i] != b'\000'[0]:
3523             break
3524     else:
3525         # only happens when n == 0
3526         s = b'\000'
3527         i = 0
3528     s = s[i:]
3529     # add back some pad bytes.  this could be done more efficiently w.r.t. the
3530     # de-padding being done above, but sigh...
3531     if blocksize > 0 and len(s) % blocksize:
3532         s = (blocksize - len(s) % blocksize) * b'\000' + s
3533     return s
3534
3535
3536 def bytes_to_long(s):
3537     """bytes_to_long(string) : long
3538     Convert a byte string to a long integer.
3539
3540     This is (essentially) the inverse of long_to_bytes().
3541     """
3542     acc = 0
3543     length = len(s)
3544     if length % 4:
3545         extra = (4 - length % 4)
3546         s = b'\000' * extra + s
3547         length = length + extra
3548     for i in range(0, length, 4):
3549         acc = (acc << 32) + compat_struct_unpack('>I', s[i:i + 4])[0]
3550     return acc
3551
3552
3553 def ohdave_rsa_encrypt(data, exponent, modulus):
3554     '''
3555     Implement OHDave's RSA algorithm. See http://www.ohdave.com/rsa/
3556
3557     Input:
3558         data: data to encrypt, bytes-like object
3559         exponent, modulus: parameter e and N of RSA algorithm, both integer
3560     Output: hex string of encrypted data
3561
3562     Limitation: supports one block encryption only
3563     '''
3564
3565     payload = int(binascii.hexlify(data[::-1]), 16)
3566     encrypted = pow(payload, exponent, modulus)
3567     return '%x' % encrypted
3568
3569
3570 def pkcs1pad(data, length):
3571     """
3572     Padding input data with PKCS#1 scheme
3573
3574     @param {int[]} data        input data
3575     @param {int}   length      target length
3576     @returns {int[]}           padded data
3577     """
3578     if len(data) > length - 11:
3579         raise ValueError('Input data too long for PKCS#1 padding')
3580
3581     pseudo_random = [random.randint(0, 254) for _ in range(length - len(data) - 3)]
3582     return [0, 2] + pseudo_random + [0] + data
3583
3584
3585 def encode_base_n(num, n, table=None):
3586     FULL_TABLE = '0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ'
3587     if not table:
3588         table = FULL_TABLE[:n]
3589
3590     if n > len(table):
3591         raise ValueError('base %d exceeds table length %d' % (n, len(table)))
3592
3593     if num == 0:
3594         return table[0]
3595
3596     ret = ''
3597     while num:
3598         ret = table[num % n] + ret
3599         num = num // n
3600     return ret
3601
3602
3603 def decode_packed_codes(code):
3604     mobj = re.search(PACKED_CODES_RE, code)
3605     obfucasted_code, base, count, symbols = mobj.groups()
3606     base = int(base)
3607     count = int(count)
3608     symbols = symbols.split('|')
3609     symbol_table = {}
3610
3611     while count:
3612         count -= 1
3613         base_n_count = encode_base_n(count, base)
3614         symbol_table[base_n_count] = symbols[count] or base_n_count
3615
3616     return re.sub(
3617         r'\b(\w+)\b', lambda mobj: symbol_table[mobj.group(0)],
3618         obfucasted_code)
3619
3620
3621 def parse_m3u8_attributes(attrib):
3622     info = {}
3623     for (key, val) in re.findall(r'(?P<key>[A-Z0-9-]+)=(?P<val>"[^"]+"|[^",]+)(?:,|$)', attrib):
3624         if val.startswith('"'):
3625             val = val[1:-1]
3626         info[key] = val
3627     return info
3628
3629
3630 def urshift(val, n):
3631     return val >> n if val >= 0 else (val + 0x100000000) >> n
3632
3633
3634 # Based on png2str() written by @gdkchan and improved by @yokrysty
3635 # Originally posted at https://github.com/rg3/youtube-dl/issues/9706
3636 def decode_png(png_data):
3637     # Reference: https://www.w3.org/TR/PNG/
3638     header = png_data[8:]
3639
3640     if png_data[:8] != b'\x89PNG\x0d\x0a\x1a\x0a' or header[4:8] != b'IHDR':
3641         raise IOError('Not a valid PNG file.')
3642
3643     int_map = {1: '>B', 2: '>H', 4: '>I'}
3644     unpack_integer = lambda x: compat_struct_unpack(int_map[len(x)], x)[0]
3645
3646     chunks = []
3647
3648     while header:
3649         length = unpack_integer(header[:4])
3650         header = header[4:]
3651
3652         chunk_type = header[:4]
3653         header = header[4:]
3654
3655         chunk_data = header[:length]
3656         header = header[length:]
3657
3658         header = header[4:]  # Skip CRC
3659
3660         chunks.append({
3661             'type': chunk_type,
3662             'length': length,
3663             'data': chunk_data
3664         })
3665
3666     ihdr = chunks[0]['data']
3667
3668     width = unpack_integer(ihdr[:4])
3669     height = unpack_integer(ihdr[4:8])
3670
3671     idat = b''
3672
3673     for chunk in chunks:
3674         if chunk['type'] == b'IDAT':
3675             idat += chunk['data']
3676
3677     if not idat:
3678         raise IOError('Unable to read PNG data.')
3679
3680     decompressed_data = bytearray(zlib.decompress(idat))
3681
3682     stride = width * 3
3683     pixels = []
3684
3685     def _get_pixel(idx):
3686         x = idx % stride
3687         y = idx // stride
3688         return pixels[y][x]
3689
3690     for y in range(height):
3691         basePos = y * (1 + stride)
3692         filter_type = decompressed_data[basePos]
3693
3694         current_row = []
3695
3696         pixels.append(current_row)
3697
3698         for x in range(stride):
3699             color = decompressed_data[1 + basePos + x]
3700             basex = y * stride + x
3701             left = 0
3702             up = 0
3703
3704             if x > 2:
3705                 left = _get_pixel(basex - 3)
3706             if y > 0:
3707                 up = _get_pixel(basex - stride)
3708
3709             if filter_type == 1:  # Sub
3710                 color = (color + left) & 0xff
3711             elif filter_type == 2:  # Up
3712                 color = (color + up) & 0xff
3713             elif filter_type == 3:  # Average
3714                 color = (color + ((left + up) >> 1)) & 0xff
3715             elif filter_type == 4:  # Paeth
3716                 a = left
3717                 b = up
3718                 c = 0
3719
3720                 if x > 2 and y > 0:
3721                     c = _get_pixel(basex - stride - 3)
3722
3723                 p = a + b - c
3724
3725                 pa = abs(p - a)
3726                 pb = abs(p - b)
3727                 pc = abs(p - c)
3728
3729                 if pa <= pb and pa <= pc:
3730                     color = (color + a) & 0xff
3731                 elif pb <= pc:
3732                     color = (color + b) & 0xff
3733                 else:
3734                     color = (color + c) & 0xff
3735
3736             current_row.append(color)
3737
3738     return width, height, pixels
3739
3740
3741 def write_xattr(path, key, value):
3742     # This mess below finds the best xattr tool for the job
3743     try:
3744         # try the pyxattr module...
3745         import xattr
3746
3747         if hasattr(xattr, 'set'):  # pyxattr
3748             # Unicode arguments are not supported in python-pyxattr until
3749             # version 0.5.0
3750             # See https://github.com/rg3/youtube-dl/issues/5498
3751             pyxattr_required_version = '0.5.0'
3752             if version_tuple(xattr.__version__) < version_tuple(pyxattr_required_version):
3753                 # TODO: fallback to CLI tools
3754                 raise XAttrUnavailableError(
3755                     'python-pyxattr is detected but is too old. '
3756                     'youtube-dl requires %s or above while your version is %s. '
3757                     'Falling back to other xattr implementations' % (
3758                         pyxattr_required_version, xattr.__version__))
3759
3760             setxattr = xattr.set
3761         else:  # xattr
3762             setxattr = xattr.setxattr
3763
3764         try:
3765             setxattr(path, key, value)
3766         except EnvironmentError as e:
3767             raise XAttrMetadataError(e.errno, e.strerror)
3768
3769     except ImportError:
3770         if compat_os_name == 'nt':
3771             # Write xattrs to NTFS Alternate Data Streams:
3772             # http://en.wikipedia.org/wiki/NTFS#Alternate_data_streams_.28ADS.29
3773             assert ':' not in key
3774             assert os.path.exists(path)
3775
3776             ads_fn = path + ':' + key
3777             try:
3778                 with open(ads_fn, 'wb') as f:
3779                     f.write(value)
3780             except EnvironmentError as e:
3781                 raise XAttrMetadataError(e.errno, e.strerror)
3782         else:
3783             user_has_setfattr = check_executable('setfattr', ['--version'])
3784             user_has_xattr = check_executable('xattr', ['-h'])
3785
3786             if user_has_setfattr or user_has_xattr:
3787
3788                 value = value.decode('utf-8')
3789                 if user_has_setfattr:
3790                     executable = 'setfattr'
3791                     opts = ['-n', key, '-v', value]
3792                 elif user_has_xattr:
3793                     executable = 'xattr'
3794                     opts = ['-w', key, value]
3795
3796                 cmd = ([encodeFilename(executable, True)] +
3797                        [encodeArgument(o) for o in opts] +
3798                        [encodeFilename(path, True)])
3799
3800                 try:
3801                     p = subprocess.Popen(
3802                         cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, stdin=subprocess.PIPE)
3803                 except EnvironmentError as e:
3804                     raise XAttrMetadataError(e.errno, e.strerror)
3805                 stdout, stderr = p.communicate()
3806                 stderr = stderr.decode('utf-8', 'replace')
3807                 if p.returncode != 0:
3808                     raise XAttrMetadataError(p.returncode, stderr)
3809
3810             else:
3811                 # On Unix, and can't find pyxattr, setfattr, or xattr.
3812                 if sys.platform.startswith('linux'):
3813                     raise XAttrUnavailableError(
3814                         "Couldn't find a tool to set the xattrs. "
3815                         "Install either the python 'pyxattr' or 'xattr' "
3816                         "modules, or the GNU 'attr' package "
3817                         "(which contains the 'setfattr' tool).")
3818                 else:
3819                     raise XAttrUnavailableError(
3820                         "Couldn't find a tool to set the xattrs. "
3821                         "Install either the python 'xattr' module, "
3822                         "or the 'xattr' binary.")
3823
3824
3825 def cookie_to_dict(cookie):
3826     cookie_dict = {
3827         'name': cookie.name,
3828         'value': cookie.value,
3829     };
3830     if cookie.port_specified:
3831         cookie_dict['port'] = cookie.port
3832     if cookie.domain_specified:
3833         cookie_dict['domain'] = cookie.domain
3834     if cookie.path_specified:
3835         cookie_dict['path'] = cookie.path
3836     if not cookie.expires is None:
3837         cookie_dict['expires'] = cookie.expires
3838     if not cookie.secure is None:
3839         cookie_dict['secure'] = cookie.secure
3840     if not cookie.discard is None:
3841         cookie_dict['discard'] = cookie.discard
3842     try:
3843         if (cookie.has_nonstandard_attr('httpOnly') or
3844             cookie.has_nonstandard_attr('httponly') or
3845             cookie.has_nonstandard_attr('HttpOnly')):
3846             cookie_dict['httponly'] = True
3847     except TypeError:
3848         pass
3849     return cookie_dict
3850
3851
3852 def cookie_jar_to_list(cookie_jar):
3853     return [cookie_to_dict(cookie) for cookie in cookie_jar]
3854
3855
3856 class PhantomJSwrapper(object):
3857     """PhantomJS wrapper class"""
3858
3859     _TEMPLATE = r'''
3860         phantom.onError = function(msg, trace) {{
3861           var msgStack = ['PHANTOM ERROR: ' + msg];
3862           if(trace && trace.length) {{
3863             msgStack.push('TRACE:');
3864             trace.forEach(function(t) {{
3865               msgStack.push(' -> ' + (t.file || t.sourceURL) + ': ' + t.line
3866                 + (t.function ? ' (in function ' + t.function +')' : ''));
3867             }});
3868           }}
3869           console.error(msgStack.join('\n'));
3870           phantom.exit(1);
3871         }};
3872         var page = require('webpage').create();
3873         var fs = require('fs');
3874         var read = {{ mode: 'r', charset: 'utf-8' }};
3875         var write = {{ mode: 'w', charset: 'utf-8' }};
3876         JSON.parse(fs.read("{cookies}", read)).forEach(function(x) {{
3877           phantom.addCookie(x);
3878         }});
3879         page.settings.resourceTimeout = {timeout};
3880         page.settings.userAgent = "{ua}";
3881         page.onLoadStarted = function() {{
3882           page.evaluate(function() {{
3883             delete window._phantom;
3884             delete window.callPhantom;
3885           }});
3886         }};
3887         var saveAndExit = function() {{
3888           fs.write("{html}", page.content, write);
3889           fs.write("{cookies}", JSON.stringify(phantom.cookies), write);
3890           phantom.exit();
3891         }};
3892         page.onLoadFinished = function(status) {{
3893           if(page.url === "") {{
3894             page.setContent(fs.read("{html}", read), "{url}");
3895           }}
3896           else {{
3897             {jscode}
3898           }}
3899         }};
3900         page.open("");
3901     '''
3902
3903     _TMP_FILE_NAMES = ['script', 'html', 'cookies']
3904
3905     @staticmethod
3906     def _version():
3907         return get_exe_version('phantomjs', version_re=r'([0-9.]+)')
3908
3909     def __init__(self, extractor, required_version=None, timeout=10000):
3910         self.exe = check_executable('phantomjs', ['-v'])
3911         if not self.exe:
3912             raise ExtractorError('PhantomJS executable not found in PATH, '
3913                                  'download it from http://phantomjs.org',
3914                                  expected=True)
3915
3916         self.extractor = extractor
3917
3918         if required_version:
3919             version = self._version()
3920             if is_outdated_version(version, required_version):
3921                 self.extractor._downloader.report_warning(
3922                     'Your copy of PhantomJS is outdated, update it to version '
3923                     '%s or newer if you encounter any errors.' % required_version)
3924
3925         self.options = {
3926             'timeout': timeout,
3927         }
3928         self._TMP_FILES = {}
3929         for name in self._TMP_FILE_NAMES:
3930             tmp = tempfile.NamedTemporaryFile(delete=False)
3931             tmp.close()
3932             self._TMP_FILES[name] = tmp
3933
3934     def __del__(self):
3935         for name in self._TMP_FILE_NAMES:
3936             try:
3937                 os.remove(self._TMP_FILES[name].name)
3938             except:
3939                 pass
3940
3941     def _save_cookies(self, url):
3942         cookies = cookie_jar_to_list(self.extractor._downloader.cookiejar)
3943         for cookie in cookies:
3944             if 'path' not in cookie:
3945                 cookie['path'] = '/'
3946             if 'domain' not in cookie:
3947                 cookie['domain'] = compat_urlparse.urlparse(url).netloc
3948         with open(self._TMP_FILES['cookies'].name, 'wb') as f:
3949             f.write(json.dumps(cookies).encode('utf-8'))
3950
3951     def _load_cookies(self):
3952         with open(self._TMP_FILES['cookies'].name, 'rb') as f:
3953             cookies = json.loads(f.read().decode('utf-8'))
3954         for cookie in cookies:
3955             if cookie['httponly'] is True:
3956                 cookie['rest'] = { 'httpOnly': None }
3957             if 'expiry' in cookie:
3958                 cookie['expire_time'] = cookie['expiry']
3959             self.extractor._set_cookie(**cookie)
3960
3961     def get(self, url, html=None, video_id=None, note=None, note2='Executing JS on webpage', headers={}, jscode='saveAndExit();'):
3962         """
3963         Downloads webpage (if needed) and executes JS
3964
3965         Params:
3966             url: website url
3967             html: optional, html code of website
3968             video_id: video id
3969             note: optional, displayed when downloading webpage
3970             note2: optional, displayed when executing JS
3971             headers: custom http headers
3972             jscode: code to be executed when page is loaded
3973
3974         Returns tuple with:
3975             * downloaded website (after JS execution)
3976             * anything you print with `console.log` (but not inside `page.execute`!)
3977
3978         In most cases you don't need to add any `jscode`.
3979         It is executed in `page.onLoadFinished`.
3980         `saveAndExit();` is mandatory, use it instead of `phantom.exit()`
3981         It is possible to wait for some element on the webpage, for example:
3982             var check = function() {
3983               var elementFound = page.evaluate(function() {
3984                 return document.querySelector('#b.done') !== null;
3985               });
3986               if(elementFound)
3987                 saveAndExit();
3988               else
3989                 window.setTimeout(check, 500);
3990             }
3991
3992             page.evaluate(function(){
3993               document.querySelector('#a').click();
3994             });
3995             check();
3996         """
3997         if 'saveAndExit();' not in jscode:
3998             raise ExtractorError('`saveAndExit();` not found in `jscode`')
3999         if not html:
4000             html = self.extractor._download_webpage(url, video_id, note=note, headers=headers)
4001         with open(self._TMP_FILES['html'].name, 'wb') as f:
4002             f.write(html.encode('utf-8'))
4003
4004         self._save_cookies(url)
4005
4006         replaces = self.options
4007         replaces['url'] = url
4008         user_agent = headers.get('User-Agent') or std_headers['User-Agent']
4009         replaces['ua'] = user_agent.replace('"', '\\"')
4010         replaces['jscode'] = jscode
4011
4012         for x in self._TMP_FILE_NAMES:
4013             replaces[x] = self._TMP_FILES[x].name.replace('\\', '\\\\').replace('"', '\\"')
4014
4015         with open(self._TMP_FILES['script'].name, 'wb') as f:
4016             f.write(self._TEMPLATE.format(**replaces).encode('utf-8'))
4017
4018         if video_id is None:
4019             self.extractor.to_screen('%s' % (note2,))
4020         else:
4021             self.extractor.to_screen('%s: %s' % (video_id, note2))
4022
4023         p = subprocess.Popen([self.exe, '--ssl-protocol=any',
4024             self._TMP_FILES['script'].name], stdout=subprocess.PIPE,
4025             stderr=subprocess.PIPE)
4026         out, err = p.communicate()
4027         if p.returncode != 0:
4028             raise ExtractorError('Executing JS failed\n:'
4029                                  + encodeArgument(err))
4030         with open(self._TMP_FILES['html'].name, 'rb') as f:
4031             html = f.read().decode('utf-8')
4032
4033         self._load_cookies()
4034
4035         return (html, encodeArgument(out))
4036
4037
4038 def random_birthday(year_field, month_field, day_field):
4039     return {
4040         year_field: str(random.randint(1950, 1995)),
4041         month_field: str(random.randint(1, 12)),
4042         day_field: str(random.randint(1, 31)),
4043     }