_ Git - youtube-dl/blob - youtube_dl/YoutubeDL.py

   1 #!/usr/bin/env python
   2 # -*- coding: utf-8 -*-
   3
   4 from __future__ import absolute_import, unicode_literals
   5
   6 import collections
   7 import contextlib
   8 import datetime
   9 import errno
  10 import fileinput
  11 import io
  12 import itertools
  13 import json
  14 import locale
  15 import operator
  16 import os
  17 import platform
  18 import re
  19 import shutil
  20 import subprocess
  21 import socket
  22 import sys
  23 import time
  24 import traceback
  25
  26 if os.name == 'nt':
  27     import ctypes
  28
  29 from .compat import (
  30     compat_basestring,
  31     compat_cookiejar,
  32     compat_expanduser,
  33     compat_get_terminal_size,
  34     compat_http_client,
  35     compat_kwargs,
  36     compat_str,
  37     compat_urllib_error,
  38     compat_urllib_request,
  39 )
  40 from .utils import (
  41     escape_url,
  42     ContentTooShortError,
  43     date_from_str,
  44     DateRange,
  45     DEFAULT_OUTTMPL,
  46     determine_ext,
  47     DownloadError,
  48     encodeFilename,
  49     ExtractorError,
  50     format_bytes,
  51     formatSeconds,
  52     locked_file,
  53     make_HTTPS_handler,
  54     MaxDownloadsReached,
  55     PagedList,
  56     parse_filesize,
  57     PerRequestProxyHandler,
  58     PostProcessingError,
  59     platform_name,
  60     preferredencoding,
  61     render_table,
  62     SameFileError,
  63     sanitize_filename,
  64     sanitize_path,
  65     std_headers,
  66     subtitles_filename,
  67     UnavailableVideoError,
  68     url_basename,
  69     version_tuple,
  70     write_json_file,
  71     write_string,
  72     YoutubeDLHandler,
  73     prepend_extension,
  74     replace_extension,
  75     args_to_str,
  76     age_restricted,
  77 )
  78 from .cache import Cache
  79 from .extractor import get_info_extractor, gen_extractors
  80 from .downloader import get_suitable_downloader
  81 from .downloader.rtmp import rtmpdump_version
  82 from .postprocessor import (
  83     FFmpegFixupM4aPP,
  84     FFmpegFixupStretchedPP,
  85     FFmpegMergerPP,
  86     FFmpegPostProcessor,
  87     get_postprocessor,
  88 )
  89 from .version import __version__
  90
  91
  92 class YoutubeDL(object):
  93     """YoutubeDL class.
  94
  95     YoutubeDL objects are the ones responsible of downloading the
  96     actual video file and writing it to disk if the user has requested
  97     it, among some other tasks. In most cases there should be one per
  98     program. As, given a video URL, the downloader doesn't know how to
  99     extract all the needed information, task that InfoExtractors do, it
 100     has to pass the URL to one of them.
 101
 102     For this, YoutubeDL objects have a method that allows
 103     InfoExtractors to be registered in a given order. When it is passed
 104     a URL, the YoutubeDL object handles it to the first InfoExtractor it
 105     finds that reports being able to handle it. The InfoExtractor extracts
 106     all the information about the video or videos the URL refers to, and
 107     YoutubeDL process the extracted information, possibly using a File
 108     Downloader to download the video.
 109
 110     YoutubeDL objects accept a lot of parameters. In order not to saturate
 111     the object constructor with arguments, it receives a dictionary of
 112     options instead. These options are available through the params
 113     attribute for the InfoExtractors to use. The YoutubeDL also
 114     registers itself as the downloader in charge for the InfoExtractors
 115     that are added to it, so this is a "mutual registration".
 116
 117     Available options:
 118
 119     username:          Username for authentication purposes.
 120     password:          Password for authentication purposes.
 121     videopassword:     Password for acces a video.
 122     usenetrc:          Use netrc for authentication instead.
 123     verbose:           Print additional info to stdout.
 124     quiet:             Do not print messages to stdout.
 125     no_warnings:       Do not print out anything for warnings.
 126     forceurl:          Force printing final URL.
 127     forcetitle:        Force printing title.
 128     forceid:           Force printing ID.
 129     forcethumbnail:    Force printing thumbnail URL.
 130     forcedescription:  Force printing description.
 131     forcefilename:     Force printing final filename.
 132     forceduration:     Force printing duration.
 133     forcejson:         Force printing info_dict as JSON.
 134     dump_single_json:  Force printing the info_dict of the whole playlist
 135                        (or video) as a single JSON line.
 136     simulate:          Do not download the video files.
 137     format:            Video format code. See options.py for more information.
 138     outtmpl:           Template for output names.
 139     restrictfilenames: Do not allow "&" and spaces in file names
 140     ignoreerrors:      Do not stop on download errors.
 141     nooverwrites:      Prevent overwriting files.
 142     playliststart:     Playlist item to start at.
 143     playlistend:       Playlist item to end at.
 144     playlist_items:    Specific indices of playlist to download.
 145     playlistreverse:   Download playlist items in reverse order.
 146     matchtitle:        Download only matching titles.
 147     rejecttitle:       Reject downloads for matching titles.
 148     logger:            Log messages to a logging.Logger instance.
 149     logtostderr:       Log messages to stderr instead of stdout.
 150     writedescription:  Write the video description to a .description file
 151     writeinfojson:     Write the video description to a .info.json file
 152     writeannotations:  Write the video annotations to a .annotations.xml file
 153     writethumbnail:    Write the thumbnail image to a file
 154     write_all_thumbnails:  Write all thumbnail formats to files
 155     writesubtitles:    Write the video subtitles to a file
 156     writeautomaticsub: Write the automatic subtitles to a file
 157     allsubtitles:      Downloads all the subtitles of the video
 158                        (requires writesubtitles or writeautomaticsub)
 159     listsubtitles:     Lists all available subtitles for the video
 160     subtitlesformat:   The format code for subtitles
 161     subtitleslangs:    List of languages of the subtitles to download
 162     keepvideo:         Keep the video file after post-processing
 163     daterange:         A DateRange object, download only if the upload_date is in the range.
 164     skip_download:     Skip the actual download of the video file
 165     cachedir:          Location of the cache files in the filesystem.
 166                        False to disable filesystem cache.
 167     noplaylist:        Download single video instead of a playlist if in doubt.
 168     age_limit:         An integer representing the user's age in years.
 169                        Unsuitable videos for the given age are skipped.
 170     min_views:         An integer representing the minimum view count the video
 171                        must have in order to not be skipped.
 172                        Videos without view count information are always
 173                        downloaded. None for no limit.
 174     max_views:         An integer representing the maximum view count.
 175                        Videos that are more popular than that are not
 176                        downloaded.
 177                        Videos without view count information are always
 178                        downloaded. None for no limit.
 179     download_archive:  File name of a file where all downloads are recorded.
 180                        Videos already present in the file are not downloaded
 181                        again.
 182     cookiefile:        File name where cookies should be read from and dumped to.
 183     nocheckcertificate:Do not verify SSL certificates
 184     prefer_insecure:   Use HTTP instead of HTTPS to retrieve information.
 185                        At the moment, this is only supported by YouTube.
 186     proxy:             URL of the proxy server to use
 187     cn_verification_proxy:  URL of the proxy to use for IP address verification
 188                        on Chinese sites. (Experimental)
 189     socket_timeout:    Time to wait for unresponsive hosts, in seconds
 190     bidi_workaround:   Work around buggy terminals without bidirectional text
 191                        support, using fridibi
 192     debug_printtraffic:Print out sent and received HTTP traffic
 193     include_ads:       Download ads as well
 194     default_search:    Prepend this string if an input url is not valid.
 195                        'auto' for elaborate guessing
 196     encoding:          Use this encoding instead of the system-specified.
 197     extract_flat:      Do not resolve URLs, return the immediate result.
 198                        Pass in 'in_playlist' to only show this behavior for
 199                        playlist items.
 200     postprocessors:    A list of dictionaries, each with an entry
 201                        * key:  The name of the postprocessor. See
 202                                youtube_dl/postprocessor/__init__.py for a list.
 203                        as well as any further keyword arguments for the
 204                        postprocessor.
 205     progress_hooks:    A list of functions that get called on download
 206                        progress, with a dictionary with the entries
 207                        * status: One of "downloading", "error", or "finished".
 208                                  Check this first and ignore unknown values.
 209
 210                        If status is one of "downloading", or "finished", the
 211                        following properties may also be present:
 212                        * filename: The final filename (always present)
 213                        * tmpfilename: The filename we're currently writing to
 214                        * downloaded_bytes: Bytes on disk
 215                        * total_bytes: Size of the whole file, None if unknown
 216                        * total_bytes_estimate: Guess of the eventual file size,
 217                                                None if unavailable.
 218                        * elapsed: The number of seconds since download started.
 219                        * eta: The estimated time in seconds, None if unknown
 220                        * speed: The download speed in bytes/second, None if
 221                                 unknown
 222                        * fragment_index: The counter of the currently
 223                                          downloaded video fragment.
 224                        * fragment_count: The number of fragments (= individual
 225                                          files that will be merged)
 226
 227                        Progress hooks are guaranteed to be called at least once
 228                        (with status "finished") if the download is successful.
 229     merge_output_format: Extension to use when merging formats.
 230     fixup:             Automatically correct known faults of the file.
 231                        One of:
 232                        - "never": do nothing
 233                        - "warn": only emit a warning
 234                        - "detect_or_warn": check whether we can do anything
 235                                            about it, warn otherwise (default)
 236     source_address:    (Experimental) Client-side IP address to bind to.
 237     call_home:         Boolean, true iff we are allowed to contact the
 238                        youtube-dl servers for debugging.
 239     sleep_interval:    Number of seconds to sleep before each download.
 240     listformats:       Print an overview of available video formats and exit.
 241     list_thumbnails:   Print a table of all thumbnails and exit.
 242     match_filter:      A function that gets called with the info_dict of
 243                        every video.
 244                        If it returns a message, the video is ignored.
 245                        If it returns None, the video is downloaded.
 246                        match_filter_func in utils.py is one example for this.
 247     no_color:          Do not emit color codes in output.
 248
 249     The following options determine which downloader is picked:
 250     external_downloader: Executable of the external downloader to call.
 251                        None or unset for standard (built-in) downloader.
 252     hls_prefer_native: Use the native HLS downloader instead of ffmpeg/avconv.
 253
 254     The following parameters are not used by YoutubeDL itself, they are used by
 255     the downloader (see youtube_dl/downloader/common.py):
 256     nopart, updatetime, buffersize, ratelimit, min_filesize, max_filesize, test,
 257     noresizebuffer, retries, continuedl, noprogress, consoletitle,
 258     xattr_set_filesize, external_downloader_args.
 259
 260     The following options are used by the post processors:
 261     prefer_ffmpeg:     If True, use ffmpeg instead of avconv if both are available,
 262                        otherwise prefer avconv.
 263     """
 264
 265     params = None
 266     _ies = []
 267     _pps = []
 268     _download_retcode = None
 269     _num_downloads = None
 270     _screen_file = None
 271
 272     def __init__(self, params=None, auto_init=True):
 273         """Create a FileDownloader object with the given options."""
 274         if params is None:
 275             params = {}
 276         self._ies = []
 277         self._ies_instances = {}
 278         self._pps = []
 279         self._progress_hooks = []
 280         self._download_retcode = 0
 281         self._num_downloads = 0
 282         self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)]
 283         self._err_file = sys.stderr
 284         self.params = params
 285         self.cache = Cache(self)
 286
 287         if params.get('bidi_workaround', False):
 288             try:
 289                 import pty
 290                 master, slave = pty.openpty()
 291                 width = compat_get_terminal_size().columns
 292                 if width is None:
 293                     width_args = []
 294                 else:
 295                     width_args = ['-w', str(width)]
 296                 sp_kwargs = dict(
 297                     stdin=subprocess.PIPE,
 298                     stdout=slave,
 299                     stderr=self._err_file)
 300                 try:
 301                     self._output_process = subprocess.Popen(
 302                         ['bidiv'] + width_args, **sp_kwargs
 303                     )
 304                 except OSError:
 305                     self._output_process = subprocess.Popen(
 306                         ['fribidi', '-c', 'UTF-8'] + width_args, **sp_kwargs)
 307                 self._output_channel = os.fdopen(master, 'rb')
 308             except OSError as ose:
 309                 if ose.errno == 2:
 310                     self.report_warning('Could not find fribidi executable, ignoring --bidi-workaround . Make sure that  fribidi  is an executable file in one of the directories in your $PATH.')
 311                 else:
 312                     raise
 313
 314         if (sys.version_info >= (3,) and sys.platform != 'win32' and
 315                 sys.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968'] and
 316                 not params.get('restrictfilenames', False)):
 317             # On Python 3, the Unicode filesystem API will throw errors (#1474)
 318             self.report_warning(
 319                 'Assuming --restrict-filenames since file system encoding '
 320                 'cannot encode all characters. '
 321                 'Set the LC_ALL environment variable to fix this.')
 322             self.params['restrictfilenames'] = True
 323
 324         if isinstance(params.get('outtmpl'), bytes):
 325             self.report_warning(
 326                 'Parameter outtmpl is bytes, but should be a unicode string. '
 327                 'Put  from __future__ import unicode_literals  at the top of your code file or consider switching to Python 3.x.')
 328
 329         self._setup_opener()
 330
 331         if auto_init:
 332             self.print_debug_header()
 333             self.add_default_info_extractors()
 334
 335         for pp_def_raw in self.params.get('postprocessors', []):
 336             pp_class = get_postprocessor(pp_def_raw['key'])
 337             pp_def = dict(pp_def_raw)
 338             del pp_def['key']
 339             pp = pp_class(self, **compat_kwargs(pp_def))
 340             self.add_post_processor(pp)
 341
 342         for ph in self.params.get('progress_hooks', []):
 343             self.add_progress_hook(ph)
 344
 345     def warn_if_short_id(self, argv):
 346         # short YouTube ID starting with dash?
 347         idxs = [
 348             i for i, a in enumerate(argv)
 349             if re.match(r'^-[0-9A-Za-z_-]{10}$', a)]
 350         if idxs:
 351             correct_argv = (
 352                 ['youtube-dl'] +
 353                 [a for i, a in enumerate(argv) if i not in idxs] +
 354                 ['--'] + [argv[i] for i in idxs]
 355             )
 356             self.report_warning(
 357                 'Long argument string detected. '
 358                 'Use -- to separate parameters and URLs, like this:\n%s\n' %
 359                 args_to_str(correct_argv))
 360
 361     def add_info_extractor(self, ie):
 362         """Add an InfoExtractor object to the end of the list."""
 363         self._ies.append(ie)
 364         self._ies_instances[ie.ie_key()] = ie
 365         ie.set_downloader(self)
 366
 367     def get_info_extractor(self, ie_key):
 368         """
 369         Get an instance of an IE with name ie_key, it will try to get one from
 370         the _ies list, if there's no instance it will create a new one and add
 371         it to the extractor list.
 372         """
 373         ie = self._ies_instances.get(ie_key)
 374         if ie is None:
 375             ie = get_info_extractor(ie_key)()
 376             self.add_info_extractor(ie)
 377         return ie
 378
 379     def add_default_info_extractors(self):
 380         """
 381         Add the InfoExtractors returned by gen_extractors to the end of the list
 382         """
 383         for ie in gen_extractors():
 384             self.add_info_extractor(ie)
 385
 386     def add_post_processor(self, pp):
 387         """Add a PostProcessor object to the end of the chain."""
 388         self._pps.append(pp)
 389         pp.set_downloader(self)
 390
 391     def add_progress_hook(self, ph):
 392         """Add the progress hook (currently only for the file downloader)"""
 393         self._progress_hooks.append(ph)
 394
 395     def _bidi_workaround(self, message):
 396         if not hasattr(self, '_output_channel'):
 397             return message
 398
 399         assert hasattr(self, '_output_process')
 400         assert isinstance(message, compat_str)
 401         line_count = message.count('\n') + 1
 402         self._output_process.stdin.write((message + '\n').encode('utf-8'))
 403         self._output_process.stdin.flush()
 404         res = ''.join(self._output_channel.readline().decode('utf-8')
 405                       for _ in range(line_count))
 406         return res[:-len('\n')]
 407
 408     def to_screen(self, message, skip_eol=False):
 409         """Print message to stdout if not in quiet mode."""
 410         return self.to_stdout(message, skip_eol, check_quiet=True)
 411
 412     def _write_string(self, s, out=None):
 413         write_string(s, out=out, encoding=self.params.get('encoding'))
 414
 415     def to_stdout(self, message, skip_eol=False, check_quiet=False):
 416         """Print message to stdout if not in quiet mode."""
 417         if self.params.get('logger'):
 418             self.params['logger'].debug(message)
 419         elif not check_quiet or not self.params.get('quiet', False):
 420             message = self._bidi_workaround(message)
 421             terminator = ['\n', ''][skip_eol]
 422             output = message + terminator
 423
 424             self._write_string(output, self._screen_file)
 425
 426     def to_stderr(self, message):
 427         """Print message to stderr."""
 428         assert isinstance(message, compat_str)
 429         if self.params.get('logger'):
 430             self.params['logger'].error(message)
 431         else:
 432             message = self._bidi_workaround(message)
 433             output = message + '\n'
 434             self._write_string(output, self._err_file)
 435
 436     def to_console_title(self, message):
 437         if not self.params.get('consoletitle', False):
 438             return
 439         if os.name == 'nt' and ctypes.windll.kernel32.GetConsoleWindow():
 440             # c_wchar_p() might not be necessary if `message` is
 441             # already of type unicode()
 442             ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message))
 443         elif 'TERM' in os.environ:
 444             self._write_string('\033]0;%s\007' % message, self._screen_file)
 445
 446     def save_console_title(self):
 447         if not self.params.get('consoletitle', False):
 448             return
 449         if 'TERM' in os.environ:
 450             # Save the title on stack
 451             self._write_string('\033[22;0t', self._screen_file)
 452
 453     def restore_console_title(self):
 454         if not self.params.get('consoletitle', False):
 455             return
 456         if 'TERM' in os.environ:
 457             # Restore the title from stack
 458             self._write_string('\033[23;0t', self._screen_file)
 459
 460     def __enter__(self):
 461         self.save_console_title()
 462         return self
 463
 464     def __exit__(self, *args):
 465         self.restore_console_title()
 466
 467         if self.params.get('cookiefile') is not None:
 468             self.cookiejar.save()
 469
 470     def trouble(self, message=None, tb=None):
 471         """Determine action to take when a download problem appears.
 472
 473         Depending on if the downloader has been configured to ignore
 474         download errors or not, this method may throw an exception or
 475         not when errors are found, after printing the message.
 476
 477         tb, if given, is additional traceback information.
 478         """
 479         if message is not None:
 480             self.to_stderr(message)
 481         if self.params.get('verbose'):
 482             if tb is None:
 483                 if sys.exc_info()[0]:  # if .trouble has been called from an except block
 484                     tb = ''
 485                     if hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
 486                         tb += ''.join(traceback.format_exception(*sys.exc_info()[1].exc_info))
 487                     tb += compat_str(traceback.format_exc())
 488                 else:
 489                     tb_data = traceback.format_list(traceback.extract_stack())
 490                     tb = ''.join(tb_data)
 491             self.to_stderr(tb)
 492         if not self.params.get('ignoreerrors', False):
 493             if sys.exc_info()[0] and hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
 494                 exc_info = sys.exc_info()[1].exc_info
 495             else:
 496                 exc_info = sys.exc_info()
 497             raise DownloadError(message, exc_info)
 498         self._download_retcode = 1
 499
 500     def report_warning(self, message):
 501         '''
 502         Print the message to stderr, it will be prefixed with 'WARNING:'
 503         If stderr is a tty file the 'WARNING:' will be colored
 504         '''
 505         if self.params.get('logger') is not None:
 506             self.params['logger'].warning(message)
 507         else:
 508             if self.params.get('no_warnings'):
 509                 return
 510             if not self.params.get('no_color') and self._err_file.isatty() and os.name != 'nt':
 511                 _msg_header = '\033[0;33mWARNING:\033[0m'
 512             else:
 513                 _msg_header = 'WARNING:'
 514             warning_message = '%s %s' % (_msg_header, message)
 515             self.to_stderr(warning_message)
 516
 517     def report_error(self, message, tb=None):
 518         '''
 519         Do the same as trouble, but prefixes the message with 'ERROR:', colored
 520         in red if stderr is a tty file.
 521         '''
 522         if not self.params.get('no_color') and self._err_file.isatty() and os.name != 'nt':
 523             _msg_header = '\033[0;31mERROR:\033[0m'
 524         else:
 525             _msg_header = 'ERROR:'
 526         error_message = '%s %s' % (_msg_header, message)
 527         self.trouble(error_message, tb)
 528
 529     def report_file_already_downloaded(self, file_name):
 530         """Report file has already been fully downloaded."""
 531         try:
 532             self.to_screen('[download] %s has already been downloaded' % file_name)
 533         except UnicodeEncodeError:
 534             self.to_screen('[download] The file has already been downloaded')
 535
 536     def prepare_filename(self, info_dict):
 537         """Generate the output filename."""
 538         try:
 539             template_dict = dict(info_dict)
 540
 541             template_dict['epoch'] = int(time.time())
 542             autonumber_size = self.params.get('autonumber_size')
 543             if autonumber_size is None:
 544                 autonumber_size = 5
 545             autonumber_templ = '%0' + str(autonumber_size) + 'd'
 546             template_dict['autonumber'] = autonumber_templ % self._num_downloads
 547             if template_dict.get('playlist_index') is not None:
 548                 template_dict['playlist_index'] = '%0*d' % (len(str(template_dict['n_entries'])), template_dict['playlist_index'])
 549             if template_dict.get('resolution') is None:
 550                 if template_dict.get('width') and template_dict.get('height'):
 551                     template_dict['resolution'] = '%dx%d' % (template_dict['width'], template_dict['height'])
 552                 elif template_dict.get('height'):
 553                     template_dict['resolution'] = '%sp' % template_dict['height']
 554                 elif template_dict.get('width'):
 555                     template_dict['resolution'] = '?x%d' % template_dict['width']
 556
 557             sanitize = lambda k, v: sanitize_filename(
 558                 compat_str(v),
 559                 restricted=self.params.get('restrictfilenames'),
 560                 is_id=(k == 'id'))
 561             template_dict = dict((k, sanitize(k, v))
 562                                  for k, v in template_dict.items()
 563                                  if v is not None)
 564             template_dict = collections.defaultdict(lambda: 'NA', template_dict)
 565
 566             outtmpl = sanitize_path(self.params.get('outtmpl', DEFAULT_OUTTMPL))
 567             tmpl = compat_expanduser(outtmpl)
 568             filename = tmpl % template_dict
 569             # Temporary fix for #4787
 570             # 'Treat' all problem characters by passing filename through preferredencoding
 571             # to workaround encoding issues with subprocess on python2 @ Windows
 572             if sys.version_info < (3, 0) and sys.platform == 'win32':
 573                 filename = encodeFilename(filename, True).decode(preferredencoding())
 574             return filename
 575         except ValueError as err:
 576             self.report_error('Error in output template: ' + str(err) + ' (encoding: ' + repr(preferredencoding()) + ')')
 577             return None
 578
 579     def _match_entry(self, info_dict, incomplete):
 580         """ Returns None iff the file should be downloaded """
 581
 582         video_title = info_dict.get('title', info_dict.get('id', 'video'))
 583         if 'title' in info_dict:
 584             # This can happen when we're just evaluating the playlist
 585             title = info_dict['title']
 586             matchtitle = self.params.get('matchtitle', False)
 587             if matchtitle:
 588                 if not re.search(matchtitle, title, re.IGNORECASE):
 589                     return '"' + title + '" title did not match pattern "' + matchtitle + '"'
 590             rejecttitle = self.params.get('rejecttitle', False)
 591             if rejecttitle:
 592                 if re.search(rejecttitle, title, re.IGNORECASE):
 593                     return '"' + title + '" title matched reject pattern "' + rejecttitle + '"'
 594         date = info_dict.get('upload_date', None)
 595         if date is not None:
 596             dateRange = self.params.get('daterange', DateRange())
 597             if date not in dateRange:
 598                 return '%s upload date is not in range %s' % (date_from_str(date).isoformat(), dateRange)
 599         view_count = info_dict.get('view_count', None)
 600         if view_count is not None:
 601             min_views = self.params.get('min_views')
 602             if min_views is not None and view_count < min_views:
 603                 return 'Skipping %s, because it has not reached minimum view count (%d/%d)' % (video_title, view_count, min_views)
 604             max_views = self.params.get('max_views')
 605             if max_views is not None and view_count > max_views:
 606                 return 'Skipping %s, because it has exceeded the maximum view count (%d/%d)' % (video_title, view_count, max_views)
 607         if age_restricted(info_dict.get('age_limit'), self.params.get('age_limit')):
 608             return 'Skipping "%s" because it is age restricted' % video_title
 609         if self.in_download_archive(info_dict):
 610             return '%s has already been recorded in archive' % video_title
 611
 612         if not incomplete:
 613             match_filter = self.params.get('match_filter')
 614             if match_filter is not None:
 615                 ret = match_filter(info_dict)
 616                 if ret is not None:
 617                     return ret
 618
 619         return None
 620
 621     @staticmethod
 622     def add_extra_info(info_dict, extra_info):
 623         '''Set the keys from extra_info in info dict if they are missing'''
 624         for key, value in extra_info.items():
 625             info_dict.setdefault(key, value)
 626
 627     def extract_info(self, url, download=True, ie_key=None, extra_info={},
 628                      process=True):
 629         '''
 630         Returns a list with a dictionary for each video we find.
 631         If 'download', also downloads the videos.
 632         extra_info is a dict containing the extra values to add to each result
 633         '''
 634
 635         if ie_key:
 636             ies = [self.get_info_extractor(ie_key)]
 637         else:
 638             ies = self._ies
 639
 640         for ie in ies:
 641             if not ie.suitable(url):
 642                 continue
 643
 644             if not ie.working():
 645                 self.report_warning('The program functionality for this site has been marked as broken, '
 646                                     'and will probably not work.')
 647
 648             try:
 649                 ie_result = ie.extract(url)
 650                 if ie_result is None:  # Finished already (backwards compatibility; listformats and friends should be moved here)
 651                     break
 652                 if isinstance(ie_result, list):
 653                     # Backwards compatibility: old IE result format
 654                     ie_result = {
 655                         '_type': 'compat_list',
 656                         'entries': ie_result,
 657                     }
 658                 self.add_default_extra_info(ie_result, ie, url)
 659                 if process:
 660                     return self.process_ie_result(ie_result, download, extra_info)
 661                 else:
 662                     return ie_result
 663             except ExtractorError as de:  # An error we somewhat expected
 664                 self.report_error(compat_str(de), de.format_traceback())
 665                 break
 666             except MaxDownloadsReached:
 667                 raise
 668             except Exception as e:
 669                 if self.params.get('ignoreerrors', False):
 670                     self.report_error(compat_str(e), tb=compat_str(traceback.format_exc()))
 671                     break
 672                 else:
 673                     raise
 674         else:
 675             self.report_error('no suitable InfoExtractor for URL %s' % url)
 676
 677     def add_default_extra_info(self, ie_result, ie, url):
 678         self.add_extra_info(ie_result, {
 679             'extractor': ie.IE_NAME,
 680             'webpage_url': url,
 681             'webpage_url_basename': url_basename(url),
 682             'extractor_key': ie.ie_key(),
 683         })
 684
 685     def process_ie_result(self, ie_result, download=True, extra_info={}):
 686         """
 687         Take the result of the ie(may be modified) and resolve all unresolved
 688         references (URLs, playlist items).
 689
 690         It will also download the videos if 'download'.
 691         Returns the resolved ie_result.
 692         """
 693
 694         result_type = ie_result.get('_type', 'video')
 695
 696         if result_type in ('url', 'url_transparent'):
 697             extract_flat = self.params.get('extract_flat', False)
 698             if ((extract_flat == 'in_playlist' and 'playlist' in extra_info) or
 699                     extract_flat is True):
 700                 if self.params.get('forcejson', False):
 701                     self.to_stdout(json.dumps(ie_result))
 702                 return ie_result
 703
 704         if result_type == 'video':
 705             self.add_extra_info(ie_result, extra_info)
 706             return self.process_video_result(ie_result, download=download)
 707         elif result_type == 'url':
 708             # We have to add extra_info to the results because it may be
 709             # contained in a playlist
 710             return self.extract_info(ie_result['url'],
 711                                      download,
 712                                      ie_key=ie_result.get('ie_key'),
 713                                      extra_info=extra_info)
 714         elif result_type == 'url_transparent':
 715             # Use the information from the embedding page
 716             info = self.extract_info(
 717                 ie_result['url'], ie_key=ie_result.get('ie_key'),
 718                 extra_info=extra_info, download=False, process=False)
 719
 720             force_properties = dict(
 721                 (k, v) for k, v in ie_result.items() if v is not None)
 722             for f in ('_type', 'url'):
 723                 if f in force_properties:
 724                     del force_properties[f]
 725             new_result = info.copy()
 726             new_result.update(force_properties)
 727
 728             assert new_result.get('_type') != 'url_transparent'
 729
 730             return self.process_ie_result(
 731                 new_result, download=download, extra_info=extra_info)
 732         elif result_type == 'playlist' or result_type == 'multi_video':
 733             # We process each entry in the playlist
 734             playlist = ie_result.get('title', None) or ie_result.get('id', None)
 735             self.to_screen('[download] Downloading playlist: %s' % playlist)
 736
 737             playlist_results = []
 738
 739             playliststart = self.params.get('playliststart', 1) - 1
 740             playlistend = self.params.get('playlistend', None)
 741             # For backwards compatibility, interpret -1 as whole list
 742             if playlistend == -1:
 743                 playlistend = None
 744
 745             playlistitems_str = self.params.get('playlist_items', None)
 746             playlistitems = None
 747             if playlistitems_str is not None:
 748                 def iter_playlistitems(format):
 749                     for string_segment in format.split(','):
 750                         if '-' in string_segment:
 751                             start, end = string_segment.split('-')
 752                             for item in range(int(start), int(end) + 1):
 753                                 yield int(item)
 754                         else:
 755                             yield int(string_segment)
 756                 playlistitems = iter_playlistitems(playlistitems_str)
 757
 758             ie_entries = ie_result['entries']
 759             if isinstance(ie_entries, list):
 760                 n_all_entries = len(ie_entries)
 761                 if playlistitems:
 762                     entries = [
 763                         ie_entries[i - 1] for i in playlistitems
 764                         if -n_all_entries <= i - 1 < n_all_entries]
 765                 else:
 766                     entries = ie_entries[playliststart:playlistend]
 767                 n_entries = len(entries)
 768                 self.to_screen(
 769                     "[%s] playlist %s: Collected %d video ids (downloading %d of them)" %
 770                     (ie_result['extractor'], playlist, n_all_entries, n_entries))
 771             elif isinstance(ie_entries, PagedList):
 772                 if playlistitems:
 773                     entries = []
 774                     for item in playlistitems:
 775                         entries.extend(ie_entries.getslice(
 776                             item - 1, item
 777                         ))
 778                 else:
 779                     entries = ie_entries.getslice(
 780                         playliststart, playlistend)
 781                 n_entries = len(entries)
 782                 self.to_screen(
 783                     "[%s] playlist %s: Downloading %d videos" %
 784                     (ie_result['extractor'], playlist, n_entries))
 785             else:  # iterable
 786                 if playlistitems:
 787                     entry_list = list(ie_entries)
 788                     entries = [entry_list[i - 1] for i in playlistitems]
 789                 else:
 790                     entries = list(itertools.islice(
 791                         ie_entries, playliststart, playlistend))
 792                 n_entries = len(entries)
 793                 self.to_screen(
 794                     "[%s] playlist %s: Downloading %d videos" %
 795                     (ie_result['extractor'], playlist, n_entries))
 796
 797             if self.params.get('playlistreverse', False):
 798                 entries = entries[::-1]
 799
 800             for i, entry in enumerate(entries, 1):
 801                 self.to_screen('[download] Downloading video %s of %s' % (i, n_entries))
 802                 extra = {
 803                     'n_entries': n_entries,
 804                     'playlist': playlist,
 805                     'playlist_id': ie_result.get('id'),
 806                     'playlist_title': ie_result.get('title'),
 807                     'playlist_index': i + playliststart,
 808                     'extractor': ie_result['extractor'],
 809                     'webpage_url': ie_result['webpage_url'],
 810                     'webpage_url_basename': url_basename(ie_result['webpage_url']),
 811                     'extractor_key': ie_result['extractor_key'],
 812                 }
 813
 814                 reason = self._match_entry(entry, incomplete=True)
 815                 if reason is not None:
 816                     self.to_screen('[download] ' + reason)
 817                     continue
 818
 819                 entry_result = self.process_ie_result(entry,
 820                                                       download=download,
 821                                                       extra_info=extra)
 822                 playlist_results.append(entry_result)
 823             ie_result['entries'] = playlist_results
 824             return ie_result
 825         elif result_type == 'compat_list':
 826             self.report_warning(
 827                 'Extractor %s returned a compat_list result. '
 828                 'It needs to be updated.' % ie_result.get('extractor'))
 829
 830             def _fixup(r):
 831                 self.add_extra_info(
 832                     r,
 833                     {
 834                         'extractor': ie_result['extractor'],
 835                         'webpage_url': ie_result['webpage_url'],
 836                         'webpage_url_basename': url_basename(ie_result['webpage_url']),
 837                         'extractor_key': ie_result['extractor_key'],
 838                     }
 839                 )
 840                 return r
 841             ie_result['entries'] = [
 842                 self.process_ie_result(_fixup(r), download, extra_info)
 843                 for r in ie_result['entries']
 844             ]
 845             return ie_result
 846         else:
 847             raise Exception('Invalid result type: %s' % result_type)
 848
 849     def _apply_format_filter(self, format_spec, available_formats):
 850         " Returns a tuple of the remaining format_spec and filtered formats "
 851
 852         OPERATORS = {
 853             '<': operator.lt,
 854             '<=': operator.le,
 855             '>': operator.gt,
 856             '>=': operator.ge,
 857             '=': operator.eq,
 858             '!=': operator.ne,
 859         }
 860         operator_rex = re.compile(r'''(?x)\s*\[
 861             (?P<key>width|height|tbr|abr|vbr|asr|filesize|fps)
 862             \s*(?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
 863             (?P<value>[0-9.]+(?:[kKmMgGtTpPeEzZyY]i?[Bb]?)?)
 864             \]$
 865             ''' % '|'.join(map(re.escape, OPERATORS.keys())))
 866         m = operator_rex.search(format_spec)
 867         if m:
 868             try:
 869                 comparison_value = int(m.group('value'))
 870             except ValueError:
 871                 comparison_value = parse_filesize(m.group('value'))
 872                 if comparison_value is None:
 873                     comparison_value = parse_filesize(m.group('value') + 'B')
 874                 if comparison_value is None:
 875                     raise ValueError(
 876                         'Invalid value %r in format specification %r' % (
 877                             m.group('value'), format_spec))
 878             op = OPERATORS[m.group('op')]
 879
 880         if not m:
 881             STR_OPERATORS = {
 882                 '=': operator.eq,
 883                 '!=': operator.ne,
 884             }
 885             str_operator_rex = re.compile(r'''(?x)\s*\[
 886                 \s*(?P<key>ext|acodec|vcodec|container|protocol)
 887                 \s*(?P<op>%s)(?P<none_inclusive>\s*\?)?
 888                 \s*(?P<value>[a-zA-Z0-9_-]+)
 889                 \s*\]$
 890                 ''' % '|'.join(map(re.escape, STR_OPERATORS.keys())))
 891             m = str_operator_rex.search(format_spec)
 892             if m:
 893                 comparison_value = m.group('value')
 894                 op = STR_OPERATORS[m.group('op')]
 895
 896         if not m:
 897             raise ValueError('Invalid format specification %r' % format_spec)
 898
 899         def _filter(f):
 900             actual_value = f.get(m.group('key'))
 901             if actual_value is None:
 902                 return m.group('none_inclusive')
 903             return op(actual_value, comparison_value)
 904         new_formats = [f for f in available_formats if _filter(f)]
 905
 906         new_format_spec = format_spec[:-len(m.group(0))]
 907         if not new_format_spec:
 908             new_format_spec = 'best'
 909
 910         return (new_format_spec, new_formats)
 911
 912     def select_format(self, format_spec, available_formats):
 913         while format_spec.endswith(']'):
 914             format_spec, available_formats = self._apply_format_filter(
 915                 format_spec, available_formats)
 916         if not available_formats:
 917             return None
 918
 919         if format_spec in ['best', 'worst', None]:
 920             format_idx = 0 if format_spec == 'worst' else -1
 921             audiovideo_formats = [
 922                 f for f in available_formats
 923                 if f.get('vcodec') != 'none' and f.get('acodec') != 'none']
 924             if audiovideo_formats:
 925                 return audiovideo_formats[format_idx]
 926             # for audio only urls, select the best/worst audio format
 927             elif all(f.get('acodec') != 'none' for f in available_formats):
 928                 return available_formats[format_idx]
 929         elif format_spec == 'bestaudio':
 930             audio_formats = [
 931                 f for f in available_formats
 932                 if f.get('vcodec') == 'none']
 933             if audio_formats:
 934                 return audio_formats[-1]
 935         elif format_spec == 'worstaudio':
 936             audio_formats = [
 937                 f for f in available_formats
 938                 if f.get('vcodec') == 'none']
 939             if audio_formats:
 940                 return audio_formats[0]
 941         elif format_spec == 'bestvideo':
 942             video_formats = [
 943                 f for f in available_formats
 944                 if f.get('acodec') == 'none']
 945             if video_formats:
 946                 return video_formats[-1]
 947         elif format_spec == 'worstvideo':
 948             video_formats = [
 949                 f for f in available_formats
 950                 if f.get('acodec') == 'none']
 951             if video_formats:
 952                 return video_formats[0]
 953         else:
 954             extensions = ['mp4', 'flv', 'webm', '3gp', 'm4a', 'mp3', 'ogg', 'aac', 'wav']
 955             if format_spec in extensions:
 956                 filter_f = lambda f: f['ext'] == format_spec
 957             else:
 958                 filter_f = lambda f: f['format_id'] == format_spec
 959             matches = list(filter(filter_f, available_formats))
 960             if matches:
 961                 return matches[-1]
 962         return None
 963
 964     def _calc_headers(self, info_dict):
 965         res = std_headers.copy()
 966
 967         add_headers = info_dict.get('http_headers')
 968         if add_headers:
 969             res.update(add_headers)
 970
 971         cookies = self._calc_cookies(info_dict)
 972         if cookies:
 973             res['Cookie'] = cookies
 974
 975         return res
 976
 977     def _calc_cookies(self, info_dict):
 978         pr = compat_urllib_request.Request(info_dict['url'])
 979         self.cookiejar.add_cookie_header(pr)
 980         return pr.get_header('Cookie')
 981
 982     def process_video_result(self, info_dict, download=True):
 983         assert info_dict.get('_type', 'video') == 'video'
 984
 985         if 'id' not in info_dict:
 986             raise ExtractorError('Missing "id" field in extractor result')
 987         if 'title' not in info_dict:
 988             raise ExtractorError('Missing "title" field in extractor result')
 989
 990         if 'playlist' not in info_dict:
 991             # It isn't part of a playlist
 992             info_dict['playlist'] = None
 993             info_dict['playlist_index'] = None
 994
 995         thumbnails = info_dict.get('thumbnails')
 996         if thumbnails is None:
 997             thumbnail = info_dict.get('thumbnail')
 998             if thumbnail:
 999                 info_dict['thumbnails'] = thumbnails = [{'url': thumbnail}]
1000         if thumbnails:
1001             thumbnails.sort(key=lambda t: (
1002                 t.get('preference'), t.get('width'), t.get('height'),
1003                 t.get('id'), t.get('url')))
1004             for i, t in enumerate(thumbnails):
1005                 if 'width' in t and 'height' in t:
1006                     t['resolution'] = '%dx%d' % (t['width'], t['height'])
1007                 if t.get('id') is None:
1008                     t['id'] = '%d' % i
1009
1010         if thumbnails and 'thumbnail' not in info_dict:
1011             info_dict['thumbnail'] = thumbnails[-1]['url']
1012
1013         if 'display_id' not in info_dict and 'id' in info_dict:
1014             info_dict['display_id'] = info_dict['id']
1015
1016         if info_dict.get('upload_date') is None and info_dict.get('timestamp') is not None:
1017             # Working around negative timestamps in Windows
1018             # (see http://bugs.python.org/issue1646728)
1019             if info_dict['timestamp'] < 0 and os.name == 'nt':
1020                 info_dict['timestamp'] = 0
1021             upload_date = datetime.datetime.utcfromtimestamp(
1022                 info_dict['timestamp'])
1023             info_dict['upload_date'] = upload_date.strftime('%Y%m%d')
1024
1025         if self.params.get('listsubtitles', False):
1026             if 'automatic_captions' in info_dict:
1027                 self.list_subtitles(info_dict['id'], info_dict.get('automatic_captions'), 'automatic captions')
1028             self.list_subtitles(info_dict['id'], info_dict.get('subtitles'), 'subtitles')
1029             return
1030         info_dict['requested_subtitles'] = self.process_subtitles(
1031             info_dict['id'], info_dict.get('subtitles'),
1032             info_dict.get('automatic_captions'))
1033
1034         # This extractors handle format selection themselves
1035         if info_dict['extractor'] in ['Youku']:
1036             if download:
1037                 self.process_info(info_dict)
1038             return info_dict
1039
1040         # We now pick which formats have to be downloaded
1041         if info_dict.get('formats') is None:
1042             # There's only one format available
1043             formats = [info_dict]
1044         else:
1045             formats = info_dict['formats']
1046
1047         if not formats:
1048             raise ExtractorError('No video formats found!')
1049
1050         formats_dict = {}
1051
1052         # We check that all the formats have the format and format_id fields
1053         for i, format in enumerate(formats):
1054             if 'url' not in format:
1055                 raise ExtractorError('Missing "url" key in result (index %d)' % i)
1056
1057             if format.get('format_id') is None:
1058                 format['format_id'] = compat_str(i)
1059             format_id = format['format_id']
1060             if format_id not in formats_dict:
1061                 formats_dict[format_id] = []
1062             formats_dict[format_id].append(format)
1063
1064         # Make sure all formats have unique format_id
1065         for format_id, ambiguous_formats in formats_dict.items():
1066             if len(ambiguous_formats) > 1:
1067                 for i, format in enumerate(ambiguous_formats):
1068                     format['format_id'] = '%s-%d' % (format_id, i)
1069
1070         for i, format in enumerate(formats):
1071             if format.get('format') is None:
1072                 format['format'] = '{id} - {res}{note}'.format(
1073                     id=format['format_id'],
1074                     res=self.format_resolution(format),
1075                     note=' ({0})'.format(format['format_note']) if format.get('format_note') is not None else '',
1076                 )
1077             # Automatically determine file extension if missing
1078             if 'ext' not in format:
1079                 format['ext'] = determine_ext(format['url']).lower()
1080             # Add HTTP headers, so that external programs can use them from the
1081             # json output
1082             full_format_info = info_dict.copy()
1083             full_format_info.update(format)
1084             format['http_headers'] = self._calc_headers(full_format_info)
1085
1086         # TODO Central sorting goes here
1087
1088         if formats[0] is not info_dict:
1089             # only set the 'formats' fields if the original info_dict list them
1090             # otherwise we end up with a circular reference, the first (and unique)
1091             # element in the 'formats' field in info_dict is info_dict itself,
1092             # wich can't be exported to json
1093             info_dict['formats'] = formats
1094         if self.params.get('listformats'):
1095             self.list_formats(info_dict)
1096             return
1097         if self.params.get('list_thumbnails'):
1098             self.list_thumbnails(info_dict)
1099             return
1100
1101         req_format = self.params.get('format')
1102         if req_format is None:
1103             req_format_list = []
1104             if (self.params.get('outtmpl', DEFAULT_OUTTMPL) != '-' and
1105                     info_dict['extractor'] in ['youtube', 'ted']):
1106                 merger = FFmpegMergerPP(self)
1107                 if merger.available and merger.can_merge():
1108                     req_format_list.append('bestvideo+bestaudio')
1109             req_format_list.append('best')
1110             req_format = '/'.join(req_format_list)
1111         formats_to_download = []
1112         if req_format == 'all':
1113             formats_to_download = formats
1114         else:
1115             for rfstr in req_format.split(','):
1116                 # We can accept formats requested in the format: 34/5/best, we pick
1117                 # the first that is available, starting from left
1118                 req_formats = rfstr.split('/')
1119                 for rf in req_formats:
1120                     if re.match(r'.+?\+.+?', rf) is not None:
1121                         # Two formats have been requested like '137+139'
1122                         format_1, format_2 = rf.split('+')
1123                         formats_info = (self.select_format(format_1, formats),
1124                                         self.select_format(format_2, formats))
1125                         if all(formats_info):
1126                             # The first format must contain the video and the
1127                             # second the audio
1128                             if formats_info[0].get('vcodec') == 'none':
1129                                 self.report_error('The first format must '
1130                                                   'contain the video, try using '
1131                                                   '"-f %s+%s"' % (format_2, format_1))
1132                                 return
1133                             output_ext = (
1134                                 formats_info[0]['ext']
1135                                 if self.params.get('merge_output_format') is None
1136                                 else self.params['merge_output_format'])
1137                             selected_format = {
1138                                 'requested_formats': formats_info,
1139                                 'format': '%s+%s' % (formats_info[0].get('format'),
1140                                                      formats_info[1].get('format')),
1141                                 'format_id': '%s+%s' % (formats_info[0].get('format_id'),
1142                                                         formats_info[1].get('format_id')),
1143                                 'width': formats_info[0].get('width'),
1144                                 'height': formats_info[0].get('height'),
1145                                 'resolution': formats_info[0].get('resolution'),
1146                                 'fps': formats_info[0].get('fps'),
1147                                 'vcodec': formats_info[0].get('vcodec'),
1148                                 'vbr': formats_info[0].get('vbr'),
1149                                 'stretched_ratio': formats_info[0].get('stretched_ratio'),
1150                                 'acodec': formats_info[1].get('acodec'),
1151                                 'abr': formats_info[1].get('abr'),
1152                                 'ext': output_ext,
1153                             }
1154                         else:
1155                             selected_format = None
1156                     else:
1157                         selected_format = self.select_format(rf, formats)
1158                     if selected_format is not None:
1159                         formats_to_download.append(selected_format)
1160                         break
1161         if not formats_to_download:
1162             raise ExtractorError('requested format not available',
1163                                  expected=True)
1164
1165         if download:
1166             if len(formats_to_download) > 1:
1167                 self.to_screen('[info] %s: downloading video in %s formats' % (info_dict['id'], len(formats_to_download)))
1168             for format in formats_to_download:
1169                 new_info = dict(info_dict)
1170                 new_info.update(format)
1171                 self.process_info(new_info)
1172         # We update the info dict with the best quality format (backwards compatibility)
1173         info_dict.update(formats_to_download[-1])
1174         return info_dict
1175
1176     def process_subtitles(self, video_id, normal_subtitles, automatic_captions):
1177         """Select the requested subtitles and their format"""
1178         available_subs = {}
1179         if normal_subtitles and self.params.get('writesubtitles'):
1180             available_subs.update(normal_subtitles)
1181         if automatic_captions and self.params.get('writeautomaticsub'):
1182             for lang, cap_info in automatic_captions.items():
1183                 if lang not in available_subs:
1184                     available_subs[lang] = cap_info
1185
1186         if (not self.params.get('writesubtitles') and not
1187                 self.params.get('writeautomaticsub') or not
1188                 available_subs):
1189             return None
1190
1191         if self.params.get('allsubtitles', False):
1192             requested_langs = available_subs.keys()
1193         else:
1194             if self.params.get('subtitleslangs', False):
1195                 requested_langs = self.params.get('subtitleslangs')
1196             elif 'en' in available_subs:
1197                 requested_langs = ['en']
1198             else:
1199                 requested_langs = [list(available_subs.keys())[0]]
1200
1201         formats_query = self.params.get('subtitlesformat', 'best')
1202         formats_preference = formats_query.split('/') if formats_query else []
1203         subs = {}
1204         for lang in requested_langs:
1205             formats = available_subs.get(lang)
1206             if formats is None:
1207                 self.report_warning('%s subtitles not available for %s' % (lang, video_id))
1208                 continue
1209             for ext in formats_preference:
1210                 if ext == 'best':
1211                     f = formats[-1]
1212                     break
1213                 matches = list(filter(lambda f: f['ext'] == ext, formats))
1214                 if matches:
1215                     f = matches[-1]
1216                     break
1217             else:
1218                 f = formats[-1]
1219                 self.report_warning(
1220                     'No subtitle format found matching "%s" for language %s, '
1221                     'using %s' % (formats_query, lang, f['ext']))
1222             subs[lang] = f
1223         return subs
1224
1225     def process_info(self, info_dict):
1226         """Process a single resolved IE result."""
1227
1228         assert info_dict.get('_type', 'video') == 'video'
1229
1230         max_downloads = self.params.get('max_downloads')
1231         if max_downloads is not None:
1232             if self._num_downloads >= int(max_downloads):
1233                 raise MaxDownloadsReached()
1234
1235         info_dict['fulltitle'] = info_dict['title']
1236         if len(info_dict['title']) > 200:
1237             info_dict['title'] = info_dict['title'][:197] + '...'
1238
1239         if 'format' not in info_dict:
1240             info_dict['format'] = info_dict['ext']
1241
1242         reason = self._match_entry(info_dict, incomplete=False)
1243         if reason is not None:
1244             self.to_screen('[download] ' + reason)
1245             return
1246
1247         self._num_downloads += 1
1248
1249         info_dict['_filename'] = filename = self.prepare_filename(info_dict)
1250
1251         # Forced printings
1252         if self.params.get('forcetitle', False):
1253             self.to_stdout(info_dict['fulltitle'])
1254         if self.params.get('forceid', False):
1255             self.to_stdout(info_dict['id'])
1256         if self.params.get('forceurl', False):
1257             if info_dict.get('requested_formats') is not None:
1258                 for f in info_dict['requested_formats']:
1259                     self.to_stdout(f['url'] + f.get('play_path', ''))
1260             else:
1261                 # For RTMP URLs, also include the playpath
1262                 self.to_stdout(info_dict['url'] + info_dict.get('play_path', ''))
1263         if self.params.get('forcethumbnail', False) and info_dict.get('thumbnail') is not None:
1264             self.to_stdout(info_dict['thumbnail'])
1265         if self.params.get('forcedescription', False) and info_dict.get('description') is not None:
1266             self.to_stdout(info_dict['description'])
1267         if self.params.get('forcefilename', False) and filename is not None:
1268             self.to_stdout(filename)
1269         if self.params.get('forceduration', False) and info_dict.get('duration') is not None:
1270             self.to_stdout(formatSeconds(info_dict['duration']))
1271         if self.params.get('forceformat', False):
1272             self.to_stdout(info_dict['format'])
1273         if self.params.get('forcejson', False):
1274             self.to_stdout(json.dumps(info_dict))
1275
1276         # Do nothing else if in simulate mode
1277         if self.params.get('simulate', False):
1278             return
1279
1280         if filename is None:
1281             return
1282
1283         try:
1284             dn = os.path.dirname(sanitize_path(encodeFilename(filename)))
1285             if dn and not os.path.exists(dn):
1286                 os.makedirs(dn)
1287         except (OSError, IOError) as err:
1288             self.report_error('unable to create directory ' + compat_str(err))
1289             return
1290
1291         if self.params.get('writedescription', False):
1292             descfn = replace_extension(filename, 'description', info_dict.get('ext'))
1293             if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(descfn)):
1294                 self.to_screen('[info] Video description is already present')
1295             elif info_dict.get('description') is None:
1296                 self.report_warning('There\'s no description to write.')
1297             else:
1298                 try:
1299                     self.to_screen('[info] Writing video description to: ' + descfn)
1300                     with io.open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile:
1301                         descfile.write(info_dict['description'])
1302                 except (OSError, IOError):
1303                     self.report_error('Cannot write description file ' + descfn)
1304                     return
1305
1306         if self.params.get('writeannotations', False):
1307             annofn = replace_extension(filename, 'annotations.xml', info_dict.get('ext'))
1308             if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(annofn)):
1309                 self.to_screen('[info] Video annotations are already present')
1310             else:
1311                 try:
1312                     self.to_screen('[info] Writing video annotations to: ' + annofn)
1313                     with io.open(encodeFilename(annofn), 'w', encoding='utf-8') as annofile:
1314                         annofile.write(info_dict['annotations'])
1315                 except (KeyError, TypeError):
1316                     self.report_warning('There are no annotations to write.')
1317                 except (OSError, IOError):
1318                     self.report_error('Cannot write annotations file: ' + annofn)
1319                     return
1320
1321         subtitles_are_requested = any([self.params.get('writesubtitles', False),
1322                                        self.params.get('writeautomaticsub')])
1323
1324         if subtitles_are_requested and info_dict.get('requested_subtitles'):
1325             # subtitles download errors are already managed as troubles in relevant IE
1326             # that way it will silently go on when used with unsupporting IE
1327             subtitles = info_dict['requested_subtitles']
1328             ie = self.get_info_extractor(info_dict['extractor_key'])
1329             for sub_lang, sub_info in subtitles.items():
1330                 sub_format = sub_info['ext']
1331                 if sub_info.get('data') is not None:
1332                     sub_data = sub_info['data']
1333                 else:
1334                     try:
1335                         sub_data = ie._download_webpage(
1336                             sub_info['url'], info_dict['id'], note=False)
1337                     except ExtractorError as err:
1338                         self.report_warning('Unable to download subtitle for "%s": %s' %
1339                                             (sub_lang, compat_str(err.cause)))
1340                         continue
1341                 try:
1342                     sub_filename = subtitles_filename(filename, sub_lang, sub_format)
1343                     if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(sub_filename)):
1344                         self.to_screen('[info] Video subtitle %s.%s is already_present' % (sub_lang, sub_format))
1345                     else:
1346                         self.to_screen('[info] Writing video subtitles to: ' + sub_filename)
1347                         with io.open(encodeFilename(sub_filename), 'w', encoding='utf-8') as subfile:
1348                             subfile.write(sub_data)
1349                 except (OSError, IOError):
1350                     self.report_error('Cannot write subtitles file ' + sub_filename)
1351                     return
1352
1353         if self.params.get('writeinfojson', False):
1354             infofn = replace_extension(filename, 'info.json', info_dict.get('ext'))
1355             if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(infofn)):
1356                 self.to_screen('[info] Video description metadata is already present')
1357             else:
1358                 self.to_screen('[info] Writing video description metadata as JSON to: ' + infofn)
1359                 try:
1360                     write_json_file(self.filter_requested_info(info_dict), infofn)
1361                 except (OSError, IOError):
1362                     self.report_error('Cannot write metadata to JSON file ' + infofn)
1363                     return
1364
1365         self._write_thumbnails(info_dict, filename)
1366
1367         if not self.params.get('skip_download', False):
1368             try:
1369                 def dl(name, info):
1370                     fd = get_suitable_downloader(info, self.params)(self, self.params)
1371                     for ph in self._progress_hooks:
1372                         fd.add_progress_hook(ph)
1373                     if self.params.get('verbose'):
1374                         self.to_stdout('[debug] Invoking downloader on %r' % info.get('url'))
1375                     return fd.download(name, info)
1376
1377                 if info_dict.get('requested_formats') is not None:
1378                     downloaded = []
1379                     success = True
1380                     merger = FFmpegMergerPP(self)
1381                     if not merger.available:
1382                         postprocessors = []
1383                         self.report_warning('You have requested multiple '
1384                                             'formats but ffmpeg or avconv are not installed.'
1385                                             ' The formats won\'t be merged.')
1386                     else:
1387                         postprocessors = [merger]
1388
1389                     def compatible_formats(formats):
1390                         video, audio = formats
1391                         # Check extension
1392                         video_ext, audio_ext = audio.get('ext'), video.get('ext')
1393                         if video_ext and audio_ext:
1394                             COMPATIBLE_EXTS = (
1395                                 ('mp3', 'mp4', 'm4a', 'm4p', 'm4b', 'm4r', 'm4v'),
1396                                 ('webm')
1397                             )
1398                             for exts in COMPATIBLE_EXTS:
1399                                 if video_ext in exts and audio_ext in exts:
1400                                     return True
1401                         # TODO: Check acodec/vcodec
1402                         return False
1403
1404                     filename_real_ext = os.path.splitext(filename)[1][1:]
1405                     filename_wo_ext = (
1406                         os.path.splitext(filename)[0]
1407                         if filename_real_ext == info_dict['ext']
1408                         else filename)
1409                     requested_formats = info_dict['requested_formats']
1410                     if self.params.get('merge_output_format') is None and not compatible_formats(requested_formats):
1411                         info_dict['ext'] = 'mkv'
1412                         self.report_warning(
1413                             'Requested formats are incompatible for merge and will be merged into mkv.')
1414                     # Ensure filename always has a correct extension for successful merge
1415                     filename = '%s.%s' % (filename_wo_ext, info_dict['ext'])
1416                     if os.path.exists(encodeFilename(filename)):
1417                         self.to_screen(
1418                             '[download] %s has already been downloaded and '
1419                             'merged' % filename)
1420                     else:
1421                         for f in requested_formats:
1422                             new_info = dict(info_dict)
1423                             new_info.update(f)
1424                             fname = self.prepare_filename(new_info)
1425                             fname = prepend_extension(fname, 'f%s' % f['format_id'], new_info['ext'])
1426                             downloaded.append(fname)
1427                             partial_success = dl(fname, new_info)
1428                             success = success and partial_success
1429                         info_dict['__postprocessors'] = postprocessors
1430                         info_dict['__files_to_merge'] = downloaded
1431                 else:
1432                     # Just a single file
1433                     success = dl(filename, info_dict)
1434             except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
1435                 self.report_error('unable to download video data: %s' % str(err))
1436                 return
1437             except (OSError, IOError) as err:
1438                 raise UnavailableVideoError(err)
1439             except (ContentTooShortError, ) as err:
1440                 self.report_error('content too short (expected %s bytes and served %s)' % (err.expected, err.downloaded))
1441                 return
1442
1443             if success:
1444                 # Fixup content
1445                 fixup_policy = self.params.get('fixup')
1446                 if fixup_policy is None:
1447                     fixup_policy = 'detect_or_warn'
1448
1449                 stretched_ratio = info_dict.get('stretched_ratio')
1450                 if stretched_ratio is not None and stretched_ratio != 1:
1451                     if fixup_policy == 'warn':
1452                         self.report_warning('%s: Non-uniform pixel ratio (%s)' % (
1453                             info_dict['id'], stretched_ratio))
1454                     elif fixup_policy == 'detect_or_warn':
1455                         stretched_pp = FFmpegFixupStretchedPP(self)
1456                         if stretched_pp.available:
1457                             info_dict.setdefault('__postprocessors', [])
1458                             info_dict['__postprocessors'].append(stretched_pp)
1459                         else:
1460                             self.report_warning(
1461                                 '%s: Non-uniform pixel ratio (%s). Install ffmpeg or avconv to fix this automatically.' % (
1462                                     info_dict['id'], stretched_ratio))
1463                     else:
1464                         assert fixup_policy in ('ignore', 'never')
1465
1466                 if info_dict.get('requested_formats') is None and info_dict.get('container') == 'm4a_dash':
1467                     if fixup_policy == 'warn':
1468                         self.report_warning('%s: writing DASH m4a. Only some players support this container.' % (
1469                             info_dict['id']))
1470                     elif fixup_policy == 'detect_or_warn':
1471                         fixup_pp = FFmpegFixupM4aPP(self)
1472                         if fixup_pp.available:
1473                             info_dict.setdefault('__postprocessors', [])
1474                             info_dict['__postprocessors'].append(fixup_pp)
1475                         else:
1476                             self.report_warning(
1477                                 '%s: writing DASH m4a. Only some players support this container. Install ffmpeg or avconv to fix this automatically.' % (
1478                                     info_dict['id']))
1479                     else:
1480                         assert fixup_policy in ('ignore', 'never')
1481
1482                 try:
1483                     self.post_process(filename, info_dict)
1484                 except (PostProcessingError) as err:
1485                     self.report_error('postprocessing: %s' % str(err))
1486                     return
1487                 self.record_download_archive(info_dict)
1488
1489     def download(self, url_list):
1490         """Download a given list of URLs."""
1491         outtmpl = self.params.get('outtmpl', DEFAULT_OUTTMPL)
1492         if (len(url_list) > 1 and
1493                 '%' not in outtmpl and
1494                 self.params.get('max_downloads') != 1):
1495             raise SameFileError(outtmpl)
1496
1497         for url in url_list:
1498             try:
1499                 # It also downloads the videos
1500                 res = self.extract_info(url)
1501             except UnavailableVideoError:
1502                 self.report_error('unable to download video')
1503             except MaxDownloadsReached:
1504                 self.to_screen('[info] Maximum number of downloaded files reached.')
1505                 raise
1506             else:
1507                 if self.params.get('dump_single_json', False):
1508                     self.to_stdout(json.dumps(res))
1509
1510         return self._download_retcode
1511
1512     def download_with_info_file(self, info_filename):
1513         with contextlib.closing(fileinput.FileInput(
1514                 [info_filename], mode='r',
1515                 openhook=fileinput.hook_encoded('utf-8'))) as f:
1516             # FileInput doesn't have a read method, we can't call json.load
1517             info = self.filter_requested_info(json.loads('\n'.join(f)))
1518         try:
1519             self.process_ie_result(info, download=True)
1520         except DownloadError:
1521             webpage_url = info.get('webpage_url')
1522             if webpage_url is not None:
1523                 self.report_warning('The info failed to download, trying with "%s"' % webpage_url)
1524                 return self.download([webpage_url])
1525             else:
1526                 raise
1527         return self._download_retcode
1528
1529     @staticmethod
1530     def filter_requested_info(info_dict):
1531         return dict(
1532             (k, v) for k, v in info_dict.items()
1533             if k not in ['requested_formats', 'requested_subtitles'])
1534
1535     def post_process(self, filename, ie_info):
1536         """Run all the postprocessors on the given file."""
1537         info = dict(ie_info)
1538         info['filepath'] = filename
1539         pps_chain = []
1540         if ie_info.get('__postprocessors') is not None:
1541             pps_chain.extend(ie_info['__postprocessors'])
1542         pps_chain.extend(self._pps)
1543         for pp in pps_chain:
1544             files_to_delete = []
1545             try:
1546                 files_to_delete, info = pp.run(info)
1547             except PostProcessingError as e:
1548                 self.report_error(e.msg)
1549             if files_to_delete and not self.params.get('keepvideo', False):
1550                 for old_filename in files_to_delete:
1551                     self.to_screen('Deleting original file %s (pass -k to keep)' % old_filename)
1552                     try:
1553                         os.remove(encodeFilename(old_filename))
1554                     except (IOError, OSError):
1555                         self.report_warning('Unable to remove downloaded original file')
1556
1557     def _make_archive_id(self, info_dict):
1558         # Future-proof against any change in case
1559         # and backwards compatibility with prior versions
1560         extractor = info_dict.get('extractor_key')
1561         if extractor is None:
1562             if 'id' in info_dict:
1563                 extractor = info_dict.get('ie_key')  # key in a playlist
1564         if extractor is None:
1565             return None  # Incomplete video information
1566         return extractor.lower() + ' ' + info_dict['id']
1567
1568     def in_download_archive(self, info_dict):
1569         fn = self.params.get('download_archive')
1570         if fn is None:
1571             return False
1572
1573         vid_id = self._make_archive_id(info_dict)
1574         if vid_id is None:
1575             return False  # Incomplete video information
1576
1577         try:
1578             with locked_file(fn, 'r', encoding='utf-8') as archive_file:
1579                 for line in archive_file:
1580                     if line.strip() == vid_id:
1581                         return True
1582         except IOError as ioe:
1583             if ioe.errno != errno.ENOENT:
1584                 raise
1585         return False
1586
1587     def record_download_archive(self, info_dict):
1588         fn = self.params.get('download_archive')
1589         if fn is None:
1590             return
1591         vid_id = self._make_archive_id(info_dict)
1592         assert vid_id
1593         with locked_file(fn, 'a', encoding='utf-8') as archive_file:
1594             archive_file.write(vid_id + '\n')
1595
1596     @staticmethod
1597     def format_resolution(format, default='unknown'):
1598         if format.get('vcodec') == 'none':
1599             return 'audio only'
1600         if format.get('resolution') is not None:
1601             return format['resolution']
1602         if format.get('height') is not None:
1603             if format.get('width') is not None:
1604                 res = '%sx%s' % (format['width'], format['height'])
1605             else:
1606                 res = '%sp' % format['height']
1607         elif format.get('width') is not None:
1608             res = '?x%d' % format['width']
1609         else:
1610             res = default
1611         return res
1612
1613     def _format_note(self, fdict):
1614         res = ''
1615         if fdict.get('ext') in ['f4f', 'f4m']:
1616             res += '(unsupported) '
1617         if fdict.get('format_note') is not None:
1618             res += fdict['format_note'] + ' '
1619         if fdict.get('tbr') is not None:
1620             res += '%4dk ' % fdict['tbr']
1621         if fdict.get('container') is not None:
1622             if res:
1623                 res += ', '
1624             res += '%s container' % fdict['container']
1625         if (fdict.get('vcodec') is not None and
1626                 fdict.get('vcodec') != 'none'):
1627             if res:
1628                 res += ', '
1629             res += fdict['vcodec']
1630             if fdict.get('vbr') is not None:
1631                 res += '@'
1632         elif fdict.get('vbr') is not None and fdict.get('abr') is not None:
1633             res += 'video@'
1634         if fdict.get('vbr') is not None:
1635             res += '%4dk' % fdict['vbr']
1636         if fdict.get('fps') is not None:
1637             res += ', %sfps' % fdict['fps']
1638         if fdict.get('acodec') is not None:
1639             if res:
1640                 res += ', '
1641             if fdict['acodec'] == 'none':
1642                 res += 'video only'
1643             else:
1644                 res += '%-5s' % fdict['acodec']
1645         elif fdict.get('abr') is not None:
1646             if res:
1647                 res += ', '
1648             res += 'audio'
1649         if fdict.get('abr') is not None:
1650             res += '@%3dk' % fdict['abr']
1651         if fdict.get('asr') is not None:
1652             res += ' (%5dHz)' % fdict['asr']
1653         if fdict.get('filesize') is not None:
1654             if res:
1655                 res += ', '
1656             res += format_bytes(fdict['filesize'])
1657         elif fdict.get('filesize_approx') is not None:
1658             if res:
1659                 res += ', '
1660             res += '~' + format_bytes(fdict['filesize_approx'])
1661         return res
1662
1663     def list_formats(self, info_dict):
1664         formats = info_dict.get('formats', [info_dict])
1665         table = [
1666             [f['format_id'], f['ext'], self.format_resolution(f), self._format_note(f)]
1667             for f in formats
1668             if f.get('preference') is None or f['preference'] >= -1000]
1669         if len(formats) > 1:
1670             table[-1][-1] += (' ' if table[-1][-1] else '') + '(best)'
1671
1672         header_line = ['format code', 'extension', 'resolution', 'note']
1673         self.to_screen(
1674             '[info] Available formats for %s:\n%s' %
1675             (info_dict['id'], render_table(header_line, table)))
1676
1677     def list_thumbnails(self, info_dict):
1678         thumbnails = info_dict.get('thumbnails')
1679         if not thumbnails:
1680             tn_url = info_dict.get('thumbnail')
1681             if tn_url:
1682                 thumbnails = [{'id': '0', 'url': tn_url}]
1683             else:
1684                 self.to_screen(
1685                     '[info] No thumbnails present for %s' % info_dict['id'])
1686                 return
1687
1688         self.to_screen(
1689             '[info] Thumbnails for %s:' % info_dict['id'])
1690         self.to_screen(render_table(
1691             ['ID', 'width', 'height', 'URL'],
1692             [[t['id'], t.get('width', 'unknown'), t.get('height', 'unknown'), t['url']] for t in thumbnails]))
1693
1694     def list_subtitles(self, video_id, subtitles, name='subtitles'):
1695         if not subtitles:
1696             self.to_screen('%s has no %s' % (video_id, name))
1697             return
1698         self.to_screen(
1699             'Available %s for %s:' % (name, video_id))
1700         self.to_screen(render_table(
1701             ['Language', 'formats'],
1702             [[lang, ', '.join(f['ext'] for f in reversed(formats))]
1703                 for lang, formats in subtitles.items()]))
1704
1705     def urlopen(self, req):
1706         """ Start an HTTP download """
1707
1708         # According to RFC 3986, URLs can not contain non-ASCII characters, however this is not
1709         # always respected by websites, some tend to give out URLs with non percent-encoded
1710         # non-ASCII characters (see telemb.py, ard.py [#3412])
1711         # urllib chokes on URLs with non-ASCII characters (see http://bugs.python.org/issue3991)
1712         # To work around aforementioned issue we will replace request's original URL with
1713         # percent-encoded one
1714         req_is_string = isinstance(req, compat_basestring)
1715         url = req if req_is_string else req.get_full_url()
1716         url_escaped = escape_url(url)
1717
1718         # Substitute URL if any change after escaping
1719         if url != url_escaped:
1720             if req_is_string:
1721                 req = url_escaped
1722             else:
1723                 req = compat_urllib_request.Request(
1724                     url_escaped, data=req.data, headers=req.headers,
1725                     origin_req_host=req.origin_req_host, unverifiable=req.unverifiable)
1726
1727         return self._opener.open(req, timeout=self._socket_timeout)
1728
1729     def print_debug_header(self):
1730         if not self.params.get('verbose'):
1731             return
1732
1733         if type('') is not compat_str:
1734             # Python 2.6 on SLES11 SP1 (https://github.com/rg3/youtube-dl/issues/3326)
1735             self.report_warning(
1736                 'Your Python is broken! Update to a newer and supported version')
1737
1738         stdout_encoding = getattr(
1739             sys.stdout, 'encoding', 'missing (%s)' % type(sys.stdout).__name__)
1740         encoding_str = (
1741             '[debug] Encodings: locale %s, fs %s, out %s, pref %s\n' % (
1742                 locale.getpreferredencoding(),
1743                 sys.getfilesystemencoding(),
1744                 stdout_encoding,
1745                 self.get_encoding()))
1746         write_string(encoding_str, encoding=None)
1747
1748         self._write_string('[debug] youtube-dl version ' + __version__ + '\n')
1749         try:
1750             sp = subprocess.Popen(
1751                 ['git', 'rev-parse', '--short', 'HEAD'],
1752                 stdout=subprocess.PIPE, stderr=subprocess.PIPE,
1753                 cwd=os.path.dirname(os.path.abspath(__file__)))
1754             out, err = sp.communicate()
1755             out = out.decode().strip()
1756             if re.match('[0-9a-f]+', out):
1757                 self._write_string('[debug] Git HEAD: ' + out + '\n')
1758         except Exception:
1759             try:
1760                 sys.exc_clear()
1761             except Exception:
1762                 pass
1763         self._write_string('[debug] Python version %s - %s\n' % (
1764             platform.python_version(), platform_name()))
1765
1766         exe_versions = FFmpegPostProcessor.get_versions(self)
1767         exe_versions['rtmpdump'] = rtmpdump_version()
1768         exe_str = ', '.join(
1769             '%s %s' % (exe, v)
1770             for exe, v in sorted(exe_versions.items())
1771             if v
1772         )
1773         if not exe_str:
1774             exe_str = 'none'
1775         self._write_string('[debug] exe versions: %s\n' % exe_str)
1776
1777         proxy_map = {}
1778         for handler in self._opener.handlers:
1779             if hasattr(handler, 'proxies'):
1780                 proxy_map.update(handler.proxies)
1781         self._write_string('[debug] Proxy map: ' + compat_str(proxy_map) + '\n')
1782
1783         if self.params.get('call_home', False):
1784             ipaddr = self.urlopen('https://yt-dl.org/ip').read().decode('utf-8')
1785             self._write_string('[debug] Public IP address: %s\n' % ipaddr)
1786             latest_version = self.urlopen(
1787                 'https://yt-dl.org/latest/version').read().decode('utf-8')
1788             if version_tuple(latest_version) > version_tuple(__version__):
1789                 self.report_warning(
1790                     'You are using an outdated version (newest version: %s)! '
1791                     'See https://yt-dl.org/update if you need help updating.' %
1792                     latest_version)
1793
1794     def _setup_opener(self):
1795         timeout_val = self.params.get('socket_timeout')
1796         self._socket_timeout = 600 if timeout_val is None else float(timeout_val)
1797
1798         opts_cookiefile = self.params.get('cookiefile')
1799         opts_proxy = self.params.get('proxy')
1800
1801         if opts_cookiefile is None:
1802             self.cookiejar = compat_cookiejar.CookieJar()
1803         else:
1804             self.cookiejar = compat_cookiejar.MozillaCookieJar(
1805                 opts_cookiefile)
1806             if os.access(opts_cookiefile, os.R_OK):
1807                 self.cookiejar.load()
1808
1809         cookie_processor = compat_urllib_request.HTTPCookieProcessor(
1810             self.cookiejar)
1811         if opts_proxy is not None:
1812             if opts_proxy == '':
1813                 proxies = {}
1814             else:
1815                 proxies = {'http': opts_proxy, 'https': opts_proxy}
1816         else:
1817             proxies = compat_urllib_request.getproxies()
1818             # Set HTTPS proxy to HTTP one if given (https://github.com/rg3/youtube-dl/issues/805)
1819             if 'http' in proxies and 'https' not in proxies:
1820                 proxies['https'] = proxies['http']
1821         proxy_handler = PerRequestProxyHandler(proxies)
1822
1823         debuglevel = 1 if self.params.get('debug_printtraffic') else 0
1824         https_handler = make_HTTPS_handler(self.params, debuglevel=debuglevel)
1825         ydlh = YoutubeDLHandler(self.params, debuglevel=debuglevel)
1826         opener = compat_urllib_request.build_opener(
1827             proxy_handler, https_handler, cookie_processor, ydlh)
1828
1829         # Delete the default user-agent header, which would otherwise apply in
1830         # cases where our custom HTTP handler doesn't come into play
1831         # (See https://github.com/rg3/youtube-dl/issues/1309 for details)
1832         opener.addheaders = []
1833         self._opener = opener
1834
1835     def encode(self, s):
1836         if isinstance(s, bytes):
1837             return s  # Already encoded
1838
1839         try:
1840             return s.encode(self.get_encoding())
1841         except UnicodeEncodeError as err:
1842             err.reason = err.reason + '. Check your system encoding configuration or use the --encoding option.'
1843             raise
1844
1845     def get_encoding(self):
1846         encoding = self.params.get('encoding')
1847         if encoding is None:
1848             encoding = preferredencoding()
1849         return encoding
1850
1851     def _write_thumbnails(self, info_dict, filename):
1852         if self.params.get('writethumbnail', False):
1853             thumbnails = info_dict.get('thumbnails')
1854             if thumbnails:
1855                 thumbnails = [thumbnails[-1]]
1856         elif self.params.get('write_all_thumbnails', False):
1857             thumbnails = info_dict.get('thumbnails')
1858         else:
1859             return
1860
1861         if not thumbnails:
1862             # No thumbnails present, so return immediately
1863             return
1864
1865         for t in thumbnails:
1866             thumb_ext = determine_ext(t['url'], 'jpg')
1867             suffix = '_%s' % t['id'] if len(thumbnails) > 1 else ''
1868             thumb_display_id = '%s ' % t['id'] if len(thumbnails) > 1 else ''
1869             t['filename'] = thumb_filename = os.path.splitext(filename)[0] + suffix + '.' + thumb_ext
1870
1871             if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(thumb_filename)):
1872                 self.to_screen('[%s] %s: Thumbnail %sis already present' %
1873                                (info_dict['extractor'], info_dict['id'], thumb_display_id))
1874             else:
1875                 self.to_screen('[%s] %s: Downloading thumbnail %s...' %
1876                                (info_dict['extractor'], info_dict['id'], thumb_display_id))
1877                 try:
1878                     uf = self.urlopen(t['url'])
1879                     with open(thumb_filename, 'wb') as thumbf:
1880                         shutil.copyfileobj(uf, thumbf)
1881                     self.to_screen('[%s] %s: Writing thumbnail %sto: %s' %
1882                                    (info_dict['extractor'], info_dict['id'], thumb_display_id, thumb_filename))
1883                 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
1884                     self.report_warning('Unable to download thumbnail "%s": %s' %
1885                                         (t['url'], compat_str(err)))