git.bitcoin.ninja Git - youtube-dl/blob - youtube_dl/YoutubeDL.py

   1 #!/usr/bin/env python
   2 # -*- coding: utf-8 -*-
   3
   4 from __future__ import absolute_import, unicode_literals
   5
   6 import collections
   7 import contextlib
   8 import datetime
   9 import errno
  10 import fileinput
  11 import io
  12 import itertools
  13 import json
  14 import locale
  15 import operator
  16 import os
  17 import platform
  18 import re
  19 import shutil
  20 import subprocess
  21 import socket
  22 import sys
  23 import time
  24 import traceback
  25
  26 if os.name == 'nt':
  27     import ctypes
  28
  29 from .compat import (
  30     compat_basestring,
  31     compat_cookiejar,
  32     compat_expanduser,
  33     compat_get_terminal_size,
  34     compat_http_client,
  35     compat_kwargs,
  36     compat_str,
  37     compat_urllib_error,
  38     compat_urllib_request,
  39 )
  40 from .utils import (
  41     escape_url,
  42     ContentTooShortError,
  43     date_from_str,
  44     DateRange,
  45     DEFAULT_OUTTMPL,
  46     determine_ext,
  47     DownloadError,
  48     encodeFilename,
  49     ExtractorError,
  50     format_bytes,
  51     formatSeconds,
  52     locked_file,
  53     make_HTTPS_handler,
  54     MaxDownloadsReached,
  55     PagedList,
  56     parse_filesize,
  57     PerRequestProxyHandler,
  58     PostProcessingError,
  59     platform_name,
  60     preferredencoding,
  61     render_table,
  62     SameFileError,
  63     sanitize_filename,
  64     sanitize_path,
  65     std_headers,
  66     subtitles_filename,
  67     UnavailableVideoError,
  68     url_basename,
  69     version_tuple,
  70     write_json_file,
  71     write_string,
  72     YoutubeDLHandler,
  73     prepend_extension,
  74     replace_extension,
  75     args_to_str,
  76     age_restricted,
  77 )
  78 from .cache import Cache
  79 from .extractor import get_info_extractor, gen_extractors
  80 from .downloader import get_suitable_downloader
  81 from .downloader.rtmp import rtmpdump_version
  82 from .postprocessor import (
  83     FFmpegFixupM4aPP,
  84     FFmpegFixupStretchedPP,
  85     FFmpegMergerPP,
  86     FFmpegPostProcessor,
  87     get_postprocessor,
  88 )
  89 from .version import __version__
  90
  91
  92 class YoutubeDL(object):
  93     """YoutubeDL class.
  94
  95     YoutubeDL objects are the ones responsible of downloading the
  96     actual video file and writing it to disk if the user has requested
  97     it, among some other tasks. In most cases there should be one per
  98     program. As, given a video URL, the downloader doesn't know how to
  99     extract all the needed information, task that InfoExtractors do, it
 100     has to pass the URL to one of them.
 101
 102     For this, YoutubeDL objects have a method that allows
 103     InfoExtractors to be registered in a given order. When it is passed
 104     a URL, the YoutubeDL object handles it to the first InfoExtractor it
 105     finds that reports being able to handle it. The InfoExtractor extracts
 106     all the information about the video or videos the URL refers to, and
 107     YoutubeDL process the extracted information, possibly using a File
 108     Downloader to download the video.
 109
 110     YoutubeDL objects accept a lot of parameters. In order not to saturate
 111     the object constructor with arguments, it receives a dictionary of
 112     options instead. These options are available through the params
 113     attribute for the InfoExtractors to use. The YoutubeDL also
 114     registers itself as the downloader in charge for the InfoExtractors
 115     that are added to it, so this is a "mutual registration".
 116
 117     Available options:
 118
 119     username:          Username for authentication purposes.
 120     password:          Password for authentication purposes.
 121     videopassword:     Password for acces a video.
 122     usenetrc:          Use netrc for authentication instead.
 123     verbose:           Print additional info to stdout.
 124     quiet:             Do not print messages to stdout.
 125     no_warnings:       Do not print out anything for warnings.
 126     forceurl:          Force printing final URL.
 127     forcetitle:        Force printing title.
 128     forceid:           Force printing ID.
 129     forcethumbnail:    Force printing thumbnail URL.
 130     forcedescription:  Force printing description.
 131     forcefilename:     Force printing final filename.
 132     forceduration:     Force printing duration.
 133     forcejson:         Force printing info_dict as JSON.
 134     dump_single_json:  Force printing the info_dict of the whole playlist
 135                        (or video) as a single JSON line.
 136     simulate:          Do not download the video files.
 137     format:            Video format code. See options.py for more information.
 138     outtmpl:           Template for output names.
 139     restrictfilenames: Do not allow "&" and spaces in file names
 140     ignoreerrors:      Do not stop on download errors.
 141     nooverwrites:      Prevent overwriting files.
 142     playliststart:     Playlist item to start at.
 143     playlistend:       Playlist item to end at.
 144     playlist_items:    Specific indices of playlist to download.
 145     playlistreverse:   Download playlist items in reverse order.
 146     matchtitle:        Download only matching titles.
 147     rejecttitle:       Reject downloads for matching titles.
 148     logger:            Log messages to a logging.Logger instance.
 149     logtostderr:       Log messages to stderr instead of stdout.
 150     writedescription:  Write the video description to a .description file
 151     writeinfojson:     Write the video description to a .info.json file
 152     writeannotations:  Write the video annotations to a .annotations.xml file
 153     writethumbnail:    Write the thumbnail image to a file
 154     write_all_thumbnails:  Write all thumbnail formats to files
 155     writesubtitles:    Write the video subtitles to a file
 156     writeautomaticsub: Write the automatic subtitles to a file
 157     allsubtitles:      Downloads all the subtitles of the video
 158                        (requires writesubtitles or writeautomaticsub)
 159     listsubtitles:     Lists all available subtitles for the video
 160     subtitlesformat:   The format code for subtitles
 161     subtitleslangs:    List of languages of the subtitles to download
 162     keepvideo:         Keep the video file after post-processing
 163     daterange:         A DateRange object, download only if the upload_date is in the range.
 164     skip_download:     Skip the actual download of the video file
 165     cachedir:          Location of the cache files in the filesystem.
 166                        False to disable filesystem cache.
 167     noplaylist:        Download single video instead of a playlist if in doubt.
 168     age_limit:         An integer representing the user's age in years.
 169                        Unsuitable videos for the given age are skipped.
 170     min_views:         An integer representing the minimum view count the video
 171                        must have in order to not be skipped.
 172                        Videos without view count information are always
 173                        downloaded. None for no limit.
 174     max_views:         An integer representing the maximum view count.
 175                        Videos that are more popular than that are not
 176                        downloaded.
 177                        Videos without view count information are always
 178                        downloaded. None for no limit.
 179     download_archive:  File name of a file where all downloads are recorded.
 180                        Videos already present in the file are not downloaded
 181                        again.
 182     cookiefile:        File name where cookies should be read from and dumped to.
 183     nocheckcertificate:Do not verify SSL certificates
 184     prefer_insecure:   Use HTTP instead of HTTPS to retrieve information.
 185                        At the moment, this is only supported by YouTube.
 186     proxy:             URL of the proxy server to use
 187     cn_verification_proxy:  URL of the proxy to use for IP address verification
 188                        on Chinese sites. (Experimental)
 189     socket_timeout:    Time to wait for unresponsive hosts, in seconds
 190     bidi_workaround:   Work around buggy terminals without bidirectional text
 191                        support, using fridibi
 192     debug_printtraffic:Print out sent and received HTTP traffic
 193     include_ads:       Download ads as well
 194     default_search:    Prepend this string if an input url is not valid.
 195                        'auto' for elaborate guessing
 196     encoding:          Use this encoding instead of the system-specified.
 197     extract_flat:      Do not resolve URLs, return the immediate result.
 198                        Pass in 'in_playlist' to only show this behavior for
 199                        playlist items.
 200     postprocessors:    A list of dictionaries, each with an entry
 201                        * key:  The name of the postprocessor. See
 202                                youtube_dl/postprocessor/__init__.py for a list.
 203                        as well as any further keyword arguments for the
 204                        postprocessor.
 205     progress_hooks:    A list of functions that get called on download
 206                        progress, with a dictionary with the entries
 207                        * status: One of "downloading", "error", or "finished".
 208                                  Check this first and ignore unknown values.
 209
 210                        If status is one of "downloading", or "finished", the
 211                        following properties may also be present:
 212                        * filename: The final filename (always present)
 213                        * tmpfilename: The filename we're currently writing to
 214                        * downloaded_bytes: Bytes on disk
 215                        * total_bytes: Size of the whole file, None if unknown
 216                        * total_bytes_estimate: Guess of the eventual file size,
 217                                                None if unavailable.
 218                        * elapsed: The number of seconds since download started.
 219                        * eta: The estimated time in seconds, None if unknown
 220                        * speed: The download speed in bytes/second, None if
 221                                 unknown
 222                        * fragment_index: The counter of the currently
 223                                          downloaded video fragment.
 224                        * fragment_count: The number of fragments (= individual
 225                                          files that will be merged)
 226
 227                        Progress hooks are guaranteed to be called at least once
 228                        (with status "finished") if the download is successful.
 229     merge_output_format: Extension to use when merging formats.
 230     fixup:             Automatically correct known faults of the file.
 231                        One of:
 232                        - "never": do nothing
 233                        - "warn": only emit a warning
 234                        - "detect_or_warn": check whether we can do anything
 235                                            about it, warn otherwise (default)
 236     source_address:    (Experimental) Client-side IP address to bind to.
 237     call_home:         Boolean, true iff we are allowed to contact the
 238                        youtube-dl servers for debugging.
 239     sleep_interval:    Number of seconds to sleep before each download.
 240     listformats:       Print an overview of available video formats and exit.
 241     list_thumbnails:   Print a table of all thumbnails and exit.
 242     match_filter:      A function that gets called with the info_dict of
 243                        every video.
 244                        If it returns a message, the video is ignored.
 245                        If it returns None, the video is downloaded.
 246                        match_filter_func in utils.py is one example for this.
 247     no_color:          Do not emit color codes in output.
 248
 249     The following options determine which downloader is picked:
 250     external_downloader: Executable of the external downloader to call.
 251                        None or unset for standard (built-in) downloader.
 252     hls_prefer_native: Use the native HLS downloader instead of ffmpeg/avconv.
 253
 254     The following parameters are not used by YoutubeDL itself, they are used by
 255     the downloader (see youtube_dl/downloader/common.py):
 256     nopart, updatetime, buffersize, ratelimit, min_filesize, max_filesize, test,
 257     noresizebuffer, retries, continuedl, noprogress, consoletitle,
 258     xattr_set_filesize, external_downloader_args.
 259
 260     The following options are used by the post processors:
 261     prefer_ffmpeg:     If True, use ffmpeg instead of avconv if both are available,
 262                        otherwise prefer avconv.
 263     exec_cmd:          Arbitrary command to run after downloading
 264     """
 265
 266     params = None
 267     _ies = []
 268     _pps = []
 269     _download_retcode = None
 270     _num_downloads = None
 271     _screen_file = None
 272
 273     def __init__(self, params=None, auto_init=True):
 274         """Create a FileDownloader object with the given options."""
 275         if params is None:
 276             params = {}
 277         self._ies = []
 278         self._ies_instances = {}
 279         self._pps = []
 280         self._progress_hooks = []
 281         self._download_retcode = 0
 282         self._num_downloads = 0
 283         self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)]
 284         self._err_file = sys.stderr
 285         self.params = params
 286         self.cache = Cache(self)
 287
 288         if params.get('bidi_workaround', False):
 289             try:
 290                 import pty
 291                 master, slave = pty.openpty()
 292                 width = compat_get_terminal_size().columns
 293                 if width is None:
 294                     width_args = []
 295                 else:
 296                     width_args = ['-w', str(width)]
 297                 sp_kwargs = dict(
 298                     stdin=subprocess.PIPE,
 299                     stdout=slave,
 300                     stderr=self._err_file)
 301                 try:
 302                     self._output_process = subprocess.Popen(
 303                         ['bidiv'] + width_args, **sp_kwargs
 304                     )
 305                 except OSError:
 306                     self._output_process = subprocess.Popen(
 307                         ['fribidi', '-c', 'UTF-8'] + width_args, **sp_kwargs)
 308                 self._output_channel = os.fdopen(master, 'rb')
 309             except OSError as ose:
 310                 if ose.errno == 2:
 311                     self.report_warning('Could not find fribidi executable, ignoring --bidi-workaround . Make sure that  fribidi  is an executable file in one of the directories in your $PATH.')
 312                 else:
 313                     raise
 314
 315         if (sys.version_info >= (3,) and sys.platform != 'win32' and
 316                 sys.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968'] and
 317                 not params.get('restrictfilenames', False)):
 318             # On Python 3, the Unicode filesystem API will throw errors (#1474)
 319             self.report_warning(
 320                 'Assuming --restrict-filenames since file system encoding '
 321                 'cannot encode all characters. '
 322                 'Set the LC_ALL environment variable to fix this.')
 323             self.params['restrictfilenames'] = True
 324
 325         if isinstance(params.get('outtmpl'), bytes):
 326             self.report_warning(
 327                 'Parameter outtmpl is bytes, but should be a unicode string. '
 328                 'Put  from __future__ import unicode_literals  at the top of your code file or consider switching to Python 3.x.')
 329
 330         self._setup_opener()
 331
 332         if auto_init:
 333             self.print_debug_header()
 334             self.add_default_info_extractors()
 335
 336         for pp_def_raw in self.params.get('postprocessors', []):
 337             pp_class = get_postprocessor(pp_def_raw['key'])
 338             pp_def = dict(pp_def_raw)
 339             del pp_def['key']
 340             pp = pp_class(self, **compat_kwargs(pp_def))
 341             self.add_post_processor(pp)
 342
 343         for ph in self.params.get('progress_hooks', []):
 344             self.add_progress_hook(ph)
 345
 346     def warn_if_short_id(self, argv):
 347         # short YouTube ID starting with dash?
 348         idxs = [
 349             i for i, a in enumerate(argv)
 350             if re.match(r'^-[0-9A-Za-z_-]{10}$', a)]
 351         if idxs:
 352             correct_argv = (
 353                 ['youtube-dl'] +
 354                 [a for i, a in enumerate(argv) if i not in idxs] +
 355                 ['--'] + [argv[i] for i in idxs]
 356             )
 357             self.report_warning(
 358                 'Long argument string detected. '
 359                 'Use -- to separate parameters and URLs, like this:\n%s\n' %
 360                 args_to_str(correct_argv))
 361
 362     def add_info_extractor(self, ie):
 363         """Add an InfoExtractor object to the end of the list."""
 364         self._ies.append(ie)
 365         self._ies_instances[ie.ie_key()] = ie
 366         ie.set_downloader(self)
 367
 368     def get_info_extractor(self, ie_key):
 369         """
 370         Get an instance of an IE with name ie_key, it will try to get one from
 371         the _ies list, if there's no instance it will create a new one and add
 372         it to the extractor list.
 373         """
 374         ie = self._ies_instances.get(ie_key)
 375         if ie is None:
 376             ie = get_info_extractor(ie_key)()
 377             self.add_info_extractor(ie)
 378         return ie
 379
 380     def add_default_info_extractors(self):
 381         """
 382         Add the InfoExtractors returned by gen_extractors to the end of the list
 383         """
 384         for ie in gen_extractors():
 385             self.add_info_extractor(ie)
 386
 387     def add_post_processor(self, pp):
 388         """Add a PostProcessor object to the end of the chain."""
 389         self._pps.append(pp)
 390         pp.set_downloader(self)
 391
 392     def add_progress_hook(self, ph):
 393         """Add the progress hook (currently only for the file downloader)"""
 394         self._progress_hooks.append(ph)
 395
 396     def _bidi_workaround(self, message):
 397         if not hasattr(self, '_output_channel'):
 398             return message
 399
 400         assert hasattr(self, '_output_process')
 401         assert isinstance(message, compat_str)
 402         line_count = message.count('\n') + 1
 403         self._output_process.stdin.write((message + '\n').encode('utf-8'))
 404         self._output_process.stdin.flush()
 405         res = ''.join(self._output_channel.readline().decode('utf-8')
 406                       for _ in range(line_count))
 407         return res[:-len('\n')]
 408
 409     def to_screen(self, message, skip_eol=False):
 410         """Print message to stdout if not in quiet mode."""
 411         return self.to_stdout(message, skip_eol, check_quiet=True)
 412
 413     def _write_string(self, s, out=None):
 414         write_string(s, out=out, encoding=self.params.get('encoding'))
 415
 416     def to_stdout(self, message, skip_eol=False, check_quiet=False):
 417         """Print message to stdout if not in quiet mode."""
 418         if self.params.get('logger'):
 419             self.params['logger'].debug(message)
 420         elif not check_quiet or not self.params.get('quiet', False):
 421             message = self._bidi_workaround(message)
 422             terminator = ['\n', ''][skip_eol]
 423             output = message + terminator
 424
 425             self._write_string(output, self._screen_file)
 426
 427     def to_stderr(self, message):
 428         """Print message to stderr."""
 429         assert isinstance(message, compat_str)
 430         if self.params.get('logger'):
 431             self.params['logger'].error(message)
 432         else:
 433             message = self._bidi_workaround(message)
 434             output = message + '\n'
 435             self._write_string(output, self._err_file)
 436
 437     def to_console_title(self, message):
 438         if not self.params.get('consoletitle', False):
 439             return
 440         if os.name == 'nt' and ctypes.windll.kernel32.GetConsoleWindow():
 441             # c_wchar_p() might not be necessary if `message` is
 442             # already of type unicode()
 443             ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message))
 444         elif 'TERM' in os.environ:
 445             self._write_string('\033]0;%s\007' % message, self._screen_file)
 446
 447     def save_console_title(self):
 448         if not self.params.get('consoletitle', False):
 449             return
 450         if 'TERM' in os.environ:
 451             # Save the title on stack
 452             self._write_string('\033[22;0t', self._screen_file)
 453
 454     def restore_console_title(self):
 455         if not self.params.get('consoletitle', False):
 456             return
 457         if 'TERM' in os.environ:
 458             # Restore the title from stack
 459             self._write_string('\033[23;0t', self._screen_file)
 460
 461     def __enter__(self):
 462         self.save_console_title()
 463         return self
 464
 465     def __exit__(self, *args):
 466         self.restore_console_title()
 467
 468         if self.params.get('cookiefile') is not None:
 469             self.cookiejar.save()
 470
 471     def trouble(self, message=None, tb=None):
 472         """Determine action to take when a download problem appears.
 473
 474         Depending on if the downloader has been configured to ignore
 475         download errors or not, this method may throw an exception or
 476         not when errors are found, after printing the message.
 477
 478         tb, if given, is additional traceback information.
 479         """
 480         if message is not None:
 481             self.to_stderr(message)
 482         if self.params.get('verbose'):
 483             if tb is None:
 484                 if sys.exc_info()[0]:  # if .trouble has been called from an except block
 485                     tb = ''
 486                     if hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
 487                         tb += ''.join(traceback.format_exception(*sys.exc_info()[1].exc_info))
 488                     tb += compat_str(traceback.format_exc())
 489                 else:
 490                     tb_data = traceback.format_list(traceback.extract_stack())
 491                     tb = ''.join(tb_data)
 492             self.to_stderr(tb)
 493         if not self.params.get('ignoreerrors', False):
 494             if sys.exc_info()[0] and hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
 495                 exc_info = sys.exc_info()[1].exc_info
 496             else:
 497                 exc_info = sys.exc_info()
 498             raise DownloadError(message, exc_info)
 499         self._download_retcode = 1
 500
 501     def report_warning(self, message):
 502         '''
 503         Print the message to stderr, it will be prefixed with 'WARNING:'
 504         If stderr is a tty file the 'WARNING:' will be colored
 505         '''
 506         if self.params.get('logger') is not None:
 507             self.params['logger'].warning(message)
 508         else:
 509             if self.params.get('no_warnings'):
 510                 return
 511             if not self.params.get('no_color') and self._err_file.isatty() and os.name != 'nt':
 512                 _msg_header = '\033[0;33mWARNING:\033[0m'
 513             else:
 514                 _msg_header = 'WARNING:'
 515             warning_message = '%s %s' % (_msg_header, message)
 516             self.to_stderr(warning_message)
 517
 518     def report_error(self, message, tb=None):
 519         '''
 520         Do the same as trouble, but prefixes the message with 'ERROR:', colored
 521         in red if stderr is a tty file.
 522         '''
 523         if not self.params.get('no_color') and self._err_file.isatty() and os.name != 'nt':
 524             _msg_header = '\033[0;31mERROR:\033[0m'
 525         else:
 526             _msg_header = 'ERROR:'
 527         error_message = '%s %s' % (_msg_header, message)
 528         self.trouble(error_message, tb)
 529
 530     def report_file_already_downloaded(self, file_name):
 531         """Report file has already been fully downloaded."""
 532         try:
 533             self.to_screen('[download] %s has already been downloaded' % file_name)
 534         except UnicodeEncodeError:
 535             self.to_screen('[download] The file has already been downloaded')
 536
 537     def prepare_filename(self, info_dict):
 538         """Generate the output filename."""
 539         try:
 540             template_dict = dict(info_dict)
 541
 542             template_dict['epoch'] = int(time.time())
 543             autonumber_size = self.params.get('autonumber_size')
 544             if autonumber_size is None:
 545                 autonumber_size = 5
 546             autonumber_templ = '%0' + str(autonumber_size) + 'd'
 547             template_dict['autonumber'] = autonumber_templ % self._num_downloads
 548             if template_dict.get('playlist_index') is not None:
 549                 template_dict['playlist_index'] = '%0*d' % (len(str(template_dict['n_entries'])), template_dict['playlist_index'])
 550             if template_dict.get('resolution') is None:
 551                 if template_dict.get('width') and template_dict.get('height'):
 552                     template_dict['resolution'] = '%dx%d' % (template_dict['width'], template_dict['height'])
 553                 elif template_dict.get('height'):
 554                     template_dict['resolution'] = '%sp' % template_dict['height']
 555                 elif template_dict.get('width'):
 556                     template_dict['resolution'] = '?x%d' % template_dict['width']
 557
 558             sanitize = lambda k, v: sanitize_filename(
 559                 compat_str(v),
 560                 restricted=self.params.get('restrictfilenames'),
 561                 is_id=(k == 'id'))
 562             template_dict = dict((k, sanitize(k, v))
 563                                  for k, v in template_dict.items()
 564                                  if v is not None)
 565             template_dict = collections.defaultdict(lambda: 'NA', template_dict)
 566
 567             outtmpl = sanitize_path(self.params.get('outtmpl', DEFAULT_OUTTMPL))
 568             tmpl = compat_expanduser(outtmpl)
 569             filename = tmpl % template_dict
 570             # Temporary fix for #4787
 571             # 'Treat' all problem characters by passing filename through preferredencoding
 572             # to workaround encoding issues with subprocess on python2 @ Windows
 573             if sys.version_info < (3, 0) and sys.platform == 'win32':
 574                 filename = encodeFilename(filename, True).decode(preferredencoding())
 575             return filename
 576         except ValueError as err:
 577             self.report_error('Error in output template: ' + str(err) + ' (encoding: ' + repr(preferredencoding()) + ')')
 578             return None
 579
 580     def _match_entry(self, info_dict, incomplete):
 581         """ Returns None iff the file should be downloaded """
 582
 583         video_title = info_dict.get('title', info_dict.get('id', 'video'))
 584         if 'title' in info_dict:
 585             # This can happen when we're just evaluating the playlist
 586             title = info_dict['title']
 587             matchtitle = self.params.get('matchtitle', False)
 588             if matchtitle:
 589                 if not re.search(matchtitle, title, re.IGNORECASE):
 590                     return '"' + title + '" title did not match pattern "' + matchtitle + '"'
 591             rejecttitle = self.params.get('rejecttitle', False)
 592             if rejecttitle:
 593                 if re.search(rejecttitle, title, re.IGNORECASE):
 594                     return '"' + title + '" title matched reject pattern "' + rejecttitle + '"'
 595         date = info_dict.get('upload_date', None)
 596         if date is not None:
 597             dateRange = self.params.get('daterange', DateRange())
 598             if date not in dateRange:
 599                 return '%s upload date is not in range %s' % (date_from_str(date).isoformat(), dateRange)
 600         view_count = info_dict.get('view_count', None)
 601         if view_count is not None:
 602             min_views = self.params.get('min_views')
 603             if min_views is not None and view_count < min_views:
 604                 return 'Skipping %s, because it has not reached minimum view count (%d/%d)' % (video_title, view_count, min_views)
 605             max_views = self.params.get('max_views')
 606             if max_views is not None and view_count > max_views:
 607                 return 'Skipping %s, because it has exceeded the maximum view count (%d/%d)' % (video_title, view_count, max_views)
 608         if age_restricted(info_dict.get('age_limit'), self.params.get('age_limit')):
 609             return 'Skipping "%s" because it is age restricted' % video_title
 610         if self.in_download_archive(info_dict):
 611             return '%s has already been recorded in archive' % video_title
 612
 613         if not incomplete:
 614             match_filter = self.params.get('match_filter')
 615             if match_filter is not None:
 616                 ret = match_filter(info_dict)
 617                 if ret is not None:
 618                     return ret
 619
 620         return None
 621
 622     @staticmethod
 623     def add_extra_info(info_dict, extra_info):
 624         '''Set the keys from extra_info in info dict if they are missing'''
 625         for key, value in extra_info.items():
 626             info_dict.setdefault(key, value)
 627
 628     def extract_info(self, url, download=True, ie_key=None, extra_info={},
 629                      process=True):
 630         '''
 631         Returns a list with a dictionary for each video we find.
 632         If 'download', also downloads the videos.
 633         extra_info is a dict containing the extra values to add to each result
 634         '''
 635
 636         if ie_key:
 637             ies = [self.get_info_extractor(ie_key)]
 638         else:
 639             ies = self._ies
 640
 641         for ie in ies:
 642             if not ie.suitable(url):
 643                 continue
 644
 645             if not ie.working():
 646                 self.report_warning('The program functionality for this site has been marked as broken, '
 647                                     'and will probably not work.')
 648
 649             try:
 650                 ie_result = ie.extract(url)
 651                 if ie_result is None:  # Finished already (backwards compatibility; listformats and friends should be moved here)
 652                     break
 653                 if isinstance(ie_result, list):
 654                     # Backwards compatibility: old IE result format
 655                     ie_result = {
 656                         '_type': 'compat_list',
 657                         'entries': ie_result,
 658                     }
 659                 self.add_default_extra_info(ie_result, ie, url)
 660                 if process:
 661                     return self.process_ie_result(ie_result, download, extra_info)
 662                 else:
 663                     return ie_result
 664             except ExtractorError as de:  # An error we somewhat expected
 665                 self.report_error(compat_str(de), de.format_traceback())
 666                 break
 667             except MaxDownloadsReached:
 668                 raise
 669             except Exception as e:
 670                 if self.params.get('ignoreerrors', False):
 671                     self.report_error(compat_str(e), tb=compat_str(traceback.format_exc()))
 672                     break
 673                 else:
 674                     raise
 675         else:
 676             self.report_error('no suitable InfoExtractor for URL %s' % url)
 677
 678     def add_default_extra_info(self, ie_result, ie, url):
 679         self.add_extra_info(ie_result, {
 680             'extractor': ie.IE_NAME,
 681             'webpage_url': url,
 682             'webpage_url_basename': url_basename(url),
 683             'extractor_key': ie.ie_key(),
 684         })
 685
 686     def process_ie_result(self, ie_result, download=True, extra_info={}):
 687         """
 688         Take the result of the ie(may be modified) and resolve all unresolved
 689         references (URLs, playlist items).
 690
 691         It will also download the videos if 'download'.
 692         Returns the resolved ie_result.
 693         """
 694
 695         result_type = ie_result.get('_type', 'video')
 696
 697         if result_type in ('url', 'url_transparent'):
 698             extract_flat = self.params.get('extract_flat', False)
 699             if ((extract_flat == 'in_playlist' and 'playlist' in extra_info) or
 700                     extract_flat is True):
 701                 if self.params.get('forcejson', False):
 702                     self.to_stdout(json.dumps(ie_result))
 703                 return ie_result
 704
 705         if result_type == 'video':
 706             self.add_extra_info(ie_result, extra_info)
 707             return self.process_video_result(ie_result, download=download)
 708         elif result_type == 'url':
 709             # We have to add extra_info to the results because it may be
 710             # contained in a playlist
 711             return self.extract_info(ie_result['url'],
 712                                      download,
 713                                      ie_key=ie_result.get('ie_key'),
 714                                      extra_info=extra_info)
 715         elif result_type == 'url_transparent':
 716             # Use the information from the embedding page
 717             info = self.extract_info(
 718                 ie_result['url'], ie_key=ie_result.get('ie_key'),
 719                 extra_info=extra_info, download=False, process=False)
 720
 721             force_properties = dict(
 722                 (k, v) for k, v in ie_result.items() if v is not None)
 723             for f in ('_type', 'url'):
 724                 if f in force_properties:
 725                     del force_properties[f]
 726             new_result = info.copy()
 727             new_result.update(force_properties)
 728
 729             assert new_result.get('_type') != 'url_transparent'
 730
 731             return self.process_ie_result(
 732                 new_result, download=download, extra_info=extra_info)
 733         elif result_type == 'playlist' or result_type == 'multi_video':
 734             # We process each entry in the playlist
 735             playlist = ie_result.get('title', None) or ie_result.get('id', None)
 736             self.to_screen('[download] Downloading playlist: %s' % playlist)
 737
 738             playlist_results = []
 739
 740             playliststart = self.params.get('playliststart', 1) - 1
 741             playlistend = self.params.get('playlistend', None)
 742             # For backwards compatibility, interpret -1 as whole list
 743             if playlistend == -1:
 744                 playlistend = None
 745
 746             playlistitems_str = self.params.get('playlist_items', None)
 747             playlistitems = None
 748             if playlistitems_str is not None:
 749                 def iter_playlistitems(format):
 750                     for string_segment in format.split(','):
 751                         if '-' in string_segment:
 752                             start, end = string_segment.split('-')
 753                             for item in range(int(start), int(end) + 1):
 754                                 yield int(item)
 755                         else:
 756                             yield int(string_segment)
 757                 playlistitems = iter_playlistitems(playlistitems_str)
 758
 759             ie_entries = ie_result['entries']
 760             if isinstance(ie_entries, list):
 761                 n_all_entries = len(ie_entries)
 762                 if playlistitems:
 763                     entries = [ie_entries[i - 1] for i in playlistitems]
 764                 else:
 765                     entries = ie_entries[playliststart:playlistend]
 766                 n_entries = len(entries)
 767                 self.to_screen(
 768                     "[%s] playlist %s: Collected %d video ids (downloading %d of them)" %
 769                     (ie_result['extractor'], playlist, n_all_entries, n_entries))
 770             elif isinstance(ie_entries, PagedList):
 771                 if playlistitems:
 772                     entries = []
 773                     for item in playlistitems:
 774                         entries.extend(ie_entries.getslice(
 775                             item - 1, item
 776                         ))
 777                 else:
 778                     entries = ie_entries.getslice(
 779                         playliststart, playlistend)
 780                 n_entries = len(entries)
 781                 self.to_screen(
 782                     "[%s] playlist %s: Downloading %d videos" %
 783                     (ie_result['extractor'], playlist, n_entries))
 784             else:  # iterable
 785                 if playlistitems:
 786                     entry_list = list(ie_entries)
 787                     entries = [entry_list[i - 1] for i in playlistitems]
 788                 else:
 789                     entries = list(itertools.islice(
 790                         ie_entries, playliststart, playlistend))
 791                 n_entries = len(entries)
 792                 self.to_screen(
 793                     "[%s] playlist %s: Downloading %d videos" %
 794                     (ie_result['extractor'], playlist, n_entries))
 795
 796             if self.params.get('playlistreverse', False):
 797                 entries = entries[::-1]
 798
 799             for i, entry in enumerate(entries, 1):
 800                 self.to_screen('[download] Downloading video %s of %s' % (i, n_entries))
 801                 extra = {
 802                     'n_entries': n_entries,
 803                     'playlist': playlist,
 804                     'playlist_id': ie_result.get('id'),
 805                     'playlist_title': ie_result.get('title'),
 806                     'playlist_index': i + playliststart,
 807                     'extractor': ie_result['extractor'],
 808                     'webpage_url': ie_result['webpage_url'],
 809                     'webpage_url_basename': url_basename(ie_result['webpage_url']),
 810                     'extractor_key': ie_result['extractor_key'],
 811                 }
 812
 813                 reason = self._match_entry(entry, incomplete=True)
 814                 if reason is not None:
 815                     self.to_screen('[download] ' + reason)
 816                     continue
 817
 818                 entry_result = self.process_ie_result(entry,
 819                                                       download=download,
 820                                                       extra_info=extra)
 821                 playlist_results.append(entry_result)
 822             ie_result['entries'] = playlist_results
 823             return ie_result
 824         elif result_type == 'compat_list':
 825             self.report_warning(
 826                 'Extractor %s returned a compat_list result. '
 827                 'It needs to be updated.' % ie_result.get('extractor'))
 828
 829             def _fixup(r):
 830                 self.add_extra_info(
 831                     r,
 832                     {
 833                         'extractor': ie_result['extractor'],
 834                         'webpage_url': ie_result['webpage_url'],
 835                         'webpage_url_basename': url_basename(ie_result['webpage_url']),
 836                         'extractor_key': ie_result['extractor_key'],
 837                     }
 838                 )
 839                 return r
 840             ie_result['entries'] = [
 841                 self.process_ie_result(_fixup(r), download, extra_info)
 842                 for r in ie_result['entries']
 843             ]
 844             return ie_result
 845         else:
 846             raise Exception('Invalid result type: %s' % result_type)
 847
 848     def _apply_format_filter(self, format_spec, available_formats):
 849         " Returns a tuple of the remaining format_spec and filtered formats "
 850
 851         OPERATORS = {
 852             '<': operator.lt,
 853             '<=': operator.le,
 854             '>': operator.gt,
 855             '>=': operator.ge,
 856             '=': operator.eq,
 857             '!=': operator.ne,
 858         }
 859         operator_rex = re.compile(r'''(?x)\s*\[
 860             (?P<key>width|height|tbr|abr|vbr|asr|filesize|fps)
 861             \s*(?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
 862             (?P<value>[0-9.]+(?:[kKmMgGtTpPeEzZyY]i?[Bb]?)?)
 863             \]$
 864             ''' % '|'.join(map(re.escape, OPERATORS.keys())))
 865         m = operator_rex.search(format_spec)
 866         if m:
 867             try:
 868                 comparison_value = int(m.group('value'))
 869             except ValueError:
 870                 comparison_value = parse_filesize(m.group('value'))
 871                 if comparison_value is None:
 872                     comparison_value = parse_filesize(m.group('value') + 'B')
 873                 if comparison_value is None:
 874                     raise ValueError(
 875                         'Invalid value %r in format specification %r' % (
 876                             m.group('value'), format_spec))
 877             op = OPERATORS[m.group('op')]
 878
 879         if not m:
 880             STR_OPERATORS = {
 881                 '=': operator.eq,
 882                 '!=': operator.ne,
 883             }
 884             str_operator_rex = re.compile(r'''(?x)\s*\[
 885                 \s*(?P<key>ext|acodec|vcodec|container|protocol)
 886                 \s*(?P<op>%s)(?P<none_inclusive>\s*\?)?
 887                 \s*(?P<value>[a-zA-Z0-9_-]+)
 888                 \s*\]$
 889                 ''' % '|'.join(map(re.escape, STR_OPERATORS.keys())))
 890             m = str_operator_rex.search(format_spec)
 891             if m:
 892                 comparison_value = m.group('value')
 893                 op = STR_OPERATORS[m.group('op')]
 894
 895         if not m:
 896             raise ValueError('Invalid format specification %r' % format_spec)
 897
 898         def _filter(f):
 899             actual_value = f.get(m.group('key'))
 900             if actual_value is None:
 901                 return m.group('none_inclusive')
 902             return op(actual_value, comparison_value)
 903         new_formats = [f for f in available_formats if _filter(f)]
 904
 905         new_format_spec = format_spec[:-len(m.group(0))]
 906         if not new_format_spec:
 907             new_format_spec = 'best'
 908
 909         return (new_format_spec, new_formats)
 910
 911     def select_format(self, format_spec, available_formats):
 912         while format_spec.endswith(']'):
 913             format_spec, available_formats = self._apply_format_filter(
 914                 format_spec, available_formats)
 915         if not available_formats:
 916             return None
 917
 918         if format_spec in ['best', 'worst', None]:
 919             format_idx = 0 if format_spec == 'worst' else -1
 920             audiovideo_formats = [
 921                 f for f in available_formats
 922                 if f.get('vcodec') != 'none' and f.get('acodec') != 'none']
 923             if audiovideo_formats:
 924                 return audiovideo_formats[format_idx]
 925             # for audio only urls, select the best/worst audio format
 926             elif all(f.get('acodec') != 'none' for f in available_formats):
 927                 return available_formats[format_idx]
 928         elif format_spec == 'bestaudio':
 929             audio_formats = [
 930                 f for f in available_formats
 931                 if f.get('vcodec') == 'none']
 932             if audio_formats:
 933                 return audio_formats[-1]
 934         elif format_spec == 'worstaudio':
 935             audio_formats = [
 936                 f for f in available_formats
 937                 if f.get('vcodec') == 'none']
 938             if audio_formats:
 939                 return audio_formats[0]
 940         elif format_spec == 'bestvideo':
 941             video_formats = [
 942                 f for f in available_formats
 943                 if f.get('acodec') == 'none']
 944             if video_formats:
 945                 return video_formats[-1]
 946         elif format_spec == 'worstvideo':
 947             video_formats = [
 948                 f for f in available_formats
 949                 if f.get('acodec') == 'none']
 950             if video_formats:
 951                 return video_formats[0]
 952         else:
 953             extensions = ['mp4', 'flv', 'webm', '3gp', 'm4a', 'mp3', 'ogg', 'aac', 'wav']
 954             if format_spec in extensions:
 955                 filter_f = lambda f: f['ext'] == format_spec
 956             else:
 957                 filter_f = lambda f: f['format_id'] == format_spec
 958             matches = list(filter(filter_f, available_formats))
 959             if matches:
 960                 return matches[-1]
 961         return None
 962
 963     def _calc_headers(self, info_dict):
 964         res = std_headers.copy()
 965
 966         add_headers = info_dict.get('http_headers')
 967         if add_headers:
 968             res.update(add_headers)
 969
 970         cookies = self._calc_cookies(info_dict)
 971         if cookies:
 972             res['Cookie'] = cookies
 973
 974         return res
 975
 976     def _calc_cookies(self, info_dict):
 977         pr = compat_urllib_request.Request(info_dict['url'])
 978         self.cookiejar.add_cookie_header(pr)
 979         return pr.get_header('Cookie')
 980
 981     def process_video_result(self, info_dict, download=True):
 982         assert info_dict.get('_type', 'video') == 'video'
 983
 984         if 'id' not in info_dict:
 985             raise ExtractorError('Missing "id" field in extractor result')
 986         if 'title' not in info_dict:
 987             raise ExtractorError('Missing "title" field in extractor result')
 988
 989         if 'playlist' not in info_dict:
 990             # It isn't part of a playlist
 991             info_dict['playlist'] = None
 992             info_dict['playlist_index'] = None
 993
 994         thumbnails = info_dict.get('thumbnails')
 995         if thumbnails is None:
 996             thumbnail = info_dict.get('thumbnail')
 997             if thumbnail:
 998                 info_dict['thumbnails'] = thumbnails = [{'url': thumbnail}]
 999         if thumbnails:
1000             thumbnails.sort(key=lambda t: (
1001                 t.get('preference'), t.get('width'), t.get('height'),
1002                 t.get('id'), t.get('url')))
1003             for i, t in enumerate(thumbnails):
1004                 if 'width' in t and 'height' in t:
1005                     t['resolution'] = '%dx%d' % (t['width'], t['height'])
1006                 if t.get('id') is None:
1007                     t['id'] = '%d' % i
1008
1009         if thumbnails and 'thumbnail' not in info_dict:
1010             info_dict['thumbnail'] = thumbnails[-1]['url']
1011
1012         if 'display_id' not in info_dict and 'id' in info_dict:
1013             info_dict['display_id'] = info_dict['id']
1014
1015         if info_dict.get('upload_date') is None and info_dict.get('timestamp') is not None:
1016             # Working around negative timestamps in Windows
1017             # (see http://bugs.python.org/issue1646728)
1018             if info_dict['timestamp'] < 0 and os.name == 'nt':
1019                 info_dict['timestamp'] = 0
1020             upload_date = datetime.datetime.utcfromtimestamp(
1021                 info_dict['timestamp'])
1022             info_dict['upload_date'] = upload_date.strftime('%Y%m%d')
1023
1024         if self.params.get('listsubtitles', False):
1025             if 'automatic_captions' in info_dict:
1026                 self.list_subtitles(info_dict['id'], info_dict.get('automatic_captions'), 'automatic captions')
1027             self.list_subtitles(info_dict['id'], info_dict.get('subtitles'), 'subtitles')
1028             return
1029         info_dict['requested_subtitles'] = self.process_subtitles(
1030             info_dict['id'], info_dict.get('subtitles'),
1031             info_dict.get('automatic_captions'))
1032
1033         # This extractors handle format selection themselves
1034         if info_dict['extractor'] in ['Youku']:
1035             if download:
1036                 self.process_info(info_dict)
1037             return info_dict
1038
1039         # We now pick which formats have to be downloaded
1040         if info_dict.get('formats') is None:
1041             # There's only one format available
1042             formats = [info_dict]
1043         else:
1044             formats = info_dict['formats']
1045
1046         if not formats:
1047             raise ExtractorError('No video formats found!')
1048
1049         # We check that all the formats have the format and format_id fields
1050         for i, format in enumerate(formats):
1051             if 'url' not in format:
1052                 raise ExtractorError('Missing "url" key in result (index %d)' % i)
1053
1054             if format.get('format_id') is None:
1055                 format['format_id'] = compat_str(i)
1056             if format.get('format') is None:
1057                 format['format'] = '{id} - {res}{note}'.format(
1058                     id=format['format_id'],
1059                     res=self.format_resolution(format),
1060                     note=' ({0})'.format(format['format_note']) if format.get('format_note') is not None else '',
1061                 )
1062             # Automatically determine file extension if missing
1063             if 'ext' not in format:
1064                 format['ext'] = determine_ext(format['url']).lower()
1065             # Add HTTP headers, so that external programs can use them from the
1066             # json output
1067             full_format_info = info_dict.copy()
1068             full_format_info.update(format)
1069             format['http_headers'] = self._calc_headers(full_format_info)
1070
1071         # TODO Central sorting goes here
1072
1073         if formats[0] is not info_dict:
1074             # only set the 'formats' fields if the original info_dict list them
1075             # otherwise we end up with a circular reference, the first (and unique)
1076             # element in the 'formats' field in info_dict is info_dict itself,
1077             # wich can't be exported to json
1078             info_dict['formats'] = formats
1079         if self.params.get('listformats'):
1080             self.list_formats(info_dict)
1081             return
1082         if self.params.get('list_thumbnails'):
1083             self.list_thumbnails(info_dict)
1084             return
1085
1086         req_format = self.params.get('format')
1087         if req_format is None:
1088             req_format_list = []
1089             if (self.params.get('outtmpl', DEFAULT_OUTTMPL) != '-'
1090                     and info_dict['extractor'] in ['youtube', 'ted']
1091                     and FFmpegMergerPP(self).available):
1092                 req_format_list.append('bestvideo+bestaudio')
1093             req_format_list.append('best')
1094             req_format = '/'.join(req_format_list)
1095         formats_to_download = []
1096         if req_format == 'all':
1097             formats_to_download = formats
1098         else:
1099             for rfstr in req_format.split(','):
1100                 # We can accept formats requested in the format: 34/5/best, we pick
1101                 # the first that is available, starting from left
1102                 req_formats = rfstr.split('/')
1103                 for rf in req_formats:
1104                     if re.match(r'.+?\+.+?', rf) is not None:
1105                         # Two formats have been requested like '137+139'
1106                         format_1, format_2 = rf.split('+')
1107                         formats_info = (self.select_format(format_1, formats),
1108                                         self.select_format(format_2, formats))
1109                         if all(formats_info):
1110                             # The first format must contain the video and the
1111                             # second the audio
1112                             if formats_info[0].get('vcodec') == 'none':
1113                                 self.report_error('The first format must '
1114                                                   'contain the video, try using '
1115                                                   '"-f %s+%s"' % (format_2, format_1))
1116                                 return
1117                             output_ext = (
1118                                 formats_info[0]['ext']
1119                                 if self.params.get('merge_output_format') is None
1120                                 else self.params['merge_output_format'])
1121                             selected_format = {
1122                                 'requested_formats': formats_info,
1123                                 'format': '%s+%s' % (formats_info[0].get('format'),
1124                                                      formats_info[1].get('format')),
1125                                 'format_id': '%s+%s' % (formats_info[0].get('format_id'),
1126                                                         formats_info[1].get('format_id')),
1127                                 'width': formats_info[0].get('width'),
1128                                 'height': formats_info[0].get('height'),
1129                                 'resolution': formats_info[0].get('resolution'),
1130                                 'fps': formats_info[0].get('fps'),
1131                                 'vcodec': formats_info[0].get('vcodec'),
1132                                 'vbr': formats_info[0].get('vbr'),
1133                                 'stretched_ratio': formats_info[0].get('stretched_ratio'),
1134                                 'acodec': formats_info[1].get('acodec'),
1135                                 'abr': formats_info[1].get('abr'),
1136                                 'ext': output_ext,
1137                             }
1138                         else:
1139                             selected_format = None
1140                     else:
1141                         selected_format = self.select_format(rf, formats)
1142                     if selected_format is not None:
1143                         formats_to_download.append(selected_format)
1144                         break
1145         if not formats_to_download:
1146             raise ExtractorError('requested format not available',
1147                                  expected=True)
1148
1149         if download:
1150             if len(formats_to_download) > 1:
1151                 self.to_screen('[info] %s: downloading video in %s formats' % (info_dict['id'], len(formats_to_download)))
1152             for format in formats_to_download:
1153                 new_info = dict(info_dict)
1154                 new_info.update(format)
1155                 self.process_info(new_info)
1156         # We update the info dict with the best quality format (backwards compatibility)
1157         info_dict.update(formats_to_download[-1])
1158         return info_dict
1159
1160     def process_subtitles(self, video_id, normal_subtitles, automatic_captions):
1161         """Select the requested subtitles and their format"""
1162         available_subs = {}
1163         if normal_subtitles and self.params.get('writesubtitles'):
1164             available_subs.update(normal_subtitles)
1165         if automatic_captions and self.params.get('writeautomaticsub'):
1166             for lang, cap_info in automatic_captions.items():
1167                 if lang not in available_subs:
1168                     available_subs[lang] = cap_info
1169
1170         if (not self.params.get('writesubtitles') and not
1171                 self.params.get('writeautomaticsub') or not
1172                 available_subs):
1173             return None
1174
1175         if self.params.get('allsubtitles', False):
1176             requested_langs = available_subs.keys()
1177         else:
1178             if self.params.get('subtitleslangs', False):
1179                 requested_langs = self.params.get('subtitleslangs')
1180             elif 'en' in available_subs:
1181                 requested_langs = ['en']
1182             else:
1183                 requested_langs = [list(available_subs.keys())[0]]
1184
1185         formats_query = self.params.get('subtitlesformat', 'best')
1186         formats_preference = formats_query.split('/') if formats_query else []
1187         subs = {}
1188         for lang in requested_langs:
1189             formats = available_subs.get(lang)
1190             if formats is None:
1191                 self.report_warning('%s subtitles not available for %s' % (lang, video_id))
1192                 continue
1193             for ext in formats_preference:
1194                 if ext == 'best':
1195                     f = formats[-1]
1196                     break
1197                 matches = list(filter(lambda f: f['ext'] == ext, formats))
1198                 if matches:
1199                     f = matches[-1]
1200                     break
1201             else:
1202                 f = formats[-1]
1203                 self.report_warning(
1204                     'No subtitle format found matching "%s" for language %s, '
1205                     'using %s' % (formats_query, lang, f['ext']))
1206             subs[lang] = f
1207         return subs
1208
1209     def process_info(self, info_dict):
1210         """Process a single resolved IE result."""
1211
1212         assert info_dict.get('_type', 'video') == 'video'
1213
1214         max_downloads = self.params.get('max_downloads')
1215         if max_downloads is not None:
1216             if self._num_downloads >= int(max_downloads):
1217                 raise MaxDownloadsReached()
1218
1219         info_dict['fulltitle'] = info_dict['title']
1220         if len(info_dict['title']) > 200:
1221             info_dict['title'] = info_dict['title'][:197] + '...'
1222
1223         if 'format' not in info_dict:
1224             info_dict['format'] = info_dict['ext']
1225
1226         reason = self._match_entry(info_dict, incomplete=False)
1227         if reason is not None:
1228             self.to_screen('[download] ' + reason)
1229             return
1230
1231         self._num_downloads += 1
1232
1233         info_dict['_filename'] = filename = self.prepare_filename(info_dict)
1234
1235         # Forced printings
1236         if self.params.get('forcetitle', False):
1237             self.to_stdout(info_dict['fulltitle'])
1238         if self.params.get('forceid', False):
1239             self.to_stdout(info_dict['id'])
1240         if self.params.get('forceurl', False):
1241             if info_dict.get('requested_formats') is not None:
1242                 for f in info_dict['requested_formats']:
1243                     self.to_stdout(f['url'] + f.get('play_path', ''))
1244             else:
1245                 # For RTMP URLs, also include the playpath
1246                 self.to_stdout(info_dict['url'] + info_dict.get('play_path', ''))
1247         if self.params.get('forcethumbnail', False) and info_dict.get('thumbnail') is not None:
1248             self.to_stdout(info_dict['thumbnail'])
1249         if self.params.get('forcedescription', False) and info_dict.get('description') is not None:
1250             self.to_stdout(info_dict['description'])
1251         if self.params.get('forcefilename', False) and filename is not None:
1252             self.to_stdout(filename)
1253         if self.params.get('forceduration', False) and info_dict.get('duration') is not None:
1254             self.to_stdout(formatSeconds(info_dict['duration']))
1255         if self.params.get('forceformat', False):
1256             self.to_stdout(info_dict['format'])
1257         if self.params.get('forcejson', False):
1258             self.to_stdout(json.dumps(info_dict))
1259
1260         # Do nothing else if in simulate mode
1261         if self.params.get('simulate', False):
1262             return
1263
1264         if filename is None:
1265             return
1266
1267         try:
1268             dn = os.path.dirname(sanitize_path(encodeFilename(filename)))
1269             if dn and not os.path.exists(dn):
1270                 os.makedirs(dn)
1271         except (OSError, IOError) as err:
1272             self.report_error('unable to create directory ' + compat_str(err))
1273             return
1274
1275         if self.params.get('writedescription', False):
1276             descfn = replace_extension(filename, 'description', info_dict.get('ext'))
1277             if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(descfn)):
1278                 self.to_screen('[info] Video description is already present')
1279             elif info_dict.get('description') is None:
1280                 self.report_warning('There\'s no description to write.')
1281             else:
1282                 try:
1283                     self.to_screen('[info] Writing video description to: ' + descfn)
1284                     with io.open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile:
1285                         descfile.write(info_dict['description'])
1286                 except (OSError, IOError):
1287                     self.report_error('Cannot write description file ' + descfn)
1288                     return
1289
1290         if self.params.get('writeannotations', False):
1291             annofn = replace_extension(filename, 'annotations.xml', info_dict.get('ext'))
1292             if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(annofn)):
1293                 self.to_screen('[info] Video annotations are already present')
1294             else:
1295                 try:
1296                     self.to_screen('[info] Writing video annotations to: ' + annofn)
1297                     with io.open(encodeFilename(annofn), 'w', encoding='utf-8') as annofile:
1298                         annofile.write(info_dict['annotations'])
1299                 except (KeyError, TypeError):
1300                     self.report_warning('There are no annotations to write.')
1301                 except (OSError, IOError):
1302                     self.report_error('Cannot write annotations file: ' + annofn)
1303                     return
1304
1305         subtitles_are_requested = any([self.params.get('writesubtitles', False),
1306                                        self.params.get('writeautomaticsub')])
1307
1308         if subtitles_are_requested and info_dict.get('requested_subtitles'):
1309             # subtitles download errors are already managed as troubles in relevant IE
1310             # that way it will silently go on when used with unsupporting IE
1311             subtitles = info_dict['requested_subtitles']
1312             ie = self.get_info_extractor(info_dict['extractor_key'])
1313             for sub_lang, sub_info in subtitles.items():
1314                 sub_format = sub_info['ext']
1315                 if sub_info.get('data') is not None:
1316                     sub_data = sub_info['data']
1317                 else:
1318                     try:
1319                         sub_data = ie._download_webpage(
1320                             sub_info['url'], info_dict['id'], note=False)
1321                     except ExtractorError as err:
1322                         self.report_warning('Unable to download subtitle for "%s": %s' %
1323                                             (sub_lang, compat_str(err.cause)))
1324                         continue
1325                 try:
1326                     sub_filename = subtitles_filename(filename, sub_lang, sub_format)
1327                     if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(sub_filename)):
1328                         self.to_screen('[info] Video subtitle %s.%s is already_present' % (sub_lang, sub_format))
1329                     else:
1330                         self.to_screen('[info] Writing video subtitles to: ' + sub_filename)
1331                         with io.open(encodeFilename(sub_filename), 'w', encoding='utf-8') as subfile:
1332                             subfile.write(sub_data)
1333                 except (OSError, IOError):
1334                     self.report_error('Cannot write subtitles file ' + sub_filename)
1335                     return
1336
1337         if self.params.get('writeinfojson', False):
1338             infofn = replace_extension(filename, 'info.json', info_dict.get('ext'))
1339             if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(infofn)):
1340                 self.to_screen('[info] Video description metadata is already present')
1341             else:
1342                 self.to_screen('[info] Writing video description metadata as JSON to: ' + infofn)
1343                 try:
1344                     write_json_file(self.filter_requested_info(info_dict), infofn)
1345                 except (OSError, IOError):
1346                     self.report_error('Cannot write metadata to JSON file ' + infofn)
1347                     return
1348
1349         self._write_thumbnails(info_dict, filename)
1350
1351         if not self.params.get('skip_download', False):
1352             try:
1353                 def dl(name, info):
1354                     fd = get_suitable_downloader(info, self.params)(self, self.params)
1355                     for ph in self._progress_hooks:
1356                         fd.add_progress_hook(ph)
1357                     if self.params.get('verbose'):
1358                         self.to_stdout('[debug] Invoking downloader on %r' % info.get('url'))
1359                     return fd.download(name, info)
1360
1361                 if info_dict.get('requested_formats') is not None:
1362                     downloaded = []
1363                     success = True
1364                     merger = FFmpegMergerPP(self)
1365                     if not merger.available:
1366                         postprocessors = []
1367                         self.report_warning('You have requested multiple '
1368                                             'formats but ffmpeg or avconv are not installed.'
1369                                             ' The formats won\'t be merged')
1370                     else:
1371                         postprocessors = [merger]
1372
1373                     def compatible_formats(formats):
1374                         video, audio = formats
1375                         # Check extension
1376                         video_ext, audio_ext = audio.get('ext'), video.get('ext')
1377                         if video_ext and audio_ext:
1378                             COMPATIBLE_EXTS = (
1379                                 ('mp3', 'mp4', 'm4a', 'm4p', 'm4b', 'm4r', 'm4v'),
1380                                 ('webm')
1381                             )
1382                             for exts in COMPATIBLE_EXTS:
1383                                 if video_ext in exts and audio_ext in exts:
1384                                     return True
1385                         # TODO: Check acodec/vcodec
1386                         return False
1387
1388                     filename_real_ext = os.path.splitext(filename)[1][1:]
1389                     filename_wo_ext = (
1390                         os.path.splitext(filename)[0]
1391                         if filename_real_ext == info_dict['ext']
1392                         else filename)
1393                     requested_formats = info_dict['requested_formats']
1394                     if self.params.get('merge_output_format') is None and not compatible_formats(requested_formats):
1395                         info_dict['ext'] = 'mkv'
1396                         self.report_warning('You have requested formats incompatible for merge. '
1397                                             'The formats will be merged into mkv')
1398                     # Ensure filename always has a correct extension for successful merge
1399                     filename = '%s.%s' % (filename_wo_ext, info_dict['ext'])
1400                     if os.path.exists(encodeFilename(filename)):
1401                         self.to_screen(
1402                             '[download] %s has already been downloaded and '
1403                             'merged' % filename)
1404                     else:
1405                         for f in requested_formats:
1406                             new_info = dict(info_dict)
1407                             new_info.update(f)
1408                             fname = self.prepare_filename(new_info)
1409                             fname = prepend_extension(fname, 'f%s' % f['format_id'], new_info['ext'])
1410                             downloaded.append(fname)
1411                             partial_success = dl(fname, new_info)
1412                             success = success and partial_success
1413                         info_dict['__postprocessors'] = postprocessors
1414                         info_dict['__files_to_merge'] = downloaded
1415                 else:
1416                     # Just a single file
1417                     success = dl(filename, info_dict)
1418             except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
1419                 self.report_error('unable to download video data: %s' % str(err))
1420                 return
1421             except (OSError, IOError) as err:
1422                 raise UnavailableVideoError(err)
1423             except (ContentTooShortError, ) as err:
1424                 self.report_error('content too short (expected %s bytes and served %s)' % (err.expected, err.downloaded))
1425                 return
1426
1427             if success:
1428                 # Fixup content
1429                 fixup_policy = self.params.get('fixup')
1430                 if fixup_policy is None:
1431                     fixup_policy = 'detect_or_warn'
1432
1433                 stretched_ratio = info_dict.get('stretched_ratio')
1434                 if stretched_ratio is not None and stretched_ratio != 1:
1435                     if fixup_policy == 'warn':
1436                         self.report_warning('%s: Non-uniform pixel ratio (%s)' % (
1437                             info_dict['id'], stretched_ratio))
1438                     elif fixup_policy == 'detect_or_warn':
1439                         stretched_pp = FFmpegFixupStretchedPP(self)
1440                         if stretched_pp.available:
1441                             info_dict.setdefault('__postprocessors', [])
1442                             info_dict['__postprocessors'].append(stretched_pp)
1443                         else:
1444                             self.report_warning(
1445                                 '%s: Non-uniform pixel ratio (%s). Install ffmpeg or avconv to fix this automatically.' % (
1446                                     info_dict['id'], stretched_ratio))
1447                     else:
1448                         assert fixup_policy in ('ignore', 'never')
1449
1450                 if info_dict.get('requested_formats') is None and info_dict.get('container') == 'm4a_dash':
1451                     if fixup_policy == 'warn':
1452                         self.report_warning('%s: writing DASH m4a. Only some players support this container.' % (
1453                             info_dict['id']))
1454                     elif fixup_policy == 'detect_or_warn':
1455                         fixup_pp = FFmpegFixupM4aPP(self)
1456                         if fixup_pp.available:
1457                             info_dict.setdefault('__postprocessors', [])
1458                             info_dict['__postprocessors'].append(fixup_pp)
1459                         else:
1460                             self.report_warning(
1461                                 '%s: writing DASH m4a. Only some players support this container. Install ffmpeg or avconv to fix this automatically.' % (
1462                                     info_dict['id']))
1463                     else:
1464                         assert fixup_policy in ('ignore', 'never')
1465
1466                 try:
1467                     self.post_process(filename, info_dict)
1468                 except (PostProcessingError) as err:
1469                     self.report_error('postprocessing: %s' % str(err))
1470                     return
1471                 self.record_download_archive(info_dict)
1472
1473     def download(self, url_list):
1474         """Download a given list of URLs."""
1475         outtmpl = self.params.get('outtmpl', DEFAULT_OUTTMPL)
1476         if (len(url_list) > 1 and
1477                 '%' not in outtmpl and
1478                 self.params.get('max_downloads') != 1):
1479             raise SameFileError(outtmpl)
1480
1481         for url in url_list:
1482             try:
1483                 # It also downloads the videos
1484                 res = self.extract_info(url)
1485             except UnavailableVideoError:
1486                 self.report_error('unable to download video')
1487             except MaxDownloadsReached:
1488                 self.to_screen('[info] Maximum number of downloaded files reached.')
1489                 raise
1490             else:
1491                 if self.params.get('dump_single_json', False):
1492                     self.to_stdout(json.dumps(res))
1493
1494         return self._download_retcode
1495
1496     def download_with_info_file(self, info_filename):
1497         with contextlib.closing(fileinput.FileInput(
1498                 [info_filename], mode='r',
1499                 openhook=fileinput.hook_encoded('utf-8'))) as f:
1500             # FileInput doesn't have a read method, we can't call json.load
1501             info = self.filter_requested_info(json.loads('\n'.join(f)))
1502         try:
1503             self.process_ie_result(info, download=True)
1504         except DownloadError:
1505             webpage_url = info.get('webpage_url')
1506             if webpage_url is not None:
1507                 self.report_warning('The info failed to download, trying with "%s"' % webpage_url)
1508                 return self.download([webpage_url])
1509             else:
1510                 raise
1511         return self._download_retcode
1512
1513     @staticmethod
1514     def filter_requested_info(info_dict):
1515         return dict(
1516             (k, v) for k, v in info_dict.items()
1517             if k not in ['requested_formats', 'requested_subtitles'])
1518
1519     def post_process(self, filename, ie_info):
1520         """Run all the postprocessors on the given file."""
1521         info = dict(ie_info)
1522         info['filepath'] = filename
1523         pps_chain = []
1524         if ie_info.get('__postprocessors') is not None:
1525             pps_chain.extend(ie_info['__postprocessors'])
1526         pps_chain.extend(self._pps)
1527         for pp in pps_chain:
1528             try:
1529                 files_to_delete, info = pp.run(info)
1530             except PostProcessingError as e:
1531                 self.report_error(e.msg)
1532             if files_to_delete and not self.params.get('keepvideo', False):
1533                 for old_filename in files_to_delete:
1534                     self.to_screen('Deleting original file %s (pass -k to keep)' % old_filename)
1535                     try:
1536                         os.remove(encodeFilename(old_filename))
1537                     except (IOError, OSError):
1538                         self.report_warning('Unable to remove downloaded original file')
1539
1540     def _make_archive_id(self, info_dict):
1541         # Future-proof against any change in case
1542         # and backwards compatibility with prior versions
1543         extractor = info_dict.get('extractor_key')
1544         if extractor is None:
1545             if 'id' in info_dict:
1546                 extractor = info_dict.get('ie_key')  # key in a playlist
1547         if extractor is None:
1548             return None  # Incomplete video information
1549         return extractor.lower() + ' ' + info_dict['id']
1550
1551     def in_download_archive(self, info_dict):
1552         fn = self.params.get('download_archive')
1553         if fn is None:
1554             return False
1555
1556         vid_id = self._make_archive_id(info_dict)
1557         if vid_id is None:
1558             return False  # Incomplete video information
1559
1560         try:
1561             with locked_file(fn, 'r', encoding='utf-8') as archive_file:
1562                 for line in archive_file:
1563                     if line.strip() == vid_id:
1564                         return True
1565         except IOError as ioe:
1566             if ioe.errno != errno.ENOENT:
1567                 raise
1568         return False
1569
1570     def record_download_archive(self, info_dict):
1571         fn = self.params.get('download_archive')
1572         if fn is None:
1573             return
1574         vid_id = self._make_archive_id(info_dict)
1575         assert vid_id
1576         with locked_file(fn, 'a', encoding='utf-8') as archive_file:
1577             archive_file.write(vid_id + '\n')
1578
1579     @staticmethod
1580     def format_resolution(format, default='unknown'):
1581         if format.get('vcodec') == 'none':
1582             return 'audio only'
1583         if format.get('resolution') is not None:
1584             return format['resolution']
1585         if format.get('height') is not None:
1586             if format.get('width') is not None:
1587                 res = '%sx%s' % (format['width'], format['height'])
1588             else:
1589                 res = '%sp' % format['height']
1590         elif format.get('width') is not None:
1591             res = '?x%d' % format['width']
1592         else:
1593             res = default
1594         return res
1595
1596     def _format_note(self, fdict):
1597         res = ''
1598         if fdict.get('ext') in ['f4f', 'f4m']:
1599             res += '(unsupported) '
1600         if fdict.get('format_note') is not None:
1601             res += fdict['format_note'] + ' '
1602         if fdict.get('tbr') is not None:
1603             res += '%4dk ' % fdict['tbr']
1604         if fdict.get('container') is not None:
1605             if res:
1606                 res += ', '
1607             res += '%s container' % fdict['container']
1608         if (fdict.get('vcodec') is not None and
1609                 fdict.get('vcodec') != 'none'):
1610             if res:
1611                 res += ', '
1612             res += fdict['vcodec']
1613             if fdict.get('vbr') is not None:
1614                 res += '@'
1615         elif fdict.get('vbr') is not None and fdict.get('abr') is not None:
1616             res += 'video@'
1617         if fdict.get('vbr') is not None:
1618             res += '%4dk' % fdict['vbr']
1619         if fdict.get('fps') is not None:
1620             res += ', %sfps' % fdict['fps']
1621         if fdict.get('acodec') is not None:
1622             if res:
1623                 res += ', '
1624             if fdict['acodec'] == 'none':
1625                 res += 'video only'
1626             else:
1627                 res += '%-5s' % fdict['acodec']
1628         elif fdict.get('abr') is not None:
1629             if res:
1630                 res += ', '
1631             res += 'audio'
1632         if fdict.get('abr') is not None:
1633             res += '@%3dk' % fdict['abr']
1634         if fdict.get('asr') is not None:
1635             res += ' (%5dHz)' % fdict['asr']
1636         if fdict.get('filesize') is not None:
1637             if res:
1638                 res += ', '
1639             res += format_bytes(fdict['filesize'])
1640         elif fdict.get('filesize_approx') is not None:
1641             if res:
1642                 res += ', '
1643             res += '~' + format_bytes(fdict['filesize_approx'])
1644         return res
1645
1646     def list_formats(self, info_dict):
1647         formats = info_dict.get('formats', [info_dict])
1648         table = [
1649             [f['format_id'], f['ext'], self.format_resolution(f), self._format_note(f)]
1650             for f in formats
1651             if f.get('preference') is None or f['preference'] >= -1000]
1652         if len(formats) > 1:
1653             table[-1][-1] += (' ' if table[-1][-1] else '') + '(best)'
1654
1655         header_line = ['format code', 'extension', 'resolution', 'note']
1656         self.to_screen(
1657             '[info] Available formats for %s:\n%s' %
1658             (info_dict['id'], render_table(header_line, table)))
1659
1660     def list_thumbnails(self, info_dict):
1661         thumbnails = info_dict.get('thumbnails')
1662         if not thumbnails:
1663             tn_url = info_dict.get('thumbnail')
1664             if tn_url:
1665                 thumbnails = [{'id': '0', 'url': tn_url}]
1666             else:
1667                 self.to_screen(
1668                     '[info] No thumbnails present for %s' % info_dict['id'])
1669                 return
1670
1671         self.to_screen(
1672             '[info] Thumbnails for %s:' % info_dict['id'])
1673         self.to_screen(render_table(
1674             ['ID', 'width', 'height', 'URL'],
1675             [[t['id'], t.get('width', 'unknown'), t.get('height', 'unknown'), t['url']] for t in thumbnails]))
1676
1677     def list_subtitles(self, video_id, subtitles, name='subtitles'):
1678         if not subtitles:
1679             self.to_screen('%s has no %s' % (video_id, name))
1680             return
1681         self.to_screen(
1682             'Available %s for %s:' % (name, video_id))
1683         self.to_screen(render_table(
1684             ['Language', 'formats'],
1685             [[lang, ', '.join(f['ext'] for f in reversed(formats))]
1686                 for lang, formats in subtitles.items()]))
1687
1688     def urlopen(self, req):
1689         """ Start an HTTP download """
1690
1691         # According to RFC 3986, URLs can not contain non-ASCII characters, however this is not
1692         # always respected by websites, some tend to give out URLs with non percent-encoded
1693         # non-ASCII characters (see telemb.py, ard.py [#3412])
1694         # urllib chokes on URLs with non-ASCII characters (see http://bugs.python.org/issue3991)
1695         # To work around aforementioned issue we will replace request's original URL with
1696         # percent-encoded one
1697         req_is_string = isinstance(req, compat_basestring)
1698         url = req if req_is_string else req.get_full_url()
1699         url_escaped = escape_url(url)
1700
1701         # Substitute URL if any change after escaping
1702         if url != url_escaped:
1703             if req_is_string:
1704                 req = url_escaped
1705             else:
1706                 req = compat_urllib_request.Request(
1707                     url_escaped, data=req.data, headers=req.headers,
1708                     origin_req_host=req.origin_req_host, unverifiable=req.unverifiable)
1709
1710         return self._opener.open(req, timeout=self._socket_timeout)
1711
1712     def print_debug_header(self):
1713         if not self.params.get('verbose'):
1714             return
1715
1716         if type('') is not compat_str:
1717             # Python 2.6 on SLES11 SP1 (https://github.com/rg3/youtube-dl/issues/3326)
1718             self.report_warning(
1719                 'Your Python is broken! Update to a newer and supported version')
1720
1721         stdout_encoding = getattr(
1722             sys.stdout, 'encoding', 'missing (%s)' % type(sys.stdout).__name__)
1723         encoding_str = (
1724             '[debug] Encodings: locale %s, fs %s, out %s, pref %s\n' % (
1725                 locale.getpreferredencoding(),
1726                 sys.getfilesystemencoding(),
1727                 stdout_encoding,
1728                 self.get_encoding()))
1729         write_string(encoding_str, encoding=None)
1730
1731         self._write_string('[debug] youtube-dl version ' + __version__ + '\n')
1732         try:
1733             sp = subprocess.Popen(
1734                 ['git', 'rev-parse', '--short', 'HEAD'],
1735                 stdout=subprocess.PIPE, stderr=subprocess.PIPE,
1736                 cwd=os.path.dirname(os.path.abspath(__file__)))
1737             out, err = sp.communicate()
1738             out = out.decode().strip()
1739             if re.match('[0-9a-f]+', out):
1740                 self._write_string('[debug] Git HEAD: ' + out + '\n')
1741         except Exception:
1742             try:
1743                 sys.exc_clear()
1744             except Exception:
1745                 pass
1746         self._write_string('[debug] Python version %s - %s\n' % (
1747             platform.python_version(), platform_name()))
1748
1749         exe_versions = FFmpegPostProcessor.get_versions(self)
1750         exe_versions['rtmpdump'] = rtmpdump_version()
1751         exe_str = ', '.join(
1752             '%s %s' % (exe, v)
1753             for exe, v in sorted(exe_versions.items())
1754             if v
1755         )
1756         if not exe_str:
1757             exe_str = 'none'
1758         self._write_string('[debug] exe versions: %s\n' % exe_str)
1759
1760         proxy_map = {}
1761         for handler in self._opener.handlers:
1762             if hasattr(handler, 'proxies'):
1763                 proxy_map.update(handler.proxies)
1764         self._write_string('[debug] Proxy map: ' + compat_str(proxy_map) + '\n')
1765
1766         if self.params.get('call_home', False):
1767             ipaddr = self.urlopen('https://yt-dl.org/ip').read().decode('utf-8')
1768             self._write_string('[debug] Public IP address: %s\n' % ipaddr)
1769             latest_version = self.urlopen(
1770                 'https://yt-dl.org/latest/version').read().decode('utf-8')
1771             if version_tuple(latest_version) > version_tuple(__version__):
1772                 self.report_warning(
1773                     'You are using an outdated version (newest version: %s)! '
1774                     'See https://yt-dl.org/update if you need help updating.' %
1775                     latest_version)
1776
1777     def _setup_opener(self):
1778         timeout_val = self.params.get('socket_timeout')
1779         self._socket_timeout = 600 if timeout_val is None else float(timeout_val)
1780
1781         opts_cookiefile = self.params.get('cookiefile')
1782         opts_proxy = self.params.get('proxy')
1783
1784         if opts_cookiefile is None:
1785             self.cookiejar = compat_cookiejar.CookieJar()
1786         else:
1787             self.cookiejar = compat_cookiejar.MozillaCookieJar(
1788                 opts_cookiefile)
1789             if os.access(opts_cookiefile, os.R_OK):
1790                 self.cookiejar.load()
1791
1792         cookie_processor = compat_urllib_request.HTTPCookieProcessor(
1793             self.cookiejar)
1794         if opts_proxy is not None:
1795             if opts_proxy == '':
1796                 proxies = {}
1797             else:
1798                 proxies = {'http': opts_proxy, 'https': opts_proxy}
1799         else:
1800             proxies = compat_urllib_request.getproxies()
1801             # Set HTTPS proxy to HTTP one if given (https://github.com/rg3/youtube-dl/issues/805)
1802             if 'http' in proxies and 'https' not in proxies:
1803                 proxies['https'] = proxies['http']
1804         proxy_handler = PerRequestProxyHandler(proxies)
1805
1806         debuglevel = 1 if self.params.get('debug_printtraffic') else 0
1807         https_handler = make_HTTPS_handler(self.params, debuglevel=debuglevel)
1808         ydlh = YoutubeDLHandler(self.params, debuglevel=debuglevel)
1809         opener = compat_urllib_request.build_opener(
1810             proxy_handler, https_handler, cookie_processor, ydlh)
1811
1812         # Delete the default user-agent header, which would otherwise apply in
1813         # cases where our custom HTTP handler doesn't come into play
1814         # (See https://github.com/rg3/youtube-dl/issues/1309 for details)
1815         opener.addheaders = []
1816         self._opener = opener
1817
1818     def encode(self, s):
1819         if isinstance(s, bytes):
1820             return s  # Already encoded
1821
1822         try:
1823             return s.encode(self.get_encoding())
1824         except UnicodeEncodeError as err:
1825             err.reason = err.reason + '. Check your system encoding configuration or use the --encoding option.'
1826             raise
1827
1828     def get_encoding(self):
1829         encoding = self.params.get('encoding')
1830         if encoding is None:
1831             encoding = preferredencoding()
1832         return encoding
1833
1834     def _write_thumbnails(self, info_dict, filename):
1835         if self.params.get('writethumbnail', False):
1836             thumbnails = info_dict.get('thumbnails')
1837             if thumbnails:
1838                 thumbnails = [thumbnails[-1]]
1839         elif self.params.get('write_all_thumbnails', False):
1840             thumbnails = info_dict.get('thumbnails')
1841         else:
1842             return
1843
1844         if not thumbnails:
1845             # No thumbnails present, so return immediately
1846             return
1847
1848         for t in thumbnails:
1849             thumb_ext = determine_ext(t['url'], 'jpg')
1850             suffix = '_%s' % t['id'] if len(thumbnails) > 1 else ''
1851             thumb_display_id = '%s ' % t['id'] if len(thumbnails) > 1 else ''
1852             thumb_filename = os.path.splitext(filename)[0] + suffix + '.' + thumb_ext
1853
1854             if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(thumb_filename)):
1855                 self.to_screen('[%s] %s: Thumbnail %sis already present' %
1856                                (info_dict['extractor'], info_dict['id'], thumb_display_id))
1857             else:
1858                 self.to_screen('[%s] %s: Downloading thumbnail %s...' %
1859                                (info_dict['extractor'], info_dict['id'], thumb_display_id))
1860                 try:
1861                     uf = self.urlopen(t['url'])
1862                     with open(thumb_filename, 'wb') as thumbf:
1863                         shutil.copyfileobj(uf, thumbf)
1864                     self.to_screen('[%s] %s: Writing thumbnail %sto: %s' %
1865                                    (info_dict['extractor'], info_dict['id'], thumb_display_id, thumb_filename))
1866                 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
1867                     self.report_warning('Unable to download thumbnail "%s": %s' %
1868                                         (t['url'], compat_str(err)))