_ Git - youtube-dl/blob - youtube_dl/YoutubeDL.py

   1 #!/usr/bin/env python
   2 # -*- coding: utf-8 -*-
   3
   4 from __future__ import absolute_import, unicode_literals
   5
   6 import collections
   7 import contextlib
   8 import datetime
   9 import errno
  10 import fileinput
  11 import io
  12 import itertools
  13 import json
  14 import locale
  15 import operator
  16 import os
  17 import platform
  18 import re
  19 import shutil
  20 import subprocess
  21 import socket
  22 import sys
  23 import time
  24 import traceback
  25
  26 if os.name == 'nt':
  27     import ctypes
  28
  29 from .compat import (
  30     compat_basestring,
  31     compat_cookiejar,
  32     compat_expanduser,
  33     compat_get_terminal_size,
  34     compat_http_client,
  35     compat_kwargs,
  36     compat_str,
  37     compat_urllib_error,
  38     compat_urllib_request,
  39 )
  40 from .utils import (
  41     escape_url,
  42     ContentTooShortError,
  43     date_from_str,
  44     DateRange,
  45     DEFAULT_OUTTMPL,
  46     determine_ext,
  47     DownloadError,
  48     encodeFilename,
  49     ExtractorError,
  50     format_bytes,
  51     formatSeconds,
  52     locked_file,
  53     make_HTTPS_handler,
  54     MaxDownloadsReached,
  55     PagedList,
  56     parse_filesize,
  57     PerRequestProxyHandler,
  58     PostProcessingError,
  59     platform_name,
  60     preferredencoding,
  61     render_table,
  62     SameFileError,
  63     sanitize_filename,
  64     sanitize_path,
  65     std_headers,
  66     subtitles_filename,
  67     UnavailableVideoError,
  68     url_basename,
  69     version_tuple,
  70     write_json_file,
  71     write_string,
  72     YoutubeDLHandler,
  73     prepend_extension,
  74     args_to_str,
  75     age_restricted,
  76 )
  77 from .cache import Cache
  78 from .extractor import get_info_extractor, gen_extractors
  79 from .downloader import get_suitable_downloader
  80 from .downloader.rtmp import rtmpdump_version
  81 from .postprocessor import (
  82     FFmpegFixupM4aPP,
  83     FFmpegFixupStretchedPP,
  84     FFmpegMergerPP,
  85     FFmpegPostProcessor,
  86     get_postprocessor,
  87 )
  88 from .version import __version__
  89
  90
  91 class YoutubeDL(object):
  92     """YoutubeDL class.
  93
  94     YoutubeDL objects are the ones responsible of downloading the
  95     actual video file and writing it to disk if the user has requested
  96     it, among some other tasks. In most cases there should be one per
  97     program. As, given a video URL, the downloader doesn't know how to
  98     extract all the needed information, task that InfoExtractors do, it
  99     has to pass the URL to one of them.
 100
 101     For this, YoutubeDL objects have a method that allows
 102     InfoExtractors to be registered in a given order. When it is passed
 103     a URL, the YoutubeDL object handles it to the first InfoExtractor it
 104     finds that reports being able to handle it. The InfoExtractor extracts
 105     all the information about the video or videos the URL refers to, and
 106     YoutubeDL process the extracted information, possibly using a File
 107     Downloader to download the video.
 108
 109     YoutubeDL objects accept a lot of parameters. In order not to saturate
 110     the object constructor with arguments, it receives a dictionary of
 111     options instead. These options are available through the params
 112     attribute for the InfoExtractors to use. The YoutubeDL also
 113     registers itself as the downloader in charge for the InfoExtractors
 114     that are added to it, so this is a "mutual registration".
 115
 116     Available options:
 117
 118     username:          Username for authentication purposes.
 119     password:          Password for authentication purposes.
 120     videopassword:     Password for acces a video.
 121     usenetrc:          Use netrc for authentication instead.
 122     verbose:           Print additional info to stdout.
 123     quiet:             Do not print messages to stdout.
 124     no_warnings:       Do not print out anything for warnings.
 125     forceurl:          Force printing final URL.
 126     forcetitle:        Force printing title.
 127     forceid:           Force printing ID.
 128     forcethumbnail:    Force printing thumbnail URL.
 129     forcedescription:  Force printing description.
 130     forcefilename:     Force printing final filename.
 131     forceduration:     Force printing duration.
 132     forcejson:         Force printing info_dict as JSON.
 133     dump_single_json:  Force printing the info_dict of the whole playlist
 134                        (or video) as a single JSON line.
 135     simulate:          Do not download the video files.
 136     format:            Video format code. See options.py for more information.
 137     outtmpl:           Template for output names.
 138     restrictfilenames: Do not allow "&" and spaces in file names
 139     ignoreerrors:      Do not stop on download errors.
 140     nooverwrites:      Prevent overwriting files.
 141     playliststart:     Playlist item to start at.
 142     playlistend:       Playlist item to end at.
 143     playlist_items:    Specific indices of playlist to download.
 144     playlistreverse:   Download playlist items in reverse order.
 145     matchtitle:        Download only matching titles.
 146     rejecttitle:       Reject downloads for matching titles.
 147     logger:            Log messages to a logging.Logger instance.
 148     logtostderr:       Log messages to stderr instead of stdout.
 149     writedescription:  Write the video description to a .description file
 150     writeinfojson:     Write the video description to a .info.json file
 151     writeannotations:  Write the video annotations to a .annotations.xml file
 152     writethumbnail:    Write the thumbnail image to a file
 153     write_all_thumbnails:  Write all thumbnail formats to files
 154     writesubtitles:    Write the video subtitles to a file
 155     writeautomaticsub: Write the automatic subtitles to a file
 156     allsubtitles:      Downloads all the subtitles of the video
 157                        (requires writesubtitles or writeautomaticsub)
 158     listsubtitles:     Lists all available subtitles for the video
 159     subtitlesformat:   The format code for subtitles
 160     subtitleslangs:    List of languages of the subtitles to download
 161     keepvideo:         Keep the video file after post-processing
 162     daterange:         A DateRange object, download only if the upload_date is in the range.
 163     skip_download:     Skip the actual download of the video file
 164     cachedir:          Location of the cache files in the filesystem.
 165                        False to disable filesystem cache.
 166     noplaylist:        Download single video instead of a playlist if in doubt.
 167     age_limit:         An integer representing the user's age in years.
 168                        Unsuitable videos for the given age are skipped.
 169     min_views:         An integer representing the minimum view count the video
 170                        must have in order to not be skipped.
 171                        Videos without view count information are always
 172                        downloaded. None for no limit.
 173     max_views:         An integer representing the maximum view count.
 174                        Videos that are more popular than that are not
 175                        downloaded.
 176                        Videos without view count information are always
 177                        downloaded. None for no limit.
 178     download_archive:  File name of a file where all downloads are recorded.
 179                        Videos already present in the file are not downloaded
 180                        again.
 181     cookiefile:        File name where cookies should be read from and dumped to.
 182     nocheckcertificate:Do not verify SSL certificates
 183     prefer_insecure:   Use HTTP instead of HTTPS to retrieve information.
 184                        At the moment, this is only supported by YouTube.
 185     proxy:             URL of the proxy server to use
 186     cn_verification_proxy:  URL of the proxy to use for IP address verification
 187                        on Chinese sites. (Experimental)
 188     socket_timeout:    Time to wait for unresponsive hosts, in seconds
 189     bidi_workaround:   Work around buggy terminals without bidirectional text
 190                        support, using fridibi
 191     debug_printtraffic:Print out sent and received HTTP traffic
 192     include_ads:       Download ads as well
 193     default_search:    Prepend this string if an input url is not valid.
 194                        'auto' for elaborate guessing
 195     encoding:          Use this encoding instead of the system-specified.
 196     extract_flat:      Do not resolve URLs, return the immediate result.
 197                        Pass in 'in_playlist' to only show this behavior for
 198                        playlist items.
 199     postprocessors:    A list of dictionaries, each with an entry
 200                        * key:  The name of the postprocessor. See
 201                                youtube_dl/postprocessor/__init__.py for a list.
 202                        as well as any further keyword arguments for the
 203                        postprocessor.
 204     progress_hooks:    A list of functions that get called on download
 205                        progress, with a dictionary with the entries
 206                        * status: One of "downloading", "error", or "finished".
 207                                  Check this first and ignore unknown values.
 208
 209                        If status is one of "downloading", or "finished", the
 210                        following properties may also be present:
 211                        * filename: The final filename (always present)
 212                        * tmpfilename: The filename we're currently writing to
 213                        * downloaded_bytes: Bytes on disk
 214                        * total_bytes: Size of the whole file, None if unknown
 215                        * total_bytes_estimate: Guess of the eventual file size,
 216                                                None if unavailable.
 217                        * elapsed: The number of seconds since download started.
 218                        * eta: The estimated time in seconds, None if unknown
 219                        * speed: The download speed in bytes/second, None if
 220                                 unknown
 221                        * fragment_index: The counter of the currently
 222                                          downloaded video fragment.
 223                        * fragment_count: The number of fragments (= individual
 224                                          files that will be merged)
 225
 226                        Progress hooks are guaranteed to be called at least once
 227                        (with status "finished") if the download is successful.
 228     merge_output_format: Extension to use when merging formats.
 229     fixup:             Automatically correct known faults of the file.
 230                        One of:
 231                        - "never": do nothing
 232                        - "warn": only emit a warning
 233                        - "detect_or_warn": check whether we can do anything
 234                                            about it, warn otherwise (default)
 235     source_address:    (Experimental) Client-side IP address to bind to.
 236     call_home:         Boolean, true iff we are allowed to contact the
 237                        youtube-dl servers for debugging.
 238     sleep_interval:    Number of seconds to sleep before each download.
 239     listformats:       Print an overview of available video formats and exit.
 240     list_thumbnails:   Print a table of all thumbnails and exit.
 241     match_filter:      A function that gets called with the info_dict of
 242                        every video.
 243                        If it returns a message, the video is ignored.
 244                        If it returns None, the video is downloaded.
 245                        match_filter_func in utils.py is one example for this.
 246     no_color:          Do not emit color codes in output.
 247
 248     The following options determine which downloader is picked:
 249     external_downloader: Executable of the external downloader to call.
 250                        None or unset for standard (built-in) downloader.
 251     hls_prefer_native: Use the native HLS downloader instead of ffmpeg/avconv.
 252
 253     The following parameters are not used by YoutubeDL itself, they are used by
 254     the downloader (see youtube_dl/downloader/common.py):
 255     nopart, updatetime, buffersize, ratelimit, min_filesize, max_filesize, test,
 256     noresizebuffer, retries, continuedl, noprogress, consoletitle,
 257     xattr_set_filesize, external_downloader_args.
 258
 259     The following options are used by the post processors:
 260     prefer_ffmpeg:     If True, use ffmpeg instead of avconv if both are available,
 261                        otherwise prefer avconv.
 262     exec_cmd:          Arbitrary command to run after downloading
 263     """
 264
 265     params = None
 266     _ies = []
 267     _pps = []
 268     _download_retcode = None
 269     _num_downloads = None
 270     _screen_file = None
 271
 272     def __init__(self, params=None, auto_init=True):
 273         """Create a FileDownloader object with the given options."""
 274         if params is None:
 275             params = {}
 276         self._ies = []
 277         self._ies_instances = {}
 278         self._pps = []
 279         self._progress_hooks = []
 280         self._download_retcode = 0
 281         self._num_downloads = 0
 282         self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)]
 283         self._err_file = sys.stderr
 284         self.params = params
 285         self.cache = Cache(self)
 286
 287         if params.get('bidi_workaround', False):
 288             try:
 289                 import pty
 290                 master, slave = pty.openpty()
 291                 width = compat_get_terminal_size().columns
 292                 if width is None:
 293                     width_args = []
 294                 else:
 295                     width_args = ['-w', str(width)]
 296                 sp_kwargs = dict(
 297                     stdin=subprocess.PIPE,
 298                     stdout=slave,
 299                     stderr=self._err_file)
 300                 try:
 301                     self._output_process = subprocess.Popen(
 302                         ['bidiv'] + width_args, **sp_kwargs
 303                     )
 304                 except OSError:
 305                     self._output_process = subprocess.Popen(
 306                         ['fribidi', '-c', 'UTF-8'] + width_args, **sp_kwargs)
 307                 self._output_channel = os.fdopen(master, 'rb')
 308             except OSError as ose:
 309                 if ose.errno == 2:
 310                     self.report_warning('Could not find fribidi executable, ignoring --bidi-workaround . Make sure that  fribidi  is an executable file in one of the directories in your $PATH.')
 311                 else:
 312                     raise
 313
 314         if (sys.version_info >= (3,) and sys.platform != 'win32' and
 315                 sys.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968'] and
 316                 not params.get('restrictfilenames', False)):
 317             # On Python 3, the Unicode filesystem API will throw errors (#1474)
 318             self.report_warning(
 319                 'Assuming --restrict-filenames since file system encoding '
 320                 'cannot encode all characters. '
 321                 'Set the LC_ALL environment variable to fix this.')
 322             self.params['restrictfilenames'] = True
 323
 324         if isinstance(params.get('outtmpl'), bytes):
 325             self.report_warning(
 326                 'Parameter outtmpl is bytes, but should be a unicode string. '
 327                 'Put  from __future__ import unicode_literals  at the top of your code file or consider switching to Python 3.x.')
 328
 329         self._setup_opener()
 330
 331         if auto_init:
 332             self.print_debug_header()
 333             self.add_default_info_extractors()
 334
 335         for pp_def_raw in self.params.get('postprocessors', []):
 336             pp_class = get_postprocessor(pp_def_raw['key'])
 337             pp_def = dict(pp_def_raw)
 338             del pp_def['key']
 339             pp = pp_class(self, **compat_kwargs(pp_def))
 340             self.add_post_processor(pp)
 341
 342         for ph in self.params.get('progress_hooks', []):
 343             self.add_progress_hook(ph)
 344
 345     def warn_if_short_id(self, argv):
 346         # short YouTube ID starting with dash?
 347         idxs = [
 348             i for i, a in enumerate(argv)
 349             if re.match(r'^-[0-9A-Za-z_-]{10}$', a)]
 350         if idxs:
 351             correct_argv = (
 352                 ['youtube-dl'] +
 353                 [a for i, a in enumerate(argv) if i not in idxs] +
 354                 ['--'] + [argv[i] for i in idxs]
 355             )
 356             self.report_warning(
 357                 'Long argument string detected. '
 358                 'Use -- to separate parameters and URLs, like this:\n%s\n' %
 359                 args_to_str(correct_argv))
 360
 361     def add_info_extractor(self, ie):
 362         """Add an InfoExtractor object to the end of the list."""
 363         self._ies.append(ie)
 364         self._ies_instances[ie.ie_key()] = ie
 365         ie.set_downloader(self)
 366
 367     def get_info_extractor(self, ie_key):
 368         """
 369         Get an instance of an IE with name ie_key, it will try to get one from
 370         the _ies list, if there's no instance it will create a new one and add
 371         it to the extractor list.
 372         """
 373         ie = self._ies_instances.get(ie_key)
 374         if ie is None:
 375             ie = get_info_extractor(ie_key)()
 376             self.add_info_extractor(ie)
 377         return ie
 378
 379     def add_default_info_extractors(self):
 380         """
 381         Add the InfoExtractors returned by gen_extractors to the end of the list
 382         """
 383         for ie in gen_extractors():
 384             self.add_info_extractor(ie)
 385
 386     def add_post_processor(self, pp):
 387         """Add a PostProcessor object to the end of the chain."""
 388         self._pps.append(pp)
 389         pp.set_downloader(self)
 390
 391     def add_progress_hook(self, ph):
 392         """Add the progress hook (currently only for the file downloader)"""
 393         self._progress_hooks.append(ph)
 394
 395     def _bidi_workaround(self, message):
 396         if not hasattr(self, '_output_channel'):
 397             return message
 398
 399         assert hasattr(self, '_output_process')
 400         assert isinstance(message, compat_str)
 401         line_count = message.count('\n') + 1
 402         self._output_process.stdin.write((message + '\n').encode('utf-8'))
 403         self._output_process.stdin.flush()
 404         res = ''.join(self._output_channel.readline().decode('utf-8')
 405                       for _ in range(line_count))
 406         return res[:-len('\n')]
 407
 408     def to_screen(self, message, skip_eol=False):
 409         """Print message to stdout if not in quiet mode."""
 410         return self.to_stdout(message, skip_eol, check_quiet=True)
 411
 412     def _write_string(self, s, out=None):
 413         write_string(s, out=out, encoding=self.params.get('encoding'))
 414
 415     def to_stdout(self, message, skip_eol=False, check_quiet=False):
 416         """Print message to stdout if not in quiet mode."""
 417         if self.params.get('logger'):
 418             self.params['logger'].debug(message)
 419         elif not check_quiet or not self.params.get('quiet', False):
 420             message = self._bidi_workaround(message)
 421             terminator = ['\n', ''][skip_eol]
 422             output = message + terminator
 423
 424             self._write_string(output, self._screen_file)
 425
 426     def to_stderr(self, message):
 427         """Print message to stderr."""
 428         assert isinstance(message, compat_str)
 429         if self.params.get('logger'):
 430             self.params['logger'].error(message)
 431         else:
 432             message = self._bidi_workaround(message)
 433             output = message + '\n'
 434             self._write_string(output, self._err_file)
 435
 436     def to_console_title(self, message):
 437         if not self.params.get('consoletitle', False):
 438             return
 439         if os.name == 'nt' and ctypes.windll.kernel32.GetConsoleWindow():
 440             # c_wchar_p() might not be necessary if `message` is
 441             # already of type unicode()
 442             ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message))
 443         elif 'TERM' in os.environ:
 444             self._write_string('\033]0;%s\007' % message, self._screen_file)
 445
 446     def save_console_title(self):
 447         if not self.params.get('consoletitle', False):
 448             return
 449         if 'TERM' in os.environ:
 450             # Save the title on stack
 451             self._write_string('\033[22;0t', self._screen_file)
 452
 453     def restore_console_title(self):
 454         if not self.params.get('consoletitle', False):
 455             return
 456         if 'TERM' in os.environ:
 457             # Restore the title from stack
 458             self._write_string('\033[23;0t', self._screen_file)
 459
 460     def __enter__(self):
 461         self.save_console_title()
 462         return self
 463
 464     def __exit__(self, *args):
 465         self.restore_console_title()
 466
 467         if self.params.get('cookiefile') is not None:
 468             self.cookiejar.save()
 469
 470     def trouble(self, message=None, tb=None):
 471         """Determine action to take when a download problem appears.
 472
 473         Depending on if the downloader has been configured to ignore
 474         download errors or not, this method may throw an exception or
 475         not when errors are found, after printing the message.
 476
 477         tb, if given, is additional traceback information.
 478         """
 479         if message is not None:
 480             self.to_stderr(message)
 481         if self.params.get('verbose'):
 482             if tb is None:
 483                 if sys.exc_info()[0]:  # if .trouble has been called from an except block
 484                     tb = ''
 485                     if hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
 486                         tb += ''.join(traceback.format_exception(*sys.exc_info()[1].exc_info))
 487                     tb += compat_str(traceback.format_exc())
 488                 else:
 489                     tb_data = traceback.format_list(traceback.extract_stack())
 490                     tb = ''.join(tb_data)
 491             self.to_stderr(tb)
 492         if not self.params.get('ignoreerrors', False):
 493             if sys.exc_info()[0] and hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
 494                 exc_info = sys.exc_info()[1].exc_info
 495             else:
 496                 exc_info = sys.exc_info()
 497             raise DownloadError(message, exc_info)
 498         self._download_retcode = 1
 499
 500     def report_warning(self, message):
 501         '''
 502         Print the message to stderr, it will be prefixed with 'WARNING:'
 503         If stderr is a tty file the 'WARNING:' will be colored
 504         '''
 505         if self.params.get('logger') is not None:
 506             self.params['logger'].warning(message)
 507         else:
 508             if self.params.get('no_warnings'):
 509                 return
 510             if not self.params.get('no_color') and self._err_file.isatty() and os.name != 'nt':
 511                 _msg_header = '\033[0;33mWARNING:\033[0m'
 512             else:
 513                 _msg_header = 'WARNING:'
 514             warning_message = '%s %s' % (_msg_header, message)
 515             self.to_stderr(warning_message)
 516
 517     def report_error(self, message, tb=None):
 518         '''
 519         Do the same as trouble, but prefixes the message with 'ERROR:', colored
 520         in red if stderr is a tty file.
 521         '''
 522         if not self.params.get('no_color') and self._err_file.isatty() and os.name != 'nt':
 523             _msg_header = '\033[0;31mERROR:\033[0m'
 524         else:
 525             _msg_header = 'ERROR:'
 526         error_message = '%s %s' % (_msg_header, message)
 527         self.trouble(error_message, tb)
 528
 529     def report_file_already_downloaded(self, file_name):
 530         """Report file has already been fully downloaded."""
 531         try:
 532             self.to_screen('[download] %s has already been downloaded' % file_name)
 533         except UnicodeEncodeError:
 534             self.to_screen('[download] The file has already been downloaded')
 535
 536     def prepare_filename(self, info_dict):
 537         """Generate the output filename."""
 538         try:
 539             template_dict = dict(info_dict)
 540
 541             template_dict['epoch'] = int(time.time())
 542             autonumber_size = self.params.get('autonumber_size')
 543             if autonumber_size is None:
 544                 autonumber_size = 5
 545             autonumber_templ = '%0' + str(autonumber_size) + 'd'
 546             template_dict['autonumber'] = autonumber_templ % self._num_downloads
 547             if template_dict.get('playlist_index') is not None:
 548                 template_dict['playlist_index'] = '%0*d' % (len(str(template_dict['n_entries'])), template_dict['playlist_index'])
 549             if template_dict.get('resolution') is None:
 550                 if template_dict.get('width') and template_dict.get('height'):
 551                     template_dict['resolution'] = '%dx%d' % (template_dict['width'], template_dict['height'])
 552                 elif template_dict.get('height'):
 553                     template_dict['resolution'] = '%sp' % template_dict['height']
 554                 elif template_dict.get('width'):
 555                     template_dict['resolution'] = '?x%d' % template_dict['width']
 556
 557             sanitize = lambda k, v: sanitize_filename(
 558                 compat_str(v),
 559                 restricted=self.params.get('restrictfilenames'),
 560                 is_id=(k == 'id'))
 561             template_dict = dict((k, sanitize(k, v))
 562                                  for k, v in template_dict.items()
 563                                  if v is not None)
 564             template_dict = collections.defaultdict(lambda: 'NA', template_dict)
 565
 566             outtmpl = sanitize_path(self.params.get('outtmpl', DEFAULT_OUTTMPL))
 567             tmpl = compat_expanduser(outtmpl)
 568             filename = tmpl % template_dict
 569             # Temporary fix for #4787
 570             # 'Treat' all problem characters by passing filename through preferredencoding
 571             # to workaround encoding issues with subprocess on python2 @ Windows
 572             if sys.version_info < (3, 0) and sys.platform == 'win32':
 573                 filename = encodeFilename(filename, True).decode(preferredencoding())
 574             return filename
 575         except ValueError as err:
 576             self.report_error('Error in output template: ' + str(err) + ' (encoding: ' + repr(preferredencoding()) + ')')
 577             return None
 578
 579     def _match_entry(self, info_dict, incomplete):
 580         """ Returns None iff the file should be downloaded """
 581
 582         video_title = info_dict.get('title', info_dict.get('id', 'video'))
 583         if 'title' in info_dict:
 584             # This can happen when we're just evaluating the playlist
 585             title = info_dict['title']
 586             matchtitle = self.params.get('matchtitle', False)
 587             if matchtitle:
 588                 if not re.search(matchtitle, title, re.IGNORECASE):
 589                     return '"' + title + '" title did not match pattern "' + matchtitle + '"'
 590             rejecttitle = self.params.get('rejecttitle', False)
 591             if rejecttitle:
 592                 if re.search(rejecttitle, title, re.IGNORECASE):
 593                     return '"' + title + '" title matched reject pattern "' + rejecttitle + '"'
 594         date = info_dict.get('upload_date', None)
 595         if date is not None:
 596             dateRange = self.params.get('daterange', DateRange())
 597             if date not in dateRange:
 598                 return '%s upload date is not in range %s' % (date_from_str(date).isoformat(), dateRange)
 599         view_count = info_dict.get('view_count', None)
 600         if view_count is not None:
 601             min_views = self.params.get('min_views')
 602             if min_views is not None and view_count < min_views:
 603                 return 'Skipping %s, because it has not reached minimum view count (%d/%d)' % (video_title, view_count, min_views)
 604             max_views = self.params.get('max_views')
 605             if max_views is not None and view_count > max_views:
 606                 return 'Skipping %s, because it has exceeded the maximum view count (%d/%d)' % (video_title, view_count, max_views)
 607         if age_restricted(info_dict.get('age_limit'), self.params.get('age_limit')):
 608             return 'Skipping "%s" because it is age restricted' % video_title
 609         if self.in_download_archive(info_dict):
 610             return '%s has already been recorded in archive' % video_title
 611
 612         if not incomplete:
 613             match_filter = self.params.get('match_filter')
 614             if match_filter is not None:
 615                 ret = match_filter(info_dict)
 616                 if ret is not None:
 617                     return ret
 618
 619         return None
 620
 621     @staticmethod
 622     def add_extra_info(info_dict, extra_info):
 623         '''Set the keys from extra_info in info dict if they are missing'''
 624         for key, value in extra_info.items():
 625             info_dict.setdefault(key, value)
 626
 627     def extract_info(self, url, download=True, ie_key=None, extra_info={},
 628                      process=True):
 629         '''
 630         Returns a list with a dictionary for each video we find.
 631         If 'download', also downloads the videos.
 632         extra_info is a dict containing the extra values to add to each result
 633         '''
 634
 635         if ie_key:
 636             ies = [self.get_info_extractor(ie_key)]
 637         else:
 638             ies = self._ies
 639
 640         for ie in ies:
 641             if not ie.suitable(url):
 642                 continue
 643
 644             if not ie.working():
 645                 self.report_warning('The program functionality for this site has been marked as broken, '
 646                                     'and will probably not work.')
 647
 648             try:
 649                 ie_result = ie.extract(url)
 650                 if ie_result is None:  # Finished already (backwards compatibility; listformats and friends should be moved here)
 651                     break
 652                 if isinstance(ie_result, list):
 653                     # Backwards compatibility: old IE result format
 654                     ie_result = {
 655                         '_type': 'compat_list',
 656                         'entries': ie_result,
 657                     }
 658                 self.add_default_extra_info(ie_result, ie, url)
 659                 if process:
 660                     return self.process_ie_result(ie_result, download, extra_info)
 661                 else:
 662                     return ie_result
 663             except ExtractorError as de:  # An error we somewhat expected
 664                 self.report_error(compat_str(de), de.format_traceback())
 665                 break
 666             except MaxDownloadsReached:
 667                 raise
 668             except Exception as e:
 669                 if self.params.get('ignoreerrors', False):
 670                     self.report_error(compat_str(e), tb=compat_str(traceback.format_exc()))
 671                     break
 672                 else:
 673                     raise
 674         else:
 675             self.report_error('no suitable InfoExtractor for URL %s' % url)
 676
 677     def add_default_extra_info(self, ie_result, ie, url):
 678         self.add_extra_info(ie_result, {
 679             'extractor': ie.IE_NAME,
 680             'webpage_url': url,
 681             'webpage_url_basename': url_basename(url),
 682             'extractor_key': ie.ie_key(),
 683         })
 684
 685     def process_ie_result(self, ie_result, download=True, extra_info={}):
 686         """
 687         Take the result of the ie(may be modified) and resolve all unresolved
 688         references (URLs, playlist items).
 689
 690         It will also download the videos if 'download'.
 691         Returns the resolved ie_result.
 692         """
 693
 694         result_type = ie_result.get('_type', 'video')
 695
 696         if result_type in ('url', 'url_transparent'):
 697             extract_flat = self.params.get('extract_flat', False)
 698             if ((extract_flat == 'in_playlist' and 'playlist' in extra_info) or
 699                     extract_flat is True):
 700                 if self.params.get('forcejson', False):
 701                     self.to_stdout(json.dumps(ie_result))
 702                 return ie_result
 703
 704         if result_type == 'video':
 705             self.add_extra_info(ie_result, extra_info)
 706             return self.process_video_result(ie_result, download=download)
 707         elif result_type == 'url':
 708             # We have to add extra_info to the results because it may be
 709             # contained in a playlist
 710             return self.extract_info(ie_result['url'],
 711                                      download,
 712                                      ie_key=ie_result.get('ie_key'),
 713                                      extra_info=extra_info)
 714         elif result_type == 'url_transparent':
 715             # Use the information from the embedding page
 716             info = self.extract_info(
 717                 ie_result['url'], ie_key=ie_result.get('ie_key'),
 718                 extra_info=extra_info, download=False, process=False)
 719
 720             force_properties = dict(
 721                 (k, v) for k, v in ie_result.items() if v is not None)
 722             for f in ('_type', 'url'):
 723                 if f in force_properties:
 724                     del force_properties[f]
 725             new_result = info.copy()
 726             new_result.update(force_properties)
 727
 728             assert new_result.get('_type') != 'url_transparent'
 729
 730             return self.process_ie_result(
 731                 new_result, download=download, extra_info=extra_info)
 732         elif result_type == 'playlist' or result_type == 'multi_video':
 733             # We process each entry in the playlist
 734             playlist = ie_result.get('title', None) or ie_result.get('id', None)
 735             self.to_screen('[download] Downloading playlist: %s' % playlist)
 736
 737             playlist_results = []
 738
 739             playliststart = self.params.get('playliststart', 1) - 1
 740             playlistend = self.params.get('playlistend', None)
 741             # For backwards compatibility, interpret -1 as whole list
 742             if playlistend == -1:
 743                 playlistend = None
 744
 745             playlistitems_str = self.params.get('playlist_items', None)
 746             playlistitems = None
 747             if playlistitems_str is not None:
 748                 def iter_playlistitems(format):
 749                     for string_segment in format.split(','):
 750                         if '-' in string_segment:
 751                             start, end = string_segment.split('-')
 752                             for item in range(int(start), int(end) + 1):
 753                                 yield int(item)
 754                         else:
 755                             yield int(string_segment)
 756                 playlistitems = iter_playlistitems(playlistitems_str)
 757
 758             ie_entries = ie_result['entries']
 759             if isinstance(ie_entries, list):
 760                 n_all_entries = len(ie_entries)
 761                 if playlistitems:
 762                     entries = [ie_entries[i - 1] for i in playlistitems]
 763                 else:
 764                     entries = ie_entries[playliststart:playlistend]
 765                 n_entries = len(entries)
 766                 self.to_screen(
 767                     "[%s] playlist %s: Collected %d video ids (downloading %d of them)" %
 768                     (ie_result['extractor'], playlist, n_all_entries, n_entries))
 769             elif isinstance(ie_entries, PagedList):
 770                 if playlistitems:
 771                     entries = []
 772                     for item in playlistitems:
 773                         entries.extend(ie_entries.getslice(
 774                             item - 1, item
 775                         ))
 776                 else:
 777                     entries = ie_entries.getslice(
 778                         playliststart, playlistend)
 779                 n_entries = len(entries)
 780                 self.to_screen(
 781                     "[%s] playlist %s: Downloading %d videos" %
 782                     (ie_result['extractor'], playlist, n_entries))
 783             else:  # iterable
 784                 if playlistitems:
 785                     entry_list = list(ie_entries)
 786                     entries = [entry_list[i - 1] for i in playlistitems]
 787                 else:
 788                     entries = list(itertools.islice(
 789                         ie_entries, playliststart, playlistend))
 790                 n_entries = len(entries)
 791                 self.to_screen(
 792                     "[%s] playlist %s: Downloading %d videos" %
 793                     (ie_result['extractor'], playlist, n_entries))
 794
 795             if self.params.get('playlistreverse', False):
 796                 entries = entries[::-1]
 797
 798             for i, entry in enumerate(entries, 1):
 799                 self.to_screen('[download] Downloading video %s of %s' % (i, n_entries))
 800                 extra = {
 801                     'n_entries': n_entries,
 802                     'playlist': playlist,
 803                     'playlist_id': ie_result.get('id'),
 804                     'playlist_title': ie_result.get('title'),
 805                     'playlist_index': i + playliststart,
 806                     'extractor': ie_result['extractor'],
 807                     'webpage_url': ie_result['webpage_url'],
 808                     'webpage_url_basename': url_basename(ie_result['webpage_url']),
 809                     'extractor_key': ie_result['extractor_key'],
 810                 }
 811
 812                 reason = self._match_entry(entry, incomplete=True)
 813                 if reason is not None:
 814                     self.to_screen('[download] ' + reason)
 815                     continue
 816
 817                 entry_result = self.process_ie_result(entry,
 818                                                       download=download,
 819                                                       extra_info=extra)
 820                 playlist_results.append(entry_result)
 821             ie_result['entries'] = playlist_results
 822             return ie_result
 823         elif result_type == 'compat_list':
 824             self.report_warning(
 825                 'Extractor %s returned a compat_list result. '
 826                 'It needs to be updated.' % ie_result.get('extractor'))
 827
 828             def _fixup(r):
 829                 self.add_extra_info(
 830                     r,
 831                     {
 832                         'extractor': ie_result['extractor'],
 833                         'webpage_url': ie_result['webpage_url'],
 834                         'webpage_url_basename': url_basename(ie_result['webpage_url']),
 835                         'extractor_key': ie_result['extractor_key'],
 836                     }
 837                 )
 838                 return r
 839             ie_result['entries'] = [
 840                 self.process_ie_result(_fixup(r), download, extra_info)
 841                 for r in ie_result['entries']
 842             ]
 843             return ie_result
 844         else:
 845             raise Exception('Invalid result type: %s' % result_type)
 846
 847     def _apply_format_filter(self, format_spec, available_formats):
 848         " Returns a tuple of the remaining format_spec and filtered formats "
 849
 850         OPERATORS = {
 851             '<': operator.lt,
 852             '<=': operator.le,
 853             '>': operator.gt,
 854             '>=': operator.ge,
 855             '=': operator.eq,
 856             '!=': operator.ne,
 857         }
 858         operator_rex = re.compile(r'''(?x)\s*\[
 859             (?P<key>width|height|tbr|abr|vbr|asr|filesize|fps)
 860             \s*(?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
 861             (?P<value>[0-9.]+(?:[kKmMgGtTpPeEzZyY]i?[Bb]?)?)
 862             \]$
 863             ''' % '|'.join(map(re.escape, OPERATORS.keys())))
 864         m = operator_rex.search(format_spec)
 865         if m:
 866             try:
 867                 comparison_value = int(m.group('value'))
 868             except ValueError:
 869                 comparison_value = parse_filesize(m.group('value'))
 870                 if comparison_value is None:
 871                     comparison_value = parse_filesize(m.group('value') + 'B')
 872                 if comparison_value is None:
 873                     raise ValueError(
 874                         'Invalid value %r in format specification %r' % (
 875                             m.group('value'), format_spec))
 876             op = OPERATORS[m.group('op')]
 877
 878         if not m:
 879             STR_OPERATORS = {
 880                 '=': operator.eq,
 881                 '!=': operator.ne,
 882             }
 883             str_operator_rex = re.compile(r'''(?x)\s*\[
 884                 \s*(?P<key>ext|acodec|vcodec|container|protocol)
 885                 \s*(?P<op>%s)(?P<none_inclusive>\s*\?)?
 886                 \s*(?P<value>[a-zA-Z0-9_-]+)
 887                 \s*\]$
 888                 ''' % '|'.join(map(re.escape, STR_OPERATORS.keys())))
 889             m = str_operator_rex.search(format_spec)
 890             if m:
 891                 comparison_value = m.group('value')
 892                 op = STR_OPERATORS[m.group('op')]
 893
 894         if not m:
 895             raise ValueError('Invalid format specification %r' % format_spec)
 896
 897         def _filter(f):
 898             actual_value = f.get(m.group('key'))
 899             if actual_value is None:
 900                 return m.group('none_inclusive')
 901             return op(actual_value, comparison_value)
 902         new_formats = [f for f in available_formats if _filter(f)]
 903
 904         new_format_spec = format_spec[:-len(m.group(0))]
 905         if not new_format_spec:
 906             new_format_spec = 'best'
 907
 908         return (new_format_spec, new_formats)
 909
 910     def select_format(self, format_spec, available_formats):
 911         while format_spec.endswith(']'):
 912             format_spec, available_formats = self._apply_format_filter(
 913                 format_spec, available_formats)
 914         if not available_formats:
 915             return None
 916
 917         if format_spec == 'best' or format_spec is None:
 918             audiovideo_formats = [
 919                 f for f in available_formats
 920                 if f.get('vcodec') != 'none' and f.get('acodec') != 'none']
 921             if audiovideo_formats:
 922                 return audiovideo_formats[-1]
 923             # for audio only urls, 'best' selects the best audio format
 924             elif all(f.get('acodec') != 'none' for f in available_formats):
 925                 return available_formats[-1]
 926         elif format_spec == 'worst':
 927             audiovideo_formats = [
 928                 f for f in available_formats
 929                 if f.get('vcodec') != 'none' and f.get('acodec') != 'none']
 930             if audiovideo_formats:
 931                 return audiovideo_formats[0]
 932             return available_formats[0]
 933         elif format_spec == 'bestaudio':
 934             audio_formats = [
 935                 f for f in available_formats
 936                 if f.get('vcodec') == 'none']
 937             if audio_formats:
 938                 return audio_formats[-1]
 939         elif format_spec == 'worstaudio':
 940             audio_formats = [
 941                 f for f in available_formats
 942                 if f.get('vcodec') == 'none']
 943             if audio_formats:
 944                 return audio_formats[0]
 945         elif format_spec == 'bestvideo':
 946             video_formats = [
 947                 f for f in available_formats
 948                 if f.get('acodec') == 'none']
 949             if video_formats:
 950                 return video_formats[-1]
 951         elif format_spec == 'worstvideo':
 952             video_formats = [
 953                 f for f in available_formats
 954                 if f.get('acodec') == 'none']
 955             if video_formats:
 956                 return video_formats[0]
 957         else:
 958             extensions = ['mp4', 'flv', 'webm', '3gp', 'm4a', 'mp3', 'ogg', 'aac', 'wav']
 959             if format_spec in extensions:
 960                 filter_f = lambda f: f['ext'] == format_spec
 961             else:
 962                 filter_f = lambda f: f['format_id'] == format_spec
 963             matches = list(filter(filter_f, available_formats))
 964             if matches:
 965                 return matches[-1]
 966         return None
 967
 968     def _calc_headers(self, info_dict):
 969         res = std_headers.copy()
 970
 971         add_headers = info_dict.get('http_headers')
 972         if add_headers:
 973             res.update(add_headers)
 974
 975         cookies = self._calc_cookies(info_dict)
 976         if cookies:
 977             res['Cookie'] = cookies
 978
 979         return res
 980
 981     def _calc_cookies(self, info_dict):
 982         pr = compat_urllib_request.Request(info_dict['url'])
 983         self.cookiejar.add_cookie_header(pr)
 984         return pr.get_header('Cookie')
 985
 986     def process_video_result(self, info_dict, download=True):
 987         assert info_dict.get('_type', 'video') == 'video'
 988
 989         if 'id' not in info_dict:
 990             raise ExtractorError('Missing "id" field in extractor result')
 991         if 'title' not in info_dict:
 992             raise ExtractorError('Missing "title" field in extractor result')
 993
 994         if 'playlist' not in info_dict:
 995             # It isn't part of a playlist
 996             info_dict['playlist'] = None
 997             info_dict['playlist_index'] = None
 998
 999         thumbnails = info_dict.get('thumbnails')
1000         if thumbnails is None:
1001             thumbnail = info_dict.get('thumbnail')
1002             if thumbnail:
1003                 info_dict['thumbnails'] = thumbnails = [{'url': thumbnail}]
1004         if thumbnails:
1005             thumbnails.sort(key=lambda t: (
1006                 t.get('preference'), t.get('width'), t.get('height'),
1007                 t.get('id'), t.get('url')))
1008             for i, t in enumerate(thumbnails):
1009                 if 'width' in t and 'height' in t:
1010                     t['resolution'] = '%dx%d' % (t['width'], t['height'])
1011                 if t.get('id') is None:
1012                     t['id'] = '%d' % i
1013
1014         if thumbnails and 'thumbnail' not in info_dict:
1015             info_dict['thumbnail'] = thumbnails[-1]['url']
1016
1017         if 'display_id' not in info_dict and 'id' in info_dict:
1018             info_dict['display_id'] = info_dict['id']
1019
1020         if info_dict.get('upload_date') is None and info_dict.get('timestamp') is not None:
1021             # Working around negative timestamps in Windows
1022             # (see http://bugs.python.org/issue1646728)
1023             if info_dict['timestamp'] < 0 and os.name == 'nt':
1024                 info_dict['timestamp'] = 0
1025             upload_date = datetime.datetime.utcfromtimestamp(
1026                 info_dict['timestamp'])
1027             info_dict['upload_date'] = upload_date.strftime('%Y%m%d')
1028
1029         if self.params.get('listsubtitles', False):
1030             if 'automatic_captions' in info_dict:
1031                 self.list_subtitles(info_dict['id'], info_dict.get('automatic_captions'), 'automatic captions')
1032             self.list_subtitles(info_dict['id'], info_dict.get('subtitles'), 'subtitles')
1033             return
1034         info_dict['requested_subtitles'] = self.process_subtitles(
1035             info_dict['id'], info_dict.get('subtitles'),
1036             info_dict.get('automatic_captions'))
1037
1038         # This extractors handle format selection themselves
1039         if info_dict['extractor'] in ['Youku']:
1040             if download:
1041                 self.process_info(info_dict)
1042             return info_dict
1043
1044         # We now pick which formats have to be downloaded
1045         if info_dict.get('formats') is None:
1046             # There's only one format available
1047             formats = [info_dict]
1048         else:
1049             formats = info_dict['formats']
1050
1051         if not formats:
1052             raise ExtractorError('No video formats found!')
1053
1054         # We check that all the formats have the format and format_id fields
1055         for i, format in enumerate(formats):
1056             if 'url' not in format:
1057                 raise ExtractorError('Missing "url" key in result (index %d)' % i)
1058
1059             if format.get('format_id') is None:
1060                 format['format_id'] = compat_str(i)
1061             if format.get('format') is None:
1062                 format['format'] = '{id} - {res}{note}'.format(
1063                     id=format['format_id'],
1064                     res=self.format_resolution(format),
1065                     note=' ({0})'.format(format['format_note']) if format.get('format_note') is not None else '',
1066                 )
1067             # Automatically determine file extension if missing
1068             if 'ext' not in format:
1069                 format['ext'] = determine_ext(format['url']).lower()
1070             # Add HTTP headers, so that external programs can use them from the
1071             # json output
1072             full_format_info = info_dict.copy()
1073             full_format_info.update(format)
1074             format['http_headers'] = self._calc_headers(full_format_info)
1075
1076         # TODO Central sorting goes here
1077
1078         if formats[0] is not info_dict:
1079             # only set the 'formats' fields if the original info_dict list them
1080             # otherwise we end up with a circular reference, the first (and unique)
1081             # element in the 'formats' field in info_dict is info_dict itself,
1082             # wich can't be exported to json
1083             info_dict['formats'] = formats
1084         if self.params.get('listformats'):
1085             self.list_formats(info_dict)
1086             return
1087         if self.params.get('list_thumbnails'):
1088             self.list_thumbnails(info_dict)
1089             return
1090
1091         req_format = self.params.get('format')
1092         if req_format is None:
1093             req_format_list = []
1094             if info_dict['extractor'] in ['youtube', 'ted'] and FFmpegMergerPP(self).available:
1095                 req_format_list.append('bestvideo+bestaudio')
1096             req_format_list.append('best')
1097             req_format = '/'.join(req_format_list)
1098         formats_to_download = []
1099         if req_format == 'all':
1100             formats_to_download = formats
1101         else:
1102             for rfstr in req_format.split(','):
1103                 # We can accept formats requested in the format: 34/5/best, we pick
1104                 # the first that is available, starting from left
1105                 req_formats = rfstr.split('/')
1106                 for rf in req_formats:
1107                     if re.match(r'.+?\+.+?', rf) is not None:
1108                         # Two formats have been requested like '137+139'
1109                         format_1, format_2 = rf.split('+')
1110                         formats_info = (self.select_format(format_1, formats),
1111                                         self.select_format(format_2, formats))
1112                         if all(formats_info):
1113                             # The first format must contain the video and the
1114                             # second the audio
1115                             if formats_info[0].get('vcodec') == 'none':
1116                                 self.report_error('The first format must '
1117                                                   'contain the video, try using '
1118                                                   '"-f %s+%s"' % (format_2, format_1))
1119                                 return
1120                             output_ext = (
1121                                 formats_info[0]['ext']
1122                                 if self.params.get('merge_output_format') is None
1123                                 else self.params['merge_output_format'])
1124                             selected_format = {
1125                                 'requested_formats': formats_info,
1126                                 'format': '%s+%s' % (formats_info[0].get('format'),
1127                                                      formats_info[1].get('format')),
1128                                 'format_id': '%s+%s' % (formats_info[0].get('format_id'),
1129                                                         formats_info[1].get('format_id')),
1130                                 'width': formats_info[0].get('width'),
1131                                 'height': formats_info[0].get('height'),
1132                                 'resolution': formats_info[0].get('resolution'),
1133                                 'fps': formats_info[0].get('fps'),
1134                                 'vcodec': formats_info[0].get('vcodec'),
1135                                 'vbr': formats_info[0].get('vbr'),
1136                                 'stretched_ratio': formats_info[0].get('stretched_ratio'),
1137                                 'acodec': formats_info[1].get('acodec'),
1138                                 'abr': formats_info[1].get('abr'),
1139                                 'ext': output_ext,
1140                             }
1141                         else:
1142                             selected_format = None
1143                     else:
1144                         selected_format = self.select_format(rf, formats)
1145                     if selected_format is not None:
1146                         formats_to_download.append(selected_format)
1147                         break
1148         if not formats_to_download:
1149             raise ExtractorError('requested format not available',
1150                                  expected=True)
1151
1152         if download:
1153             if len(formats_to_download) > 1:
1154                 self.to_screen('[info] %s: downloading video in %s formats' % (info_dict['id'], len(formats_to_download)))
1155             for format in formats_to_download:
1156                 new_info = dict(info_dict)
1157                 new_info.update(format)
1158                 self.process_info(new_info)
1159         # We update the info dict with the best quality format (backwards compatibility)
1160         info_dict.update(formats_to_download[-1])
1161         return info_dict
1162
1163     def process_subtitles(self, video_id, normal_subtitles, automatic_captions):
1164         """Select the requested subtitles and their format"""
1165         available_subs = {}
1166         if normal_subtitles and self.params.get('writesubtitles'):
1167             available_subs.update(normal_subtitles)
1168         if automatic_captions and self.params.get('writeautomaticsub'):
1169             for lang, cap_info in automatic_captions.items():
1170                 if lang not in available_subs:
1171                     available_subs[lang] = cap_info
1172
1173         if (not self.params.get('writesubtitles') and not
1174                 self.params.get('writeautomaticsub') or not
1175                 available_subs):
1176             return None
1177
1178         if self.params.get('allsubtitles', False):
1179             requested_langs = available_subs.keys()
1180         else:
1181             if self.params.get('subtitleslangs', False):
1182                 requested_langs = self.params.get('subtitleslangs')
1183             elif 'en' in available_subs:
1184                 requested_langs = ['en']
1185             else:
1186                 requested_langs = [list(available_subs.keys())[0]]
1187
1188         formats_query = self.params.get('subtitlesformat', 'best')
1189         formats_preference = formats_query.split('/') if formats_query else []
1190         subs = {}
1191         for lang in requested_langs:
1192             formats = available_subs.get(lang)
1193             if formats is None:
1194                 self.report_warning('%s subtitles not available for %s' % (lang, video_id))
1195                 continue
1196             for ext in formats_preference:
1197                 if ext == 'best':
1198                     f = formats[-1]
1199                     break
1200                 matches = list(filter(lambda f: f['ext'] == ext, formats))
1201                 if matches:
1202                     f = matches[-1]
1203                     break
1204             else:
1205                 f = formats[-1]
1206                 self.report_warning(
1207                     'No subtitle format found matching "%s" for language %s, '
1208                     'using %s' % (formats_query, lang, f['ext']))
1209             subs[lang] = f
1210         return subs
1211
1212     def process_info(self, info_dict):
1213         """Process a single resolved IE result."""
1214
1215         assert info_dict.get('_type', 'video') == 'video'
1216
1217         max_downloads = self.params.get('max_downloads')
1218         if max_downloads is not None:
1219             if self._num_downloads >= int(max_downloads):
1220                 raise MaxDownloadsReached()
1221
1222         info_dict['fulltitle'] = info_dict['title']
1223         if len(info_dict['title']) > 200:
1224             info_dict['title'] = info_dict['title'][:197] + '...'
1225
1226         if 'format' not in info_dict:
1227             info_dict['format'] = info_dict['ext']
1228
1229         reason = self._match_entry(info_dict, incomplete=False)
1230         if reason is not None:
1231             self.to_screen('[download] ' + reason)
1232             return
1233
1234         self._num_downloads += 1
1235
1236         info_dict['_filename'] = filename = self.prepare_filename(info_dict)
1237
1238         # Forced printings
1239         if self.params.get('forcetitle', False):
1240             self.to_stdout(info_dict['fulltitle'])
1241         if self.params.get('forceid', False):
1242             self.to_stdout(info_dict['id'])
1243         if self.params.get('forceurl', False):
1244             if info_dict.get('requested_formats') is not None:
1245                 for f in info_dict['requested_formats']:
1246                     self.to_stdout(f['url'] + f.get('play_path', ''))
1247             else:
1248                 # For RTMP URLs, also include the playpath
1249                 self.to_stdout(info_dict['url'] + info_dict.get('play_path', ''))
1250         if self.params.get('forcethumbnail', False) and info_dict.get('thumbnail') is not None:
1251             self.to_stdout(info_dict['thumbnail'])
1252         if self.params.get('forcedescription', False) and info_dict.get('description') is not None:
1253             self.to_stdout(info_dict['description'])
1254         if self.params.get('forcefilename', False) and filename is not None:
1255             self.to_stdout(filename)
1256         if self.params.get('forceduration', False) and info_dict.get('duration') is not None:
1257             self.to_stdout(formatSeconds(info_dict['duration']))
1258         if self.params.get('forceformat', False):
1259             self.to_stdout(info_dict['format'])
1260         if self.params.get('forcejson', False):
1261             self.to_stdout(json.dumps(info_dict))
1262
1263         # Do nothing else if in simulate mode
1264         if self.params.get('simulate', False):
1265             return
1266
1267         if filename is None:
1268             return
1269
1270         try:
1271             dn = os.path.dirname(sanitize_path(encodeFilename(filename)))
1272             if dn and not os.path.exists(dn):
1273                 os.makedirs(dn)
1274         except (OSError, IOError) as err:
1275             self.report_error('unable to create directory ' + compat_str(err))
1276             return
1277
1278         if self.params.get('writedescription', False):
1279             descfn = filename + '.description'
1280             if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(descfn)):
1281                 self.to_screen('[info] Video description is already present')
1282             elif info_dict.get('description') is None:
1283                 self.report_warning('There\'s no description to write.')
1284             else:
1285                 try:
1286                     self.to_screen('[info] Writing video description to: ' + descfn)
1287                     with io.open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile:
1288                         descfile.write(info_dict['description'])
1289                 except (OSError, IOError):
1290                     self.report_error('Cannot write description file ' + descfn)
1291                     return
1292
1293         if self.params.get('writeannotations', False):
1294             annofn = filename + '.annotations.xml'
1295             if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(annofn)):
1296                 self.to_screen('[info] Video annotations are already present')
1297             else:
1298                 try:
1299                     self.to_screen('[info] Writing video annotations to: ' + annofn)
1300                     with io.open(encodeFilename(annofn), 'w', encoding='utf-8') as annofile:
1301                         annofile.write(info_dict['annotations'])
1302                 except (KeyError, TypeError):
1303                     self.report_warning('There are no annotations to write.')
1304                 except (OSError, IOError):
1305                     self.report_error('Cannot write annotations file: ' + annofn)
1306                     return
1307
1308         subtitles_are_requested = any([self.params.get('writesubtitles', False),
1309                                        self.params.get('writeautomaticsub')])
1310
1311         if subtitles_are_requested and info_dict.get('requested_subtitles'):
1312             # subtitles download errors are already managed as troubles in relevant IE
1313             # that way it will silently go on when used with unsupporting IE
1314             subtitles = info_dict['requested_subtitles']
1315             ie = self.get_info_extractor(info_dict['extractor_key'])
1316             for sub_lang, sub_info in subtitles.items():
1317                 sub_format = sub_info['ext']
1318                 if sub_info.get('data') is not None:
1319                     sub_data = sub_info['data']
1320                 else:
1321                     try:
1322                         sub_data = ie._download_webpage(
1323                             sub_info['url'], info_dict['id'], note=False)
1324                     except ExtractorError as err:
1325                         self.report_warning('Unable to download subtitle for "%s": %s' %
1326                                             (sub_lang, compat_str(err.cause)))
1327                         continue
1328                 try:
1329                     sub_filename = subtitles_filename(filename, sub_lang, sub_format)
1330                     if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(sub_filename)):
1331                         self.to_screen('[info] Video subtitle %s.%s is already_present' % (sub_lang, sub_format))
1332                     else:
1333                         self.to_screen('[info] Writing video subtitles to: ' + sub_filename)
1334                         with io.open(encodeFilename(sub_filename), 'w', encoding='utf-8') as subfile:
1335                             subfile.write(sub_data)
1336                 except (OSError, IOError):
1337                     self.report_error('Cannot write subtitles file ' + sub_filename)
1338                     return
1339
1340         if self.params.get('writeinfojson', False):
1341             infofn = os.path.splitext(filename)[0] + '.info.json'
1342             if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(infofn)):
1343                 self.to_screen('[info] Video description metadata is already present')
1344             else:
1345                 self.to_screen('[info] Writing video description metadata as JSON to: ' + infofn)
1346                 try:
1347                     write_json_file(info_dict, infofn)
1348                 except (OSError, IOError):
1349                     self.report_error('Cannot write metadata to JSON file ' + infofn)
1350                     return
1351
1352         self._write_thumbnails(info_dict, filename)
1353
1354         if not self.params.get('skip_download', False):
1355             try:
1356                 def dl(name, info):
1357                     fd = get_suitable_downloader(info, self.params)(self, self.params)
1358                     for ph in self._progress_hooks:
1359                         fd.add_progress_hook(ph)
1360                     if self.params.get('verbose'):
1361                         self.to_stdout('[debug] Invoking downloader on %r' % info.get('url'))
1362                     return fd.download(name, info)
1363
1364                 if info_dict.get('requested_formats') is not None:
1365                     downloaded = []
1366                     success = True
1367                     merger = FFmpegMergerPP(self)
1368                     if not merger.available:
1369                         postprocessors = []
1370                         self.report_warning('You have requested multiple '
1371                                             'formats but ffmpeg or avconv are not installed.'
1372                                             ' The formats won\'t be merged')
1373                     else:
1374                         postprocessors = [merger]
1375
1376                     def compatible_formats(formats):
1377                         video, audio = formats
1378                         # Check extension
1379                         video_ext, audio_ext = audio.get('ext'), video.get('ext')
1380                         if video_ext and audio_ext:
1381                             COMPATIBLE_EXTS = (
1382                                 ('mp3', 'mp4', 'm4a', 'm4p', 'm4b', 'm4r', 'm4v'),
1383                                 ('webm')
1384                             )
1385                             for exts in COMPATIBLE_EXTS:
1386                                 if video_ext in exts and audio_ext in exts:
1387                                     return True
1388                         # TODO: Check acodec/vcodec
1389                         return False
1390
1391                     requested_formats = info_dict['requested_formats']
1392                     if self.params.get('merge_output_format') is None and not compatible_formats(requested_formats):
1393                         filename = os.path.splitext(filename)[0] + '.mkv'
1394                         self.report_warning('You have requested formats incompatible for merge. '
1395                                             'The formats will be merged into mkv')
1396                     if os.path.exists(encodeFilename(filename)):
1397                         self.to_screen(
1398                             '[download] %s has already been downloaded and '
1399                             'merged' % filename)
1400                     else:
1401                         for f in requested_formats:
1402                             new_info = dict(info_dict)
1403                             new_info.update(f)
1404                             fname = self.prepare_filename(new_info)
1405                             fname = prepend_extension(fname, 'f%s' % f['format_id'])
1406                             downloaded.append(fname)
1407                             partial_success = dl(fname, new_info)
1408                             success = success and partial_success
1409                         info_dict['__postprocessors'] = postprocessors
1410                         info_dict['__files_to_merge'] = downloaded
1411                 else:
1412                     # Just a single file
1413                     success = dl(filename, info_dict)
1414             except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
1415                 self.report_error('unable to download video data: %s' % str(err))
1416                 return
1417             except (OSError, IOError) as err:
1418                 raise UnavailableVideoError(err)
1419             except (ContentTooShortError, ) as err:
1420                 self.report_error('content too short (expected %s bytes and served %s)' % (err.expected, err.downloaded))
1421                 return
1422
1423             if success:
1424                 # Fixup content
1425                 fixup_policy = self.params.get('fixup')
1426                 if fixup_policy is None:
1427                     fixup_policy = 'detect_or_warn'
1428
1429                 stretched_ratio = info_dict.get('stretched_ratio')
1430                 if stretched_ratio is not None and stretched_ratio != 1:
1431                     if fixup_policy == 'warn':
1432                         self.report_warning('%s: Non-uniform pixel ratio (%s)' % (
1433                             info_dict['id'], stretched_ratio))
1434                     elif fixup_policy == 'detect_or_warn':
1435                         stretched_pp = FFmpegFixupStretchedPP(self)
1436                         if stretched_pp.available:
1437                             info_dict.setdefault('__postprocessors', [])
1438                             info_dict['__postprocessors'].append(stretched_pp)
1439                         else:
1440                             self.report_warning(
1441                                 '%s: Non-uniform pixel ratio (%s). Install ffmpeg or avconv to fix this automatically.' % (
1442                                     info_dict['id'], stretched_ratio))
1443                     else:
1444                         assert fixup_policy in ('ignore', 'never')
1445
1446                 if info_dict.get('requested_formats') is None and info_dict.get('container') == 'm4a_dash':
1447                     if fixup_policy == 'warn':
1448                         self.report_warning('%s: writing DASH m4a. Only some players support this container.' % (
1449                             info_dict['id']))
1450                     elif fixup_policy == 'detect_or_warn':
1451                         fixup_pp = FFmpegFixupM4aPP(self)
1452                         if fixup_pp.available:
1453                             info_dict.setdefault('__postprocessors', [])
1454                             info_dict['__postprocessors'].append(fixup_pp)
1455                         else:
1456                             self.report_warning(
1457                                 '%s: writing DASH m4a. Only some players support this container. Install ffmpeg or avconv to fix this automatically.' % (
1458                                     info_dict['id']))
1459                     else:
1460                         assert fixup_policy in ('ignore', 'never')
1461
1462                 try:
1463                     self.post_process(filename, info_dict)
1464                 except (PostProcessingError) as err:
1465                     self.report_error('postprocessing: %s' % str(err))
1466                     return
1467                 self.record_download_archive(info_dict)
1468
1469     def download(self, url_list):
1470         """Download a given list of URLs."""
1471         outtmpl = self.params.get('outtmpl', DEFAULT_OUTTMPL)
1472         if (len(url_list) > 1 and
1473                 '%' not in outtmpl and
1474                 self.params.get('max_downloads') != 1):
1475             raise SameFileError(outtmpl)
1476
1477         for url in url_list:
1478             try:
1479                 # It also downloads the videos
1480                 res = self.extract_info(url)
1481             except UnavailableVideoError:
1482                 self.report_error('unable to download video')
1483             except MaxDownloadsReached:
1484                 self.to_screen('[info] Maximum number of downloaded files reached.')
1485                 raise
1486             else:
1487                 if self.params.get('dump_single_json', False):
1488                     self.to_stdout(json.dumps(res))
1489
1490         return self._download_retcode
1491
1492     def download_with_info_file(self, info_filename):
1493         with contextlib.closing(fileinput.FileInput(
1494                 [info_filename], mode='r',
1495                 openhook=fileinput.hook_encoded('utf-8'))) as f:
1496             # FileInput doesn't have a read method, we can't call json.load
1497             info = json.loads('\n'.join(f))
1498         try:
1499             self.process_ie_result(info, download=True)
1500         except DownloadError:
1501             webpage_url = info.get('webpage_url')
1502             if webpage_url is not None:
1503                 self.report_warning('The info failed to download, trying with "%s"' % webpage_url)
1504                 return self.download([webpage_url])
1505             else:
1506                 raise
1507         return self._download_retcode
1508
1509     def post_process(self, filename, ie_info):
1510         """Run all the postprocessors on the given file."""
1511         info = dict(ie_info)
1512         info['filepath'] = filename
1513         pps_chain = []
1514         if ie_info.get('__postprocessors') is not None:
1515             pps_chain.extend(ie_info['__postprocessors'])
1516         pps_chain.extend(self._pps)
1517         for pp in pps_chain:
1518             try:
1519                 files_to_delete, info = pp.run(info)
1520             except PostProcessingError as e:
1521                 self.report_error(e.msg)
1522             if files_to_delete and not self.params.get('keepvideo', False):
1523                 for old_filename in files_to_delete:
1524                     self.to_screen('Deleting original file %s (pass -k to keep)' % old_filename)
1525                     try:
1526                         os.remove(encodeFilename(old_filename))
1527                     except (IOError, OSError):
1528                         self.report_warning('Unable to remove downloaded original file')
1529
1530     def _make_archive_id(self, info_dict):
1531         # Future-proof against any change in case
1532         # and backwards compatibility with prior versions
1533         extractor = info_dict.get('extractor_key')
1534         if extractor is None:
1535             if 'id' in info_dict:
1536                 extractor = info_dict.get('ie_key')  # key in a playlist
1537         if extractor is None:
1538             return None  # Incomplete video information
1539         return extractor.lower() + ' ' + info_dict['id']
1540
1541     def in_download_archive(self, info_dict):
1542         fn = self.params.get('download_archive')
1543         if fn is None:
1544             return False
1545
1546         vid_id = self._make_archive_id(info_dict)
1547         if vid_id is None:
1548             return False  # Incomplete video information
1549
1550         try:
1551             with locked_file(fn, 'r', encoding='utf-8') as archive_file:
1552                 for line in archive_file:
1553                     if line.strip() == vid_id:
1554                         return True
1555         except IOError as ioe:
1556             if ioe.errno != errno.ENOENT:
1557                 raise
1558         return False
1559
1560     def record_download_archive(self, info_dict):
1561         fn = self.params.get('download_archive')
1562         if fn is None:
1563             return
1564         vid_id = self._make_archive_id(info_dict)
1565         assert vid_id
1566         with locked_file(fn, 'a', encoding='utf-8') as archive_file:
1567             archive_file.write(vid_id + '\n')
1568
1569     @staticmethod
1570     def format_resolution(format, default='unknown'):
1571         if format.get('vcodec') == 'none':
1572             return 'audio only'
1573         if format.get('resolution') is not None:
1574             return format['resolution']
1575         if format.get('height') is not None:
1576             if format.get('width') is not None:
1577                 res = '%sx%s' % (format['width'], format['height'])
1578             else:
1579                 res = '%sp' % format['height']
1580         elif format.get('width') is not None:
1581             res = '?x%d' % format['width']
1582         else:
1583             res = default
1584         return res
1585
1586     def _format_note(self, fdict):
1587         res = ''
1588         if fdict.get('ext') in ['f4f', 'f4m']:
1589             res += '(unsupported) '
1590         if fdict.get('format_note') is not None:
1591             res += fdict['format_note'] + ' '
1592         if fdict.get('tbr') is not None:
1593             res += '%4dk ' % fdict['tbr']
1594         if fdict.get('container') is not None:
1595             if res:
1596                 res += ', '
1597             res += '%s container' % fdict['container']
1598         if (fdict.get('vcodec') is not None and
1599                 fdict.get('vcodec') != 'none'):
1600             if res:
1601                 res += ', '
1602             res += fdict['vcodec']
1603             if fdict.get('vbr') is not None:
1604                 res += '@'
1605         elif fdict.get('vbr') is not None and fdict.get('abr') is not None:
1606             res += 'video@'
1607         if fdict.get('vbr') is not None:
1608             res += '%4dk' % fdict['vbr']
1609         if fdict.get('fps') is not None:
1610             res += ', %sfps' % fdict['fps']
1611         if fdict.get('acodec') is not None:
1612             if res:
1613                 res += ', '
1614             if fdict['acodec'] == 'none':
1615                 res += 'video only'
1616             else:
1617                 res += '%-5s' % fdict['acodec']
1618         elif fdict.get('abr') is not None:
1619             if res:
1620                 res += ', '
1621             res += 'audio'
1622         if fdict.get('abr') is not None:
1623             res += '@%3dk' % fdict['abr']
1624         if fdict.get('asr') is not None:
1625             res += ' (%5dHz)' % fdict['asr']
1626         if fdict.get('filesize') is not None:
1627             if res:
1628                 res += ', '
1629             res += format_bytes(fdict['filesize'])
1630         elif fdict.get('filesize_approx') is not None:
1631             if res:
1632                 res += ', '
1633             res += '~' + format_bytes(fdict['filesize_approx'])
1634         return res
1635
1636     def list_formats(self, info_dict):
1637         formats = info_dict.get('formats', [info_dict])
1638         table = [
1639             [f['format_id'], f['ext'], self.format_resolution(f), self._format_note(f)]
1640             for f in formats
1641             if f.get('preference') is None or f['preference'] >= -1000]
1642         if len(formats) > 1:
1643             table[-1][-1] += (' ' if table[-1][-1] else '') + '(best)'
1644
1645         header_line = ['format code', 'extension', 'resolution', 'note']
1646         self.to_screen(
1647             '[info] Available formats for %s:\n%s' %
1648             (info_dict['id'], render_table(header_line, table)))
1649
1650     def list_thumbnails(self, info_dict):
1651         thumbnails = info_dict.get('thumbnails')
1652         if not thumbnails:
1653             tn_url = info_dict.get('thumbnail')
1654             if tn_url:
1655                 thumbnails = [{'id': '0', 'url': tn_url}]
1656             else:
1657                 self.to_screen(
1658                     '[info] No thumbnails present for %s' % info_dict['id'])
1659                 return
1660
1661         self.to_screen(
1662             '[info] Thumbnails for %s:' % info_dict['id'])
1663         self.to_screen(render_table(
1664             ['ID', 'width', 'height', 'URL'],
1665             [[t['id'], t.get('width', 'unknown'), t.get('height', 'unknown'), t['url']] for t in thumbnails]))
1666
1667     def list_subtitles(self, video_id, subtitles, name='subtitles'):
1668         if not subtitles:
1669             self.to_screen('%s has no %s' % (video_id, name))
1670             return
1671         self.to_screen(
1672             'Available %s for %s:' % (name, video_id))
1673         self.to_screen(render_table(
1674             ['Language', 'formats'],
1675             [[lang, ', '.join(f['ext'] for f in reversed(formats))]
1676                 for lang, formats in subtitles.items()]))
1677
1678     def urlopen(self, req):
1679         """ Start an HTTP download """
1680
1681         # According to RFC 3986, URLs can not contain non-ASCII characters, however this is not
1682         # always respected by websites, some tend to give out URLs with non percent-encoded
1683         # non-ASCII characters (see telemb.py, ard.py [#3412])
1684         # urllib chokes on URLs with non-ASCII characters (see http://bugs.python.org/issue3991)
1685         # To work around aforementioned issue we will replace request's original URL with
1686         # percent-encoded one
1687         req_is_string = isinstance(req, compat_basestring)
1688         url = req if req_is_string else req.get_full_url()
1689         url_escaped = escape_url(url)
1690
1691         # Substitute URL if any change after escaping
1692         if url != url_escaped:
1693             if req_is_string:
1694                 req = url_escaped
1695             else:
1696                 req = compat_urllib_request.Request(
1697                     url_escaped, data=req.data, headers=req.headers,
1698                     origin_req_host=req.origin_req_host, unverifiable=req.unverifiable)
1699
1700         return self._opener.open(req, timeout=self._socket_timeout)
1701
1702     def print_debug_header(self):
1703         if not self.params.get('verbose'):
1704             return
1705
1706         if type('') is not compat_str:
1707             # Python 2.6 on SLES11 SP1 (https://github.com/rg3/youtube-dl/issues/3326)
1708             self.report_warning(
1709                 'Your Python is broken! Update to a newer and supported version')
1710
1711         stdout_encoding = getattr(
1712             sys.stdout, 'encoding', 'missing (%s)' % type(sys.stdout).__name__)
1713         encoding_str = (
1714             '[debug] Encodings: locale %s, fs %s, out %s, pref %s\n' % (
1715                 locale.getpreferredencoding(),
1716                 sys.getfilesystemencoding(),
1717                 stdout_encoding,
1718                 self.get_encoding()))
1719         write_string(encoding_str, encoding=None)
1720
1721         self._write_string('[debug] youtube-dl version ' + __version__ + '\n')
1722         try:
1723             sp = subprocess.Popen(
1724                 ['git', 'rev-parse', '--short', 'HEAD'],
1725                 stdout=subprocess.PIPE, stderr=subprocess.PIPE,
1726                 cwd=os.path.dirname(os.path.abspath(__file__)))
1727             out, err = sp.communicate()
1728             out = out.decode().strip()
1729             if re.match('[0-9a-f]+', out):
1730                 self._write_string('[debug] Git HEAD: ' + out + '\n')
1731         except Exception:
1732             try:
1733                 sys.exc_clear()
1734             except Exception:
1735                 pass
1736         self._write_string('[debug] Python version %s - %s\n' % (
1737             platform.python_version(), platform_name()))
1738
1739         exe_versions = FFmpegPostProcessor.get_versions(self)
1740         exe_versions['rtmpdump'] = rtmpdump_version()
1741         exe_str = ', '.join(
1742             '%s %s' % (exe, v)
1743             for exe, v in sorted(exe_versions.items())
1744             if v
1745         )
1746         if not exe_str:
1747             exe_str = 'none'
1748         self._write_string('[debug] exe versions: %s\n' % exe_str)
1749
1750         proxy_map = {}
1751         for handler in self._opener.handlers:
1752             if hasattr(handler, 'proxies'):
1753                 proxy_map.update(handler.proxies)
1754         self._write_string('[debug] Proxy map: ' + compat_str(proxy_map) + '\n')
1755
1756         if self.params.get('call_home', False):
1757             ipaddr = self.urlopen('https://yt-dl.org/ip').read().decode('utf-8')
1758             self._write_string('[debug] Public IP address: %s\n' % ipaddr)
1759             latest_version = self.urlopen(
1760                 'https://yt-dl.org/latest/version').read().decode('utf-8')
1761             if version_tuple(latest_version) > version_tuple(__version__):
1762                 self.report_warning(
1763                     'You are using an outdated version (newest version: %s)! '
1764                     'See https://yt-dl.org/update if you need help updating.' %
1765                     latest_version)
1766
1767     def _setup_opener(self):
1768         timeout_val = self.params.get('socket_timeout')
1769         self._socket_timeout = 600 if timeout_val is None else float(timeout_val)
1770
1771         opts_cookiefile = self.params.get('cookiefile')
1772         opts_proxy = self.params.get('proxy')
1773
1774         if opts_cookiefile is None:
1775             self.cookiejar = compat_cookiejar.CookieJar()
1776         else:
1777             self.cookiejar = compat_cookiejar.MozillaCookieJar(
1778                 opts_cookiefile)
1779             if os.access(opts_cookiefile, os.R_OK):
1780                 self.cookiejar.load()
1781
1782         cookie_processor = compat_urllib_request.HTTPCookieProcessor(
1783             self.cookiejar)
1784         if opts_proxy is not None:
1785             if opts_proxy == '':
1786                 proxies = {}
1787             else:
1788                 proxies = {'http': opts_proxy, 'https': opts_proxy}
1789         else:
1790             proxies = compat_urllib_request.getproxies()
1791             # Set HTTPS proxy to HTTP one if given (https://github.com/rg3/youtube-dl/issues/805)
1792             if 'http' in proxies and 'https' not in proxies:
1793                 proxies['https'] = proxies['http']
1794         proxy_handler = PerRequestProxyHandler(proxies)
1795
1796         debuglevel = 1 if self.params.get('debug_printtraffic') else 0
1797         https_handler = make_HTTPS_handler(self.params, debuglevel=debuglevel)
1798         ydlh = YoutubeDLHandler(self.params, debuglevel=debuglevel)
1799         opener = compat_urllib_request.build_opener(
1800             proxy_handler, https_handler, cookie_processor, ydlh)
1801
1802         # Delete the default user-agent header, which would otherwise apply in
1803         # cases where our custom HTTP handler doesn't come into play
1804         # (See https://github.com/rg3/youtube-dl/issues/1309 for details)
1805         opener.addheaders = []
1806         self._opener = opener
1807
1808     def encode(self, s):
1809         if isinstance(s, bytes):
1810             return s  # Already encoded
1811
1812         try:
1813             return s.encode(self.get_encoding())
1814         except UnicodeEncodeError as err:
1815             err.reason = err.reason + '. Check your system encoding configuration or use the --encoding option.'
1816             raise
1817
1818     def get_encoding(self):
1819         encoding = self.params.get('encoding')
1820         if encoding is None:
1821             encoding = preferredencoding()
1822         return encoding
1823
1824     def _write_thumbnails(self, info_dict, filename):
1825         if self.params.get('writethumbnail', False):
1826             thumbnails = info_dict.get('thumbnails')
1827             if thumbnails:
1828                 thumbnails = [thumbnails[-1]]
1829         elif self.params.get('write_all_thumbnails', False):
1830             thumbnails = info_dict.get('thumbnails')
1831         else:
1832             return
1833
1834         if not thumbnails:
1835             # No thumbnails present, so return immediately
1836             return
1837
1838         for t in thumbnails:
1839             thumb_ext = determine_ext(t['url'], 'jpg')
1840             suffix = '_%s' % t['id'] if len(thumbnails) > 1 else ''
1841             thumb_display_id = '%s ' % t['id'] if len(thumbnails) > 1 else ''
1842             thumb_filename = os.path.splitext(filename)[0] + suffix + '.' + thumb_ext
1843
1844             if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(thumb_filename)):
1845                 self.to_screen('[%s] %s: Thumbnail %sis already present' %
1846                                (info_dict['extractor'], info_dict['id'], thumb_display_id))
1847             else:
1848                 self.to_screen('[%s] %s: Downloading thumbnail %s...' %
1849                                (info_dict['extractor'], info_dict['id'], thumb_display_id))
1850                 try:
1851                     uf = self.urlopen(t['url'])
1852                     with open(thumb_filename, 'wb') as thumbf:
1853                         shutil.copyfileobj(uf, thumbf)
1854                     self.to_screen('[%s] %s: Writing thumbnail %sto: %s' %
1855                                    (info_dict['extractor'], info_dict['id'], thumb_display_id, thumb_filename))
1856                 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
1857                     self.report_warning('Unable to download thumbnail "%s": %s' %
1858                                         (t['url'], compat_str(err)))