_ Git - youtube-dl/blob - youtube_dl/YoutubeDL.py

   1 #!/usr/bin/env python
   2 # -*- coding: utf-8 -*-
   3
   4 from __future__ import absolute_import, unicode_literals
   5
   6 import collections
   7 import contextlib
   8 import datetime
   9 import errno
  10 import fileinput
  11 import io
  12 import itertools
  13 import json
  14 import locale
  15 import operator
  16 import os
  17 import platform
  18 import re
  19 import shutil
  20 import subprocess
  21 import socket
  22 import sys
  23 import time
  24 import traceback
  25
  26 if os.name == 'nt':
  27     import ctypes
  28
  29 from .compat import (
  30     compat_basestring,
  31     compat_cookiejar,
  32     compat_expanduser,
  33     compat_get_terminal_size,
  34     compat_http_client,
  35     compat_kwargs,
  36     compat_str,
  37     compat_urllib_error,
  38     compat_urllib_request,
  39 )
  40 from .utils import (
  41     escape_url,
  42     ContentTooShortError,
  43     date_from_str,
  44     DateRange,
  45     DEFAULT_OUTTMPL,
  46     determine_ext,
  47     DownloadError,
  48     encodeFilename,
  49     ExtractorError,
  50     format_bytes,
  51     formatSeconds,
  52     HEADRequest,
  53     locked_file,
  54     make_HTTPS_handler,
  55     MaxDownloadsReached,
  56     PagedList,
  57     parse_filesize,
  58     PerRequestProxyHandler,
  59     PostProcessingError,
  60     platform_name,
  61     preferredencoding,
  62     render_table,
  63     SameFileError,
  64     sanitize_filename,
  65     sanitize_path,
  66     std_headers,
  67     subtitles_filename,
  68     UnavailableVideoError,
  69     url_basename,
  70     version_tuple,
  71     write_json_file,
  72     write_string,
  73     YoutubeDLHandler,
  74     prepend_extension,
  75     replace_extension,
  76     args_to_str,
  77     age_restricted,
  78 )
  79 from .cache import Cache
  80 from .extractor import get_info_extractor, gen_extractors
  81 from .downloader import get_suitable_downloader
  82 from .downloader.rtmp import rtmpdump_version
  83 from .postprocessor import (
  84     FFmpegFixupM4aPP,
  85     FFmpegFixupStretchedPP,
  86     FFmpegMergerPP,
  87     FFmpegPostProcessor,
  88     get_postprocessor,
  89 )
  90 from .version import __version__
  91
  92
  93 class YoutubeDL(object):
  94     """YoutubeDL class.
  95
  96     YoutubeDL objects are the ones responsible of downloading the
  97     actual video file and writing it to disk if the user has requested
  98     it, among some other tasks. In most cases there should be one per
  99     program. As, given a video URL, the downloader doesn't know how to
 100     extract all the needed information, task that InfoExtractors do, it
 101     has to pass the URL to one of them.
 102
 103     For this, YoutubeDL objects have a method that allows
 104     InfoExtractors to be registered in a given order. When it is passed
 105     a URL, the YoutubeDL object handles it to the first InfoExtractor it
 106     finds that reports being able to handle it. The InfoExtractor extracts
 107     all the information about the video or videos the URL refers to, and
 108     YoutubeDL process the extracted information, possibly using a File
 109     Downloader to download the video.
 110
 111     YoutubeDL objects accept a lot of parameters. In order not to saturate
 112     the object constructor with arguments, it receives a dictionary of
 113     options instead. These options are available through the params
 114     attribute for the InfoExtractors to use. The YoutubeDL also
 115     registers itself as the downloader in charge for the InfoExtractors
 116     that are added to it, so this is a "mutual registration".
 117
 118     Available options:
 119
 120     username:          Username for authentication purposes.
 121     password:          Password for authentication purposes.
 122     videopassword:     Password for acces a video.
 123     usenetrc:          Use netrc for authentication instead.
 124     verbose:           Print additional info to stdout.
 125     quiet:             Do not print messages to stdout.
 126     no_warnings:       Do not print out anything for warnings.
 127     forceurl:          Force printing final URL.
 128     forcetitle:        Force printing title.
 129     forceid:           Force printing ID.
 130     forcethumbnail:    Force printing thumbnail URL.
 131     forcedescription:  Force printing description.
 132     forcefilename:     Force printing final filename.
 133     forceduration:     Force printing duration.
 134     forcejson:         Force printing info_dict as JSON.
 135     dump_single_json:  Force printing the info_dict of the whole playlist
 136                        (or video) as a single JSON line.
 137     simulate:          Do not download the video files.
 138     format:            Video format code. See options.py for more information.
 139     outtmpl:           Template for output names.
 140     restrictfilenames: Do not allow "&" and spaces in file names
 141     ignoreerrors:      Do not stop on download errors.
 142     nooverwrites:      Prevent overwriting files.
 143     playliststart:     Playlist item to start at.
 144     playlistend:       Playlist item to end at.
 145     playlist_items:    Specific indices of playlist to download.
 146     playlistreverse:   Download playlist items in reverse order.
 147     matchtitle:        Download only matching titles.
 148     rejecttitle:       Reject downloads for matching titles.
 149     logger:            Log messages to a logging.Logger instance.
 150     logtostderr:       Log messages to stderr instead of stdout.
 151     writedescription:  Write the video description to a .description file
 152     writeinfojson:     Write the video description to a .info.json file
 153     writeannotations:  Write the video annotations to a .annotations.xml file
 154     writethumbnail:    Write the thumbnail image to a file
 155     write_all_thumbnails:  Write all thumbnail formats to files
 156     writesubtitles:    Write the video subtitles to a file
 157     writeautomaticsub: Write the automatic subtitles to a file
 158     allsubtitles:      Downloads all the subtitles of the video
 159                        (requires writesubtitles or writeautomaticsub)
 160     listsubtitles:     Lists all available subtitles for the video
 161     subtitlesformat:   The format code for subtitles
 162     subtitleslangs:    List of languages of the subtitles to download
 163     keepvideo:         Keep the video file after post-processing
 164     daterange:         A DateRange object, download only if the upload_date is in the range.
 165     skip_download:     Skip the actual download of the video file
 166     cachedir:          Location of the cache files in the filesystem.
 167                        False to disable filesystem cache.
 168     noplaylist:        Download single video instead of a playlist if in doubt.
 169     age_limit:         An integer representing the user's age in years.
 170                        Unsuitable videos for the given age are skipped.
 171     min_views:         An integer representing the minimum view count the video
 172                        must have in order to not be skipped.
 173                        Videos without view count information are always
 174                        downloaded. None for no limit.
 175     max_views:         An integer representing the maximum view count.
 176                        Videos that are more popular than that are not
 177                        downloaded.
 178                        Videos without view count information are always
 179                        downloaded. None for no limit.
 180     download_archive:  File name of a file where all downloads are recorded.
 181                        Videos already present in the file are not downloaded
 182                        again.
 183     cookiefile:        File name where cookies should be read from and dumped to.
 184     nocheckcertificate:Do not verify SSL certificates
 185     prefer_insecure:   Use HTTP instead of HTTPS to retrieve information.
 186                        At the moment, this is only supported by YouTube.
 187     proxy:             URL of the proxy server to use
 188     cn_verification_proxy:  URL of the proxy to use for IP address verification
 189                        on Chinese sites. (Experimental)
 190     socket_timeout:    Time to wait for unresponsive hosts, in seconds
 191     bidi_workaround:   Work around buggy terminals without bidirectional text
 192                        support, using fridibi
 193     debug_printtraffic:Print out sent and received HTTP traffic
 194     include_ads:       Download ads as well
 195     default_search:    Prepend this string if an input url is not valid.
 196                        'auto' for elaborate guessing
 197     encoding:          Use this encoding instead of the system-specified.
 198     extract_flat:      Do not resolve URLs, return the immediate result.
 199                        Pass in 'in_playlist' to only show this behavior for
 200                        playlist items.
 201     postprocessors:    A list of dictionaries, each with an entry
 202                        * key:  The name of the postprocessor. See
 203                                youtube_dl/postprocessor/__init__.py for a list.
 204                        as well as any further keyword arguments for the
 205                        postprocessor.
 206     progress_hooks:    A list of functions that get called on download
 207                        progress, with a dictionary with the entries
 208                        * status: One of "downloading", "error", or "finished".
 209                                  Check this first and ignore unknown values.
 210
 211                        If status is one of "downloading", or "finished", the
 212                        following properties may also be present:
 213                        * filename: The final filename (always present)
 214                        * tmpfilename: The filename we're currently writing to
 215                        * downloaded_bytes: Bytes on disk
 216                        * total_bytes: Size of the whole file, None if unknown
 217                        * total_bytes_estimate: Guess of the eventual file size,
 218                                                None if unavailable.
 219                        * elapsed: The number of seconds since download started.
 220                        * eta: The estimated time in seconds, None if unknown
 221                        * speed: The download speed in bytes/second, None if
 222                                 unknown
 223                        * fragment_index: The counter of the currently
 224                                          downloaded video fragment.
 225                        * fragment_count: The number of fragments (= individual
 226                                          files that will be merged)
 227
 228                        Progress hooks are guaranteed to be called at least once
 229                        (with status "finished") if the download is successful.
 230     merge_output_format: Extension to use when merging formats.
 231     fixup:             Automatically correct known faults of the file.
 232                        One of:
 233                        - "never": do nothing
 234                        - "warn": only emit a warning
 235                        - "detect_or_warn": check whether we can do anything
 236                                            about it, warn otherwise (default)
 237     source_address:    (Experimental) Client-side IP address to bind to.
 238     call_home:         Boolean, true iff we are allowed to contact the
 239                        youtube-dl servers for debugging.
 240     sleep_interval:    Number of seconds to sleep before each download.
 241     listformats:       Print an overview of available video formats and exit.
 242     list_thumbnails:   Print a table of all thumbnails and exit.
 243     match_filter:      A function that gets called with the info_dict of
 244                        every video.
 245                        If it returns a message, the video is ignored.
 246                        If it returns None, the video is downloaded.
 247                        match_filter_func in utils.py is one example for this.
 248     no_color:          Do not emit color codes in output.
 249
 250     The following options determine which downloader is picked:
 251     external_downloader: Executable of the external downloader to call.
 252                        None or unset for standard (built-in) downloader.
 253     hls_prefer_native: Use the native HLS downloader instead of ffmpeg/avconv.
 254
 255     The following parameters are not used by YoutubeDL itself, they are used by
 256     the downloader (see youtube_dl/downloader/common.py):
 257     nopart, updatetime, buffersize, ratelimit, min_filesize, max_filesize, test,
 258     noresizebuffer, retries, continuedl, noprogress, consoletitle,
 259     xattr_set_filesize, external_downloader_args.
 260
 261     The following options are used by the post processors:
 262     prefer_ffmpeg:     If True, use ffmpeg instead of avconv if both are available,
 263                        otherwise prefer avconv.
 264     """
 265
 266     params = None
 267     _ies = []
 268     _pps = []
 269     _download_retcode = None
 270     _num_downloads = None
 271     _screen_file = None
 272
 273     def __init__(self, params=None, auto_init=True):
 274         """Create a FileDownloader object with the given options."""
 275         if params is None:
 276             params = {}
 277         self._ies = []
 278         self._ies_instances = {}
 279         self._pps = []
 280         self._progress_hooks = []
 281         self._download_retcode = 0
 282         self._num_downloads = 0
 283         self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)]
 284         self._err_file = sys.stderr
 285         self.params = params
 286         self.cache = Cache(self)
 287
 288         if params.get('bidi_workaround', False):
 289             try:
 290                 import pty
 291                 master, slave = pty.openpty()
 292                 width = compat_get_terminal_size().columns
 293                 if width is None:
 294                     width_args = []
 295                 else:
 296                     width_args = ['-w', str(width)]
 297                 sp_kwargs = dict(
 298                     stdin=subprocess.PIPE,
 299                     stdout=slave,
 300                     stderr=self._err_file)
 301                 try:
 302                     self._output_process = subprocess.Popen(
 303                         ['bidiv'] + width_args, **sp_kwargs
 304                     )
 305                 except OSError:
 306                     self._output_process = subprocess.Popen(
 307                         ['fribidi', '-c', 'UTF-8'] + width_args, **sp_kwargs)
 308                 self._output_channel = os.fdopen(master, 'rb')
 309             except OSError as ose:
 310                 if ose.errno == 2:
 311                     self.report_warning('Could not find fribidi executable, ignoring --bidi-workaround . Make sure that  fribidi  is an executable file in one of the directories in your $PATH.')
 312                 else:
 313                     raise
 314
 315         if (sys.version_info >= (3,) and sys.platform != 'win32' and
 316                 sys.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968'] and
 317                 not params.get('restrictfilenames', False)):
 318             # On Python 3, the Unicode filesystem API will throw errors (#1474)
 319             self.report_warning(
 320                 'Assuming --restrict-filenames since file system encoding '
 321                 'cannot encode all characters. '
 322                 'Set the LC_ALL environment variable to fix this.')
 323             self.params['restrictfilenames'] = True
 324
 325         if isinstance(params.get('outtmpl'), bytes):
 326             self.report_warning(
 327                 'Parameter outtmpl is bytes, but should be a unicode string. '
 328                 'Put  from __future__ import unicode_literals  at the top of your code file or consider switching to Python 3.x.')
 329
 330         self._setup_opener()
 331
 332         if auto_init:
 333             self.print_debug_header()
 334             self.add_default_info_extractors()
 335
 336         for pp_def_raw in self.params.get('postprocessors', []):
 337             pp_class = get_postprocessor(pp_def_raw['key'])
 338             pp_def = dict(pp_def_raw)
 339             del pp_def['key']
 340             pp = pp_class(self, **compat_kwargs(pp_def))
 341             self.add_post_processor(pp)
 342
 343         for ph in self.params.get('progress_hooks', []):
 344             self.add_progress_hook(ph)
 345
 346     def warn_if_short_id(self, argv):
 347         # short YouTube ID starting with dash?
 348         idxs = [
 349             i for i, a in enumerate(argv)
 350             if re.match(r'^-[0-9A-Za-z_-]{10}$', a)]
 351         if idxs:
 352             correct_argv = (
 353                 ['youtube-dl'] +
 354                 [a for i, a in enumerate(argv) if i not in idxs] +
 355                 ['--'] + [argv[i] for i in idxs]
 356             )
 357             self.report_warning(
 358                 'Long argument string detected. '
 359                 'Use -- to separate parameters and URLs, like this:\n%s\n' %
 360                 args_to_str(correct_argv))
 361
 362     def add_info_extractor(self, ie):
 363         """Add an InfoExtractor object to the end of the list."""
 364         self._ies.append(ie)
 365         self._ies_instances[ie.ie_key()] = ie
 366         ie.set_downloader(self)
 367
 368     def get_info_extractor(self, ie_key):
 369         """
 370         Get an instance of an IE with name ie_key, it will try to get one from
 371         the _ies list, if there's no instance it will create a new one and add
 372         it to the extractor list.
 373         """
 374         ie = self._ies_instances.get(ie_key)
 375         if ie is None:
 376             ie = get_info_extractor(ie_key)()
 377             self.add_info_extractor(ie)
 378         return ie
 379
 380     def add_default_info_extractors(self):
 381         """
 382         Add the InfoExtractors returned by gen_extractors to the end of the list
 383         """
 384         for ie in gen_extractors():
 385             self.add_info_extractor(ie)
 386
 387     def add_post_processor(self, pp):
 388         """Add a PostProcessor object to the end of the chain."""
 389         self._pps.append(pp)
 390         pp.set_downloader(self)
 391
 392     def add_progress_hook(self, ph):
 393         """Add the progress hook (currently only for the file downloader)"""
 394         self._progress_hooks.append(ph)
 395
 396     def _bidi_workaround(self, message):
 397         if not hasattr(self, '_output_channel'):
 398             return message
 399
 400         assert hasattr(self, '_output_process')
 401         assert isinstance(message, compat_str)
 402         line_count = message.count('\n') + 1
 403         self._output_process.stdin.write((message + '\n').encode('utf-8'))
 404         self._output_process.stdin.flush()
 405         res = ''.join(self._output_channel.readline().decode('utf-8')
 406                       for _ in range(line_count))
 407         return res[:-len('\n')]
 408
 409     def to_screen(self, message, skip_eol=False):
 410         """Print message to stdout if not in quiet mode."""
 411         return self.to_stdout(message, skip_eol, check_quiet=True)
 412
 413     def _write_string(self, s, out=None):
 414         write_string(s, out=out, encoding=self.params.get('encoding'))
 415
 416     def to_stdout(self, message, skip_eol=False, check_quiet=False):
 417         """Print message to stdout if not in quiet mode."""
 418         if self.params.get('logger'):
 419             self.params['logger'].debug(message)
 420         elif not check_quiet or not self.params.get('quiet', False):
 421             message = self._bidi_workaround(message)
 422             terminator = ['\n', ''][skip_eol]
 423             output = message + terminator
 424
 425             self._write_string(output, self._screen_file)
 426
 427     def to_stderr(self, message):
 428         """Print message to stderr."""
 429         assert isinstance(message, compat_str)
 430         if self.params.get('logger'):
 431             self.params['logger'].error(message)
 432         else:
 433             message = self._bidi_workaround(message)
 434             output = message + '\n'
 435             self._write_string(output, self._err_file)
 436
 437     def to_console_title(self, message):
 438         if not self.params.get('consoletitle', False):
 439             return
 440         if os.name == 'nt' and ctypes.windll.kernel32.GetConsoleWindow():
 441             # c_wchar_p() might not be necessary if `message` is
 442             # already of type unicode()
 443             ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message))
 444         elif 'TERM' in os.environ:
 445             self._write_string('\033]0;%s\007' % message, self._screen_file)
 446
 447     def save_console_title(self):
 448         if not self.params.get('consoletitle', False):
 449             return
 450         if 'TERM' in os.environ:
 451             # Save the title on stack
 452             self._write_string('\033[22;0t', self._screen_file)
 453
 454     def restore_console_title(self):
 455         if not self.params.get('consoletitle', False):
 456             return
 457         if 'TERM' in os.environ:
 458             # Restore the title from stack
 459             self._write_string('\033[23;0t', self._screen_file)
 460
 461     def __enter__(self):
 462         self.save_console_title()
 463         return self
 464
 465     def __exit__(self, *args):
 466         self.restore_console_title()
 467
 468         if self.params.get('cookiefile') is not None:
 469             self.cookiejar.save()
 470
 471     def trouble(self, message=None, tb=None):
 472         """Determine action to take when a download problem appears.
 473
 474         Depending on if the downloader has been configured to ignore
 475         download errors or not, this method may throw an exception or
 476         not when errors are found, after printing the message.
 477
 478         tb, if given, is additional traceback information.
 479         """
 480         if message is not None:
 481             self.to_stderr(message)
 482         if self.params.get('verbose'):
 483             if tb is None:
 484                 if sys.exc_info()[0]:  # if .trouble has been called from an except block
 485                     tb = ''
 486                     if hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
 487                         tb += ''.join(traceback.format_exception(*sys.exc_info()[1].exc_info))
 488                     tb += compat_str(traceback.format_exc())
 489                 else:
 490                     tb_data = traceback.format_list(traceback.extract_stack())
 491                     tb = ''.join(tb_data)
 492             self.to_stderr(tb)
 493         if not self.params.get('ignoreerrors', False):
 494             if sys.exc_info()[0] and hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
 495                 exc_info = sys.exc_info()[1].exc_info
 496             else:
 497                 exc_info = sys.exc_info()
 498             raise DownloadError(message, exc_info)
 499         self._download_retcode = 1
 500
 501     def report_warning(self, message):
 502         '''
 503         Print the message to stderr, it will be prefixed with 'WARNING:'
 504         If stderr is a tty file the 'WARNING:' will be colored
 505         '''
 506         if self.params.get('logger') is not None:
 507             self.params['logger'].warning(message)
 508         else:
 509             if self.params.get('no_warnings'):
 510                 return
 511             if not self.params.get('no_color') and self._err_file.isatty() and os.name != 'nt':
 512                 _msg_header = '\033[0;33mWARNING:\033[0m'
 513             else:
 514                 _msg_header = 'WARNING:'
 515             warning_message = '%s %s' % (_msg_header, message)
 516             self.to_stderr(warning_message)
 517
 518     def report_error(self, message, tb=None):
 519         '''
 520         Do the same as trouble, but prefixes the message with 'ERROR:', colored
 521         in red if stderr is a tty file.
 522         '''
 523         if not self.params.get('no_color') and self._err_file.isatty() and os.name != 'nt':
 524             _msg_header = '\033[0;31mERROR:\033[0m'
 525         else:
 526             _msg_header = 'ERROR:'
 527         error_message = '%s %s' % (_msg_header, message)
 528         self.trouble(error_message, tb)
 529
 530     def report_file_already_downloaded(self, file_name):
 531         """Report file has already been fully downloaded."""
 532         try:
 533             self.to_screen('[download] %s has already been downloaded' % file_name)
 534         except UnicodeEncodeError:
 535             self.to_screen('[download] The file has already been downloaded')
 536
 537     def prepare_filename(self, info_dict):
 538         """Generate the output filename."""
 539         try:
 540             template_dict = dict(info_dict)
 541
 542             template_dict['epoch'] = int(time.time())
 543             autonumber_size = self.params.get('autonumber_size')
 544             if autonumber_size is None:
 545                 autonumber_size = 5
 546             autonumber_templ = '%0' + str(autonumber_size) + 'd'
 547             template_dict['autonumber'] = autonumber_templ % self._num_downloads
 548             if template_dict.get('playlist_index') is not None:
 549                 template_dict['playlist_index'] = '%0*d' % (len(str(template_dict['n_entries'])), template_dict['playlist_index'])
 550             if template_dict.get('resolution') is None:
 551                 if template_dict.get('width') and template_dict.get('height'):
 552                     template_dict['resolution'] = '%dx%d' % (template_dict['width'], template_dict['height'])
 553                 elif template_dict.get('height'):
 554                     template_dict['resolution'] = '%sp' % template_dict['height']
 555                 elif template_dict.get('width'):
 556                     template_dict['resolution'] = '?x%d' % template_dict['width']
 557
 558             sanitize = lambda k, v: sanitize_filename(
 559                 compat_str(v),
 560                 restricted=self.params.get('restrictfilenames'),
 561                 is_id=(k == 'id'))
 562             template_dict = dict((k, sanitize(k, v))
 563                                  for k, v in template_dict.items()
 564                                  if v is not None)
 565             template_dict = collections.defaultdict(lambda: 'NA', template_dict)
 566
 567             outtmpl = sanitize_path(self.params.get('outtmpl', DEFAULT_OUTTMPL))
 568             tmpl = compat_expanduser(outtmpl)
 569             filename = tmpl % template_dict
 570             # Temporary fix for #4787
 571             # 'Treat' all problem characters by passing filename through preferredencoding
 572             # to workaround encoding issues with subprocess on python2 @ Windows
 573             if sys.version_info < (3, 0) and sys.platform == 'win32':
 574                 filename = encodeFilename(filename, True).decode(preferredencoding())
 575             return filename
 576         except ValueError as err:
 577             self.report_error('Error in output template: ' + str(err) + ' (encoding: ' + repr(preferredencoding()) + ')')
 578             return None
 579
 580     def _match_entry(self, info_dict, incomplete):
 581         """ Returns None iff the file should be downloaded """
 582
 583         video_title = info_dict.get('title', info_dict.get('id', 'video'))
 584         if 'title' in info_dict:
 585             # This can happen when we're just evaluating the playlist
 586             title = info_dict['title']
 587             matchtitle = self.params.get('matchtitle', False)
 588             if matchtitle:
 589                 if not re.search(matchtitle, title, re.IGNORECASE):
 590                     return '"' + title + '" title did not match pattern "' + matchtitle + '"'
 591             rejecttitle = self.params.get('rejecttitle', False)
 592             if rejecttitle:
 593                 if re.search(rejecttitle, title, re.IGNORECASE):
 594                     return '"' + title + '" title matched reject pattern "' + rejecttitle + '"'
 595         date = info_dict.get('upload_date', None)
 596         if date is not None:
 597             dateRange = self.params.get('daterange', DateRange())
 598             if date not in dateRange:
 599                 return '%s upload date is not in range %s' % (date_from_str(date).isoformat(), dateRange)
 600         view_count = info_dict.get('view_count', None)
 601         if view_count is not None:
 602             min_views = self.params.get('min_views')
 603             if min_views is not None and view_count < min_views:
 604                 return 'Skipping %s, because it has not reached minimum view count (%d/%d)' % (video_title, view_count, min_views)
 605             max_views = self.params.get('max_views')
 606             if max_views is not None and view_count > max_views:
 607                 return 'Skipping %s, because it has exceeded the maximum view count (%d/%d)' % (video_title, view_count, max_views)
 608         if age_restricted(info_dict.get('age_limit'), self.params.get('age_limit')):
 609             return 'Skipping "%s" because it is age restricted' % video_title
 610         if self.in_download_archive(info_dict):
 611             return '%s has already been recorded in archive' % video_title
 612
 613         if not incomplete:
 614             match_filter = self.params.get('match_filter')
 615             if match_filter is not None:
 616                 ret = match_filter(info_dict)
 617                 if ret is not None:
 618                     return ret
 619
 620         return None
 621
 622     @staticmethod
 623     def add_extra_info(info_dict, extra_info):
 624         '''Set the keys from extra_info in info dict if they are missing'''
 625         for key, value in extra_info.items():
 626             info_dict.setdefault(key, value)
 627
 628     def extract_info(self, url, download=True, ie_key=None, extra_info={},
 629                      process=True):
 630         '''
 631         Returns a list with a dictionary for each video we find.
 632         If 'download', also downloads the videos.
 633         extra_info is a dict containing the extra values to add to each result
 634         '''
 635
 636         if ie_key:
 637             ies = [self.get_info_extractor(ie_key)]
 638         else:
 639             ies = self._ies
 640
 641         for ie in ies:
 642             if not ie.suitable(url):
 643                 continue
 644
 645             if not ie.working():
 646                 self.report_warning('The program functionality for this site has been marked as broken, '
 647                                     'and will probably not work.')
 648
 649             try:
 650                 ie_result = ie.extract(url)
 651                 if ie_result is None:  # Finished already (backwards compatibility; listformats and friends should be moved here)
 652                     break
 653                 if isinstance(ie_result, list):
 654                     # Backwards compatibility: old IE result format
 655                     ie_result = {
 656                         '_type': 'compat_list',
 657                         'entries': ie_result,
 658                     }
 659                 self.add_default_extra_info(ie_result, ie, url)
 660                 if process:
 661                     return self.process_ie_result(ie_result, download, extra_info)
 662                 else:
 663                     return ie_result
 664             except ExtractorError as de:  # An error we somewhat expected
 665                 self.report_error(compat_str(de), de.format_traceback())
 666                 break
 667             except MaxDownloadsReached:
 668                 raise
 669             except Exception as e:
 670                 if self.params.get('ignoreerrors', False):
 671                     self.report_error(compat_str(e), tb=compat_str(traceback.format_exc()))
 672                     break
 673                 else:
 674                     raise
 675         else:
 676             self.report_error('no suitable InfoExtractor for URL %s' % url)
 677
 678     def add_default_extra_info(self, ie_result, ie, url):
 679         self.add_extra_info(ie_result, {
 680             'extractor': ie.IE_NAME,
 681             'webpage_url': url,
 682             'webpage_url_basename': url_basename(url),
 683             'extractor_key': ie.ie_key(),
 684         })
 685
 686     def process_ie_result(self, ie_result, download=True, extra_info={}):
 687         """
 688         Take the result of the ie(may be modified) and resolve all unresolved
 689         references (URLs, playlist items).
 690
 691         It will also download the videos if 'download'.
 692         Returns the resolved ie_result.
 693         """
 694
 695         result_type = ie_result.get('_type', 'video')
 696
 697         if result_type in ('url', 'url_transparent'):
 698             extract_flat = self.params.get('extract_flat', False)
 699             if ((extract_flat == 'in_playlist' and 'playlist' in extra_info) or
 700                     extract_flat is True):
 701                 if self.params.get('forcejson', False):
 702                     self.to_stdout(json.dumps(ie_result))
 703                 return ie_result
 704
 705         if result_type == 'video':
 706             self.add_extra_info(ie_result, extra_info)
 707             return self.process_video_result(ie_result, download=download)
 708         elif result_type == 'url':
 709             # We have to add extra_info to the results because it may be
 710             # contained in a playlist
 711             return self.extract_info(ie_result['url'],
 712                                      download,
 713                                      ie_key=ie_result.get('ie_key'),
 714                                      extra_info=extra_info)
 715         elif result_type == 'url_transparent':
 716             # Use the information from the embedding page
 717             info = self.extract_info(
 718                 ie_result['url'], ie_key=ie_result.get('ie_key'),
 719                 extra_info=extra_info, download=False, process=False)
 720
 721             force_properties = dict(
 722                 (k, v) for k, v in ie_result.items() if v is not None)
 723             for f in ('_type', 'url'):
 724                 if f in force_properties:
 725                     del force_properties[f]
 726             new_result = info.copy()
 727             new_result.update(force_properties)
 728
 729             assert new_result.get('_type') != 'url_transparent'
 730
 731             return self.process_ie_result(
 732                 new_result, download=download, extra_info=extra_info)
 733         elif result_type == 'playlist' or result_type == 'multi_video':
 734             # We process each entry in the playlist
 735             playlist = ie_result.get('title', None) or ie_result.get('id', None)
 736             self.to_screen('[download] Downloading playlist: %s' % playlist)
 737
 738             playlist_results = []
 739
 740             playliststart = self.params.get('playliststart', 1) - 1
 741             playlistend = self.params.get('playlistend', None)
 742             # For backwards compatibility, interpret -1 as whole list
 743             if playlistend == -1:
 744                 playlistend = None
 745
 746             playlistitems_str = self.params.get('playlist_items', None)
 747             playlistitems = None
 748             if playlistitems_str is not None:
 749                 def iter_playlistitems(format):
 750                     for string_segment in format.split(','):
 751                         if '-' in string_segment:
 752                             start, end = string_segment.split('-')
 753                             for item in range(int(start), int(end) + 1):
 754                                 yield int(item)
 755                         else:
 756                             yield int(string_segment)
 757                 playlistitems = iter_playlistitems(playlistitems_str)
 758
 759             ie_entries = ie_result['entries']
 760             if isinstance(ie_entries, list):
 761                 n_all_entries = len(ie_entries)
 762                 if playlistitems:
 763                     entries = [
 764                         ie_entries[i - 1] for i in playlistitems
 765                         if -n_all_entries <= i - 1 < n_all_entries]
 766                 else:
 767                     entries = ie_entries[playliststart:playlistend]
 768                 n_entries = len(entries)
 769                 self.to_screen(
 770                     "[%s] playlist %s: Collected %d video ids (downloading %d of them)" %
 771                     (ie_result['extractor'], playlist, n_all_entries, n_entries))
 772             elif isinstance(ie_entries, PagedList):
 773                 if playlistitems:
 774                     entries = []
 775                     for item in playlistitems:
 776                         entries.extend(ie_entries.getslice(
 777                             item - 1, item
 778                         ))
 779                 else:
 780                     entries = ie_entries.getslice(
 781                         playliststart, playlistend)
 782                 n_entries = len(entries)
 783                 self.to_screen(
 784                     "[%s] playlist %s: Downloading %d videos" %
 785                     (ie_result['extractor'], playlist, n_entries))
 786             else:  # iterable
 787                 if playlistitems:
 788                     entry_list = list(ie_entries)
 789                     entries = [entry_list[i - 1] for i in playlistitems]
 790                 else:
 791                     entries = list(itertools.islice(
 792                         ie_entries, playliststart, playlistend))
 793                 n_entries = len(entries)
 794                 self.to_screen(
 795                     "[%s] playlist %s: Downloading %d videos" %
 796                     (ie_result['extractor'], playlist, n_entries))
 797
 798             if self.params.get('playlistreverse', False):
 799                 entries = entries[::-1]
 800
 801             for i, entry in enumerate(entries, 1):
 802                 self.to_screen('[download] Downloading video %s of %s' % (i, n_entries))
 803                 extra = {
 804                     'n_entries': n_entries,
 805                     'playlist': playlist,
 806                     'playlist_id': ie_result.get('id'),
 807                     'playlist_title': ie_result.get('title'),
 808                     'playlist_index': i + playliststart,
 809                     'extractor': ie_result['extractor'],
 810                     'webpage_url': ie_result['webpage_url'],
 811                     'webpage_url_basename': url_basename(ie_result['webpage_url']),
 812                     'extractor_key': ie_result['extractor_key'],
 813                 }
 814
 815                 reason = self._match_entry(entry, incomplete=True)
 816                 if reason is not None:
 817                     self.to_screen('[download] ' + reason)
 818                     continue
 819
 820                 entry_result = self.process_ie_result(entry,
 821                                                       download=download,
 822                                                       extra_info=extra)
 823                 playlist_results.append(entry_result)
 824             ie_result['entries'] = playlist_results
 825             return ie_result
 826         elif result_type == 'compat_list':
 827             self.report_warning(
 828                 'Extractor %s returned a compat_list result. '
 829                 'It needs to be updated.' % ie_result.get('extractor'))
 830
 831             def _fixup(r):
 832                 self.add_extra_info(
 833                     r,
 834                     {
 835                         'extractor': ie_result['extractor'],
 836                         'webpage_url': ie_result['webpage_url'],
 837                         'webpage_url_basename': url_basename(ie_result['webpage_url']),
 838                         'extractor_key': ie_result['extractor_key'],
 839                     }
 840                 )
 841                 return r
 842             ie_result['entries'] = [
 843                 self.process_ie_result(_fixup(r), download, extra_info)
 844                 for r in ie_result['entries']
 845             ]
 846             return ie_result
 847         else:
 848             raise Exception('Invalid result type: %s' % result_type)
 849
 850     def _apply_format_filter(self, format_spec, available_formats):
 851         " Returns a tuple of the remaining format_spec and filtered formats "
 852
 853         OPERATORS = {
 854             '<': operator.lt,
 855             '<=': operator.le,
 856             '>': operator.gt,
 857             '>=': operator.ge,
 858             '=': operator.eq,
 859             '!=': operator.ne,
 860         }
 861         operator_rex = re.compile(r'''(?x)\s*\[
 862             (?P<key>width|height|tbr|abr|vbr|asr|filesize|fps)
 863             \s*(?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
 864             (?P<value>[0-9.]+(?:[kKmMgGtTpPeEzZyY]i?[Bb]?)?)
 865             \]$
 866             ''' % '|'.join(map(re.escape, OPERATORS.keys())))
 867         m = operator_rex.search(format_spec)
 868         if m:
 869             try:
 870                 comparison_value = int(m.group('value'))
 871             except ValueError:
 872                 comparison_value = parse_filesize(m.group('value'))
 873                 if comparison_value is None:
 874                     comparison_value = parse_filesize(m.group('value') + 'B')
 875                 if comparison_value is None:
 876                     raise ValueError(
 877                         'Invalid value %r in format specification %r' % (
 878                             m.group('value'), format_spec))
 879             op = OPERATORS[m.group('op')]
 880
 881         if not m:
 882             STR_OPERATORS = {
 883                 '=': operator.eq,
 884                 '!=': operator.ne,
 885             }
 886             str_operator_rex = re.compile(r'''(?x)\s*\[
 887                 \s*(?P<key>ext|acodec|vcodec|container|protocol)
 888                 \s*(?P<op>%s)(?P<none_inclusive>\s*\?)?
 889                 \s*(?P<value>[a-zA-Z0-9_-]+)
 890                 \s*\]$
 891                 ''' % '|'.join(map(re.escape, STR_OPERATORS.keys())))
 892             m = str_operator_rex.search(format_spec)
 893             if m:
 894                 comparison_value = m.group('value')
 895                 op = STR_OPERATORS[m.group('op')]
 896
 897         if not m:
 898             raise ValueError('Invalid format specification %r' % format_spec)
 899
 900         def _filter(f):
 901             actual_value = f.get(m.group('key'))
 902             if actual_value is None:
 903                 return m.group('none_inclusive')
 904             return op(actual_value, comparison_value)
 905         new_formats = [f for f in available_formats if _filter(f)]
 906
 907         new_format_spec = format_spec[:-len(m.group(0))]
 908         if not new_format_spec:
 909             new_format_spec = 'best'
 910
 911         return (new_format_spec, new_formats)
 912
 913     def select_format(self, format_spec, available_formats):
 914         while format_spec.endswith(']'):
 915             format_spec, available_formats = self._apply_format_filter(
 916                 format_spec, available_formats)
 917         if not available_formats:
 918             return None
 919
 920         if format_spec in ['best', 'worst', None]:
 921             format_idx = 0 if format_spec == 'worst' else -1
 922             audiovideo_formats = [
 923                 f for f in available_formats
 924                 if f.get('vcodec') != 'none' and f.get('acodec') != 'none']
 925             if audiovideo_formats:
 926                 return audiovideo_formats[format_idx]
 927             # for audio only (soundcloud) or video only (imgur) urls, select the best/worst audio format
 928             elif (all(f.get('acodec') != 'none' for f in available_formats) or
 929                   all(f.get('vcodec') != 'none' for f in available_formats)):
 930                 return available_formats[format_idx]
 931         elif format_spec == 'bestaudio':
 932             audio_formats = [
 933                 f for f in available_formats
 934                 if f.get('vcodec') == 'none']
 935             if audio_formats:
 936                 return audio_formats[-1]
 937         elif format_spec == 'worstaudio':
 938             audio_formats = [
 939                 f for f in available_formats
 940                 if f.get('vcodec') == 'none']
 941             if audio_formats:
 942                 return audio_formats[0]
 943         elif format_spec == 'bestvideo':
 944             video_formats = [
 945                 f for f in available_formats
 946                 if f.get('acodec') == 'none']
 947             if video_formats:
 948                 return video_formats[-1]
 949         elif format_spec == 'worstvideo':
 950             video_formats = [
 951                 f for f in available_formats
 952                 if f.get('acodec') == 'none']
 953             if video_formats:
 954                 return video_formats[0]
 955         else:
 956             extensions = ['mp4', 'flv', 'webm', '3gp', 'm4a', 'mp3', 'ogg', 'aac', 'wav']
 957             if format_spec in extensions:
 958                 filter_f = lambda f: f['ext'] == format_spec
 959             else:
 960                 filter_f = lambda f: f['format_id'] == format_spec
 961             matches = list(filter(filter_f, available_formats))
 962             if matches:
 963                 return matches[-1]
 964         return None
 965
 966     def _calc_headers(self, info_dict):
 967         res = std_headers.copy()
 968
 969         add_headers = info_dict.get('http_headers')
 970         if add_headers:
 971             res.update(add_headers)
 972
 973         cookies = self._calc_cookies(info_dict)
 974         if cookies:
 975             res['Cookie'] = cookies
 976
 977         return res
 978
 979     def _calc_cookies(self, info_dict):
 980         pr = compat_urllib_request.Request(info_dict['url'])
 981         self.cookiejar.add_cookie_header(pr)
 982         return pr.get_header('Cookie')
 983
 984     def process_video_result(self, info_dict, download=True):
 985         assert info_dict.get('_type', 'video') == 'video'
 986
 987         if 'id' not in info_dict:
 988             raise ExtractorError('Missing "id" field in extractor result')
 989         if 'title' not in info_dict:
 990             raise ExtractorError('Missing "title" field in extractor result')
 991
 992         if 'playlist' not in info_dict:
 993             # It isn't part of a playlist
 994             info_dict['playlist'] = None
 995             info_dict['playlist_index'] = None
 996
 997         thumbnails = info_dict.get('thumbnails')
 998         if thumbnails is None:
 999             thumbnail = info_dict.get('thumbnail')
1000             if thumbnail:
1001                 info_dict['thumbnails'] = thumbnails = [{'url': thumbnail}]
1002         if thumbnails:
1003             thumbnails.sort(key=lambda t: (
1004                 t.get('preference'), t.get('width'), t.get('height'),
1005                 t.get('id'), t.get('url')))
1006             for i, t in enumerate(thumbnails):
1007                 if 'width' in t and 'height' in t:
1008                     t['resolution'] = '%dx%d' % (t['width'], t['height'])
1009                 if t.get('id') is None:
1010                     t['id'] = '%d' % i
1011
1012         if thumbnails and 'thumbnail' not in info_dict:
1013             info_dict['thumbnail'] = thumbnails[-1]['url']
1014
1015         if 'display_id' not in info_dict and 'id' in info_dict:
1016             info_dict['display_id'] = info_dict['id']
1017
1018         if info_dict.get('upload_date') is None and info_dict.get('timestamp') is not None:
1019             # Working around out-of-range timestamp values (e.g. negative ones on Windows,
1020             # see http://bugs.python.org/issue1646728)
1021             try:
1022                 upload_date = datetime.datetime.utcfromtimestamp(info_dict['timestamp'])
1023                 info_dict['upload_date'] = upload_date.strftime('%Y%m%d')
1024             except (ValueError, OverflowError, OSError):
1025                 pass
1026
1027         if self.params.get('listsubtitles', False):
1028             if 'automatic_captions' in info_dict:
1029                 self.list_subtitles(info_dict['id'], info_dict.get('automatic_captions'), 'automatic captions')
1030             self.list_subtitles(info_dict['id'], info_dict.get('subtitles'), 'subtitles')
1031             return
1032         info_dict['requested_subtitles'] = self.process_subtitles(
1033             info_dict['id'], info_dict.get('subtitles'),
1034             info_dict.get('automatic_captions'))
1035
1036         # This extractors handle format selection themselves
1037         if info_dict['extractor'] in ['Youku']:
1038             if download:
1039                 self.process_info(info_dict)
1040             return info_dict
1041
1042         # We now pick which formats have to be downloaded
1043         if info_dict.get('formats') is None:
1044             # There's only one format available
1045             formats = [info_dict]
1046         else:
1047             formats = info_dict['formats']
1048
1049         if not formats:
1050             raise ExtractorError('No video formats found!')
1051
1052         formats_dict = {}
1053
1054         # We check that all the formats have the format and format_id fields
1055         for i, format in enumerate(formats):
1056             if 'url' not in format:
1057                 raise ExtractorError('Missing "url" key in result (index %d)' % i)
1058
1059             if format.get('format_id') is None:
1060                 format['format_id'] = compat_str(i)
1061             format_id = format['format_id']
1062             if format_id not in formats_dict:
1063                 formats_dict[format_id] = []
1064             formats_dict[format_id].append(format)
1065
1066         # Make sure all formats have unique format_id
1067         for format_id, ambiguous_formats in formats_dict.items():
1068             if len(ambiguous_formats) > 1:
1069                 for i, format in enumerate(ambiguous_formats):
1070                     format['format_id'] = '%s-%d' % (format_id, i)
1071
1072         for i, format in enumerate(formats):
1073             if format.get('format') is None:
1074                 format['format'] = '{id} - {res}{note}'.format(
1075                     id=format['format_id'],
1076                     res=self.format_resolution(format),
1077                     note=' ({0})'.format(format['format_note']) if format.get('format_note') is not None else '',
1078                 )
1079             # Automatically determine file extension if missing
1080             if 'ext' not in format:
1081                 format['ext'] = determine_ext(format['url']).lower()
1082             # Add HTTP headers, so that external programs can use them from the
1083             # json output
1084             full_format_info = info_dict.copy()
1085             full_format_info.update(format)
1086             format['http_headers'] = self._calc_headers(full_format_info)
1087
1088         # TODO Central sorting goes here
1089
1090         if formats[0] is not info_dict:
1091             # only set the 'formats' fields if the original info_dict list them
1092             # otherwise we end up with a circular reference, the first (and unique)
1093             # element in the 'formats' field in info_dict is info_dict itself,
1094             # wich can't be exported to json
1095             info_dict['formats'] = formats
1096         if self.params.get('listformats'):
1097             self.list_formats(info_dict)
1098             return
1099         if self.params.get('list_thumbnails'):
1100             self.list_thumbnails(info_dict)
1101             return
1102
1103         req_format = self.params.get('format')
1104         if req_format is None:
1105             req_format_list = []
1106             if (self.params.get('outtmpl', DEFAULT_OUTTMPL) != '-' and
1107                     info_dict['extractor'] in ['youtube', 'ted']):
1108                 merger = FFmpegMergerPP(self)
1109                 if merger.available and merger.can_merge():
1110                     req_format_list.append('bestvideo+bestaudio')
1111             req_format_list.append('best')
1112             req_format = '/'.join(req_format_list)
1113         formats_to_download = []
1114         if req_format == 'all':
1115             formats_to_download = formats
1116         else:
1117             for rfstr in req_format.split(','):
1118                 # We can accept formats requested in the format: 34/5/best, we pick
1119                 # the first that is available, starting from left
1120                 req_formats = rfstr.split('/')
1121                 for rf in req_formats:
1122                     if re.match(r'.+?\+.+?', rf) is not None:
1123                         # Two formats have been requested like '137+139'
1124                         format_1, format_2 = rf.split('+')
1125                         formats_info = (self.select_format(format_1, formats),
1126                                         self.select_format(format_2, formats))
1127                         if all(formats_info):
1128                             # The first format must contain the video and the
1129                             # second the audio
1130                             if formats_info[0].get('vcodec') == 'none':
1131                                 self.report_error('The first format must '
1132                                                   'contain the video, try using '
1133                                                   '"-f %s+%s"' % (format_2, format_1))
1134                                 return
1135                             output_ext = (
1136                                 formats_info[0]['ext']
1137                                 if self.params.get('merge_output_format') is None
1138                                 else self.params['merge_output_format'])
1139                             selected_format = {
1140                                 'requested_formats': formats_info,
1141                                 'format': '%s+%s' % (formats_info[0].get('format'),
1142                                                      formats_info[1].get('format')),
1143                                 'format_id': '%s+%s' % (formats_info[0].get('format_id'),
1144                                                         formats_info[1].get('format_id')),
1145                                 'width': formats_info[0].get('width'),
1146                                 'height': formats_info[0].get('height'),
1147                                 'resolution': formats_info[0].get('resolution'),
1148                                 'fps': formats_info[0].get('fps'),
1149                                 'vcodec': formats_info[0].get('vcodec'),
1150                                 'vbr': formats_info[0].get('vbr'),
1151                                 'stretched_ratio': formats_info[0].get('stretched_ratio'),
1152                                 'acodec': formats_info[1].get('acodec'),
1153                                 'abr': formats_info[1].get('abr'),
1154                                 'ext': output_ext,
1155                             }
1156                         else:
1157                             selected_format = None
1158                     else:
1159                         selected_format = self.select_format(rf, formats)
1160                     if selected_format is not None:
1161                         formats_to_download.append(selected_format)
1162                         break
1163         if not formats_to_download:
1164             raise ExtractorError('requested format not available',
1165                                  expected=True)
1166
1167         if download:
1168             if len(formats_to_download) > 1:
1169                 self.to_screen('[info] %s: downloading video in %s formats' % (info_dict['id'], len(formats_to_download)))
1170             for format in formats_to_download:
1171                 new_info = dict(info_dict)
1172                 new_info.update(format)
1173                 self.process_info(new_info)
1174         # We update the info dict with the best quality format (backwards compatibility)
1175         info_dict.update(formats_to_download[-1])
1176         return info_dict
1177
1178     def process_subtitles(self, video_id, normal_subtitles, automatic_captions):
1179         """Select the requested subtitles and their format"""
1180         available_subs = {}
1181         if normal_subtitles and self.params.get('writesubtitles'):
1182             available_subs.update(normal_subtitles)
1183         if automatic_captions and self.params.get('writeautomaticsub'):
1184             for lang, cap_info in automatic_captions.items():
1185                 if lang not in available_subs:
1186                     available_subs[lang] = cap_info
1187
1188         if (not self.params.get('writesubtitles') and not
1189                 self.params.get('writeautomaticsub') or not
1190                 available_subs):
1191             return None
1192
1193         if self.params.get('allsubtitles', False):
1194             requested_langs = available_subs.keys()
1195         else:
1196             if self.params.get('subtitleslangs', False):
1197                 requested_langs = self.params.get('subtitleslangs')
1198             elif 'en' in available_subs:
1199                 requested_langs = ['en']
1200             else:
1201                 requested_langs = [list(available_subs.keys())[0]]
1202
1203         formats_query = self.params.get('subtitlesformat', 'best')
1204         formats_preference = formats_query.split('/') if formats_query else []
1205         subs = {}
1206         for lang in requested_langs:
1207             formats = available_subs.get(lang)
1208             if formats is None:
1209                 self.report_warning('%s subtitles not available for %s' % (lang, video_id))
1210                 continue
1211             for ext in formats_preference:
1212                 if ext == 'best':
1213                     f = formats[-1]
1214                     break
1215                 matches = list(filter(lambda f: f['ext'] == ext, formats))
1216                 if matches:
1217                     f = matches[-1]
1218                     break
1219             else:
1220                 f = formats[-1]
1221                 self.report_warning(
1222                     'No subtitle format found matching "%s" for language %s, '
1223                     'using %s' % (formats_query, lang, f['ext']))
1224             subs[lang] = f
1225         return subs
1226
1227     def process_info(self, info_dict):
1228         """Process a single resolved IE result."""
1229
1230         assert info_dict.get('_type', 'video') == 'video'
1231
1232         max_downloads = self.params.get('max_downloads')
1233         if max_downloads is not None:
1234             if self._num_downloads >= int(max_downloads):
1235                 raise MaxDownloadsReached()
1236
1237         info_dict['fulltitle'] = info_dict['title']
1238         if len(info_dict['title']) > 200:
1239             info_dict['title'] = info_dict['title'][:197] + '...'
1240
1241         if 'format' not in info_dict:
1242             info_dict['format'] = info_dict['ext']
1243
1244         reason = self._match_entry(info_dict, incomplete=False)
1245         if reason is not None:
1246             self.to_screen('[download] ' + reason)
1247             return
1248
1249         self._num_downloads += 1
1250
1251         info_dict['_filename'] = filename = self.prepare_filename(info_dict)
1252
1253         # Forced printings
1254         if self.params.get('forcetitle', False):
1255             self.to_stdout(info_dict['fulltitle'])
1256         if self.params.get('forceid', False):
1257             self.to_stdout(info_dict['id'])
1258         if self.params.get('forceurl', False):
1259             if info_dict.get('requested_formats') is not None:
1260                 for f in info_dict['requested_formats']:
1261                     self.to_stdout(f['url'] + f.get('play_path', ''))
1262             else:
1263                 # For RTMP URLs, also include the playpath
1264                 self.to_stdout(info_dict['url'] + info_dict.get('play_path', ''))
1265         if self.params.get('forcethumbnail', False) and info_dict.get('thumbnail') is not None:
1266             self.to_stdout(info_dict['thumbnail'])
1267         if self.params.get('forcedescription', False) and info_dict.get('description') is not None:
1268             self.to_stdout(info_dict['description'])
1269         if self.params.get('forcefilename', False) and filename is not None:
1270             self.to_stdout(filename)
1271         if self.params.get('forceduration', False) and info_dict.get('duration') is not None:
1272             self.to_stdout(formatSeconds(info_dict['duration']))
1273         if self.params.get('forceformat', False):
1274             self.to_stdout(info_dict['format'])
1275         if self.params.get('forcejson', False):
1276             self.to_stdout(json.dumps(info_dict))
1277
1278         # Do nothing else if in simulate mode
1279         if self.params.get('simulate', False):
1280             return
1281
1282         if filename is None:
1283             return
1284
1285         try:
1286             dn = os.path.dirname(sanitize_path(encodeFilename(filename)))
1287             if dn and not os.path.exists(dn):
1288                 os.makedirs(dn)
1289         except (OSError, IOError) as err:
1290             self.report_error('unable to create directory ' + compat_str(err))
1291             return
1292
1293         if self.params.get('writedescription', False):
1294             descfn = replace_extension(filename, 'description', info_dict.get('ext'))
1295             if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(descfn)):
1296                 self.to_screen('[info] Video description is already present')
1297             elif info_dict.get('description') is None:
1298                 self.report_warning('There\'s no description to write.')
1299             else:
1300                 try:
1301                     self.to_screen('[info] Writing video description to: ' + descfn)
1302                     with io.open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile:
1303                         descfile.write(info_dict['description'])
1304                 except (OSError, IOError):
1305                     self.report_error('Cannot write description file ' + descfn)
1306                     return
1307
1308         if self.params.get('writeannotations', False):
1309             annofn = replace_extension(filename, 'annotations.xml', info_dict.get('ext'))
1310             if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(annofn)):
1311                 self.to_screen('[info] Video annotations are already present')
1312             else:
1313                 try:
1314                     self.to_screen('[info] Writing video annotations to: ' + annofn)
1315                     with io.open(encodeFilename(annofn), 'w', encoding='utf-8') as annofile:
1316                         annofile.write(info_dict['annotations'])
1317                 except (KeyError, TypeError):
1318                     self.report_warning('There are no annotations to write.')
1319                 except (OSError, IOError):
1320                     self.report_error('Cannot write annotations file: ' + annofn)
1321                     return
1322
1323         subtitles_are_requested = any([self.params.get('writesubtitles', False),
1324                                        self.params.get('writeautomaticsub')])
1325
1326         if subtitles_are_requested and info_dict.get('requested_subtitles'):
1327             # subtitles download errors are already managed as troubles in relevant IE
1328             # that way it will silently go on when used with unsupporting IE
1329             subtitles = info_dict['requested_subtitles']
1330             ie = self.get_info_extractor(info_dict['extractor_key'])
1331             for sub_lang, sub_info in subtitles.items():
1332                 sub_format = sub_info['ext']
1333                 if sub_info.get('data') is not None:
1334                     sub_data = sub_info['data']
1335                 else:
1336                     try:
1337                         sub_data = ie._download_webpage(
1338                             sub_info['url'], info_dict['id'], note=False)
1339                     except ExtractorError as err:
1340                         self.report_warning('Unable to download subtitle for "%s": %s' %
1341                                             (sub_lang, compat_str(err.cause)))
1342                         continue
1343                 try:
1344                     sub_filename = subtitles_filename(filename, sub_lang, sub_format)
1345                     if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(sub_filename)):
1346                         self.to_screen('[info] Video subtitle %s.%s is already_present' % (sub_lang, sub_format))
1347                     else:
1348                         self.to_screen('[info] Writing video subtitles to: ' + sub_filename)
1349                         with io.open(encodeFilename(sub_filename), 'w', encoding='utf-8') as subfile:
1350                             subfile.write(sub_data)
1351                 except (OSError, IOError):
1352                     self.report_error('Cannot write subtitles file ' + sub_filename)
1353                     return
1354
1355         if self.params.get('writeinfojson', False):
1356             infofn = replace_extension(filename, 'info.json', info_dict.get('ext'))
1357             if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(infofn)):
1358                 self.to_screen('[info] Video description metadata is already present')
1359             else:
1360                 self.to_screen('[info] Writing video description metadata as JSON to: ' + infofn)
1361                 try:
1362                     write_json_file(self.filter_requested_info(info_dict), infofn)
1363                 except (OSError, IOError):
1364                     self.report_error('Cannot write metadata to JSON file ' + infofn)
1365                     return
1366
1367         self._write_thumbnails(info_dict, filename)
1368
1369         if not self.params.get('skip_download', False):
1370             try:
1371                 def dl(name, info):
1372                     fd = get_suitable_downloader(info, self.params)(self, self.params)
1373                     for ph in self._progress_hooks:
1374                         fd.add_progress_hook(ph)
1375                     if self.params.get('verbose'):
1376                         self.to_stdout('[debug] Invoking downloader on %r' % info.get('url'))
1377                     return fd.download(name, info)
1378
1379                 if info_dict.get('requested_formats') is not None:
1380                     downloaded = []
1381                     success = True
1382                     merger = FFmpegMergerPP(self)
1383                     if not merger.available:
1384                         postprocessors = []
1385                         self.report_warning('You have requested multiple '
1386                                             'formats but ffmpeg or avconv are not installed.'
1387                                             ' The formats won\'t be merged.')
1388                     else:
1389                         postprocessors = [merger]
1390
1391                     def compatible_formats(formats):
1392                         video, audio = formats
1393                         # Check extension
1394                         video_ext, audio_ext = audio.get('ext'), video.get('ext')
1395                         if video_ext and audio_ext:
1396                             COMPATIBLE_EXTS = (
1397                                 ('mp3', 'mp4', 'm4a', 'm4p', 'm4b', 'm4r', 'm4v'),
1398                                 ('webm')
1399                             )
1400                             for exts in COMPATIBLE_EXTS:
1401                                 if video_ext in exts and audio_ext in exts:
1402                                     return True
1403                         # TODO: Check acodec/vcodec
1404                         return False
1405
1406                     filename_real_ext = os.path.splitext(filename)[1][1:]
1407                     filename_wo_ext = (
1408                         os.path.splitext(filename)[0]
1409                         if filename_real_ext == info_dict['ext']
1410                         else filename)
1411                     requested_formats = info_dict['requested_formats']
1412                     if self.params.get('merge_output_format') is None and not compatible_formats(requested_formats):
1413                         info_dict['ext'] = 'mkv'
1414                         self.report_warning(
1415                             'Requested formats are incompatible for merge and will be merged into mkv.')
1416                     # Ensure filename always has a correct extension for successful merge
1417                     filename = '%s.%s' % (filename_wo_ext, info_dict['ext'])
1418                     if os.path.exists(encodeFilename(filename)):
1419                         self.to_screen(
1420                             '[download] %s has already been downloaded and '
1421                             'merged' % filename)
1422                     else:
1423                         for f in requested_formats:
1424                             new_info = dict(info_dict)
1425                             new_info.update(f)
1426                             fname = self.prepare_filename(new_info)
1427                             fname = prepend_extension(fname, 'f%s' % f['format_id'], new_info['ext'])
1428                             downloaded.append(fname)
1429                             partial_success = dl(fname, new_info)
1430                             success = success and partial_success
1431                         info_dict['__postprocessors'] = postprocessors
1432                         info_dict['__files_to_merge'] = downloaded
1433                 else:
1434                     # Just a single file
1435                     success = dl(filename, info_dict)
1436             except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
1437                 self.report_error('unable to download video data: %s' % str(err))
1438                 return
1439             except (OSError, IOError) as err:
1440                 raise UnavailableVideoError(err)
1441             except (ContentTooShortError, ) as err:
1442                 self.report_error('content too short (expected %s bytes and served %s)' % (err.expected, err.downloaded))
1443                 return
1444
1445             if success:
1446                 # Fixup content
1447                 fixup_policy = self.params.get('fixup')
1448                 if fixup_policy is None:
1449                     fixup_policy = 'detect_or_warn'
1450
1451                 stretched_ratio = info_dict.get('stretched_ratio')
1452                 if stretched_ratio is not None and stretched_ratio != 1:
1453                     if fixup_policy == 'warn':
1454                         self.report_warning('%s: Non-uniform pixel ratio (%s)' % (
1455                             info_dict['id'], stretched_ratio))
1456                     elif fixup_policy == 'detect_or_warn':
1457                         stretched_pp = FFmpegFixupStretchedPP(self)
1458                         if stretched_pp.available:
1459                             info_dict.setdefault('__postprocessors', [])
1460                             info_dict['__postprocessors'].append(stretched_pp)
1461                         else:
1462                             self.report_warning(
1463                                 '%s: Non-uniform pixel ratio (%s). Install ffmpeg or avconv to fix this automatically.' % (
1464                                     info_dict['id'], stretched_ratio))
1465                     else:
1466                         assert fixup_policy in ('ignore', 'never')
1467
1468                 if info_dict.get('requested_formats') is None and info_dict.get('container') == 'm4a_dash':
1469                     if fixup_policy == 'warn':
1470                         self.report_warning('%s: writing DASH m4a. Only some players support this container.' % (
1471                             info_dict['id']))
1472                     elif fixup_policy == 'detect_or_warn':
1473                         fixup_pp = FFmpegFixupM4aPP(self)
1474                         if fixup_pp.available:
1475                             info_dict.setdefault('__postprocessors', [])
1476                             info_dict['__postprocessors'].append(fixup_pp)
1477                         else:
1478                             self.report_warning(
1479                                 '%s: writing DASH m4a. Only some players support this container. Install ffmpeg or avconv to fix this automatically.' % (
1480                                     info_dict['id']))
1481                     else:
1482                         assert fixup_policy in ('ignore', 'never')
1483
1484                 try:
1485                     self.post_process(filename, info_dict)
1486                 except (PostProcessingError) as err:
1487                     self.report_error('postprocessing: %s' % str(err))
1488                     return
1489                 self.record_download_archive(info_dict)
1490
1491     def download(self, url_list):
1492         """Download a given list of URLs."""
1493         outtmpl = self.params.get('outtmpl', DEFAULT_OUTTMPL)
1494         if (len(url_list) > 1 and
1495                 '%' not in outtmpl and
1496                 self.params.get('max_downloads') != 1):
1497             raise SameFileError(outtmpl)
1498
1499         for url in url_list:
1500             try:
1501                 # It also downloads the videos
1502                 res = self.extract_info(url)
1503             except UnavailableVideoError:
1504                 self.report_error('unable to download video')
1505             except MaxDownloadsReached:
1506                 self.to_screen('[info] Maximum number of downloaded files reached.')
1507                 raise
1508             else:
1509                 if self.params.get('dump_single_json', False):
1510                     self.to_stdout(json.dumps(res))
1511
1512         return self._download_retcode
1513
1514     def download_with_info_file(self, info_filename):
1515         with contextlib.closing(fileinput.FileInput(
1516                 [info_filename], mode='r',
1517                 openhook=fileinput.hook_encoded('utf-8'))) as f:
1518             # FileInput doesn't have a read method, we can't call json.load
1519             info = self.filter_requested_info(json.loads('\n'.join(f)))
1520         try:
1521             self.process_ie_result(info, download=True)
1522         except DownloadError:
1523             webpage_url = info.get('webpage_url')
1524             if webpage_url is not None:
1525                 self.report_warning('The info failed to download, trying with "%s"' % webpage_url)
1526                 return self.download([webpage_url])
1527             else:
1528                 raise
1529         return self._download_retcode
1530
1531     @staticmethod
1532     def filter_requested_info(info_dict):
1533         return dict(
1534             (k, v) for k, v in info_dict.items()
1535             if k not in ['requested_formats', 'requested_subtitles'])
1536
1537     def post_process(self, filename, ie_info):
1538         """Run all the postprocessors on the given file."""
1539         info = dict(ie_info)
1540         info['filepath'] = filename
1541         pps_chain = []
1542         if ie_info.get('__postprocessors') is not None:
1543             pps_chain.extend(ie_info['__postprocessors'])
1544         pps_chain.extend(self._pps)
1545         for pp in pps_chain:
1546             files_to_delete = []
1547             try:
1548                 files_to_delete, info = pp.run(info)
1549             except PostProcessingError as e:
1550                 self.report_error(e.msg)
1551             if files_to_delete and not self.params.get('keepvideo', False):
1552                 for old_filename in files_to_delete:
1553                     self.to_screen('Deleting original file %s (pass -k to keep)' % old_filename)
1554                     try:
1555                         os.remove(encodeFilename(old_filename))
1556                     except (IOError, OSError):
1557                         self.report_warning('Unable to remove downloaded original file')
1558
1559     def _make_archive_id(self, info_dict):
1560         # Future-proof against any change in case
1561         # and backwards compatibility with prior versions
1562         extractor = info_dict.get('extractor_key')
1563         if extractor is None:
1564             if 'id' in info_dict:
1565                 extractor = info_dict.get('ie_key')  # key in a playlist
1566         if extractor is None:
1567             return None  # Incomplete video information
1568         return extractor.lower() + ' ' + info_dict['id']
1569
1570     def in_download_archive(self, info_dict):
1571         fn = self.params.get('download_archive')
1572         if fn is None:
1573             return False
1574
1575         vid_id = self._make_archive_id(info_dict)
1576         if vid_id is None:
1577             return False  # Incomplete video information
1578
1579         try:
1580             with locked_file(fn, 'r', encoding='utf-8') as archive_file:
1581                 for line in archive_file:
1582                     if line.strip() == vid_id:
1583                         return True
1584         except IOError as ioe:
1585             if ioe.errno != errno.ENOENT:
1586                 raise
1587         return False
1588
1589     def record_download_archive(self, info_dict):
1590         fn = self.params.get('download_archive')
1591         if fn is None:
1592             return
1593         vid_id = self._make_archive_id(info_dict)
1594         assert vid_id
1595         with locked_file(fn, 'a', encoding='utf-8') as archive_file:
1596             archive_file.write(vid_id + '\n')
1597
1598     @staticmethod
1599     def format_resolution(format, default='unknown'):
1600         if format.get('vcodec') == 'none':
1601             return 'audio only'
1602         if format.get('resolution') is not None:
1603             return format['resolution']
1604         if format.get('height') is not None:
1605             if format.get('width') is not None:
1606                 res = '%sx%s' % (format['width'], format['height'])
1607             else:
1608                 res = '%sp' % format['height']
1609         elif format.get('width') is not None:
1610             res = '?x%d' % format['width']
1611         else:
1612             res = default
1613         return res
1614
1615     def _format_note(self, fdict):
1616         res = ''
1617         if fdict.get('ext') in ['f4f', 'f4m']:
1618             res += '(unsupported) '
1619         if fdict.get('format_note') is not None:
1620             res += fdict['format_note'] + ' '
1621         if fdict.get('tbr') is not None:
1622             res += '%4dk ' % fdict['tbr']
1623         if fdict.get('container') is not None:
1624             if res:
1625                 res += ', '
1626             res += '%s container' % fdict['container']
1627         if (fdict.get('vcodec') is not None and
1628                 fdict.get('vcodec') != 'none'):
1629             if res:
1630                 res += ', '
1631             res += fdict['vcodec']
1632             if fdict.get('vbr') is not None:
1633                 res += '@'
1634         elif fdict.get('vbr') is not None and fdict.get('abr') is not None:
1635             res += 'video@'
1636         if fdict.get('vbr') is not None:
1637             res += '%4dk' % fdict['vbr']
1638         if fdict.get('fps') is not None:
1639             res += ', %sfps' % fdict['fps']
1640         if fdict.get('acodec') is not None:
1641             if res:
1642                 res += ', '
1643             if fdict['acodec'] == 'none':
1644                 res += 'video only'
1645             else:
1646                 res += '%-5s' % fdict['acodec']
1647         elif fdict.get('abr') is not None:
1648             if res:
1649                 res += ', '
1650             res += 'audio'
1651         if fdict.get('abr') is not None:
1652             res += '@%3dk' % fdict['abr']
1653         if fdict.get('asr') is not None:
1654             res += ' (%5dHz)' % fdict['asr']
1655         if fdict.get('filesize') is not None:
1656             if res:
1657                 res += ', '
1658             res += format_bytes(fdict['filesize'])
1659         elif fdict.get('filesize_approx') is not None:
1660             if res:
1661                 res += ', '
1662             res += '~' + format_bytes(fdict['filesize_approx'])
1663         return res
1664
1665     def list_formats(self, info_dict):
1666         formats = info_dict.get('formats', [info_dict])
1667         table = [
1668             [f['format_id'], f['ext'], self.format_resolution(f), self._format_note(f)]
1669             for f in formats
1670             if f.get('preference') is None or f['preference'] >= -1000]
1671         if len(formats) > 1:
1672             table[-1][-1] += (' ' if table[-1][-1] else '') + '(best)'
1673
1674         header_line = ['format code', 'extension', 'resolution', 'note']
1675         self.to_screen(
1676             '[info] Available formats for %s:\n%s' %
1677             (info_dict['id'], render_table(header_line, table)))
1678
1679     def list_thumbnails(self, info_dict):
1680         thumbnails = info_dict.get('thumbnails')
1681         if not thumbnails:
1682             tn_url = info_dict.get('thumbnail')
1683             if tn_url:
1684                 thumbnails = [{'id': '0', 'url': tn_url}]
1685             else:
1686                 self.to_screen(
1687                     '[info] No thumbnails present for %s' % info_dict['id'])
1688                 return
1689
1690         self.to_screen(
1691             '[info] Thumbnails for %s:' % info_dict['id'])
1692         self.to_screen(render_table(
1693             ['ID', 'width', 'height', 'URL'],
1694             [[t['id'], t.get('width', 'unknown'), t.get('height', 'unknown'), t['url']] for t in thumbnails]))
1695
1696     def list_subtitles(self, video_id, subtitles, name='subtitles'):
1697         if not subtitles:
1698             self.to_screen('%s has no %s' % (video_id, name))
1699             return
1700         self.to_screen(
1701             'Available %s for %s:' % (name, video_id))
1702         self.to_screen(render_table(
1703             ['Language', 'formats'],
1704             [[lang, ', '.join(f['ext'] for f in reversed(formats))]
1705                 for lang, formats in subtitles.items()]))
1706
1707     def urlopen(self, req):
1708         """ Start an HTTP download """
1709
1710         # According to RFC 3986, URLs can not contain non-ASCII characters, however this is not
1711         # always respected by websites, some tend to give out URLs with non percent-encoded
1712         # non-ASCII characters (see telemb.py, ard.py [#3412])
1713         # urllib chokes on URLs with non-ASCII characters (see http://bugs.python.org/issue3991)
1714         # To work around aforementioned issue we will replace request's original URL with
1715         # percent-encoded one
1716         req_is_string = isinstance(req, compat_basestring)
1717         url = req if req_is_string else req.get_full_url()
1718         url_escaped = escape_url(url)
1719
1720         # Substitute URL if any change after escaping
1721         if url != url_escaped:
1722             if req_is_string:
1723                 req = url_escaped
1724             else:
1725                 req_type = HEADRequest if req.get_method() == 'HEAD' else compat_urllib_request.Request
1726                 req = req_type(
1727                     url_escaped, data=req.data, headers=req.headers,
1728                     origin_req_host=req.origin_req_host, unverifiable=req.unverifiable)
1729
1730         return self._opener.open(req, timeout=self._socket_timeout)
1731
1732     def print_debug_header(self):
1733         if not self.params.get('verbose'):
1734             return
1735
1736         if type('') is not compat_str:
1737             # Python 2.6 on SLES11 SP1 (https://github.com/rg3/youtube-dl/issues/3326)
1738             self.report_warning(
1739                 'Your Python is broken! Update to a newer and supported version')
1740
1741         stdout_encoding = getattr(
1742             sys.stdout, 'encoding', 'missing (%s)' % type(sys.stdout).__name__)
1743         encoding_str = (
1744             '[debug] Encodings: locale %s, fs %s, out %s, pref %s\n' % (
1745                 locale.getpreferredencoding(),
1746                 sys.getfilesystemencoding(),
1747                 stdout_encoding,
1748                 self.get_encoding()))
1749         write_string(encoding_str, encoding=None)
1750
1751         self._write_string('[debug] youtube-dl version ' + __version__ + '\n')
1752         try:
1753             sp = subprocess.Popen(
1754                 ['git', 'rev-parse', '--short', 'HEAD'],
1755                 stdout=subprocess.PIPE, stderr=subprocess.PIPE,
1756                 cwd=os.path.dirname(os.path.abspath(__file__)))
1757             out, err = sp.communicate()
1758             out = out.decode().strip()
1759             if re.match('[0-9a-f]+', out):
1760                 self._write_string('[debug] Git HEAD: ' + out + '\n')
1761         except Exception:
1762             try:
1763                 sys.exc_clear()
1764             except Exception:
1765                 pass
1766         self._write_string('[debug] Python version %s - %s\n' % (
1767             platform.python_version(), platform_name()))
1768
1769         exe_versions = FFmpegPostProcessor.get_versions(self)
1770         exe_versions['rtmpdump'] = rtmpdump_version()
1771         exe_str = ', '.join(
1772             '%s %s' % (exe, v)
1773             for exe, v in sorted(exe_versions.items())
1774             if v
1775         )
1776         if not exe_str:
1777             exe_str = 'none'
1778         self._write_string('[debug] exe versions: %s\n' % exe_str)
1779
1780         proxy_map = {}
1781         for handler in self._opener.handlers:
1782             if hasattr(handler, 'proxies'):
1783                 proxy_map.update(handler.proxies)
1784         self._write_string('[debug] Proxy map: ' + compat_str(proxy_map) + '\n')
1785
1786         if self.params.get('call_home', False):
1787             ipaddr = self.urlopen('https://yt-dl.org/ip').read().decode('utf-8')
1788             self._write_string('[debug] Public IP address: %s\n' % ipaddr)
1789             latest_version = self.urlopen(
1790                 'https://yt-dl.org/latest/version').read().decode('utf-8')
1791             if version_tuple(latest_version) > version_tuple(__version__):
1792                 self.report_warning(
1793                     'You are using an outdated version (newest version: %s)! '
1794                     'See https://yt-dl.org/update if you need help updating.' %
1795                     latest_version)
1796
1797     def _setup_opener(self):
1798         timeout_val = self.params.get('socket_timeout')
1799         self._socket_timeout = 600 if timeout_val is None else float(timeout_val)
1800
1801         opts_cookiefile = self.params.get('cookiefile')
1802         opts_proxy = self.params.get('proxy')
1803
1804         if opts_cookiefile is None:
1805             self.cookiejar = compat_cookiejar.CookieJar()
1806         else:
1807             self.cookiejar = compat_cookiejar.MozillaCookieJar(
1808                 opts_cookiefile)
1809             if os.access(opts_cookiefile, os.R_OK):
1810                 self.cookiejar.load()
1811
1812         cookie_processor = compat_urllib_request.HTTPCookieProcessor(
1813             self.cookiejar)
1814         if opts_proxy is not None:
1815             if opts_proxy == '':
1816                 proxies = {}
1817             else:
1818                 proxies = {'http': opts_proxy, 'https': opts_proxy}
1819         else:
1820             proxies = compat_urllib_request.getproxies()
1821             # Set HTTPS proxy to HTTP one if given (https://github.com/rg3/youtube-dl/issues/805)
1822             if 'http' in proxies and 'https' not in proxies:
1823                 proxies['https'] = proxies['http']
1824         proxy_handler = PerRequestProxyHandler(proxies)
1825
1826         debuglevel = 1 if self.params.get('debug_printtraffic') else 0
1827         https_handler = make_HTTPS_handler(self.params, debuglevel=debuglevel)
1828         ydlh = YoutubeDLHandler(self.params, debuglevel=debuglevel)
1829         opener = compat_urllib_request.build_opener(
1830             proxy_handler, https_handler, cookie_processor, ydlh)
1831
1832         # Delete the default user-agent header, which would otherwise apply in
1833         # cases where our custom HTTP handler doesn't come into play
1834         # (See https://github.com/rg3/youtube-dl/issues/1309 for details)
1835         opener.addheaders = []
1836         self._opener = opener
1837
1838     def encode(self, s):
1839         if isinstance(s, bytes):
1840             return s  # Already encoded
1841
1842         try:
1843             return s.encode(self.get_encoding())
1844         except UnicodeEncodeError as err:
1845             err.reason = err.reason + '. Check your system encoding configuration or use the --encoding option.'
1846             raise
1847
1848     def get_encoding(self):
1849         encoding = self.params.get('encoding')
1850         if encoding is None:
1851             encoding = preferredencoding()
1852         return encoding
1853
1854     def _write_thumbnails(self, info_dict, filename):
1855         if self.params.get('writethumbnail', False):
1856             thumbnails = info_dict.get('thumbnails')
1857             if thumbnails:
1858                 thumbnails = [thumbnails[-1]]
1859         elif self.params.get('write_all_thumbnails', False):
1860             thumbnails = info_dict.get('thumbnails')
1861         else:
1862             return
1863
1864         if not thumbnails:
1865             # No thumbnails present, so return immediately
1866             return
1867
1868         for t in thumbnails:
1869             thumb_ext = determine_ext(t['url'], 'jpg')
1870             suffix = '_%s' % t['id'] if len(thumbnails) > 1 else ''
1871             thumb_display_id = '%s ' % t['id'] if len(thumbnails) > 1 else ''
1872             t['filename'] = thumb_filename = os.path.splitext(filename)[0] + suffix + '.' + thumb_ext
1873
1874             if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(thumb_filename)):
1875                 self.to_screen('[%s] %s: Thumbnail %sis already present' %
1876                                (info_dict['extractor'], info_dict['id'], thumb_display_id))
1877             else:
1878                 self.to_screen('[%s] %s: Downloading thumbnail %s...' %
1879                                (info_dict['extractor'], info_dict['id'], thumb_display_id))
1880                 try:
1881                     uf = self.urlopen(t['url'])
1882                     with open(thumb_filename, 'wb') as thumbf:
1883                         shutil.copyfileobj(uf, thumbf)
1884                     self.to_screen('[%s] %s: Writing thumbnail %sto: %s' %
1885                                    (info_dict['extractor'], info_dict['id'], thumb_display_id, thumb_filename))
1886                 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
1887                     self.report_warning('Unable to download thumbnail "%s": %s' %
1888                                         (t['url'], compat_str(err)))