_ Git - youtube-dl/blob - youtube_dl/YoutubeDL.py

   1 #!/usr/bin/env python
   2 # -*- coding: utf-8 -*-
   3
   4 from __future__ import absolute_import, unicode_literals
   5
   6 import collections
   7 import contextlib
   8 import datetime
   9 import errno
  10 import fileinput
  11 import io
  12 import itertools
  13 import json
  14 import locale
  15 import operator
  16 import os
  17 import platform
  18 import re
  19 import shutil
  20 import subprocess
  21 import socket
  22 import sys
  23 import time
  24 import traceback
  25
  26 if os.name == 'nt':
  27     import ctypes
  28
  29 from .compat import (
  30     compat_basestring,
  31     compat_cookiejar,
  32     compat_expanduser,
  33     compat_get_terminal_size,
  34     compat_http_client,
  35     compat_kwargs,
  36     compat_str,
  37     compat_urllib_error,
  38     compat_urllib_request,
  39 )
  40 from .utils import (
  41     escape_url,
  42     ContentTooShortError,
  43     date_from_str,
  44     DateRange,
  45     DEFAULT_OUTTMPL,
  46     determine_ext,
  47     DownloadError,
  48     encodeFilename,
  49     ExtractorError,
  50     format_bytes,
  51     formatSeconds,
  52     HEADRequest,
  53     locked_file,
  54     make_HTTPS_handler,
  55     MaxDownloadsReached,
  56     PagedList,
  57     parse_filesize,
  58     PerRequestProxyHandler,
  59     PostProcessingError,
  60     platform_name,
  61     preferredencoding,
  62     render_table,
  63     SameFileError,
  64     sanitize_filename,
  65     sanitize_path,
  66     std_headers,
  67     subtitles_filename,
  68     UnavailableVideoError,
  69     url_basename,
  70     version_tuple,
  71     write_json_file,
  72     write_string,
  73     YoutubeDLHandler,
  74     prepend_extension,
  75     replace_extension,
  76     args_to_str,
  77     age_restricted,
  78 )
  79 from .cache import Cache
  80 from .extractor import get_info_extractor, gen_extractors
  81 from .downloader import get_suitable_downloader
  82 from .downloader.rtmp import rtmpdump_version
  83 from .postprocessor import (
  84     FFmpegFixupM4aPP,
  85     FFmpegFixupStretchedPP,
  86     FFmpegMergerPP,
  87     FFmpegPostProcessor,
  88     get_postprocessor,
  89 )
  90 from .version import __version__
  91
  92
  93 class YoutubeDL(object):
  94     """YoutubeDL class.
  95
  96     YoutubeDL objects are the ones responsible of downloading the
  97     actual video file and writing it to disk if the user has requested
  98     it, among some other tasks. In most cases there should be one per
  99     program. As, given a video URL, the downloader doesn't know how to
 100     extract all the needed information, task that InfoExtractors do, it
 101     has to pass the URL to one of them.
 102
 103     For this, YoutubeDL objects have a method that allows
 104     InfoExtractors to be registered in a given order. When it is passed
 105     a URL, the YoutubeDL object handles it to the first InfoExtractor it
 106     finds that reports being able to handle it. The InfoExtractor extracts
 107     all the information about the video or videos the URL refers to, and
 108     YoutubeDL process the extracted information, possibly using a File
 109     Downloader to download the video.
 110
 111     YoutubeDL objects accept a lot of parameters. In order not to saturate
 112     the object constructor with arguments, it receives a dictionary of
 113     options instead. These options are available through the params
 114     attribute for the InfoExtractors to use. The YoutubeDL also
 115     registers itself as the downloader in charge for the InfoExtractors
 116     that are added to it, so this is a "mutual registration".
 117
 118     Available options:
 119
 120     username:          Username for authentication purposes.
 121     password:          Password for authentication purposes.
 122     videopassword:     Password for accessing a video.
 123     usenetrc:          Use netrc for authentication instead.
 124     verbose:           Print additional info to stdout.
 125     quiet:             Do not print messages to stdout.
 126     no_warnings:       Do not print out anything for warnings.
 127     forceurl:          Force printing final URL.
 128     forcetitle:        Force printing title.
 129     forceid:           Force printing ID.
 130     forcethumbnail:    Force printing thumbnail URL.
 131     forcedescription:  Force printing description.
 132     forcefilename:     Force printing final filename.
 133     forceduration:     Force printing duration.
 134     forcejson:         Force printing info_dict as JSON.
 135     dump_single_json:  Force printing the info_dict of the whole playlist
 136                        (or video) as a single JSON line.
 137     simulate:          Do not download the video files.
 138     format:            Video format code. See options.py for more information.
 139     outtmpl:           Template for output names.
 140     restrictfilenames: Do not allow "&" and spaces in file names
 141     ignoreerrors:      Do not stop on download errors.
 142     force_generic_extractor: Force downloader to use the generic extractor
 143     nooverwrites:      Prevent overwriting files.
 144     playliststart:     Playlist item to start at.
 145     playlistend:       Playlist item to end at.
 146     playlist_items:    Specific indices of playlist to download.
 147     playlistreverse:   Download playlist items in reverse order.
 148     matchtitle:        Download only matching titles.
 149     rejecttitle:       Reject downloads for matching titles.
 150     logger:            Log messages to a logging.Logger instance.
 151     logtostderr:       Log messages to stderr instead of stdout.
 152     writedescription:  Write the video description to a .description file
 153     writeinfojson:     Write the video description to a .info.json file
 154     writeannotations:  Write the video annotations to a .annotations.xml file
 155     writethumbnail:    Write the thumbnail image to a file
 156     write_all_thumbnails:  Write all thumbnail formats to files
 157     writesubtitles:    Write the video subtitles to a file
 158     writeautomaticsub: Write the automatic subtitles to a file
 159     allsubtitles:      Downloads all the subtitles of the video
 160                        (requires writesubtitles or writeautomaticsub)
 161     listsubtitles:     Lists all available subtitles for the video
 162     subtitlesformat:   The format code for subtitles
 163     subtitleslangs:    List of languages of the subtitles to download
 164     keepvideo:         Keep the video file after post-processing
 165     daterange:         A DateRange object, download only if the upload_date is in the range.
 166     skip_download:     Skip the actual download of the video file
 167     cachedir:          Location of the cache files in the filesystem.
 168                        False to disable filesystem cache.
 169     noplaylist:        Download single video instead of a playlist if in doubt.
 170     age_limit:         An integer representing the user's age in years.
 171                        Unsuitable videos for the given age are skipped.
 172     min_views:         An integer representing the minimum view count the video
 173                        must have in order to not be skipped.
 174                        Videos without view count information are always
 175                        downloaded. None for no limit.
 176     max_views:         An integer representing the maximum view count.
 177                        Videos that are more popular than that are not
 178                        downloaded.
 179                        Videos without view count information are always
 180                        downloaded. None for no limit.
 181     download_archive:  File name of a file where all downloads are recorded.
 182                        Videos already present in the file are not downloaded
 183                        again.
 184     cookiefile:        File name where cookies should be read from and dumped to.
 185     nocheckcertificate:Do not verify SSL certificates
 186     prefer_insecure:   Use HTTP instead of HTTPS to retrieve information.
 187                        At the moment, this is only supported by YouTube.
 188     proxy:             URL of the proxy server to use
 189     cn_verification_proxy:  URL of the proxy to use for IP address verification
 190                        on Chinese sites. (Experimental)
 191     socket_timeout:    Time to wait for unresponsive hosts, in seconds
 192     bidi_workaround:   Work around buggy terminals without bidirectional text
 193                        support, using fridibi
 194     debug_printtraffic:Print out sent and received HTTP traffic
 195     include_ads:       Download ads as well
 196     default_search:    Prepend this string if an input url is not valid.
 197                        'auto' for elaborate guessing
 198     encoding:          Use this encoding instead of the system-specified.
 199     extract_flat:      Do not resolve URLs, return the immediate result.
 200                        Pass in 'in_playlist' to only show this behavior for
 201                        playlist items.
 202     postprocessors:    A list of dictionaries, each with an entry
 203                        * key:  The name of the postprocessor. See
 204                                youtube_dl/postprocessor/__init__.py for a list.
 205                        as well as any further keyword arguments for the
 206                        postprocessor.
 207     progress_hooks:    A list of functions that get called on download
 208                        progress, with a dictionary with the entries
 209                        * status: One of "downloading", "error", or "finished".
 210                                  Check this first and ignore unknown values.
 211
 212                        If status is one of "downloading", or "finished", the
 213                        following properties may also be present:
 214                        * filename: The final filename (always present)
 215                        * tmpfilename: The filename we're currently writing to
 216                        * downloaded_bytes: Bytes on disk
 217                        * total_bytes: Size of the whole file, None if unknown
 218                        * total_bytes_estimate: Guess of the eventual file size,
 219                                                None if unavailable.
 220                        * elapsed: The number of seconds since download started.
 221                        * eta: The estimated time in seconds, None if unknown
 222                        * speed: The download speed in bytes/second, None if
 223                                 unknown
 224                        * fragment_index: The counter of the currently
 225                                          downloaded video fragment.
 226                        * fragment_count: The number of fragments (= individual
 227                                          files that will be merged)
 228
 229                        Progress hooks are guaranteed to be called at least once
 230                        (with status "finished") if the download is successful.
 231     merge_output_format: Extension to use when merging formats.
 232     fixup:             Automatically correct known faults of the file.
 233                        One of:
 234                        - "never": do nothing
 235                        - "warn": only emit a warning
 236                        - "detect_or_warn": check whether we can do anything
 237                                            about it, warn otherwise (default)
 238     source_address:    (Experimental) Client-side IP address to bind to.
 239     call_home:         Boolean, true iff we are allowed to contact the
 240                        youtube-dl servers for debugging.
 241     sleep_interval:    Number of seconds to sleep before each download.
 242     listformats:       Print an overview of available video formats and exit.
 243     list_thumbnails:   Print a table of all thumbnails and exit.
 244     match_filter:      A function that gets called with the info_dict of
 245                        every video.
 246                        If it returns a message, the video is ignored.
 247                        If it returns None, the video is downloaded.
 248                        match_filter_func in utils.py is one example for this.
 249     no_color:          Do not emit color codes in output.
 250
 251     The following options determine which downloader is picked:
 252     external_downloader: Executable of the external downloader to call.
 253                        None or unset for standard (built-in) downloader.
 254     hls_prefer_native: Use the native HLS downloader instead of ffmpeg/avconv.
 255
 256     The following parameters are not used by YoutubeDL itself, they are used by
 257     the downloader (see youtube_dl/downloader/common.py):
 258     nopart, updatetime, buffersize, ratelimit, min_filesize, max_filesize, test,
 259     noresizebuffer, retries, continuedl, noprogress, consoletitle,
 260     xattr_set_filesize, external_downloader_args.
 261
 262     The following options are used by the post processors:
 263     prefer_ffmpeg:     If True, use ffmpeg instead of avconv if both are available,
 264                        otherwise prefer avconv.
 265     postprocessor_args: Extra parameters for external apps, like avconv.
 266     """
 267
 268     params = None
 269     _ies = []
 270     _pps = []
 271     _download_retcode = None
 272     _num_downloads = None
 273     _screen_file = None
 274
 275     def __init__(self, params=None, auto_init=True):
 276         """Create a FileDownloader object with the given options."""
 277         if params is None:
 278             params = {}
 279         self._ies = []
 280         self._ies_instances = {}
 281         self._pps = []
 282         self._progress_hooks = []
 283         self._download_retcode = 0
 284         self._num_downloads = 0
 285         self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)]
 286         self._err_file = sys.stderr
 287         self.params = params
 288         self.cache = Cache(self)
 289
 290         if params.get('bidi_workaround', False):
 291             try:
 292                 import pty
 293                 master, slave = pty.openpty()
 294                 width = compat_get_terminal_size().columns
 295                 if width is None:
 296                     width_args = []
 297                 else:
 298                     width_args = ['-w', str(width)]
 299                 sp_kwargs = dict(
 300                     stdin=subprocess.PIPE,
 301                     stdout=slave,
 302                     stderr=self._err_file)
 303                 try:
 304                     self._output_process = subprocess.Popen(
 305                         ['bidiv'] + width_args, **sp_kwargs
 306                     )
 307                 except OSError:
 308                     self._output_process = subprocess.Popen(
 309                         ['fribidi', '-c', 'UTF-8'] + width_args, **sp_kwargs)
 310                 self._output_channel = os.fdopen(master, 'rb')
 311             except OSError as ose:
 312                 if ose.errno == 2:
 313                     self.report_warning('Could not find fribidi executable, ignoring --bidi-workaround . Make sure that  fribidi  is an executable file in one of the directories in your $PATH.')
 314                 else:
 315                     raise
 316
 317         if (sys.version_info >= (3,) and sys.platform != 'win32' and
 318                 sys.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968'] and
 319                 not params.get('restrictfilenames', False)):
 320             # On Python 3, the Unicode filesystem API will throw errors (#1474)
 321             self.report_warning(
 322                 'Assuming --restrict-filenames since file system encoding '
 323                 'cannot encode all characters. '
 324                 'Set the LC_ALL environment variable to fix this.')
 325             self.params['restrictfilenames'] = True
 326
 327         if isinstance(params.get('outtmpl'), bytes):
 328             self.report_warning(
 329                 'Parameter outtmpl is bytes, but should be a unicode string. '
 330                 'Put  from __future__ import unicode_literals  at the top of your code file or consider switching to Python 3.x.')
 331
 332         self._setup_opener()
 333
 334         if auto_init:
 335             self.print_debug_header()
 336             self.add_default_info_extractors()
 337
 338         for pp_def_raw in self.params.get('postprocessors', []):
 339             pp_class = get_postprocessor(pp_def_raw['key'])
 340             pp_def = dict(pp_def_raw)
 341             del pp_def['key']
 342             pp = pp_class(self, **compat_kwargs(pp_def))
 343             self.add_post_processor(pp)
 344
 345         for ph in self.params.get('progress_hooks', []):
 346             self.add_progress_hook(ph)
 347
 348     def warn_if_short_id(self, argv):
 349         # short YouTube ID starting with dash?
 350         idxs = [
 351             i for i, a in enumerate(argv)
 352             if re.match(r'^-[0-9A-Za-z_-]{10}$', a)]
 353         if idxs:
 354             correct_argv = (
 355                 ['youtube-dl'] +
 356                 [a for i, a in enumerate(argv) if i not in idxs] +
 357                 ['--'] + [argv[i] for i in idxs]
 358             )
 359             self.report_warning(
 360                 'Long argument string detected. '
 361                 'Use -- to separate parameters and URLs, like this:\n%s\n' %
 362                 args_to_str(correct_argv))
 363
 364     def add_info_extractor(self, ie):
 365         """Add an InfoExtractor object to the end of the list."""
 366         self._ies.append(ie)
 367         self._ies_instances[ie.ie_key()] = ie
 368         ie.set_downloader(self)
 369
 370     def get_info_extractor(self, ie_key):
 371         """
 372         Get an instance of an IE with name ie_key, it will try to get one from
 373         the _ies list, if there's no instance it will create a new one and add
 374         it to the extractor list.
 375         """
 376         ie = self._ies_instances.get(ie_key)
 377         if ie is None:
 378             ie = get_info_extractor(ie_key)()
 379             self.add_info_extractor(ie)
 380         return ie
 381
 382     def add_default_info_extractors(self):
 383         """
 384         Add the InfoExtractors returned by gen_extractors to the end of the list
 385         """
 386         for ie in gen_extractors():
 387             self.add_info_extractor(ie)
 388
 389     def add_post_processor(self, pp):
 390         """Add a PostProcessor object to the end of the chain."""
 391         self._pps.append(pp)
 392         pp.set_downloader(self)
 393
 394     def add_progress_hook(self, ph):
 395         """Add the progress hook (currently only for the file downloader)"""
 396         self._progress_hooks.append(ph)
 397
 398     def _bidi_workaround(self, message):
 399         if not hasattr(self, '_output_channel'):
 400             return message
 401
 402         assert hasattr(self, '_output_process')
 403         assert isinstance(message, compat_str)
 404         line_count = message.count('\n') + 1
 405         self._output_process.stdin.write((message + '\n').encode('utf-8'))
 406         self._output_process.stdin.flush()
 407         res = ''.join(self._output_channel.readline().decode('utf-8')
 408                       for _ in range(line_count))
 409         return res[:-len('\n')]
 410
 411     def to_screen(self, message, skip_eol=False):
 412         """Print message to stdout if not in quiet mode."""
 413         return self.to_stdout(message, skip_eol, check_quiet=True)
 414
 415     def _write_string(self, s, out=None):
 416         write_string(s, out=out, encoding=self.params.get('encoding'))
 417
 418     def to_stdout(self, message, skip_eol=False, check_quiet=False):
 419         """Print message to stdout if not in quiet mode."""
 420         if self.params.get('logger'):
 421             self.params['logger'].debug(message)
 422         elif not check_quiet or not self.params.get('quiet', False):
 423             message = self._bidi_workaround(message)
 424             terminator = ['\n', ''][skip_eol]
 425             output = message + terminator
 426
 427             self._write_string(output, self._screen_file)
 428
 429     def to_stderr(self, message):
 430         """Print message to stderr."""
 431         assert isinstance(message, compat_str)
 432         if self.params.get('logger'):
 433             self.params['logger'].error(message)
 434         else:
 435             message = self._bidi_workaround(message)
 436             output = message + '\n'
 437             self._write_string(output, self._err_file)
 438
 439     def to_console_title(self, message):
 440         if not self.params.get('consoletitle', False):
 441             return
 442         if os.name == 'nt' and ctypes.windll.kernel32.GetConsoleWindow():
 443             # c_wchar_p() might not be necessary if `message` is
 444             # already of type unicode()
 445             ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message))
 446         elif 'TERM' in os.environ:
 447             self._write_string('\033]0;%s\007' % message, self._screen_file)
 448
 449     def save_console_title(self):
 450         if not self.params.get('consoletitle', False):
 451             return
 452         if 'TERM' in os.environ:
 453             # Save the title on stack
 454             self._write_string('\033[22;0t', self._screen_file)
 455
 456     def restore_console_title(self):
 457         if not self.params.get('consoletitle', False):
 458             return
 459         if 'TERM' in os.environ:
 460             # Restore the title from stack
 461             self._write_string('\033[23;0t', self._screen_file)
 462
 463     def __enter__(self):
 464         self.save_console_title()
 465         return self
 466
 467     def __exit__(self, *args):
 468         self.restore_console_title()
 469
 470         if self.params.get('cookiefile') is not None:
 471             self.cookiejar.save()
 472
 473     def trouble(self, message=None, tb=None):
 474         """Determine action to take when a download problem appears.
 475
 476         Depending on if the downloader has been configured to ignore
 477         download errors or not, this method may throw an exception or
 478         not when errors are found, after printing the message.
 479
 480         tb, if given, is additional traceback information.
 481         """
 482         if message is not None:
 483             self.to_stderr(message)
 484         if self.params.get('verbose'):
 485             if tb is None:
 486                 if sys.exc_info()[0]:  # if .trouble has been called from an except block
 487                     tb = ''
 488                     if hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
 489                         tb += ''.join(traceback.format_exception(*sys.exc_info()[1].exc_info))
 490                     tb += compat_str(traceback.format_exc())
 491                 else:
 492                     tb_data = traceback.format_list(traceback.extract_stack())
 493                     tb = ''.join(tb_data)
 494             self.to_stderr(tb)
 495         if not self.params.get('ignoreerrors', False):
 496             if sys.exc_info()[0] and hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
 497                 exc_info = sys.exc_info()[1].exc_info
 498             else:
 499                 exc_info = sys.exc_info()
 500             raise DownloadError(message, exc_info)
 501         self._download_retcode = 1
 502
 503     def report_warning(self, message):
 504         '''
 505         Print the message to stderr, it will be prefixed with 'WARNING:'
 506         If stderr is a tty file the 'WARNING:' will be colored
 507         '''
 508         if self.params.get('logger') is not None:
 509             self.params['logger'].warning(message)
 510         else:
 511             if self.params.get('no_warnings'):
 512                 return
 513             if not self.params.get('no_color') and self._err_file.isatty() and os.name != 'nt':
 514                 _msg_header = '\033[0;33mWARNING:\033[0m'
 515             else:
 516                 _msg_header = 'WARNING:'
 517             warning_message = '%s %s' % (_msg_header, message)
 518             self.to_stderr(warning_message)
 519
 520     def report_error(self, message, tb=None):
 521         '''
 522         Do the same as trouble, but prefixes the message with 'ERROR:', colored
 523         in red if stderr is a tty file.
 524         '''
 525         if not self.params.get('no_color') and self._err_file.isatty() and os.name != 'nt':
 526             _msg_header = '\033[0;31mERROR:\033[0m'
 527         else:
 528             _msg_header = 'ERROR:'
 529         error_message = '%s %s' % (_msg_header, message)
 530         self.trouble(error_message, tb)
 531
 532     def report_file_already_downloaded(self, file_name):
 533         """Report file has already been fully downloaded."""
 534         try:
 535             self.to_screen('[download] %s has already been downloaded' % file_name)
 536         except UnicodeEncodeError:
 537             self.to_screen('[download] The file has already been downloaded')
 538
 539     def prepare_filename(self, info_dict):
 540         """Generate the output filename."""
 541         try:
 542             template_dict = dict(info_dict)
 543
 544             template_dict['epoch'] = int(time.time())
 545             autonumber_size = self.params.get('autonumber_size')
 546             if autonumber_size is None:
 547                 autonumber_size = 5
 548             autonumber_templ = '%0' + str(autonumber_size) + 'd'
 549             template_dict['autonumber'] = autonumber_templ % self._num_downloads
 550             if template_dict.get('playlist_index') is not None:
 551                 template_dict['playlist_index'] = '%0*d' % (len(str(template_dict['n_entries'])), template_dict['playlist_index'])
 552             if template_dict.get('resolution') is None:
 553                 if template_dict.get('width') and template_dict.get('height'):
 554                     template_dict['resolution'] = '%dx%d' % (template_dict['width'], template_dict['height'])
 555                 elif template_dict.get('height'):
 556                     template_dict['resolution'] = '%sp' % template_dict['height']
 557                 elif template_dict.get('width'):
 558                     template_dict['resolution'] = '?x%d' % template_dict['width']
 559
 560             sanitize = lambda k, v: sanitize_filename(
 561                 compat_str(v),
 562                 restricted=self.params.get('restrictfilenames'),
 563                 is_id=(k == 'id'))
 564             template_dict = dict((k, sanitize(k, v))
 565                                  for k, v in template_dict.items()
 566                                  if v is not None)
 567             template_dict = collections.defaultdict(lambda: 'NA', template_dict)
 568
 569             outtmpl = sanitize_path(self.params.get('outtmpl', DEFAULT_OUTTMPL))
 570             tmpl = compat_expanduser(outtmpl)
 571             filename = tmpl % template_dict
 572             # Temporary fix for #4787
 573             # 'Treat' all problem characters by passing filename through preferredencoding
 574             # to workaround encoding issues with subprocess on python2 @ Windows
 575             if sys.version_info < (3, 0) and sys.platform == 'win32':
 576                 filename = encodeFilename(filename, True).decode(preferredencoding())
 577             return filename
 578         except ValueError as err:
 579             self.report_error('Error in output template: ' + str(err) + ' (encoding: ' + repr(preferredencoding()) + ')')
 580             return None
 581
 582     def _match_entry(self, info_dict, incomplete):
 583         """ Returns None iff the file should be downloaded """
 584
 585         video_title = info_dict.get('title', info_dict.get('id', 'video'))
 586         if 'title' in info_dict:
 587             # This can happen when we're just evaluating the playlist
 588             title = info_dict['title']
 589             matchtitle = self.params.get('matchtitle', False)
 590             if matchtitle:
 591                 if not re.search(matchtitle, title, re.IGNORECASE):
 592                     return '"' + title + '" title did not match pattern "' + matchtitle + '"'
 593             rejecttitle = self.params.get('rejecttitle', False)
 594             if rejecttitle:
 595                 if re.search(rejecttitle, title, re.IGNORECASE):
 596                     return '"' + title + '" title matched reject pattern "' + rejecttitle + '"'
 597         date = info_dict.get('upload_date', None)
 598         if date is not None:
 599             dateRange = self.params.get('daterange', DateRange())
 600             if date not in dateRange:
 601                 return '%s upload date is not in range %s' % (date_from_str(date).isoformat(), dateRange)
 602         view_count = info_dict.get('view_count', None)
 603         if view_count is not None:
 604             min_views = self.params.get('min_views')
 605             if min_views is not None and view_count < min_views:
 606                 return 'Skipping %s, because it has not reached minimum view count (%d/%d)' % (video_title, view_count, min_views)
 607             max_views = self.params.get('max_views')
 608             if max_views is not None and view_count > max_views:
 609                 return 'Skipping %s, because it has exceeded the maximum view count (%d/%d)' % (video_title, view_count, max_views)
 610         if age_restricted(info_dict.get('age_limit'), self.params.get('age_limit')):
 611             return 'Skipping "%s" because it is age restricted' % video_title
 612         if self.in_download_archive(info_dict):
 613             return '%s has already been recorded in archive' % video_title
 614
 615         if not incomplete:
 616             match_filter = self.params.get('match_filter')
 617             if match_filter is not None:
 618                 ret = match_filter(info_dict)
 619                 if ret is not None:
 620                     return ret
 621
 622         return None
 623
 624     @staticmethod
 625     def add_extra_info(info_dict, extra_info):
 626         '''Set the keys from extra_info in info dict if they are missing'''
 627         for key, value in extra_info.items():
 628             info_dict.setdefault(key, value)
 629
 630     def extract_info(self, url, download=True, ie_key=None, extra_info={},
 631                      process=True, force_generic_extractor=False):
 632         '''
 633         Returns a list with a dictionary for each video we find.
 634         If 'download', also downloads the videos.
 635         extra_info is a dict containing the extra values to add to each result
 636         '''
 637
 638         if not ie_key and force_generic_extractor:
 639             ie_key = 'Generic'
 640
 641         if ie_key:
 642             ies = [self.get_info_extractor(ie_key)]
 643         else:
 644             ies = self._ies
 645
 646         for ie in ies:
 647             if not ie.suitable(url):
 648                 continue
 649
 650             if not ie.working():
 651                 self.report_warning('The program functionality for this site has been marked as broken, '
 652                                     'and will probably not work.')
 653
 654             try:
 655                 ie_result = ie.extract(url)
 656                 if ie_result is None:  # Finished already (backwards compatibility; listformats and friends should be moved here)
 657                     break
 658                 if isinstance(ie_result, list):
 659                     # Backwards compatibility: old IE result format
 660                     ie_result = {
 661                         '_type': 'compat_list',
 662                         'entries': ie_result,
 663                     }
 664                 self.add_default_extra_info(ie_result, ie, url)
 665                 if process:
 666                     return self.process_ie_result(ie_result, download, extra_info)
 667                 else:
 668                     return ie_result
 669             except ExtractorError as de:  # An error we somewhat expected
 670                 self.report_error(compat_str(de), de.format_traceback())
 671                 break
 672             except MaxDownloadsReached:
 673                 raise
 674             except Exception as e:
 675                 if self.params.get('ignoreerrors', False):
 676                     self.report_error(compat_str(e), tb=compat_str(traceback.format_exc()))
 677                     break
 678                 else:
 679                     raise
 680         else:
 681             self.report_error('no suitable InfoExtractor for URL %s' % url)
 682
 683     def add_default_extra_info(self, ie_result, ie, url):
 684         self.add_extra_info(ie_result, {
 685             'extractor': ie.IE_NAME,
 686             'webpage_url': url,
 687             'webpage_url_basename': url_basename(url),
 688             'extractor_key': ie.ie_key(),
 689         })
 690
 691     def process_ie_result(self, ie_result, download=True, extra_info={}):
 692         """
 693         Take the result of the ie(may be modified) and resolve all unresolved
 694         references (URLs, playlist items).
 695
 696         It will also download the videos if 'download'.
 697         Returns the resolved ie_result.
 698         """
 699
 700         result_type = ie_result.get('_type', 'video')
 701
 702         if result_type in ('url', 'url_transparent'):
 703             extract_flat = self.params.get('extract_flat', False)
 704             if ((extract_flat == 'in_playlist' and 'playlist' in extra_info) or
 705                     extract_flat is True):
 706                 if self.params.get('forcejson', False):
 707                     self.to_stdout(json.dumps(ie_result))
 708                 return ie_result
 709
 710         if result_type == 'video':
 711             self.add_extra_info(ie_result, extra_info)
 712             return self.process_video_result(ie_result, download=download)
 713         elif result_type == 'url':
 714             # We have to add extra_info to the results because it may be
 715             # contained in a playlist
 716             return self.extract_info(ie_result['url'],
 717                                      download,
 718                                      ie_key=ie_result.get('ie_key'),
 719                                      extra_info=extra_info)
 720         elif result_type == 'url_transparent':
 721             # Use the information from the embedding page
 722             info = self.extract_info(
 723                 ie_result['url'], ie_key=ie_result.get('ie_key'),
 724                 extra_info=extra_info, download=False, process=False)
 725
 726             force_properties = dict(
 727                 (k, v) for k, v in ie_result.items() if v is not None)
 728             for f in ('_type', 'url'):
 729                 if f in force_properties:
 730                     del force_properties[f]
 731             new_result = info.copy()
 732             new_result.update(force_properties)
 733
 734             assert new_result.get('_type') != 'url_transparent'
 735
 736             return self.process_ie_result(
 737                 new_result, download=download, extra_info=extra_info)
 738         elif result_type == 'playlist' or result_type == 'multi_video':
 739             # We process each entry in the playlist
 740             playlist = ie_result.get('title', None) or ie_result.get('id', None)
 741             self.to_screen('[download] Downloading playlist: %s' % playlist)
 742
 743             playlist_results = []
 744
 745             playliststart = self.params.get('playliststart', 1) - 1
 746             playlistend = self.params.get('playlistend', None)
 747             # For backwards compatibility, interpret -1 as whole list
 748             if playlistend == -1:
 749                 playlistend = None
 750
 751             playlistitems_str = self.params.get('playlist_items', None)
 752             playlistitems = None
 753             if playlistitems_str is not None:
 754                 def iter_playlistitems(format):
 755                     for string_segment in format.split(','):
 756                         if '-' in string_segment:
 757                             start, end = string_segment.split('-')
 758                             for item in range(int(start), int(end) + 1):
 759                                 yield int(item)
 760                         else:
 761                             yield int(string_segment)
 762                 playlistitems = iter_playlistitems(playlistitems_str)
 763
 764             ie_entries = ie_result['entries']
 765             if isinstance(ie_entries, list):
 766                 n_all_entries = len(ie_entries)
 767                 if playlistitems:
 768                     entries = [
 769                         ie_entries[i - 1] for i in playlistitems
 770                         if -n_all_entries <= i - 1 < n_all_entries]
 771                 else:
 772                     entries = ie_entries[playliststart:playlistend]
 773                 n_entries = len(entries)
 774                 self.to_screen(
 775                     "[%s] playlist %s: Collected %d video ids (downloading %d of them)" %
 776                     (ie_result['extractor'], playlist, n_all_entries, n_entries))
 777             elif isinstance(ie_entries, PagedList):
 778                 if playlistitems:
 779                     entries = []
 780                     for item in playlistitems:
 781                         entries.extend(ie_entries.getslice(
 782                             item - 1, item
 783                         ))
 784                 else:
 785                     entries = ie_entries.getslice(
 786                         playliststart, playlistend)
 787                 n_entries = len(entries)
 788                 self.to_screen(
 789                     "[%s] playlist %s: Downloading %d videos" %
 790                     (ie_result['extractor'], playlist, n_entries))
 791             else:  # iterable
 792                 if playlistitems:
 793                     entry_list = list(ie_entries)
 794                     entries = [entry_list[i - 1] for i in playlistitems]
 795                 else:
 796                     entries = list(itertools.islice(
 797                         ie_entries, playliststart, playlistend))
 798                 n_entries = len(entries)
 799                 self.to_screen(
 800                     "[%s] playlist %s: Downloading %d videos" %
 801                     (ie_result['extractor'], playlist, n_entries))
 802
 803             if self.params.get('playlistreverse', False):
 804                 entries = entries[::-1]
 805
 806             for i, entry in enumerate(entries, 1):
 807                 self.to_screen('[download] Downloading video %s of %s' % (i, n_entries))
 808                 extra = {
 809                     'n_entries': n_entries,
 810                     'playlist': playlist,
 811                     'playlist_id': ie_result.get('id'),
 812                     'playlist_title': ie_result.get('title'),
 813                     'playlist_index': i + playliststart,
 814                     'extractor': ie_result['extractor'],
 815                     'webpage_url': ie_result['webpage_url'],
 816                     'webpage_url_basename': url_basename(ie_result['webpage_url']),
 817                     'extractor_key': ie_result['extractor_key'],
 818                 }
 819
 820                 reason = self._match_entry(entry, incomplete=True)
 821                 if reason is not None:
 822                     self.to_screen('[download] ' + reason)
 823                     continue
 824
 825                 entry_result = self.process_ie_result(entry,
 826                                                       download=download,
 827                                                       extra_info=extra)
 828                 playlist_results.append(entry_result)
 829             ie_result['entries'] = playlist_results
 830             return ie_result
 831         elif result_type == 'compat_list':
 832             self.report_warning(
 833                 'Extractor %s returned a compat_list result. '
 834                 'It needs to be updated.' % ie_result.get('extractor'))
 835
 836             def _fixup(r):
 837                 self.add_extra_info(
 838                     r,
 839                     {
 840                         'extractor': ie_result['extractor'],
 841                         'webpage_url': ie_result['webpage_url'],
 842                         'webpage_url_basename': url_basename(ie_result['webpage_url']),
 843                         'extractor_key': ie_result['extractor_key'],
 844                     }
 845                 )
 846                 return r
 847             ie_result['entries'] = [
 848                 self.process_ie_result(_fixup(r), download, extra_info)
 849                 for r in ie_result['entries']
 850             ]
 851             return ie_result
 852         else:
 853             raise Exception('Invalid result type: %s' % result_type)
 854
 855     def _apply_format_filter(self, format_spec, available_formats):
 856         " Returns a tuple of the remaining format_spec and filtered formats "
 857
 858         OPERATORS = {
 859             '<': operator.lt,
 860             '<=': operator.le,
 861             '>': operator.gt,
 862             '>=': operator.ge,
 863             '=': operator.eq,
 864             '!=': operator.ne,
 865         }
 866         operator_rex = re.compile(r'''(?x)\s*\[
 867             (?P<key>width|height|tbr|abr|vbr|asr|filesize|fps)
 868             \s*(?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
 869             (?P<value>[0-9.]+(?:[kKmMgGtTpPeEzZyY]i?[Bb]?)?)
 870             \]$
 871             ''' % '|'.join(map(re.escape, OPERATORS.keys())))
 872         m = operator_rex.search(format_spec)
 873         if m:
 874             try:
 875                 comparison_value = int(m.group('value'))
 876             except ValueError:
 877                 comparison_value = parse_filesize(m.group('value'))
 878                 if comparison_value is None:
 879                     comparison_value = parse_filesize(m.group('value') + 'B')
 880                 if comparison_value is None:
 881                     raise ValueError(
 882                         'Invalid value %r in format specification %r' % (
 883                             m.group('value'), format_spec))
 884             op = OPERATORS[m.group('op')]
 885
 886         if not m:
 887             STR_OPERATORS = {
 888                 '=': operator.eq,
 889                 '!=': operator.ne,
 890             }
 891             str_operator_rex = re.compile(r'''(?x)\s*\[
 892                 \s*(?P<key>ext|acodec|vcodec|container|protocol)
 893                 \s*(?P<op>%s)(?P<none_inclusive>\s*\?)?
 894                 \s*(?P<value>[a-zA-Z0-9_-]+)
 895                 \s*\]$
 896                 ''' % '|'.join(map(re.escape, STR_OPERATORS.keys())))
 897             m = str_operator_rex.search(format_spec)
 898             if m:
 899                 comparison_value = m.group('value')
 900                 op = STR_OPERATORS[m.group('op')]
 901
 902         if not m:
 903             raise ValueError('Invalid format specification %r' % format_spec)
 904
 905         def _filter(f):
 906             actual_value = f.get(m.group('key'))
 907             if actual_value is None:
 908                 return m.group('none_inclusive')
 909             return op(actual_value, comparison_value)
 910         new_formats = [f for f in available_formats if _filter(f)]
 911
 912         new_format_spec = format_spec[:-len(m.group(0))]
 913         if not new_format_spec:
 914             new_format_spec = 'best'
 915
 916         return (new_format_spec, new_formats)
 917
 918     def select_format(self, format_spec, available_formats):
 919         while format_spec.endswith(']'):
 920             format_spec, available_formats = self._apply_format_filter(
 921                 format_spec, available_formats)
 922         if not available_formats:
 923             return None
 924
 925         if format_spec in ['best', 'worst', None]:
 926             format_idx = 0 if format_spec == 'worst' else -1
 927             audiovideo_formats = [
 928                 f for f in available_formats
 929                 if f.get('vcodec') != 'none' and f.get('acodec') != 'none']
 930             if audiovideo_formats:
 931                 return audiovideo_formats[format_idx]
 932             # for audio only (soundcloud) or video only (imgur) urls, select the best/worst audio format
 933             elif (all(f.get('acodec') != 'none' for f in available_formats) or
 934                   all(f.get('vcodec') != 'none' for f in available_formats)):
 935                 return available_formats[format_idx]
 936         elif format_spec == 'bestaudio':
 937             audio_formats = [
 938                 f for f in available_formats
 939                 if f.get('vcodec') == 'none']
 940             if audio_formats:
 941                 return audio_formats[-1]
 942         elif format_spec == 'worstaudio':
 943             audio_formats = [
 944                 f for f in available_formats
 945                 if f.get('vcodec') == 'none']
 946             if audio_formats:
 947                 return audio_formats[0]
 948         elif format_spec == 'bestvideo':
 949             video_formats = [
 950                 f for f in available_formats
 951                 if f.get('acodec') == 'none']
 952             if video_formats:
 953                 return video_formats[-1]
 954         elif format_spec == 'worstvideo':
 955             video_formats = [
 956                 f for f in available_formats
 957                 if f.get('acodec') == 'none']
 958             if video_formats:
 959                 return video_formats[0]
 960         else:
 961             extensions = ['mp4', 'flv', 'webm', '3gp', 'm4a', 'mp3', 'ogg', 'aac', 'wav']
 962             if format_spec in extensions:
 963                 filter_f = lambda f: f['ext'] == format_spec
 964             else:
 965                 filter_f = lambda f: f['format_id'] == format_spec
 966             matches = list(filter(filter_f, available_formats))
 967             if matches:
 968                 return matches[-1]
 969         return None
 970
 971     def _calc_headers(self, info_dict):
 972         res = std_headers.copy()
 973
 974         add_headers = info_dict.get('http_headers')
 975         if add_headers:
 976             res.update(add_headers)
 977
 978         cookies = self._calc_cookies(info_dict)
 979         if cookies:
 980             res['Cookie'] = cookies
 981
 982         return res
 983
 984     def _calc_cookies(self, info_dict):
 985         pr = compat_urllib_request.Request(info_dict['url'])
 986         self.cookiejar.add_cookie_header(pr)
 987         return pr.get_header('Cookie')
 988
 989     def process_video_result(self, info_dict, download=True):
 990         assert info_dict.get('_type', 'video') == 'video'
 991
 992         if 'id' not in info_dict:
 993             raise ExtractorError('Missing "id" field in extractor result')
 994         if 'title' not in info_dict:
 995             raise ExtractorError('Missing "title" field in extractor result')
 996
 997         if 'playlist' not in info_dict:
 998             # It isn't part of a playlist
 999             info_dict['playlist'] = None
1000             info_dict['playlist_index'] = None
1001
1002         thumbnails = info_dict.get('thumbnails')
1003         if thumbnails is None:
1004             thumbnail = info_dict.get('thumbnail')
1005             if thumbnail:
1006                 info_dict['thumbnails'] = thumbnails = [{'url': thumbnail}]
1007         if thumbnails:
1008             thumbnails.sort(key=lambda t: (
1009                 t.get('preference'), t.get('width'), t.get('height'),
1010                 t.get('id'), t.get('url')))
1011             for i, t in enumerate(thumbnails):
1012                 if t.get('width') and t.get('height'):
1013                     t['resolution'] = '%dx%d' % (t['width'], t['height'])
1014                 if t.get('id') is None:
1015                     t['id'] = '%d' % i
1016
1017         if thumbnails and 'thumbnail' not in info_dict:
1018             info_dict['thumbnail'] = thumbnails[-1]['url']
1019
1020         if 'display_id' not in info_dict and 'id' in info_dict:
1021             info_dict['display_id'] = info_dict['id']
1022
1023         if info_dict.get('upload_date') is None and info_dict.get('timestamp') is not None:
1024             # Working around out-of-range timestamp values (e.g. negative ones on Windows,
1025             # see http://bugs.python.org/issue1646728)
1026             try:
1027                 upload_date = datetime.datetime.utcfromtimestamp(info_dict['timestamp'])
1028                 info_dict['upload_date'] = upload_date.strftime('%Y%m%d')
1029             except (ValueError, OverflowError, OSError):
1030                 pass
1031
1032         if self.params.get('listsubtitles', False):
1033             if 'automatic_captions' in info_dict:
1034                 self.list_subtitles(info_dict['id'], info_dict.get('automatic_captions'), 'automatic captions')
1035             self.list_subtitles(info_dict['id'], info_dict.get('subtitles'), 'subtitles')
1036             return
1037         info_dict['requested_subtitles'] = self.process_subtitles(
1038             info_dict['id'], info_dict.get('subtitles'),
1039             info_dict.get('automatic_captions'))
1040
1041         # We now pick which formats have to be downloaded
1042         if info_dict.get('formats') is None:
1043             # There's only one format available
1044             formats = [info_dict]
1045         else:
1046             formats = info_dict['formats']
1047
1048         if not formats:
1049             raise ExtractorError('No video formats found!')
1050
1051         formats_dict = {}
1052
1053         # We check that all the formats have the format and format_id fields
1054         for i, format in enumerate(formats):
1055             if 'url' not in format:
1056                 raise ExtractorError('Missing "url" key in result (index %d)' % i)
1057
1058             if format.get('format_id') is None:
1059                 format['format_id'] = compat_str(i)
1060             format_id = format['format_id']
1061             if format_id not in formats_dict:
1062                 formats_dict[format_id] = []
1063             formats_dict[format_id].append(format)
1064
1065         # Make sure all formats have unique format_id
1066         for format_id, ambiguous_formats in formats_dict.items():
1067             if len(ambiguous_formats) > 1:
1068                 for i, format in enumerate(ambiguous_formats):
1069                     format['format_id'] = '%s-%d' % (format_id, i)
1070
1071         for i, format in enumerate(formats):
1072             if format.get('format') is None:
1073                 format['format'] = '{id} - {res}{note}'.format(
1074                     id=format['format_id'],
1075                     res=self.format_resolution(format),
1076                     note=' ({0})'.format(format['format_note']) if format.get('format_note') is not None else '',
1077                 )
1078             # Automatically determine file extension if missing
1079             if 'ext' not in format:
1080                 format['ext'] = determine_ext(format['url']).lower()
1081             # Add HTTP headers, so that external programs can use them from the
1082             # json output
1083             full_format_info = info_dict.copy()
1084             full_format_info.update(format)
1085             format['http_headers'] = self._calc_headers(full_format_info)
1086
1087         # TODO Central sorting goes here
1088
1089         if formats[0] is not info_dict:
1090             # only set the 'formats' fields if the original info_dict list them
1091             # otherwise we end up with a circular reference, the first (and unique)
1092             # element in the 'formats' field in info_dict is info_dict itself,
1093             # wich can't be exported to json
1094             info_dict['formats'] = formats
1095         if self.params.get('listformats'):
1096             self.list_formats(info_dict)
1097             return
1098         if self.params.get('list_thumbnails'):
1099             self.list_thumbnails(info_dict)
1100             return
1101
1102         req_format = self.params.get('format')
1103         if req_format is None:
1104             req_format_list = []
1105             if (self.params.get('outtmpl', DEFAULT_OUTTMPL) != '-' and
1106                     info_dict['extractor'] in ['youtube', 'ted']):
1107                 merger = FFmpegMergerPP(self)
1108                 if merger.available and merger.can_merge():
1109                     req_format_list.append('bestvideo+bestaudio')
1110             req_format_list.append('best')
1111             req_format = '/'.join(req_format_list)
1112         formats_to_download = []
1113         if req_format == 'all':
1114             formats_to_download = formats
1115         else:
1116             for rfstr in req_format.split(','):
1117                 # We can accept formats requested in the format: 34/5/best, we pick
1118                 # the first that is available, starting from left
1119                 req_formats = rfstr.split('/')
1120                 for rf in req_formats:
1121                     if re.match(r'.+?\+.+?', rf) is not None:
1122                         # Two formats have been requested like '137+139'
1123                         format_1, format_2 = rf.split('+')
1124                         formats_info = (self.select_format(format_1, formats),
1125                                         self.select_format(format_2, formats))
1126                         if all(formats_info):
1127                             # The first format must contain the video and the
1128                             # second the audio
1129                             if formats_info[0].get('vcodec') == 'none':
1130                                 self.report_error('The first format must '
1131                                                   'contain the video, try using '
1132                                                   '"-f %s+%s"' % (format_2, format_1))
1133                                 return
1134                             output_ext = (
1135                                 formats_info[0]['ext']
1136                                 if self.params.get('merge_output_format') is None
1137                                 else self.params['merge_output_format'])
1138                             selected_format = {
1139                                 'requested_formats': formats_info,
1140                                 'format': '%s+%s' % (formats_info[0].get('format'),
1141                                                      formats_info[1].get('format')),
1142                                 'format_id': '%s+%s' % (formats_info[0].get('format_id'),
1143                                                         formats_info[1].get('format_id')),
1144                                 'width': formats_info[0].get('width'),
1145                                 'height': formats_info[0].get('height'),
1146                                 'resolution': formats_info[0].get('resolution'),
1147                                 'fps': formats_info[0].get('fps'),
1148                                 'vcodec': formats_info[0].get('vcodec'),
1149                                 'vbr': formats_info[0].get('vbr'),
1150                                 'stretched_ratio': formats_info[0].get('stretched_ratio'),
1151                                 'acodec': formats_info[1].get('acodec'),
1152                                 'abr': formats_info[1].get('abr'),
1153                                 'ext': output_ext,
1154                             }
1155                         else:
1156                             selected_format = None
1157                     else:
1158                         selected_format = self.select_format(rf, formats)
1159                     if selected_format is not None:
1160                         formats_to_download.append(selected_format)
1161                         break
1162         if not formats_to_download:
1163             raise ExtractorError('requested format not available',
1164                                  expected=True)
1165
1166         if download:
1167             if len(formats_to_download) > 1:
1168                 self.to_screen('[info] %s: downloading video in %s formats' % (info_dict['id'], len(formats_to_download)))
1169             for format in formats_to_download:
1170                 new_info = dict(info_dict)
1171                 new_info.update(format)
1172                 self.process_info(new_info)
1173         # We update the info dict with the best quality format (backwards compatibility)
1174         info_dict.update(formats_to_download[-1])
1175         return info_dict
1176
1177     def process_subtitles(self, video_id, normal_subtitles, automatic_captions):
1178         """Select the requested subtitles and their format"""
1179         available_subs = {}
1180         if normal_subtitles and self.params.get('writesubtitles'):
1181             available_subs.update(normal_subtitles)
1182         if automatic_captions and self.params.get('writeautomaticsub'):
1183             for lang, cap_info in automatic_captions.items():
1184                 if lang not in available_subs:
1185                     available_subs[lang] = cap_info
1186
1187         if (not self.params.get('writesubtitles') and not
1188                 self.params.get('writeautomaticsub') or not
1189                 available_subs):
1190             return None
1191
1192         if self.params.get('allsubtitles', False):
1193             requested_langs = available_subs.keys()
1194         else:
1195             if self.params.get('subtitleslangs', False):
1196                 requested_langs = self.params.get('subtitleslangs')
1197             elif 'en' in available_subs:
1198                 requested_langs = ['en']
1199             else:
1200                 requested_langs = [list(available_subs.keys())[0]]
1201
1202         formats_query = self.params.get('subtitlesformat', 'best')
1203         formats_preference = formats_query.split('/') if formats_query else []
1204         subs = {}
1205         for lang in requested_langs:
1206             formats = available_subs.get(lang)
1207             if formats is None:
1208                 self.report_warning('%s subtitles not available for %s' % (lang, video_id))
1209                 continue
1210             for ext in formats_preference:
1211                 if ext == 'best':
1212                     f = formats[-1]
1213                     break
1214                 matches = list(filter(lambda f: f['ext'] == ext, formats))
1215                 if matches:
1216                     f = matches[-1]
1217                     break
1218             else:
1219                 f = formats[-1]
1220                 self.report_warning(
1221                     'No subtitle format found matching "%s" for language %s, '
1222                     'using %s' % (formats_query, lang, f['ext']))
1223             subs[lang] = f
1224         return subs
1225
1226     def process_info(self, info_dict):
1227         """Process a single resolved IE result."""
1228
1229         assert info_dict.get('_type', 'video') == 'video'
1230
1231         max_downloads = self.params.get('max_downloads')
1232         if max_downloads is not None:
1233             if self._num_downloads >= int(max_downloads):
1234                 raise MaxDownloadsReached()
1235
1236         info_dict['fulltitle'] = info_dict['title']
1237         if len(info_dict['title']) > 200:
1238             info_dict['title'] = info_dict['title'][:197] + '...'
1239
1240         if 'format' not in info_dict:
1241             info_dict['format'] = info_dict['ext']
1242
1243         reason = self._match_entry(info_dict, incomplete=False)
1244         if reason is not None:
1245             self.to_screen('[download] ' + reason)
1246             return
1247
1248         self._num_downloads += 1
1249
1250         info_dict['_filename'] = filename = self.prepare_filename(info_dict)
1251
1252         # Forced printings
1253         if self.params.get('forcetitle', False):
1254             self.to_stdout(info_dict['fulltitle'])
1255         if self.params.get('forceid', False):
1256             self.to_stdout(info_dict['id'])
1257         if self.params.get('forceurl', False):
1258             if info_dict.get('requested_formats') is not None:
1259                 for f in info_dict['requested_formats']:
1260                     self.to_stdout(f['url'] + f.get('play_path', ''))
1261             else:
1262                 # For RTMP URLs, also include the playpath
1263                 self.to_stdout(info_dict['url'] + info_dict.get('play_path', ''))
1264         if self.params.get('forcethumbnail', False) and info_dict.get('thumbnail') is not None:
1265             self.to_stdout(info_dict['thumbnail'])
1266         if self.params.get('forcedescription', False) and info_dict.get('description') is not None:
1267             self.to_stdout(info_dict['description'])
1268         if self.params.get('forcefilename', False) and filename is not None:
1269             self.to_stdout(filename)
1270         if self.params.get('forceduration', False) and info_dict.get('duration') is not None:
1271             self.to_stdout(formatSeconds(info_dict['duration']))
1272         if self.params.get('forceformat', False):
1273             self.to_stdout(info_dict['format'])
1274         if self.params.get('forcejson', False):
1275             self.to_stdout(json.dumps(info_dict))
1276
1277         # Do nothing else if in simulate mode
1278         if self.params.get('simulate', False):
1279             return
1280
1281         if filename is None:
1282             return
1283
1284         try:
1285             dn = os.path.dirname(sanitize_path(encodeFilename(filename)))
1286             if dn and not os.path.exists(dn):
1287                 os.makedirs(dn)
1288         except (OSError, IOError) as err:
1289             self.report_error('unable to create directory ' + compat_str(err))
1290             return
1291
1292         if self.params.get('writedescription', False):
1293             descfn = replace_extension(filename, 'description', info_dict.get('ext'))
1294             if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(descfn)):
1295                 self.to_screen('[info] Video description is already present')
1296             elif info_dict.get('description') is None:
1297                 self.report_warning('There\'s no description to write.')
1298             else:
1299                 try:
1300                     self.to_screen('[info] Writing video description to: ' + descfn)
1301                     with io.open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile:
1302                         descfile.write(info_dict['description'])
1303                 except (OSError, IOError):
1304                     self.report_error('Cannot write description file ' + descfn)
1305                     return
1306
1307         if self.params.get('writeannotations', False):
1308             annofn = replace_extension(filename, 'annotations.xml', info_dict.get('ext'))
1309             if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(annofn)):
1310                 self.to_screen('[info] Video annotations are already present')
1311             else:
1312                 try:
1313                     self.to_screen('[info] Writing video annotations to: ' + annofn)
1314                     with io.open(encodeFilename(annofn), 'w', encoding='utf-8') as annofile:
1315                         annofile.write(info_dict['annotations'])
1316                 except (KeyError, TypeError):
1317                     self.report_warning('There are no annotations to write.')
1318                 except (OSError, IOError):
1319                     self.report_error('Cannot write annotations file: ' + annofn)
1320                     return
1321
1322         subtitles_are_requested = any([self.params.get('writesubtitles', False),
1323                                        self.params.get('writeautomaticsub')])
1324
1325         if subtitles_are_requested and info_dict.get('requested_subtitles'):
1326             # subtitles download errors are already managed as troubles in relevant IE
1327             # that way it will silently go on when used with unsupporting IE
1328             subtitles = info_dict['requested_subtitles']
1329             ie = self.get_info_extractor(info_dict['extractor_key'])
1330             for sub_lang, sub_info in subtitles.items():
1331                 sub_format = sub_info['ext']
1332                 if sub_info.get('data') is not None:
1333                     sub_data = sub_info['data']
1334                 else:
1335                     try:
1336                         sub_data = ie._download_webpage(
1337                             sub_info['url'], info_dict['id'], note=False)
1338                     except ExtractorError as err:
1339                         self.report_warning('Unable to download subtitle for "%s": %s' %
1340                                             (sub_lang, compat_str(err.cause)))
1341                         continue
1342                 try:
1343                     sub_filename = subtitles_filename(filename, sub_lang, sub_format)
1344                     if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(sub_filename)):
1345                         self.to_screen('[info] Video subtitle %s.%s is already_present' % (sub_lang, sub_format))
1346                     else:
1347                         self.to_screen('[info] Writing video subtitles to: ' + sub_filename)
1348                         with io.open(encodeFilename(sub_filename), 'w', encoding='utf-8') as subfile:
1349                             subfile.write(sub_data)
1350                 except (OSError, IOError):
1351                     self.report_error('Cannot write subtitles file ' + sub_filename)
1352                     return
1353
1354         if self.params.get('writeinfojson', False):
1355             infofn = replace_extension(filename, 'info.json', info_dict.get('ext'))
1356             if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(infofn)):
1357                 self.to_screen('[info] Video description metadata is already present')
1358             else:
1359                 self.to_screen('[info] Writing video description metadata as JSON to: ' + infofn)
1360                 try:
1361                     write_json_file(self.filter_requested_info(info_dict), infofn)
1362                 except (OSError, IOError):
1363                     self.report_error('Cannot write metadata to JSON file ' + infofn)
1364                     return
1365
1366         self._write_thumbnails(info_dict, filename)
1367
1368         if not self.params.get('skip_download', False):
1369             try:
1370                 def dl(name, info):
1371                     fd = get_suitable_downloader(info, self.params)(self, self.params)
1372                     for ph in self._progress_hooks:
1373                         fd.add_progress_hook(ph)
1374                     if self.params.get('verbose'):
1375                         self.to_stdout('[debug] Invoking downloader on %r' % info.get('url'))
1376                     return fd.download(name, info)
1377
1378                 if info_dict.get('requested_formats') is not None:
1379                     downloaded = []
1380                     success = True
1381                     merger = FFmpegMergerPP(self)
1382                     if not merger.available:
1383                         postprocessors = []
1384                         self.report_warning('You have requested multiple '
1385                                             'formats but ffmpeg or avconv are not installed.'
1386                                             ' The formats won\'t be merged.')
1387                     else:
1388                         postprocessors = [merger]
1389
1390                     def compatible_formats(formats):
1391                         video, audio = formats
1392                         # Check extension
1393                         video_ext, audio_ext = audio.get('ext'), video.get('ext')
1394                         if video_ext and audio_ext:
1395                             COMPATIBLE_EXTS = (
1396                                 ('mp3', 'mp4', 'm4a', 'm4p', 'm4b', 'm4r', 'm4v'),
1397                                 ('webm')
1398                             )
1399                             for exts in COMPATIBLE_EXTS:
1400                                 if video_ext in exts and audio_ext in exts:
1401                                     return True
1402                         # TODO: Check acodec/vcodec
1403                         return False
1404
1405                     filename_real_ext = os.path.splitext(filename)[1][1:]
1406                     filename_wo_ext = (
1407                         os.path.splitext(filename)[0]
1408                         if filename_real_ext == info_dict['ext']
1409                         else filename)
1410                     requested_formats = info_dict['requested_formats']
1411                     if self.params.get('merge_output_format') is None and not compatible_formats(requested_formats):
1412                         info_dict['ext'] = 'mkv'
1413                         self.report_warning(
1414                             'Requested formats are incompatible for merge and will be merged into mkv.')
1415                     # Ensure filename always has a correct extension for successful merge
1416                     filename = '%s.%s' % (filename_wo_ext, info_dict['ext'])
1417                     if os.path.exists(encodeFilename(filename)):
1418                         self.to_screen(
1419                             '[download] %s has already been downloaded and '
1420                             'merged' % filename)
1421                     else:
1422                         for f in requested_formats:
1423                             new_info = dict(info_dict)
1424                             new_info.update(f)
1425                             fname = self.prepare_filename(new_info)
1426                             fname = prepend_extension(fname, 'f%s' % f['format_id'], new_info['ext'])
1427                             downloaded.append(fname)
1428                             partial_success = dl(fname, new_info)
1429                             success = success and partial_success
1430                         info_dict['__postprocessors'] = postprocessors
1431                         info_dict['__files_to_merge'] = downloaded
1432                 else:
1433                     # Just a single file
1434                     success = dl(filename, info_dict)
1435             except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
1436                 self.report_error('unable to download video data: %s' % str(err))
1437                 return
1438             except (OSError, IOError) as err:
1439                 raise UnavailableVideoError(err)
1440             except (ContentTooShortError, ) as err:
1441                 self.report_error('content too short (expected %s bytes and served %s)' % (err.expected, err.downloaded))
1442                 return
1443
1444             if success:
1445                 # Fixup content
1446                 fixup_policy = self.params.get('fixup')
1447                 if fixup_policy is None:
1448                     fixup_policy = 'detect_or_warn'
1449
1450                 stretched_ratio = info_dict.get('stretched_ratio')
1451                 if stretched_ratio is not None and stretched_ratio != 1:
1452                     if fixup_policy == 'warn':
1453                         self.report_warning('%s: Non-uniform pixel ratio (%s)' % (
1454                             info_dict['id'], stretched_ratio))
1455                     elif fixup_policy == 'detect_or_warn':
1456                         stretched_pp = FFmpegFixupStretchedPP(self)
1457                         if stretched_pp.available:
1458                             info_dict.setdefault('__postprocessors', [])
1459                             info_dict['__postprocessors'].append(stretched_pp)
1460                         else:
1461                             self.report_warning(
1462                                 '%s: Non-uniform pixel ratio (%s). Install ffmpeg or avconv to fix this automatically.' % (
1463                                     info_dict['id'], stretched_ratio))
1464                     else:
1465                         assert fixup_policy in ('ignore', 'never')
1466
1467                 if info_dict.get('requested_formats') is None and info_dict.get('container') == 'm4a_dash':
1468                     if fixup_policy == 'warn':
1469                         self.report_warning('%s: writing DASH m4a. Only some players support this container.' % (
1470                             info_dict['id']))
1471                     elif fixup_policy == 'detect_or_warn':
1472                         fixup_pp = FFmpegFixupM4aPP(self)
1473                         if fixup_pp.available:
1474                             info_dict.setdefault('__postprocessors', [])
1475                             info_dict['__postprocessors'].append(fixup_pp)
1476                         else:
1477                             self.report_warning(
1478                                 '%s: writing DASH m4a. Only some players support this container. Install ffmpeg or avconv to fix this automatically.' % (
1479                                     info_dict['id']))
1480                     else:
1481                         assert fixup_policy in ('ignore', 'never')
1482
1483                 try:
1484                     self.post_process(filename, info_dict)
1485                 except (PostProcessingError) as err:
1486                     self.report_error('postprocessing: %s' % str(err))
1487                     return
1488                 self.record_download_archive(info_dict)
1489
1490     def download(self, url_list):
1491         """Download a given list of URLs."""
1492         outtmpl = self.params.get('outtmpl', DEFAULT_OUTTMPL)
1493         if (len(url_list) > 1 and
1494                 '%' not in outtmpl and
1495                 self.params.get('max_downloads') != 1):
1496             raise SameFileError(outtmpl)
1497
1498         for url in url_list:
1499             try:
1500                 # It also downloads the videos
1501                 res = self.extract_info(
1502                     url, force_generic_extractor=self.params.get('force_generic_extractor', False))
1503             except UnavailableVideoError:
1504                 self.report_error('unable to download video')
1505             except MaxDownloadsReached:
1506                 self.to_screen('[info] Maximum number of downloaded files reached.')
1507                 raise
1508             else:
1509                 if self.params.get('dump_single_json', False):
1510                     self.to_stdout(json.dumps(res))
1511
1512         return self._download_retcode
1513
1514     def download_with_info_file(self, info_filename):
1515         with contextlib.closing(fileinput.FileInput(
1516                 [info_filename], mode='r',
1517                 openhook=fileinput.hook_encoded('utf-8'))) as f:
1518             # FileInput doesn't have a read method, we can't call json.load
1519             info = self.filter_requested_info(json.loads('\n'.join(f)))
1520         try:
1521             self.process_ie_result(info, download=True)
1522         except DownloadError:
1523             webpage_url = info.get('webpage_url')
1524             if webpage_url is not None:
1525                 self.report_warning('The info failed to download, trying with "%s"' % webpage_url)
1526                 return self.download([webpage_url])
1527             else:
1528                 raise
1529         return self._download_retcode
1530
1531     @staticmethod
1532     def filter_requested_info(info_dict):
1533         return dict(
1534             (k, v) for k, v in info_dict.items()
1535             if k not in ['requested_formats', 'requested_subtitles'])
1536
1537     def post_process(self, filename, ie_info):
1538         """Run all the postprocessors on the given file."""
1539         info = dict(ie_info)
1540         info['filepath'] = filename
1541         pps_chain = []
1542         if ie_info.get('__postprocessors') is not None:
1543             pps_chain.extend(ie_info['__postprocessors'])
1544         pps_chain.extend(self._pps)
1545         for pp in pps_chain:
1546             files_to_delete = []
1547             try:
1548                 files_to_delete, info = pp.run(info)
1549             except PostProcessingError as e:
1550                 self.report_error(e.msg)
1551             if files_to_delete and not self.params.get('keepvideo', False):
1552                 for old_filename in files_to_delete:
1553                     self.to_screen('Deleting original file %s (pass -k to keep)' % old_filename)
1554                     try:
1555                         os.remove(encodeFilename(old_filename))
1556                     except (IOError, OSError):
1557                         self.report_warning('Unable to remove downloaded original file')
1558
1559     def _make_archive_id(self, info_dict):
1560         # Future-proof against any change in case
1561         # and backwards compatibility with prior versions
1562         extractor = info_dict.get('extractor_key')
1563         if extractor is None:
1564             if 'id' in info_dict:
1565                 extractor = info_dict.get('ie_key')  # key in a playlist
1566         if extractor is None:
1567             return None  # Incomplete video information
1568         return extractor.lower() + ' ' + info_dict['id']
1569
1570     def in_download_archive(self, info_dict):
1571         fn = self.params.get('download_archive')
1572         if fn is None:
1573             return False
1574
1575         vid_id = self._make_archive_id(info_dict)
1576         if vid_id is None:
1577             return False  # Incomplete video information
1578
1579         try:
1580             with locked_file(fn, 'r', encoding='utf-8') as archive_file:
1581                 for line in archive_file:
1582                     if line.strip() == vid_id:
1583                         return True
1584         except IOError as ioe:
1585             if ioe.errno != errno.ENOENT:
1586                 raise
1587         return False
1588
1589     def record_download_archive(self, info_dict):
1590         fn = self.params.get('download_archive')
1591         if fn is None:
1592             return
1593         vid_id = self._make_archive_id(info_dict)
1594         assert vid_id
1595         with locked_file(fn, 'a', encoding='utf-8') as archive_file:
1596             archive_file.write(vid_id + '\n')
1597
1598     @staticmethod
1599     def format_resolution(format, default='unknown'):
1600         if format.get('vcodec') == 'none':
1601             return 'audio only'
1602         if format.get('resolution') is not None:
1603             return format['resolution']
1604         if format.get('height') is not None:
1605             if format.get('width') is not None:
1606                 res = '%sx%s' % (format['width'], format['height'])
1607             else:
1608                 res = '%sp' % format['height']
1609         elif format.get('width') is not None:
1610             res = '?x%d' % format['width']
1611         else:
1612             res = default
1613         return res
1614
1615     def _format_note(self, fdict):
1616         res = ''
1617         if fdict.get('ext') in ['f4f', 'f4m']:
1618             res += '(unsupported) '
1619         if fdict.get('format_note') is not None:
1620             res += fdict['format_note'] + ' '
1621         if fdict.get('tbr') is not None:
1622             res += '%4dk ' % fdict['tbr']
1623         if fdict.get('container') is not None:
1624             if res:
1625                 res += ', '
1626             res += '%s container' % fdict['container']
1627         if (fdict.get('vcodec') is not None and
1628                 fdict.get('vcodec') != 'none'):
1629             if res:
1630                 res += ', '
1631             res += fdict['vcodec']
1632             if fdict.get('vbr') is not None:
1633                 res += '@'
1634         elif fdict.get('vbr') is not None and fdict.get('abr') is not None:
1635             res += 'video@'
1636         if fdict.get('vbr') is not None:
1637             res += '%4dk' % fdict['vbr']
1638         if fdict.get('fps') is not None:
1639             res += ', %sfps' % fdict['fps']
1640         if fdict.get('acodec') is not None:
1641             if res:
1642                 res += ', '
1643             if fdict['acodec'] == 'none':
1644                 res += 'video only'
1645             else:
1646                 res += '%-5s' % fdict['acodec']
1647         elif fdict.get('abr') is not None:
1648             if res:
1649                 res += ', '
1650             res += 'audio'
1651         if fdict.get('abr') is not None:
1652             res += '@%3dk' % fdict['abr']
1653         if fdict.get('asr') is not None:
1654             res += ' (%5dHz)' % fdict['asr']
1655         if fdict.get('filesize') is not None:
1656             if res:
1657                 res += ', '
1658             res += format_bytes(fdict['filesize'])
1659         elif fdict.get('filesize_approx') is not None:
1660             if res:
1661                 res += ', '
1662             res += '~' + format_bytes(fdict['filesize_approx'])
1663         return res
1664
1665     def list_formats(self, info_dict):
1666         formats = info_dict.get('formats', [info_dict])
1667         table = [
1668             [f['format_id'], f['ext'], self.format_resolution(f), self._format_note(f)]
1669             for f in formats
1670             if f.get('preference') is None or f['preference'] >= -1000]
1671         if len(formats) > 1:
1672             table[-1][-1] += (' ' if table[-1][-1] else '') + '(best)'
1673
1674         header_line = ['format code', 'extension', 'resolution', 'note']
1675         self.to_screen(
1676             '[info] Available formats for %s:\n%s' %
1677             (info_dict['id'], render_table(header_line, table)))
1678
1679     def list_thumbnails(self, info_dict):
1680         thumbnails = info_dict.get('thumbnails')
1681         if not thumbnails:
1682             tn_url = info_dict.get('thumbnail')
1683             if tn_url:
1684                 thumbnails = [{'id': '0', 'url': tn_url}]
1685             else:
1686                 self.to_screen(
1687                     '[info] No thumbnails present for %s' % info_dict['id'])
1688                 return
1689
1690         self.to_screen(
1691             '[info] Thumbnails for %s:' % info_dict['id'])
1692         self.to_screen(render_table(
1693             ['ID', 'width', 'height', 'URL'],
1694             [[t['id'], t.get('width', 'unknown'), t.get('height', 'unknown'), t['url']] for t in thumbnails]))
1695
1696     def list_subtitles(self, video_id, subtitles, name='subtitles'):
1697         if not subtitles:
1698             self.to_screen('%s has no %s' % (video_id, name))
1699             return
1700         self.to_screen(
1701             'Available %s for %s:' % (name, video_id))
1702         self.to_screen(render_table(
1703             ['Language', 'formats'],
1704             [[lang, ', '.join(f['ext'] for f in reversed(formats))]
1705                 for lang, formats in subtitles.items()]))
1706
1707     def urlopen(self, req):
1708         """ Start an HTTP download """
1709
1710         # According to RFC 3986, URLs can not contain non-ASCII characters, however this is not
1711         # always respected by websites, some tend to give out URLs with non percent-encoded
1712         # non-ASCII characters (see telemb.py, ard.py [#3412])
1713         # urllib chokes on URLs with non-ASCII characters (see http://bugs.python.org/issue3991)
1714         # To work around aforementioned issue we will replace request's original URL with
1715         # percent-encoded one
1716         req_is_string = isinstance(req, compat_basestring)
1717         url = req if req_is_string else req.get_full_url()
1718         url_escaped = escape_url(url)
1719
1720         # Substitute URL if any change after escaping
1721         if url != url_escaped:
1722             if req_is_string:
1723                 req = url_escaped
1724             else:
1725                 req_type = HEADRequest if req.get_method() == 'HEAD' else compat_urllib_request.Request
1726                 req = req_type(
1727                     url_escaped, data=req.data, headers=req.headers,
1728                     origin_req_host=req.origin_req_host, unverifiable=req.unverifiable)
1729
1730         return self._opener.open(req, timeout=self._socket_timeout)
1731
1732     def print_debug_header(self):
1733         if not self.params.get('verbose'):
1734             return
1735
1736         if type('') is not compat_str:
1737             # Python 2.6 on SLES11 SP1 (https://github.com/rg3/youtube-dl/issues/3326)
1738             self.report_warning(
1739                 'Your Python is broken! Update to a newer and supported version')
1740
1741         stdout_encoding = getattr(
1742             sys.stdout, 'encoding', 'missing (%s)' % type(sys.stdout).__name__)
1743         encoding_str = (
1744             '[debug] Encodings: locale %s, fs %s, out %s, pref %s\n' % (
1745                 locale.getpreferredencoding(),
1746                 sys.getfilesystemencoding(),
1747                 stdout_encoding,
1748                 self.get_encoding()))
1749         write_string(encoding_str, encoding=None)
1750
1751         self._write_string('[debug] youtube-dl version ' + __version__ + '\n')
1752         try:
1753             sp = subprocess.Popen(
1754                 ['git', 'rev-parse', '--short', 'HEAD'],
1755                 stdout=subprocess.PIPE, stderr=subprocess.PIPE,
1756                 cwd=os.path.dirname(os.path.abspath(__file__)))
1757             out, err = sp.communicate()
1758             out = out.decode().strip()
1759             if re.match('[0-9a-f]+', out):
1760                 self._write_string('[debug] Git HEAD: ' + out + '\n')
1761         except Exception:
1762             try:
1763                 sys.exc_clear()
1764             except Exception:
1765                 pass
1766         self._write_string('[debug] Python version %s - %s\n' % (
1767             platform.python_version(), platform_name()))
1768
1769         exe_versions = FFmpegPostProcessor.get_versions(self)
1770         exe_versions['rtmpdump'] = rtmpdump_version()
1771         exe_str = ', '.join(
1772             '%s %s' % (exe, v)
1773             for exe, v in sorted(exe_versions.items())
1774             if v
1775         )
1776         if not exe_str:
1777             exe_str = 'none'
1778         self._write_string('[debug] exe versions: %s\n' % exe_str)
1779
1780         proxy_map = {}
1781         for handler in self._opener.handlers:
1782             if hasattr(handler, 'proxies'):
1783                 proxy_map.update(handler.proxies)
1784         self._write_string('[debug] Proxy map: ' + compat_str(proxy_map) + '\n')
1785
1786         if self.params.get('call_home', False):
1787             ipaddr = self.urlopen('https://yt-dl.org/ip').read().decode('utf-8')
1788             self._write_string('[debug] Public IP address: %s\n' % ipaddr)
1789             latest_version = self.urlopen(
1790                 'https://yt-dl.org/latest/version').read().decode('utf-8')
1791             if version_tuple(latest_version) > version_tuple(__version__):
1792                 self.report_warning(
1793                     'You are using an outdated version (newest version: %s)! '
1794                     'See https://yt-dl.org/update if you need help updating.' %
1795                     latest_version)
1796
1797     def _setup_opener(self):
1798         timeout_val = self.params.get('socket_timeout')
1799         self._socket_timeout = 600 if timeout_val is None else float(timeout_val)
1800
1801         opts_cookiefile = self.params.get('cookiefile')
1802         opts_proxy = self.params.get('proxy')
1803
1804         if opts_cookiefile is None:
1805             self.cookiejar = compat_cookiejar.CookieJar()
1806         else:
1807             self.cookiejar = compat_cookiejar.MozillaCookieJar(
1808                 opts_cookiefile)
1809             if os.access(opts_cookiefile, os.R_OK):
1810                 self.cookiejar.load()
1811
1812         cookie_processor = compat_urllib_request.HTTPCookieProcessor(
1813             self.cookiejar)
1814         if opts_proxy is not None:
1815             if opts_proxy == '':
1816                 proxies = {}
1817             else:
1818                 proxies = {'http': opts_proxy, 'https': opts_proxy}
1819         else:
1820             proxies = compat_urllib_request.getproxies()
1821             # Set HTTPS proxy to HTTP one if given (https://github.com/rg3/youtube-dl/issues/805)
1822             if 'http' in proxies and 'https' not in proxies:
1823                 proxies['https'] = proxies['http']
1824         proxy_handler = PerRequestProxyHandler(proxies)
1825
1826         debuglevel = 1 if self.params.get('debug_printtraffic') else 0
1827         https_handler = make_HTTPS_handler(self.params, debuglevel=debuglevel)
1828         ydlh = YoutubeDLHandler(self.params, debuglevel=debuglevel)
1829         opener = compat_urllib_request.build_opener(
1830             proxy_handler, https_handler, cookie_processor, ydlh)
1831
1832         # Delete the default user-agent header, which would otherwise apply in
1833         # cases where our custom HTTP handler doesn't come into play
1834         # (See https://github.com/rg3/youtube-dl/issues/1309 for details)
1835         opener.addheaders = []
1836         self._opener = opener
1837
1838     def encode(self, s):
1839         if isinstance(s, bytes):
1840             return s  # Already encoded
1841
1842         try:
1843             return s.encode(self.get_encoding())
1844         except UnicodeEncodeError as err:
1845             err.reason = err.reason + '. Check your system encoding configuration or use the --encoding option.'
1846             raise
1847
1848     def get_encoding(self):
1849         encoding = self.params.get('encoding')
1850         if encoding is None:
1851             encoding = preferredencoding()
1852         return encoding
1853
1854     def _write_thumbnails(self, info_dict, filename):
1855         if self.params.get('writethumbnail', False):
1856             thumbnails = info_dict.get('thumbnails')
1857             if thumbnails:
1858                 thumbnails = [thumbnails[-1]]
1859         elif self.params.get('write_all_thumbnails', False):
1860             thumbnails = info_dict.get('thumbnails')
1861         else:
1862             return
1863
1864         if not thumbnails:
1865             # No thumbnails present, so return immediately
1866             return
1867
1868         for t in thumbnails:
1869             thumb_ext = determine_ext(t['url'], 'jpg')
1870             suffix = '_%s' % t['id'] if len(thumbnails) > 1 else ''
1871             thumb_display_id = '%s ' % t['id'] if len(thumbnails) > 1 else ''
1872             t['filename'] = thumb_filename = os.path.splitext(filename)[0] + suffix + '.' + thumb_ext
1873
1874             if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(thumb_filename)):
1875                 self.to_screen('[%s] %s: Thumbnail %sis already present' %
1876                                (info_dict['extractor'], info_dict['id'], thumb_display_id))
1877             else:
1878                 self.to_screen('[%s] %s: Downloading thumbnail %s...' %
1879                                (info_dict['extractor'], info_dict['id'], thumb_display_id))
1880                 try:
1881                     uf = self.urlopen(t['url'])
1882                     with open(thumb_filename, 'wb') as thumbf:
1883                         shutil.copyfileobj(uf, thumbf)
1884                     self.to_screen('[%s] %s: Writing thumbnail %sto: %s' %
1885                                    (info_dict['extractor'], info_dict['id'], thumb_display_id, thumb_filename))
1886                 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
1887                     self.report_warning('Unable to download thumbnail "%s": %s' %
1888                                         (t['url'], compat_str(err)))