_ Git - youtube-dl/blob - youtube_dl/YoutubeDL.py

   1 #!/usr/bin/env python
   2 # -*- coding: utf-8 -*-
   3
   4 from __future__ import absolute_import, unicode_literals
   5
   6 import collections
   7 import contextlib
   8 import datetime
   9 import errno
  10 import fileinput
  11 import io
  12 import itertools
  13 import json
  14 import locale
  15 import operator
  16 import os
  17 import platform
  18 import re
  19 import shutil
  20 import subprocess
  21 import socket
  22 import sys
  23 import time
  24 import traceback
  25
  26 if os.name == 'nt':
  27     import ctypes
  28
  29 from .compat import (
  30     compat_basestring,
  31     compat_cookiejar,
  32     compat_expanduser,
  33     compat_get_terminal_size,
  34     compat_http_client,
  35     compat_kwargs,
  36     compat_str,
  37     compat_urllib_error,
  38     compat_urllib_request,
  39 )
  40 from .utils import (
  41     escape_url,
  42     ContentTooShortError,
  43     date_from_str,
  44     DateRange,
  45     DEFAULT_OUTTMPL,
  46     determine_ext,
  47     DownloadError,
  48     encodeFilename,
  49     ExtractorError,
  50     format_bytes,
  51     formatSeconds,
  52     HEADRequest,
  53     locked_file,
  54     make_HTTPS_handler,
  55     MaxDownloadsReached,
  56     PagedList,
  57     parse_filesize,
  58     PerRequestProxyHandler,
  59     PostProcessingError,
  60     platform_name,
  61     preferredencoding,
  62     render_table,
  63     SameFileError,
  64     sanitize_filename,
  65     sanitize_path,
  66     std_headers,
  67     subtitles_filename,
  68     UnavailableVideoError,
  69     url_basename,
  70     version_tuple,
  71     write_json_file,
  72     write_string,
  73     YoutubeDLHandler,
  74     prepend_extension,
  75     replace_extension,
  76     args_to_str,
  77     age_restricted,
  78 )
  79 from .cache import Cache
  80 from .extractor import get_info_extractor, gen_extractors
  81 from .downloader import get_suitable_downloader
  82 from .downloader.rtmp import rtmpdump_version
  83 from .postprocessor import (
  84     FFmpegFixupM4aPP,
  85     FFmpegFixupStretchedPP,
  86     FFmpegMergerPP,
  87     FFmpegPostProcessor,
  88     get_postprocessor,
  89 )
  90 from .version import __version__
  91
  92
  93 class YoutubeDL(object):
  94     """YoutubeDL class.
  95
  96     YoutubeDL objects are the ones responsible of downloading the
  97     actual video file and writing it to disk if the user has requested
  98     it, among some other tasks. In most cases there should be one per
  99     program. As, given a video URL, the downloader doesn't know how to
 100     extract all the needed information, task that InfoExtractors do, it
 101     has to pass the URL to one of them.
 102
 103     For this, YoutubeDL objects have a method that allows
 104     InfoExtractors to be registered in a given order. When it is passed
 105     a URL, the YoutubeDL object handles it to the first InfoExtractor it
 106     finds that reports being able to handle it. The InfoExtractor extracts
 107     all the information about the video or videos the URL refers to, and
 108     YoutubeDL process the extracted information, possibly using a File
 109     Downloader to download the video.
 110
 111     YoutubeDL objects accept a lot of parameters. In order not to saturate
 112     the object constructor with arguments, it receives a dictionary of
 113     options instead. These options are available through the params
 114     attribute for the InfoExtractors to use. The YoutubeDL also
 115     registers itself as the downloader in charge for the InfoExtractors
 116     that are added to it, so this is a "mutual registration".
 117
 118     Available options:
 119
 120     username:          Username for authentication purposes.
 121     password:          Password for authentication purposes.
 122     videopassword:     Password for acces a video.
 123     usenetrc:          Use netrc for authentication instead.
 124     verbose:           Print additional info to stdout.
 125     quiet:             Do not print messages to stdout.
 126     no_warnings:       Do not print out anything for warnings.
 127     forceurl:          Force printing final URL.
 128     forcetitle:        Force printing title.
 129     forceid:           Force printing ID.
 130     forcethumbnail:    Force printing thumbnail URL.
 131     forcedescription:  Force printing description.
 132     forcefilename:     Force printing final filename.
 133     forceduration:     Force printing duration.
 134     forcejson:         Force printing info_dict as JSON.
 135     dump_single_json:  Force printing the info_dict of the whole playlist
 136                        (or video) as a single JSON line.
 137     simulate:          Do not download the video files.
 138     format:            Video format code. See options.py for more information.
 139     outtmpl:           Template for output names.
 140     restrictfilenames: Do not allow "&" and spaces in file names
 141     ignoreerrors:      Do not stop on download errors.
 142     nooverwrites:      Prevent overwriting files.
 143     playliststart:     Playlist item to start at.
 144     playlistend:       Playlist item to end at.
 145     playlist_items:    Specific indices of playlist to download.
 146     playlistreverse:   Download playlist items in reverse order.
 147     matchtitle:        Download only matching titles.
 148     rejecttitle:       Reject downloads for matching titles.
 149     logger:            Log messages to a logging.Logger instance.
 150     logtostderr:       Log messages to stderr instead of stdout.
 151     writedescription:  Write the video description to a .description file
 152     writeinfojson:     Write the video description to a .info.json file
 153     writeannotations:  Write the video annotations to a .annotations.xml file
 154     writethumbnail:    Write the thumbnail image to a file
 155     write_all_thumbnails:  Write all thumbnail formats to files
 156     writesubtitles:    Write the video subtitles to a file
 157     writeautomaticsub: Write the automatic subtitles to a file
 158     allsubtitles:      Downloads all the subtitles of the video
 159                        (requires writesubtitles or writeautomaticsub)
 160     listsubtitles:     Lists all available subtitles for the video
 161     subtitlesformat:   The format code for subtitles
 162     subtitleslangs:    List of languages of the subtitles to download
 163     keepvideo:         Keep the video file after post-processing
 164     daterange:         A DateRange object, download only if the upload_date is in the range.
 165     skip_download:     Skip the actual download of the video file
 166     cachedir:          Location of the cache files in the filesystem.
 167                        False to disable filesystem cache.
 168     noplaylist:        Download single video instead of a playlist if in doubt.
 169     age_limit:         An integer representing the user's age in years.
 170                        Unsuitable videos for the given age are skipped.
 171     min_views:         An integer representing the minimum view count the video
 172                        must have in order to not be skipped.
 173                        Videos without view count information are always
 174                        downloaded. None for no limit.
 175     max_views:         An integer representing the maximum view count.
 176                        Videos that are more popular than that are not
 177                        downloaded.
 178                        Videos without view count information are always
 179                        downloaded. None for no limit.
 180     download_archive:  File name of a file where all downloads are recorded.
 181                        Videos already present in the file are not downloaded
 182                        again.
 183     cookiefile:        File name where cookies should be read from and dumped to.
 184     nocheckcertificate:Do not verify SSL certificates
 185     prefer_insecure:   Use HTTP instead of HTTPS to retrieve information.
 186                        At the moment, this is only supported by YouTube.
 187     proxy:             URL of the proxy server to use
 188     cn_verification_proxy:  URL of the proxy to use for IP address verification
 189                        on Chinese sites. (Experimental)
 190     socket_timeout:    Time to wait for unresponsive hosts, in seconds
 191     bidi_workaround:   Work around buggy terminals without bidirectional text
 192                        support, using fridibi
 193     debug_printtraffic:Print out sent and received HTTP traffic
 194     include_ads:       Download ads as well
 195     default_search:    Prepend this string if an input url is not valid.
 196                        'auto' for elaborate guessing
 197     encoding:          Use this encoding instead of the system-specified.
 198     extract_flat:      Do not resolve URLs, return the immediate result.
 199                        Pass in 'in_playlist' to only show this behavior for
 200                        playlist items.
 201     postprocessors:    A list of dictionaries, each with an entry
 202                        * key:  The name of the postprocessor. See
 203                                youtube_dl/postprocessor/__init__.py for a list.
 204                        as well as any further keyword arguments for the
 205                        postprocessor.
 206     progress_hooks:    A list of functions that get called on download
 207                        progress, with a dictionary with the entries
 208                        * status: One of "downloading", "error", or "finished".
 209                                  Check this first and ignore unknown values.
 210
 211                        If status is one of "downloading", or "finished", the
 212                        following properties may also be present:
 213                        * filename: The final filename (always present)
 214                        * tmpfilename: The filename we're currently writing to
 215                        * downloaded_bytes: Bytes on disk
 216                        * total_bytes: Size of the whole file, None if unknown
 217                        * total_bytes_estimate: Guess of the eventual file size,
 218                                                None if unavailable.
 219                        * elapsed: The number of seconds since download started.
 220                        * eta: The estimated time in seconds, None if unknown
 221                        * speed: The download speed in bytes/second, None if
 222                                 unknown
 223                        * fragment_index: The counter of the currently
 224                                          downloaded video fragment.
 225                        * fragment_count: The number of fragments (= individual
 226                                          files that will be merged)
 227
 228                        Progress hooks are guaranteed to be called at least once
 229                        (with status "finished") if the download is successful.
 230     merge_output_format: Extension to use when merging formats.
 231     fixup:             Automatically correct known faults of the file.
 232                        One of:
 233                        - "never": do nothing
 234                        - "warn": only emit a warning
 235                        - "detect_or_warn": check whether we can do anything
 236                                            about it, warn otherwise (default)
 237     source_address:    (Experimental) Client-side IP address to bind to.
 238     call_home:         Boolean, true iff we are allowed to contact the
 239                        youtube-dl servers for debugging.
 240     sleep_interval:    Number of seconds to sleep before each download.
 241     listformats:       Print an overview of available video formats and exit.
 242     list_thumbnails:   Print a table of all thumbnails and exit.
 243     match_filter:      A function that gets called with the info_dict of
 244                        every video.
 245                        If it returns a message, the video is ignored.
 246                        If it returns None, the video is downloaded.
 247                        match_filter_func in utils.py is one example for this.
 248     no_color:          Do not emit color codes in output.
 249
 250     The following options determine which downloader is picked:
 251     external_downloader: Executable of the external downloader to call.
 252                        None or unset for standard (built-in) downloader.
 253     hls_prefer_native: Use the native HLS downloader instead of ffmpeg/avconv.
 254
 255     The following parameters are not used by YoutubeDL itself, they are used by
 256     the downloader (see youtube_dl/downloader/common.py):
 257     nopart, updatetime, buffersize, ratelimit, min_filesize, max_filesize, test,
 258     noresizebuffer, retries, continuedl, noprogress, consoletitle,
 259     xattr_set_filesize, external_downloader_args.
 260
 261     The following options are used by the post processors:
 262     prefer_ffmpeg:     If True, use ffmpeg instead of avconv if both are available,
 263                        otherwise prefer avconv.
 264     """
 265
 266     params = None
 267     _ies = []
 268     _pps = []
 269     _download_retcode = None
 270     _num_downloads = None
 271     _screen_file = None
 272
 273     def __init__(self, params=None, auto_init=True):
 274         """Create a FileDownloader object with the given options."""
 275         if params is None:
 276             params = {}
 277         self._ies = []
 278         self._ies_instances = {}
 279         self._pps = []
 280         self._progress_hooks = []
 281         self._download_retcode = 0
 282         self._num_downloads = 0
 283         self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)]
 284         self._err_file = sys.stderr
 285         self.params = params
 286         self.cache = Cache(self)
 287
 288         if params.get('bidi_workaround', False):
 289             try:
 290                 import pty
 291                 master, slave = pty.openpty()
 292                 width = compat_get_terminal_size().columns
 293                 if width is None:
 294                     width_args = []
 295                 else:
 296                     width_args = ['-w', str(width)]
 297                 sp_kwargs = dict(
 298                     stdin=subprocess.PIPE,
 299                     stdout=slave,
 300                     stderr=self._err_file)
 301                 try:
 302                     self._output_process = subprocess.Popen(
 303                         ['bidiv'] + width_args, **sp_kwargs
 304                     )
 305                 except OSError:
 306                     self._output_process = subprocess.Popen(
 307                         ['fribidi', '-c', 'UTF-8'] + width_args, **sp_kwargs)
 308                 self._output_channel = os.fdopen(master, 'rb')
 309             except OSError as ose:
 310                 if ose.errno == 2:
 311                     self.report_warning('Could not find fribidi executable, ignoring --bidi-workaround . Make sure that  fribidi  is an executable file in one of the directories in your $PATH.')
 312                 else:
 313                     raise
 314
 315         if (sys.version_info >= (3,) and sys.platform != 'win32' and
 316                 sys.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968'] and
 317                 not params.get('restrictfilenames', False)):
 318             # On Python 3, the Unicode filesystem API will throw errors (#1474)
 319             self.report_warning(
 320                 'Assuming --restrict-filenames since file system encoding '
 321                 'cannot encode all characters. '
 322                 'Set the LC_ALL environment variable to fix this.')
 323             self.params['restrictfilenames'] = True
 324
 325         if isinstance(params.get('outtmpl'), bytes):
 326             self.report_warning(
 327                 'Parameter outtmpl is bytes, but should be a unicode string. '
 328                 'Put  from __future__ import unicode_literals  at the top of your code file or consider switching to Python 3.x.')
 329
 330         self._setup_opener()
 331
 332         if auto_init:
 333             self.print_debug_header()
 334             self.add_default_info_extractors()
 335
 336         for pp_def_raw in self.params.get('postprocessors', []):
 337             pp_class = get_postprocessor(pp_def_raw['key'])
 338             pp_def = dict(pp_def_raw)
 339             del pp_def['key']
 340             pp = pp_class(self, **compat_kwargs(pp_def))
 341             self.add_post_processor(pp)
 342
 343         for ph in self.params.get('progress_hooks', []):
 344             self.add_progress_hook(ph)
 345
 346     def warn_if_short_id(self, argv):
 347         # short YouTube ID starting with dash?
 348         idxs = [
 349             i for i, a in enumerate(argv)
 350             if re.match(r'^-[0-9A-Za-z_-]{10}$', a)]
 351         if idxs:
 352             correct_argv = (
 353                 ['youtube-dl'] +
 354                 [a for i, a in enumerate(argv) if i not in idxs] +
 355                 ['--'] + [argv[i] for i in idxs]
 356             )
 357             self.report_warning(
 358                 'Long argument string detected. '
 359                 'Use -- to separate parameters and URLs, like this:\n%s\n' %
 360                 args_to_str(correct_argv))
 361
 362     def add_info_extractor(self, ie):
 363         """Add an InfoExtractor object to the end of the list."""
 364         self._ies.append(ie)
 365         self._ies_instances[ie.ie_key()] = ie
 366         ie.set_downloader(self)
 367
 368     def get_info_extractor(self, ie_key):
 369         """
 370         Get an instance of an IE with name ie_key, it will try to get one from
 371         the _ies list, if there's no instance it will create a new one and add
 372         it to the extractor list.
 373         """
 374         ie = self._ies_instances.get(ie_key)
 375         if ie is None:
 376             ie = get_info_extractor(ie_key)()
 377             self.add_info_extractor(ie)
 378         return ie
 379
 380     def add_default_info_extractors(self):
 381         """
 382         Add the InfoExtractors returned by gen_extractors to the end of the list
 383         """
 384         for ie in gen_extractors():
 385             self.add_info_extractor(ie)
 386
 387     def add_post_processor(self, pp):
 388         """Add a PostProcessor object to the end of the chain."""
 389         self._pps.append(pp)
 390         pp.set_downloader(self)
 391
 392     def add_progress_hook(self, ph):
 393         """Add the progress hook (currently only for the file downloader)"""
 394         self._progress_hooks.append(ph)
 395
 396     def _bidi_workaround(self, message):
 397         if not hasattr(self, '_output_channel'):
 398             return message
 399
 400         assert hasattr(self, '_output_process')
 401         assert isinstance(message, compat_str)
 402         line_count = message.count('\n') + 1
 403         self._output_process.stdin.write((message + '\n').encode('utf-8'))
 404         self._output_process.stdin.flush()
 405         res = ''.join(self._output_channel.readline().decode('utf-8')
 406                       for _ in range(line_count))
 407         return res[:-len('\n')]
 408
 409     def to_screen(self, message, skip_eol=False):
 410         """Print message to stdout if not in quiet mode."""
 411         return self.to_stdout(message, skip_eol, check_quiet=True)
 412
 413     def _write_string(self, s, out=None):
 414         write_string(s, out=out, encoding=self.params.get('encoding'))
 415
 416     def to_stdout(self, message, skip_eol=False, check_quiet=False):
 417         """Print message to stdout if not in quiet mode."""
 418         if self.params.get('logger'):
 419             self.params['logger'].debug(message)
 420         elif not check_quiet or not self.params.get('quiet', False):
 421             message = self._bidi_workaround(message)
 422             terminator = ['\n', ''][skip_eol]
 423             output = message + terminator
 424
 425             self._write_string(output, self._screen_file)
 426
 427     def to_stderr(self, message):
 428         """Print message to stderr."""
 429         assert isinstance(message, compat_str)
 430         if self.params.get('logger'):
 431             self.params['logger'].error(message)
 432         else:
 433             message = self._bidi_workaround(message)
 434             output = message + '\n'
 435             self._write_string(output, self._err_file)
 436
 437     def to_console_title(self, message):
 438         if not self.params.get('consoletitle', False):
 439             return
 440         if os.name == 'nt' and ctypes.windll.kernel32.GetConsoleWindow():
 441             # c_wchar_p() might not be necessary if `message` is
 442             # already of type unicode()
 443             ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message))
 444         elif 'TERM' in os.environ:
 445             self._write_string('\033]0;%s\007' % message, self._screen_file)
 446
 447     def save_console_title(self):
 448         if not self.params.get('consoletitle', False):
 449             return
 450         if 'TERM' in os.environ:
 451             # Save the title on stack
 452             self._write_string('\033[22;0t', self._screen_file)
 453
 454     def restore_console_title(self):
 455         if not self.params.get('consoletitle', False):
 456             return
 457         if 'TERM' in os.environ:
 458             # Restore the title from stack
 459             self._write_string('\033[23;0t', self._screen_file)
 460
 461     def __enter__(self):
 462         self.save_console_title()
 463         return self
 464
 465     def __exit__(self, *args):
 466         self.restore_console_title()
 467
 468         if self.params.get('cookiefile') is not None:
 469             self.cookiejar.save()
 470
 471     def trouble(self, message=None, tb=None):
 472         """Determine action to take when a download problem appears.
 473
 474         Depending on if the downloader has been configured to ignore
 475         download errors or not, this method may throw an exception or
 476         not when errors are found, after printing the message.
 477
 478         tb, if given, is additional traceback information.
 479         """
 480         if message is not None:
 481             self.to_stderr(message)
 482         if self.params.get('verbose'):
 483             if tb is None:
 484                 if sys.exc_info()[0]:  # if .trouble has been called from an except block
 485                     tb = ''
 486                     if hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
 487                         tb += ''.join(traceback.format_exception(*sys.exc_info()[1].exc_info))
 488                     tb += compat_str(traceback.format_exc())
 489                 else:
 490                     tb_data = traceback.format_list(traceback.extract_stack())
 491                     tb = ''.join(tb_data)
 492             self.to_stderr(tb)
 493         if not self.params.get('ignoreerrors', False):
 494             if sys.exc_info()[0] and hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
 495                 exc_info = sys.exc_info()[1].exc_info
 496             else:
 497                 exc_info = sys.exc_info()
 498             raise DownloadError(message, exc_info)
 499         self._download_retcode = 1
 500
 501     def report_warning(self, message):
 502         '''
 503         Print the message to stderr, it will be prefixed with 'WARNING:'
 504         If stderr is a tty file the 'WARNING:' will be colored
 505         '''
 506         if self.params.get('logger') is not None:
 507             self.params['logger'].warning(message)
 508         else:
 509             if self.params.get('no_warnings'):
 510                 return
 511             if not self.params.get('no_color') and self._err_file.isatty() and os.name != 'nt':
 512                 _msg_header = '\033[0;33mWARNING:\033[0m'
 513             else:
 514                 _msg_header = 'WARNING:'
 515             warning_message = '%s %s' % (_msg_header, message)
 516             self.to_stderr(warning_message)
 517
 518     def report_error(self, message, tb=None):
 519         '''
 520         Do the same as trouble, but prefixes the message with 'ERROR:', colored
 521         in red if stderr is a tty file.
 522         '''
 523         if not self.params.get('no_color') and self._err_file.isatty() and os.name != 'nt':
 524             _msg_header = '\033[0;31mERROR:\033[0m'
 525         else:
 526             _msg_header = 'ERROR:'
 527         error_message = '%s %s' % (_msg_header, message)
 528         self.trouble(error_message, tb)
 529
 530     def report_file_already_downloaded(self, file_name):
 531         """Report file has already been fully downloaded."""
 532         try:
 533             self.to_screen('[download] %s has already been downloaded' % file_name)
 534         except UnicodeEncodeError:
 535             self.to_screen('[download] The file has already been downloaded')
 536
 537     def prepare_filename(self, info_dict):
 538         """Generate the output filename."""
 539         try:
 540             template_dict = dict(info_dict)
 541
 542             template_dict['epoch'] = int(time.time())
 543             autonumber_size = self.params.get('autonumber_size')
 544             if autonumber_size is None:
 545                 autonumber_size = 5
 546             autonumber_templ = '%0' + str(autonumber_size) + 'd'
 547             template_dict['autonumber'] = autonumber_templ % self._num_downloads
 548             if template_dict.get('playlist_index') is not None:
 549                 template_dict['playlist_index'] = '%0*d' % (len(str(template_dict['n_entries'])), template_dict['playlist_index'])
 550             if template_dict.get('resolution') is None:
 551                 if template_dict.get('width') and template_dict.get('height'):
 552                     template_dict['resolution'] = '%dx%d' % (template_dict['width'], template_dict['height'])
 553                 elif template_dict.get('height'):
 554                     template_dict['resolution'] = '%sp' % template_dict['height']
 555                 elif template_dict.get('width'):
 556                     template_dict['resolution'] = '?x%d' % template_dict['width']
 557
 558             sanitize = lambda k, v: sanitize_filename(
 559                 compat_str(v),
 560                 restricted=self.params.get('restrictfilenames'),
 561                 is_id=(k == 'id'))
 562             template_dict = dict((k, sanitize(k, v))
 563                                  for k, v in template_dict.items()
 564                                  if v is not None)
 565             template_dict = collections.defaultdict(lambda: 'NA', template_dict)
 566
 567             outtmpl = sanitize_path(self.params.get('outtmpl', DEFAULT_OUTTMPL))
 568             tmpl = compat_expanduser(outtmpl)
 569             filename = tmpl % template_dict
 570             # Temporary fix for #4787
 571             # 'Treat' all problem characters by passing filename through preferredencoding
 572             # to workaround encoding issues with subprocess on python2 @ Windows
 573             if sys.version_info < (3, 0) and sys.platform == 'win32':
 574                 filename = encodeFilename(filename, True).decode(preferredencoding())
 575             return filename
 576         except ValueError as err:
 577             self.report_error('Error in output template: ' + str(err) + ' (encoding: ' + repr(preferredencoding()) + ')')
 578             return None
 579
 580     def _match_entry(self, info_dict, incomplete):
 581         """ Returns None iff the file should be downloaded """
 582
 583         video_title = info_dict.get('title', info_dict.get('id', 'video'))
 584         if 'title' in info_dict:
 585             # This can happen when we're just evaluating the playlist
 586             title = info_dict['title']
 587             matchtitle = self.params.get('matchtitle', False)
 588             if matchtitle:
 589                 if not re.search(matchtitle, title, re.IGNORECASE):
 590                     return '"' + title + '" title did not match pattern "' + matchtitle + '"'
 591             rejecttitle = self.params.get('rejecttitle', False)
 592             if rejecttitle:
 593                 if re.search(rejecttitle, title, re.IGNORECASE):
 594                     return '"' + title + '" title matched reject pattern "' + rejecttitle + '"'
 595         date = info_dict.get('upload_date', None)
 596         if date is not None:
 597             dateRange = self.params.get('daterange', DateRange())
 598             if date not in dateRange:
 599                 return '%s upload date is not in range %s' % (date_from_str(date).isoformat(), dateRange)
 600         view_count = info_dict.get('view_count', None)
 601         if view_count is not None:
 602             min_views = self.params.get('min_views')
 603             if min_views is not None and view_count < min_views:
 604                 return 'Skipping %s, because it has not reached minimum view count (%d/%d)' % (video_title, view_count, min_views)
 605             max_views = self.params.get('max_views')
 606             if max_views is not None and view_count > max_views:
 607                 return 'Skipping %s, because it has exceeded the maximum view count (%d/%d)' % (video_title, view_count, max_views)
 608         if age_restricted(info_dict.get('age_limit'), self.params.get('age_limit')):
 609             return 'Skipping "%s" because it is age restricted' % video_title
 610         if self.in_download_archive(info_dict):
 611             return '%s has already been recorded in archive' % video_title
 612
 613         if not incomplete:
 614             match_filter = self.params.get('match_filter')
 615             if match_filter is not None:
 616                 ret = match_filter(info_dict)
 617                 if ret is not None:
 618                     return ret
 619
 620         return None
 621
 622     @staticmethod
 623     def add_extra_info(info_dict, extra_info):
 624         '''Set the keys from extra_info in info dict if they are missing'''
 625         for key, value in extra_info.items():
 626             info_dict.setdefault(key, value)
 627
 628     def extract_info(self, url, download=True, ie_key=None, extra_info={},
 629                      process=True):
 630         '''
 631         Returns a list with a dictionary for each video we find.
 632         If 'download', also downloads the videos.
 633         extra_info is a dict containing the extra values to add to each result
 634         '''
 635
 636         if ie_key:
 637             ies = [self.get_info_extractor(ie_key)]
 638         else:
 639             ies = self._ies
 640
 641         for ie in ies:
 642             if not ie.suitable(url):
 643                 continue
 644
 645             if not ie.working():
 646                 self.report_warning('The program functionality for this site has been marked as broken, '
 647                                     'and will probably not work.')
 648
 649             try:
 650                 ie_result = ie.extract(url)
 651                 if ie_result is None:  # Finished already (backwards compatibility; listformats and friends should be moved here)
 652                     break
 653                 if isinstance(ie_result, list):
 654                     # Backwards compatibility: old IE result format
 655                     ie_result = {
 656                         '_type': 'compat_list',
 657                         'entries': ie_result,
 658                     }
 659                 self.add_default_extra_info(ie_result, ie, url)
 660                 if process:
 661                     return self.process_ie_result(ie_result, download, extra_info)
 662                 else:
 663                     return ie_result
 664             except ExtractorError as de:  # An error we somewhat expected
 665                 self.report_error(compat_str(de), de.format_traceback())
 666                 break
 667             except MaxDownloadsReached:
 668                 raise
 669             except Exception as e:
 670                 if self.params.get('ignoreerrors', False):
 671                     self.report_error(compat_str(e), tb=compat_str(traceback.format_exc()))
 672                     break
 673                 else:
 674                     raise
 675         else:
 676             self.report_error('no suitable InfoExtractor for URL %s' % url)
 677
 678     def add_default_extra_info(self, ie_result, ie, url):
 679         self.add_extra_info(ie_result, {
 680             'extractor': ie.IE_NAME,
 681             'webpage_url': url,
 682             'webpage_url_basename': url_basename(url),
 683             'extractor_key': ie.ie_key(),
 684         })
 685
 686     def process_ie_result(self, ie_result, download=True, extra_info={}):
 687         """
 688         Take the result of the ie(may be modified) and resolve all unresolved
 689         references (URLs, playlist items).
 690
 691         It will also download the videos if 'download'.
 692         Returns the resolved ie_result.
 693         """
 694
 695         result_type = ie_result.get('_type', 'video')
 696
 697         if result_type in ('url', 'url_transparent'):
 698             extract_flat = self.params.get('extract_flat', False)
 699             if ((extract_flat == 'in_playlist' and 'playlist' in extra_info) or
 700                     extract_flat is True):
 701                 if self.params.get('forcejson', False):
 702                     self.to_stdout(json.dumps(ie_result))
 703                 return ie_result
 704
 705         if result_type == 'video':
 706             self.add_extra_info(ie_result, extra_info)
 707             return self.process_video_result(ie_result, download=download)
 708         elif result_type == 'url':
 709             # We have to add extra_info to the results because it may be
 710             # contained in a playlist
 711             return self.extract_info(ie_result['url'],
 712                                      download,
 713                                      ie_key=ie_result.get('ie_key'),
 714                                      extra_info=extra_info)
 715         elif result_type == 'url_transparent':
 716             # Use the information from the embedding page
 717             info = self.extract_info(
 718                 ie_result['url'], ie_key=ie_result.get('ie_key'),
 719                 extra_info=extra_info, download=False, process=False)
 720
 721             force_properties = dict(
 722                 (k, v) for k, v in ie_result.items() if v is not None)
 723             for f in ('_type', 'url'):
 724                 if f in force_properties:
 725                     del force_properties[f]
 726             new_result = info.copy()
 727             new_result.update(force_properties)
 728
 729             assert new_result.get('_type') != 'url_transparent'
 730
 731             return self.process_ie_result(
 732                 new_result, download=download, extra_info=extra_info)
 733         elif result_type == 'playlist' or result_type == 'multi_video':
 734             # We process each entry in the playlist
 735             playlist = ie_result.get('title', None) or ie_result.get('id', None)
 736             self.to_screen('[download] Downloading playlist: %s' % playlist)
 737
 738             playlist_results = []
 739
 740             playliststart = self.params.get('playliststart', 1) - 1
 741             playlistend = self.params.get('playlistend', None)
 742             # For backwards compatibility, interpret -1 as whole list
 743             if playlistend == -1:
 744                 playlistend = None
 745
 746             playlistitems_str = self.params.get('playlist_items', None)
 747             playlistitems = None
 748             if playlistitems_str is not None:
 749                 def iter_playlistitems(format):
 750                     for string_segment in format.split(','):
 751                         if '-' in string_segment:
 752                             start, end = string_segment.split('-')
 753                             for item in range(int(start), int(end) + 1):
 754                                 yield int(item)
 755                         else:
 756                             yield int(string_segment)
 757                 playlistitems = iter_playlistitems(playlistitems_str)
 758
 759             ie_entries = ie_result['entries']
 760             if isinstance(ie_entries, list):
 761                 n_all_entries = len(ie_entries)
 762                 if playlistitems:
 763                     entries = [
 764                         ie_entries[i - 1] for i in playlistitems
 765                         if -n_all_entries <= i - 1 < n_all_entries]
 766                 else:
 767                     entries = ie_entries[playliststart:playlistend]
 768                 n_entries = len(entries)
 769                 self.to_screen(
 770                     "[%s] playlist %s: Collected %d video ids (downloading %d of them)" %
 771                     (ie_result['extractor'], playlist, n_all_entries, n_entries))
 772             elif isinstance(ie_entries, PagedList):
 773                 if playlistitems:
 774                     entries = []
 775                     for item in playlistitems:
 776                         entries.extend(ie_entries.getslice(
 777                             item - 1, item
 778                         ))
 779                 else:
 780                     entries = ie_entries.getslice(
 781                         playliststart, playlistend)
 782                 n_entries = len(entries)
 783                 self.to_screen(
 784                     "[%s] playlist %s: Downloading %d videos" %
 785                     (ie_result['extractor'], playlist, n_entries))
 786             else:  # iterable
 787                 if playlistitems:
 788                     entry_list = list(ie_entries)
 789                     entries = [entry_list[i - 1] for i in playlistitems]
 790                 else:
 791                     entries = list(itertools.islice(
 792                         ie_entries, playliststart, playlistend))
 793                 n_entries = len(entries)
 794                 self.to_screen(
 795                     "[%s] playlist %s: Downloading %d videos" %
 796                     (ie_result['extractor'], playlist, n_entries))
 797
 798             if self.params.get('playlistreverse', False):
 799                 entries = entries[::-1]
 800
 801             for i, entry in enumerate(entries, 1):
 802                 self.to_screen('[download] Downloading video %s of %s' % (i, n_entries))
 803                 extra = {
 804                     'n_entries': n_entries,
 805                     'playlist': playlist,
 806                     'playlist_id': ie_result.get('id'),
 807                     'playlist_title': ie_result.get('title'),
 808                     'playlist_index': i + playliststart,
 809                     'extractor': ie_result['extractor'],
 810                     'webpage_url': ie_result['webpage_url'],
 811                     'webpage_url_basename': url_basename(ie_result['webpage_url']),
 812                     'extractor_key': ie_result['extractor_key'],
 813                 }
 814
 815                 reason = self._match_entry(entry, incomplete=True)
 816                 if reason is not None:
 817                     self.to_screen('[download] ' + reason)
 818                     continue
 819
 820                 entry_result = self.process_ie_result(entry,
 821                                                       download=download,
 822                                                       extra_info=extra)
 823                 playlist_results.append(entry_result)
 824             ie_result['entries'] = playlist_results
 825             return ie_result
 826         elif result_type == 'compat_list':
 827             self.report_warning(
 828                 'Extractor %s returned a compat_list result. '
 829                 'It needs to be updated.' % ie_result.get('extractor'))
 830
 831             def _fixup(r):
 832                 self.add_extra_info(
 833                     r,
 834                     {
 835                         'extractor': ie_result['extractor'],
 836                         'webpage_url': ie_result['webpage_url'],
 837                         'webpage_url_basename': url_basename(ie_result['webpage_url']),
 838                         'extractor_key': ie_result['extractor_key'],
 839                     }
 840                 )
 841                 return r
 842             ie_result['entries'] = [
 843                 self.process_ie_result(_fixup(r), download, extra_info)
 844                 for r in ie_result['entries']
 845             ]
 846             return ie_result
 847         else:
 848             raise Exception('Invalid result type: %s' % result_type)
 849
 850     def _apply_format_filter(self, format_spec, available_formats):
 851         " Returns a tuple of the remaining format_spec and filtered formats "
 852
 853         OPERATORS = {
 854             '<': operator.lt,
 855             '<=': operator.le,
 856             '>': operator.gt,
 857             '>=': operator.ge,
 858             '=': operator.eq,
 859             '!=': operator.ne,
 860         }
 861         operator_rex = re.compile(r'''(?x)\s*\[
 862             (?P<key>width|height|tbr|abr|vbr|asr|filesize|fps)
 863             \s*(?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
 864             (?P<value>[0-9.]+(?:[kKmMgGtTpPeEzZyY]i?[Bb]?)?)
 865             \]$
 866             ''' % '|'.join(map(re.escape, OPERATORS.keys())))
 867         m = operator_rex.search(format_spec)
 868         if m:
 869             try:
 870                 comparison_value = int(m.group('value'))
 871             except ValueError:
 872                 comparison_value = parse_filesize(m.group('value'))
 873                 if comparison_value is None:
 874                     comparison_value = parse_filesize(m.group('value') + 'B')
 875                 if comparison_value is None:
 876                     raise ValueError(
 877                         'Invalid value %r in format specification %r' % (
 878                             m.group('value'), format_spec))
 879             op = OPERATORS[m.group('op')]
 880
 881         if not m:
 882             STR_OPERATORS = {
 883                 '=': operator.eq,
 884                 '!=': operator.ne,
 885             }
 886             str_operator_rex = re.compile(r'''(?x)\s*\[
 887                 \s*(?P<key>ext|acodec|vcodec|container|protocol)
 888                 \s*(?P<op>%s)(?P<none_inclusive>\s*\?)?
 889                 \s*(?P<value>[a-zA-Z0-9_-]+)
 890                 \s*\]$
 891                 ''' % '|'.join(map(re.escape, STR_OPERATORS.keys())))
 892             m = str_operator_rex.search(format_spec)
 893             if m:
 894                 comparison_value = m.group('value')
 895                 op = STR_OPERATORS[m.group('op')]
 896
 897         if not m:
 898             raise ValueError('Invalid format specification %r' % format_spec)
 899
 900         def _filter(f):
 901             actual_value = f.get(m.group('key'))
 902             if actual_value is None:
 903                 return m.group('none_inclusive')
 904             return op(actual_value, comparison_value)
 905         new_formats = [f for f in available_formats if _filter(f)]
 906
 907         new_format_spec = format_spec[:-len(m.group(0))]
 908         if not new_format_spec:
 909             new_format_spec = 'best'
 910
 911         return (new_format_spec, new_formats)
 912
 913     def select_format(self, format_spec, available_formats):
 914         while format_spec.endswith(']'):
 915             format_spec, available_formats = self._apply_format_filter(
 916                 format_spec, available_formats)
 917         if not available_formats:
 918             return None
 919
 920         if format_spec in ['best', 'worst', None]:
 921             format_idx = 0 if format_spec == 'worst' else -1
 922             audiovideo_formats = [
 923                 f for f in available_formats
 924                 if f.get('vcodec') != 'none' and f.get('acodec') != 'none']
 925             if audiovideo_formats:
 926                 return audiovideo_formats[format_idx]
 927             # for audio only urls, select the best/worst audio format
 928             elif all(f.get('acodec') != 'none' for f in available_formats):
 929                 return available_formats[format_idx]
 930         elif format_spec == 'bestaudio':
 931             audio_formats = [
 932                 f for f in available_formats
 933                 if f.get('vcodec') == 'none']
 934             if audio_formats:
 935                 return audio_formats[-1]
 936         elif format_spec == 'worstaudio':
 937             audio_formats = [
 938                 f for f in available_formats
 939                 if f.get('vcodec') == 'none']
 940             if audio_formats:
 941                 return audio_formats[0]
 942         elif format_spec == 'bestvideo':
 943             video_formats = [
 944                 f for f in available_formats
 945                 if f.get('acodec') == 'none']
 946             if video_formats:
 947                 return video_formats[-1]
 948         elif format_spec == 'worstvideo':
 949             video_formats = [
 950                 f for f in available_formats
 951                 if f.get('acodec') == 'none']
 952             if video_formats:
 953                 return video_formats[0]
 954         else:
 955             extensions = ['mp4', 'flv', 'webm', '3gp', 'm4a', 'mp3', 'ogg', 'aac', 'wav']
 956             if format_spec in extensions:
 957                 filter_f = lambda f: f['ext'] == format_spec
 958             else:
 959                 filter_f = lambda f: f['format_id'] == format_spec
 960             matches = list(filter(filter_f, available_formats))
 961             if matches:
 962                 return matches[-1]
 963         return None
 964
 965     def _calc_headers(self, info_dict):
 966         res = std_headers.copy()
 967
 968         add_headers = info_dict.get('http_headers')
 969         if add_headers:
 970             res.update(add_headers)
 971
 972         cookies = self._calc_cookies(info_dict)
 973         if cookies:
 974             res['Cookie'] = cookies
 975
 976         return res
 977
 978     def _calc_cookies(self, info_dict):
 979         pr = compat_urllib_request.Request(info_dict['url'])
 980         self.cookiejar.add_cookie_header(pr)
 981         return pr.get_header('Cookie')
 982
 983     def process_video_result(self, info_dict, download=True):
 984         assert info_dict.get('_type', 'video') == 'video'
 985
 986         if 'id' not in info_dict:
 987             raise ExtractorError('Missing "id" field in extractor result')
 988         if 'title' not in info_dict:
 989             raise ExtractorError('Missing "title" field in extractor result')
 990
 991         if 'playlist' not in info_dict:
 992             # It isn't part of a playlist
 993             info_dict['playlist'] = None
 994             info_dict['playlist_index'] = None
 995
 996         thumbnails = info_dict.get('thumbnails')
 997         if thumbnails is None:
 998             thumbnail = info_dict.get('thumbnail')
 999             if thumbnail:
1000                 info_dict['thumbnails'] = thumbnails = [{'url': thumbnail}]
1001         if thumbnails:
1002             thumbnails.sort(key=lambda t: (
1003                 t.get('preference'), t.get('width'), t.get('height'),
1004                 t.get('id'), t.get('url')))
1005             for i, t in enumerate(thumbnails):
1006                 if 'width' in t and 'height' in t:
1007                     t['resolution'] = '%dx%d' % (t['width'], t['height'])
1008                 if t.get('id') is None:
1009                     t['id'] = '%d' % i
1010
1011         if thumbnails and 'thumbnail' not in info_dict:
1012             info_dict['thumbnail'] = thumbnails[-1]['url']
1013
1014         if 'display_id' not in info_dict and 'id' in info_dict:
1015             info_dict['display_id'] = info_dict['id']
1016
1017         if info_dict.get('upload_date') is None and info_dict.get('timestamp') is not None:
1018             # Working around negative timestamps in Windows
1019             # (see http://bugs.python.org/issue1646728)
1020             if info_dict['timestamp'] < 0 and os.name == 'nt':
1021                 info_dict['timestamp'] = 0
1022             upload_date = datetime.datetime.utcfromtimestamp(
1023                 info_dict['timestamp'])
1024             info_dict['upload_date'] = upload_date.strftime('%Y%m%d')
1025
1026         if self.params.get('listsubtitles', False):
1027             if 'automatic_captions' in info_dict:
1028                 self.list_subtitles(info_dict['id'], info_dict.get('automatic_captions'), 'automatic captions')
1029             self.list_subtitles(info_dict['id'], info_dict.get('subtitles'), 'subtitles')
1030             return
1031         info_dict['requested_subtitles'] = self.process_subtitles(
1032             info_dict['id'], info_dict.get('subtitles'),
1033             info_dict.get('automatic_captions'))
1034
1035         # This extractors handle format selection themselves
1036         if info_dict['extractor'] in ['Youku']:
1037             if download:
1038                 self.process_info(info_dict)
1039             return info_dict
1040
1041         # We now pick which formats have to be downloaded
1042         if info_dict.get('formats') is None:
1043             # There's only one format available
1044             formats = [info_dict]
1045         else:
1046             formats = info_dict['formats']
1047
1048         if not formats:
1049             raise ExtractorError('No video formats found!')
1050
1051         formats_dict = {}
1052
1053         # We check that all the formats have the format and format_id fields
1054         for i, format in enumerate(formats):
1055             if 'url' not in format:
1056                 raise ExtractorError('Missing "url" key in result (index %d)' % i)
1057
1058             if format.get('format_id') is None:
1059                 format['format_id'] = compat_str(i)
1060             format_id = format['format_id']
1061             if format_id not in formats_dict:
1062                 formats_dict[format_id] = []
1063             formats_dict[format_id].append(format)
1064
1065         # Make sure all formats have unique format_id
1066         for format_id, ambiguous_formats in formats_dict.items():
1067             if len(ambiguous_formats) > 1:
1068                 for i, format in enumerate(ambiguous_formats):
1069                     format['format_id'] = '%s-%d' % (format_id, i)
1070
1071         for i, format in enumerate(formats):
1072             if format.get('format') is None:
1073                 format['format'] = '{id} - {res}{note}'.format(
1074                     id=format['format_id'],
1075                     res=self.format_resolution(format),
1076                     note=' ({0})'.format(format['format_note']) if format.get('format_note') is not None else '',
1077                 )
1078             # Automatically determine file extension if missing
1079             if 'ext' not in format:
1080                 format['ext'] = determine_ext(format['url']).lower()
1081             # Add HTTP headers, so that external programs can use them from the
1082             # json output
1083             full_format_info = info_dict.copy()
1084             full_format_info.update(format)
1085             format['http_headers'] = self._calc_headers(full_format_info)
1086
1087         # TODO Central sorting goes here
1088
1089         if formats[0] is not info_dict:
1090             # only set the 'formats' fields if the original info_dict list them
1091             # otherwise we end up with a circular reference, the first (and unique)
1092             # element in the 'formats' field in info_dict is info_dict itself,
1093             # wich can't be exported to json
1094             info_dict['formats'] = formats
1095         if self.params.get('listformats'):
1096             self.list_formats(info_dict)
1097             return
1098         if self.params.get('list_thumbnails'):
1099             self.list_thumbnails(info_dict)
1100             return
1101
1102         req_format = self.params.get('format')
1103         if req_format is None:
1104             req_format_list = []
1105             if (self.params.get('outtmpl', DEFAULT_OUTTMPL) != '-' and
1106                     info_dict['extractor'] in ['youtube', 'ted']):
1107                 merger = FFmpegMergerPP(self)
1108                 if merger.available and merger.can_merge():
1109                     req_format_list.append('bestvideo+bestaudio')
1110             req_format_list.append('best')
1111             req_format = '/'.join(req_format_list)
1112         formats_to_download = []
1113         if req_format == 'all':
1114             formats_to_download = formats
1115         else:
1116             for rfstr in req_format.split(','):
1117                 # We can accept formats requested in the format: 34/5/best, we pick
1118                 # the first that is available, starting from left
1119                 req_formats = rfstr.split('/')
1120                 for rf in req_formats:
1121                     if re.match(r'.+?\+.+?', rf) is not None:
1122                         # Two formats have been requested like '137+139'
1123                         format_1, format_2 = rf.split('+')
1124                         formats_info = (self.select_format(format_1, formats),
1125                                         self.select_format(format_2, formats))
1126                         if all(formats_info):
1127                             # The first format must contain the video and the
1128                             # second the audio
1129                             if formats_info[0].get('vcodec') == 'none':
1130                                 self.report_error('The first format must '
1131                                                   'contain the video, try using '
1132                                                   '"-f %s+%s"' % (format_2, format_1))
1133                                 return
1134                             output_ext = (
1135                                 formats_info[0]['ext']
1136                                 if self.params.get('merge_output_format') is None
1137                                 else self.params['merge_output_format'])
1138                             selected_format = {
1139                                 'requested_formats': formats_info,
1140                                 'format': '%s+%s' % (formats_info[0].get('format'),
1141                                                      formats_info[1].get('format')),
1142                                 'format_id': '%s+%s' % (formats_info[0].get('format_id'),
1143                                                         formats_info[1].get('format_id')),
1144                                 'width': formats_info[0].get('width'),
1145                                 'height': formats_info[0].get('height'),
1146                                 'resolution': formats_info[0].get('resolution'),
1147                                 'fps': formats_info[0].get('fps'),
1148                                 'vcodec': formats_info[0].get('vcodec'),
1149                                 'vbr': formats_info[0].get('vbr'),
1150                                 'stretched_ratio': formats_info[0].get('stretched_ratio'),
1151                                 'acodec': formats_info[1].get('acodec'),
1152                                 'abr': formats_info[1].get('abr'),
1153                                 'ext': output_ext,
1154                             }
1155                         else:
1156                             selected_format = None
1157                     else:
1158                         selected_format = self.select_format(rf, formats)
1159                     if selected_format is not None:
1160                         formats_to_download.append(selected_format)
1161                         break
1162         if not formats_to_download:
1163             raise ExtractorError('requested format not available',
1164                                  expected=True)
1165
1166         if download:
1167             if len(formats_to_download) > 1:
1168                 self.to_screen('[info] %s: downloading video in %s formats' % (info_dict['id'], len(formats_to_download)))
1169             for format in formats_to_download:
1170                 new_info = dict(info_dict)
1171                 new_info.update(format)
1172                 self.process_info(new_info)
1173         # We update the info dict with the best quality format (backwards compatibility)
1174         info_dict.update(formats_to_download[-1])
1175         return info_dict
1176
1177     def process_subtitles(self, video_id, normal_subtitles, automatic_captions):
1178         """Select the requested subtitles and their format"""
1179         available_subs = {}
1180         if normal_subtitles and self.params.get('writesubtitles'):
1181             available_subs.update(normal_subtitles)
1182         if automatic_captions and self.params.get('writeautomaticsub'):
1183             for lang, cap_info in automatic_captions.items():
1184                 if lang not in available_subs:
1185                     available_subs[lang] = cap_info
1186
1187         if (not self.params.get('writesubtitles') and not
1188                 self.params.get('writeautomaticsub') or not
1189                 available_subs):
1190             return None
1191
1192         if self.params.get('allsubtitles', False):
1193             requested_langs = available_subs.keys()
1194         else:
1195             if self.params.get('subtitleslangs', False):
1196                 requested_langs = self.params.get('subtitleslangs')
1197             elif 'en' in available_subs:
1198                 requested_langs = ['en']
1199             else:
1200                 requested_langs = [list(available_subs.keys())[0]]
1201
1202         formats_query = self.params.get('subtitlesformat', 'best')
1203         formats_preference = formats_query.split('/') if formats_query else []
1204         subs = {}
1205         for lang in requested_langs:
1206             formats = available_subs.get(lang)
1207             if formats is None:
1208                 self.report_warning('%s subtitles not available for %s' % (lang, video_id))
1209                 continue
1210             for ext in formats_preference:
1211                 if ext == 'best':
1212                     f = formats[-1]
1213                     break
1214                 matches = list(filter(lambda f: f['ext'] == ext, formats))
1215                 if matches:
1216                     f = matches[-1]
1217                     break
1218             else:
1219                 f = formats[-1]
1220                 self.report_warning(
1221                     'No subtitle format found matching "%s" for language %s, '
1222                     'using %s' % (formats_query, lang, f['ext']))
1223             subs[lang] = f
1224         return subs
1225
1226     def process_info(self, info_dict):
1227         """Process a single resolved IE result."""
1228
1229         assert info_dict.get('_type', 'video') == 'video'
1230
1231         max_downloads = self.params.get('max_downloads')
1232         if max_downloads is not None:
1233             if self._num_downloads >= int(max_downloads):
1234                 raise MaxDownloadsReached()
1235
1236         info_dict['fulltitle'] = info_dict['title']
1237         if len(info_dict['title']) > 200:
1238             info_dict['title'] = info_dict['title'][:197] + '...'
1239
1240         if 'format' not in info_dict:
1241             info_dict['format'] = info_dict['ext']
1242
1243         reason = self._match_entry(info_dict, incomplete=False)
1244         if reason is not None:
1245             self.to_screen('[download] ' + reason)
1246             return
1247
1248         self._num_downloads += 1
1249
1250         info_dict['_filename'] = filename = self.prepare_filename(info_dict)
1251
1252         # Forced printings
1253         if self.params.get('forcetitle', False):
1254             self.to_stdout(info_dict['fulltitle'])
1255         if self.params.get('forceid', False):
1256             self.to_stdout(info_dict['id'])
1257         if self.params.get('forceurl', False):
1258             if info_dict.get('requested_formats') is not None:
1259                 for f in info_dict['requested_formats']:
1260                     self.to_stdout(f['url'] + f.get('play_path', ''))
1261             else:
1262                 # For RTMP URLs, also include the playpath
1263                 self.to_stdout(info_dict['url'] + info_dict.get('play_path', ''))
1264         if self.params.get('forcethumbnail', False) and info_dict.get('thumbnail') is not None:
1265             self.to_stdout(info_dict['thumbnail'])
1266         if self.params.get('forcedescription', False) and info_dict.get('description') is not None:
1267             self.to_stdout(info_dict['description'])
1268         if self.params.get('forcefilename', False) and filename is not None:
1269             self.to_stdout(filename)
1270         if self.params.get('forceduration', False) and info_dict.get('duration') is not None:
1271             self.to_stdout(formatSeconds(info_dict['duration']))
1272         if self.params.get('forceformat', False):
1273             self.to_stdout(info_dict['format'])
1274         if self.params.get('forcejson', False):
1275             self.to_stdout(json.dumps(info_dict))
1276
1277         # Do nothing else if in simulate mode
1278         if self.params.get('simulate', False):
1279             return
1280
1281         if filename is None:
1282             return
1283
1284         try:
1285             dn = os.path.dirname(sanitize_path(encodeFilename(filename)))
1286             if dn and not os.path.exists(dn):
1287                 os.makedirs(dn)
1288         except (OSError, IOError) as err:
1289             self.report_error('unable to create directory ' + compat_str(err))
1290             return
1291
1292         if self.params.get('writedescription', False):
1293             descfn = replace_extension(filename, 'description', info_dict.get('ext'))
1294             if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(descfn)):
1295                 self.to_screen('[info] Video description is already present')
1296             elif info_dict.get('description') is None:
1297                 self.report_warning('There\'s no description to write.')
1298             else:
1299                 try:
1300                     self.to_screen('[info] Writing video description to: ' + descfn)
1301                     with io.open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile:
1302                         descfile.write(info_dict['description'])
1303                 except (OSError, IOError):
1304                     self.report_error('Cannot write description file ' + descfn)
1305                     return
1306
1307         if self.params.get('writeannotations', False):
1308             annofn = replace_extension(filename, 'annotations.xml', info_dict.get('ext'))
1309             if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(annofn)):
1310                 self.to_screen('[info] Video annotations are already present')
1311             else:
1312                 try:
1313                     self.to_screen('[info] Writing video annotations to: ' + annofn)
1314                     with io.open(encodeFilename(annofn), 'w', encoding='utf-8') as annofile:
1315                         annofile.write(info_dict['annotations'])
1316                 except (KeyError, TypeError):
1317                     self.report_warning('There are no annotations to write.')
1318                 except (OSError, IOError):
1319                     self.report_error('Cannot write annotations file: ' + annofn)
1320                     return
1321
1322         subtitles_are_requested = any([self.params.get('writesubtitles', False),
1323                                        self.params.get('writeautomaticsub')])
1324
1325         if subtitles_are_requested and info_dict.get('requested_subtitles'):
1326             # subtitles download errors are already managed as troubles in relevant IE
1327             # that way it will silently go on when used with unsupporting IE
1328             subtitles = info_dict['requested_subtitles']
1329             ie = self.get_info_extractor(info_dict['extractor_key'])
1330             for sub_lang, sub_info in subtitles.items():
1331                 sub_format = sub_info['ext']
1332                 if sub_info.get('data') is not None:
1333                     sub_data = sub_info['data']
1334                 else:
1335                     try:
1336                         sub_data = ie._download_webpage(
1337                             sub_info['url'], info_dict['id'], note=False)
1338                     except ExtractorError as err:
1339                         self.report_warning('Unable to download subtitle for "%s": %s' %
1340                                             (sub_lang, compat_str(err.cause)))
1341                         continue
1342                 try:
1343                     sub_filename = subtitles_filename(filename, sub_lang, sub_format)
1344                     if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(sub_filename)):
1345                         self.to_screen('[info] Video subtitle %s.%s is already_present' % (sub_lang, sub_format))
1346                     else:
1347                         self.to_screen('[info] Writing video subtitles to: ' + sub_filename)
1348                         with io.open(encodeFilename(sub_filename), 'w', encoding='utf-8') as subfile:
1349                             subfile.write(sub_data)
1350                 except (OSError, IOError):
1351                     self.report_error('Cannot write subtitles file ' + sub_filename)
1352                     return
1353
1354         if self.params.get('writeinfojson', False):
1355             infofn = replace_extension(filename, 'info.json', info_dict.get('ext'))
1356             if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(infofn)):
1357                 self.to_screen('[info] Video description metadata is already present')
1358             else:
1359                 self.to_screen('[info] Writing video description metadata as JSON to: ' + infofn)
1360                 try:
1361                     write_json_file(self.filter_requested_info(info_dict), infofn)
1362                 except (OSError, IOError):
1363                     self.report_error('Cannot write metadata to JSON file ' + infofn)
1364                     return
1365
1366         self._write_thumbnails(info_dict, filename)
1367
1368         if not self.params.get('skip_download', False):
1369             try:
1370                 def dl(name, info):
1371                     fd = get_suitable_downloader(info, self.params)(self, self.params)
1372                     for ph in self._progress_hooks:
1373                         fd.add_progress_hook(ph)
1374                     if self.params.get('verbose'):
1375                         self.to_stdout('[debug] Invoking downloader on %r' % info.get('url'))
1376                     return fd.download(name, info)
1377
1378                 if info_dict.get('requested_formats') is not None:
1379                     downloaded = []
1380                     success = True
1381                     merger = FFmpegMergerPP(self)
1382                     if not merger.available:
1383                         postprocessors = []
1384                         self.report_warning('You have requested multiple '
1385                                             'formats but ffmpeg or avconv are not installed.'
1386                                             ' The formats won\'t be merged.')
1387                     else:
1388                         postprocessors = [merger]
1389
1390                     def compatible_formats(formats):
1391                         video, audio = formats
1392                         # Check extension
1393                         video_ext, audio_ext = audio.get('ext'), video.get('ext')
1394                         if video_ext and audio_ext:
1395                             COMPATIBLE_EXTS = (
1396                                 ('mp3', 'mp4', 'm4a', 'm4p', 'm4b', 'm4r', 'm4v'),
1397                                 ('webm')
1398                             )
1399                             for exts in COMPATIBLE_EXTS:
1400                                 if video_ext in exts and audio_ext in exts:
1401                                     return True
1402                         # TODO: Check acodec/vcodec
1403                         return False
1404
1405                     filename_real_ext = os.path.splitext(filename)[1][1:]
1406                     filename_wo_ext = (
1407                         os.path.splitext(filename)[0]
1408                         if filename_real_ext == info_dict['ext']
1409                         else filename)
1410                     requested_formats = info_dict['requested_formats']
1411                     if self.params.get('merge_output_format') is None and not compatible_formats(requested_formats):
1412                         info_dict['ext'] = 'mkv'
1413                         self.report_warning(
1414                             'Requested formats are incompatible for merge and will be merged into mkv.')
1415                     # Ensure filename always has a correct extension for successful merge
1416                     filename = '%s.%s' % (filename_wo_ext, info_dict['ext'])
1417                     if os.path.exists(encodeFilename(filename)):
1418                         self.to_screen(
1419                             '[download] %s has already been downloaded and '
1420                             'merged' % filename)
1421                     else:
1422                         for f in requested_formats:
1423                             new_info = dict(info_dict)
1424                             new_info.update(f)
1425                             fname = self.prepare_filename(new_info)
1426                             fname = prepend_extension(fname, 'f%s' % f['format_id'], new_info['ext'])
1427                             downloaded.append(fname)
1428                             partial_success = dl(fname, new_info)
1429                             success = success and partial_success
1430                         info_dict['__postprocessors'] = postprocessors
1431                         info_dict['__files_to_merge'] = downloaded
1432                 else:
1433                     # Just a single file
1434                     success = dl(filename, info_dict)
1435             except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
1436                 self.report_error('unable to download video data: %s' % str(err))
1437                 return
1438             except (OSError, IOError) as err:
1439                 raise UnavailableVideoError(err)
1440             except (ContentTooShortError, ) as err:
1441                 self.report_error('content too short (expected %s bytes and served %s)' % (err.expected, err.downloaded))
1442                 return
1443
1444             if success:
1445                 # Fixup content
1446                 fixup_policy = self.params.get('fixup')
1447                 if fixup_policy is None:
1448                     fixup_policy = 'detect_or_warn'
1449
1450                 stretched_ratio = info_dict.get('stretched_ratio')
1451                 if stretched_ratio is not None and stretched_ratio != 1:
1452                     if fixup_policy == 'warn':
1453                         self.report_warning('%s: Non-uniform pixel ratio (%s)' % (
1454                             info_dict['id'], stretched_ratio))
1455                     elif fixup_policy == 'detect_or_warn':
1456                         stretched_pp = FFmpegFixupStretchedPP(self)
1457                         if stretched_pp.available:
1458                             info_dict.setdefault('__postprocessors', [])
1459                             info_dict['__postprocessors'].append(stretched_pp)
1460                         else:
1461                             self.report_warning(
1462                                 '%s: Non-uniform pixel ratio (%s). Install ffmpeg or avconv to fix this automatically.' % (
1463                                     info_dict['id'], stretched_ratio))
1464                     else:
1465                         assert fixup_policy in ('ignore', 'never')
1466
1467                 if info_dict.get('requested_formats') is None and info_dict.get('container') == 'm4a_dash':
1468                     if fixup_policy == 'warn':
1469                         self.report_warning('%s: writing DASH m4a. Only some players support this container.' % (
1470                             info_dict['id']))
1471                     elif fixup_policy == 'detect_or_warn':
1472                         fixup_pp = FFmpegFixupM4aPP(self)
1473                         if fixup_pp.available:
1474                             info_dict.setdefault('__postprocessors', [])
1475                             info_dict['__postprocessors'].append(fixup_pp)
1476                         else:
1477                             self.report_warning(
1478                                 '%s: writing DASH m4a. Only some players support this container. Install ffmpeg or avconv to fix this automatically.' % (
1479                                     info_dict['id']))
1480                     else:
1481                         assert fixup_policy in ('ignore', 'never')
1482
1483                 try:
1484                     self.post_process(filename, info_dict)
1485                 except (PostProcessingError) as err:
1486                     self.report_error('postprocessing: %s' % str(err))
1487                     return
1488                 self.record_download_archive(info_dict)
1489
1490     def download(self, url_list):
1491         """Download a given list of URLs."""
1492         outtmpl = self.params.get('outtmpl', DEFAULT_OUTTMPL)
1493         if (len(url_list) > 1 and
1494                 '%' not in outtmpl and
1495                 self.params.get('max_downloads') != 1):
1496             raise SameFileError(outtmpl)
1497
1498         for url in url_list:
1499             try:
1500                 # It also downloads the videos
1501                 res = self.extract_info(url)
1502             except UnavailableVideoError:
1503                 self.report_error('unable to download video')
1504             except MaxDownloadsReached:
1505                 self.to_screen('[info] Maximum number of downloaded files reached.')
1506                 raise
1507             else:
1508                 if self.params.get('dump_single_json', False):
1509                     self.to_stdout(json.dumps(res))
1510
1511         return self._download_retcode
1512
1513     def download_with_info_file(self, info_filename):
1514         with contextlib.closing(fileinput.FileInput(
1515                 [info_filename], mode='r',
1516                 openhook=fileinput.hook_encoded('utf-8'))) as f:
1517             # FileInput doesn't have a read method, we can't call json.load
1518             info = self.filter_requested_info(json.loads('\n'.join(f)))
1519         try:
1520             self.process_ie_result(info, download=True)
1521         except DownloadError:
1522             webpage_url = info.get('webpage_url')
1523             if webpage_url is not None:
1524                 self.report_warning('The info failed to download, trying with "%s"' % webpage_url)
1525                 return self.download([webpage_url])
1526             else:
1527                 raise
1528         return self._download_retcode
1529
1530     @staticmethod
1531     def filter_requested_info(info_dict):
1532         return dict(
1533             (k, v) for k, v in info_dict.items()
1534             if k not in ['requested_formats', 'requested_subtitles'])
1535
1536     def post_process(self, filename, ie_info):
1537         """Run all the postprocessors on the given file."""
1538         info = dict(ie_info)
1539         info['filepath'] = filename
1540         pps_chain = []
1541         if ie_info.get('__postprocessors') is not None:
1542             pps_chain.extend(ie_info['__postprocessors'])
1543         pps_chain.extend(self._pps)
1544         for pp in pps_chain:
1545             files_to_delete = []
1546             try:
1547                 files_to_delete, info = pp.run(info)
1548             except PostProcessingError as e:
1549                 self.report_error(e.msg)
1550             if files_to_delete and not self.params.get('keepvideo', False):
1551                 for old_filename in files_to_delete:
1552                     self.to_screen('Deleting original file %s (pass -k to keep)' % old_filename)
1553                     try:
1554                         os.remove(encodeFilename(old_filename))
1555                     except (IOError, OSError):
1556                         self.report_warning('Unable to remove downloaded original file')
1557
1558     def _make_archive_id(self, info_dict):
1559         # Future-proof against any change in case
1560         # and backwards compatibility with prior versions
1561         extractor = info_dict.get('extractor_key')
1562         if extractor is None:
1563             if 'id' in info_dict:
1564                 extractor = info_dict.get('ie_key')  # key in a playlist
1565         if extractor is None:
1566             return None  # Incomplete video information
1567         return extractor.lower() + ' ' + info_dict['id']
1568
1569     def in_download_archive(self, info_dict):
1570         fn = self.params.get('download_archive')
1571         if fn is None:
1572             return False
1573
1574         vid_id = self._make_archive_id(info_dict)
1575         if vid_id is None:
1576             return False  # Incomplete video information
1577
1578         try:
1579             with locked_file(fn, 'r', encoding='utf-8') as archive_file:
1580                 for line in archive_file:
1581                     if line.strip() == vid_id:
1582                         return True
1583         except IOError as ioe:
1584             if ioe.errno != errno.ENOENT:
1585                 raise
1586         return False
1587
1588     def record_download_archive(self, info_dict):
1589         fn = self.params.get('download_archive')
1590         if fn is None:
1591             return
1592         vid_id = self._make_archive_id(info_dict)
1593         assert vid_id
1594         with locked_file(fn, 'a', encoding='utf-8') as archive_file:
1595             archive_file.write(vid_id + '\n')
1596
1597     @staticmethod
1598     def format_resolution(format, default='unknown'):
1599         if format.get('vcodec') == 'none':
1600             return 'audio only'
1601         if format.get('resolution') is not None:
1602             return format['resolution']
1603         if format.get('height') is not None:
1604             if format.get('width') is not None:
1605                 res = '%sx%s' % (format['width'], format['height'])
1606             else:
1607                 res = '%sp' % format['height']
1608         elif format.get('width') is not None:
1609             res = '?x%d' % format['width']
1610         else:
1611             res = default
1612         return res
1613
1614     def _format_note(self, fdict):
1615         res = ''
1616         if fdict.get('ext') in ['f4f', 'f4m']:
1617             res += '(unsupported) '
1618         if fdict.get('format_note') is not None:
1619             res += fdict['format_note'] + ' '
1620         if fdict.get('tbr') is not None:
1621             res += '%4dk ' % fdict['tbr']
1622         if fdict.get('container') is not None:
1623             if res:
1624                 res += ', '
1625             res += '%s container' % fdict['container']
1626         if (fdict.get('vcodec') is not None and
1627                 fdict.get('vcodec') != 'none'):
1628             if res:
1629                 res += ', '
1630             res += fdict['vcodec']
1631             if fdict.get('vbr') is not None:
1632                 res += '@'
1633         elif fdict.get('vbr') is not None and fdict.get('abr') is not None:
1634             res += 'video@'
1635         if fdict.get('vbr') is not None:
1636             res += '%4dk' % fdict['vbr']
1637         if fdict.get('fps') is not None:
1638             res += ', %sfps' % fdict['fps']
1639         if fdict.get('acodec') is not None:
1640             if res:
1641                 res += ', '
1642             if fdict['acodec'] == 'none':
1643                 res += 'video only'
1644             else:
1645                 res += '%-5s' % fdict['acodec']
1646         elif fdict.get('abr') is not None:
1647             if res:
1648                 res += ', '
1649             res += 'audio'
1650         if fdict.get('abr') is not None:
1651             res += '@%3dk' % fdict['abr']
1652         if fdict.get('asr') is not None:
1653             res += ' (%5dHz)' % fdict['asr']
1654         if fdict.get('filesize') is not None:
1655             if res:
1656                 res += ', '
1657             res += format_bytes(fdict['filesize'])
1658         elif fdict.get('filesize_approx') is not None:
1659             if res:
1660                 res += ', '
1661             res += '~' + format_bytes(fdict['filesize_approx'])
1662         return res
1663
1664     def list_formats(self, info_dict):
1665         formats = info_dict.get('formats', [info_dict])
1666         table = [
1667             [f['format_id'], f['ext'], self.format_resolution(f), self._format_note(f)]
1668             for f in formats
1669             if f.get('preference') is None or f['preference'] >= -1000]
1670         if len(formats) > 1:
1671             table[-1][-1] += (' ' if table[-1][-1] else '') + '(best)'
1672
1673         header_line = ['format code', 'extension', 'resolution', 'note']
1674         self.to_screen(
1675             '[info] Available formats for %s:\n%s' %
1676             (info_dict['id'], render_table(header_line, table)))
1677
1678     def list_thumbnails(self, info_dict):
1679         thumbnails = info_dict.get('thumbnails')
1680         if not thumbnails:
1681             tn_url = info_dict.get('thumbnail')
1682             if tn_url:
1683                 thumbnails = [{'id': '0', 'url': tn_url}]
1684             else:
1685                 self.to_screen(
1686                     '[info] No thumbnails present for %s' % info_dict['id'])
1687                 return
1688
1689         self.to_screen(
1690             '[info] Thumbnails for %s:' % info_dict['id'])
1691         self.to_screen(render_table(
1692             ['ID', 'width', 'height', 'URL'],
1693             [[t['id'], t.get('width', 'unknown'), t.get('height', 'unknown'), t['url']] for t in thumbnails]))
1694
1695     def list_subtitles(self, video_id, subtitles, name='subtitles'):
1696         if not subtitles:
1697             self.to_screen('%s has no %s' % (video_id, name))
1698             return
1699         self.to_screen(
1700             'Available %s for %s:' % (name, video_id))
1701         self.to_screen(render_table(
1702             ['Language', 'formats'],
1703             [[lang, ', '.join(f['ext'] for f in reversed(formats))]
1704                 for lang, formats in subtitles.items()]))
1705
1706     def urlopen(self, req):
1707         """ Start an HTTP download """
1708
1709         # According to RFC 3986, URLs can not contain non-ASCII characters, however this is not
1710         # always respected by websites, some tend to give out URLs with non percent-encoded
1711         # non-ASCII characters (see telemb.py, ard.py [#3412])
1712         # urllib chokes on URLs with non-ASCII characters (see http://bugs.python.org/issue3991)
1713         # To work around aforementioned issue we will replace request's original URL with
1714         # percent-encoded one
1715         req_is_string = isinstance(req, compat_basestring)
1716         url = req if req_is_string else req.get_full_url()
1717         url_escaped = escape_url(url)
1718
1719         # Substitute URL if any change after escaping
1720         if url != url_escaped:
1721             if req_is_string:
1722                 req = url_escaped
1723             else:
1724                 req_type = HEADRequest if req.get_method() == 'HEAD' else compat_urllib_request.Request
1725                 req = req_type(
1726                     url_escaped, data=req.data, headers=req.headers,
1727                     origin_req_host=req.origin_req_host, unverifiable=req.unverifiable)
1728
1729         return self._opener.open(req, timeout=self._socket_timeout)
1730
1731     def print_debug_header(self):
1732         if not self.params.get('verbose'):
1733             return
1734
1735         if type('') is not compat_str:
1736             # Python 2.6 on SLES11 SP1 (https://github.com/rg3/youtube-dl/issues/3326)
1737             self.report_warning(
1738                 'Your Python is broken! Update to a newer and supported version')
1739
1740         stdout_encoding = getattr(
1741             sys.stdout, 'encoding', 'missing (%s)' % type(sys.stdout).__name__)
1742         encoding_str = (
1743             '[debug] Encodings: locale %s, fs %s, out %s, pref %s\n' % (
1744                 locale.getpreferredencoding(),
1745                 sys.getfilesystemencoding(),
1746                 stdout_encoding,
1747                 self.get_encoding()))
1748         write_string(encoding_str, encoding=None)
1749
1750         self._write_string('[debug] youtube-dl version ' + __version__ + '\n')
1751         try:
1752             sp = subprocess.Popen(
1753                 ['git', 'rev-parse', '--short', 'HEAD'],
1754                 stdout=subprocess.PIPE, stderr=subprocess.PIPE,
1755                 cwd=os.path.dirname(os.path.abspath(__file__)))
1756             out, err = sp.communicate()
1757             out = out.decode().strip()
1758             if re.match('[0-9a-f]+', out):
1759                 self._write_string('[debug] Git HEAD: ' + out + '\n')
1760         except Exception:
1761             try:
1762                 sys.exc_clear()
1763             except Exception:
1764                 pass
1765         self._write_string('[debug] Python version %s - %s\n' % (
1766             platform.python_version(), platform_name()))
1767
1768         exe_versions = FFmpegPostProcessor.get_versions(self)
1769         exe_versions['rtmpdump'] = rtmpdump_version()
1770         exe_str = ', '.join(
1771             '%s %s' % (exe, v)
1772             for exe, v in sorted(exe_versions.items())
1773             if v
1774         )
1775         if not exe_str:
1776             exe_str = 'none'
1777         self._write_string('[debug] exe versions: %s\n' % exe_str)
1778
1779         proxy_map = {}
1780         for handler in self._opener.handlers:
1781             if hasattr(handler, 'proxies'):
1782                 proxy_map.update(handler.proxies)
1783         self._write_string('[debug] Proxy map: ' + compat_str(proxy_map) + '\n')
1784
1785         if self.params.get('call_home', False):
1786             ipaddr = self.urlopen('https://yt-dl.org/ip').read().decode('utf-8')
1787             self._write_string('[debug] Public IP address: %s\n' % ipaddr)
1788             latest_version = self.urlopen(
1789                 'https://yt-dl.org/latest/version').read().decode('utf-8')
1790             if version_tuple(latest_version) > version_tuple(__version__):
1791                 self.report_warning(
1792                     'You are using an outdated version (newest version: %s)! '
1793                     'See https://yt-dl.org/update if you need help updating.' %
1794                     latest_version)
1795
1796     def _setup_opener(self):
1797         timeout_val = self.params.get('socket_timeout')
1798         self._socket_timeout = 600 if timeout_val is None else float(timeout_val)
1799
1800         opts_cookiefile = self.params.get('cookiefile')
1801         opts_proxy = self.params.get('proxy')
1802
1803         if opts_cookiefile is None:
1804             self.cookiejar = compat_cookiejar.CookieJar()
1805         else:
1806             self.cookiejar = compat_cookiejar.MozillaCookieJar(
1807                 opts_cookiefile)
1808             if os.access(opts_cookiefile, os.R_OK):
1809                 self.cookiejar.load()
1810
1811         cookie_processor = compat_urllib_request.HTTPCookieProcessor(
1812             self.cookiejar)
1813         if opts_proxy is not None:
1814             if opts_proxy == '':
1815                 proxies = {}
1816             else:
1817                 proxies = {'http': opts_proxy, 'https': opts_proxy}
1818         else:
1819             proxies = compat_urllib_request.getproxies()
1820             # Set HTTPS proxy to HTTP one if given (https://github.com/rg3/youtube-dl/issues/805)
1821             if 'http' in proxies and 'https' not in proxies:
1822                 proxies['https'] = proxies['http']
1823         proxy_handler = PerRequestProxyHandler(proxies)
1824
1825         debuglevel = 1 if self.params.get('debug_printtraffic') else 0
1826         https_handler = make_HTTPS_handler(self.params, debuglevel=debuglevel)
1827         ydlh = YoutubeDLHandler(self.params, debuglevel=debuglevel)
1828         opener = compat_urllib_request.build_opener(
1829             proxy_handler, https_handler, cookie_processor, ydlh)
1830
1831         # Delete the default user-agent header, which would otherwise apply in
1832         # cases where our custom HTTP handler doesn't come into play
1833         # (See https://github.com/rg3/youtube-dl/issues/1309 for details)
1834         opener.addheaders = []
1835         self._opener = opener
1836
1837     def encode(self, s):
1838         if isinstance(s, bytes):
1839             return s  # Already encoded
1840
1841         try:
1842             return s.encode(self.get_encoding())
1843         except UnicodeEncodeError as err:
1844             err.reason = err.reason + '. Check your system encoding configuration or use the --encoding option.'
1845             raise
1846
1847     def get_encoding(self):
1848         encoding = self.params.get('encoding')
1849         if encoding is None:
1850             encoding = preferredencoding()
1851         return encoding
1852
1853     def _write_thumbnails(self, info_dict, filename):
1854         if self.params.get('writethumbnail', False):
1855             thumbnails = info_dict.get('thumbnails')
1856             if thumbnails:
1857                 thumbnails = [thumbnails[-1]]
1858         elif self.params.get('write_all_thumbnails', False):
1859             thumbnails = info_dict.get('thumbnails')
1860         else:
1861             return
1862
1863         if not thumbnails:
1864             # No thumbnails present, so return immediately
1865             return
1866
1867         for t in thumbnails:
1868             thumb_ext = determine_ext(t['url'], 'jpg')
1869             suffix = '_%s' % t['id'] if len(thumbnails) > 1 else ''
1870             thumb_display_id = '%s ' % t['id'] if len(thumbnails) > 1 else ''
1871             t['filename'] = thumb_filename = os.path.splitext(filename)[0] + suffix + '.' + thumb_ext
1872
1873             if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(thumb_filename)):
1874                 self.to_screen('[%s] %s: Thumbnail %sis already present' %
1875                                (info_dict['extractor'], info_dict['id'], thumb_display_id))
1876             else:
1877                 self.to_screen('[%s] %s: Downloading thumbnail %s...' %
1878                                (info_dict['extractor'], info_dict['id'], thumb_display_id))
1879                 try:
1880                     uf = self.urlopen(t['url'])
1881                     with open(thumb_filename, 'wb') as thumbf:
1882                         shutil.copyfileobj(uf, thumbf)
1883                     self.to_screen('[%s] %s: Writing thumbnail %sto: %s' %
1884                                    (info_dict['extractor'], info_dict['id'], thumb_display_id, thumb_filename))
1885                 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
1886                     self.report_warning('Unable to download thumbnail "%s": %s' %
1887                                         (t['url'], compat_str(err)))