_ Git - youtube-dl/blob - youtube_dl/YoutubeDL.py

   1 #!/usr/bin/env python
   2 # -*- coding: utf-8 -*-
   3
   4 from __future__ import absolute_import, unicode_literals
   5
   6 import collections
   7 import contextlib
   8 import datetime
   9 import errno
  10 import fileinput
  11 import io
  12 import itertools
  13 import json
  14 import locale
  15 import operator
  16 import os
  17 import platform
  18 import re
  19 import shutil
  20 import subprocess
  21 import socket
  22 import sys
  23 import time
  24 import traceback
  25
  26 if os.name == 'nt':
  27     import ctypes
  28
  29 from .compat import (
  30     compat_basestring,
  31     compat_cookiejar,
  32     compat_expanduser,
  33     compat_get_terminal_size,
  34     compat_http_client,
  35     compat_kwargs,
  36     compat_str,
  37     compat_urllib_error,
  38     compat_urllib_request,
  39 )
  40 from .utils import (
  41     escape_url,
  42     ContentTooShortError,
  43     date_from_str,
  44     DateRange,
  45     DEFAULT_OUTTMPL,
  46     determine_ext,
  47     DownloadError,
  48     encodeFilename,
  49     ExtractorError,
  50     format_bytes,
  51     formatSeconds,
  52     HEADRequest,
  53     locked_file,
  54     make_HTTPS_handler,
  55     MaxDownloadsReached,
  56     PagedList,
  57     parse_filesize,
  58     PerRequestProxyHandler,
  59     PostProcessingError,
  60     platform_name,
  61     preferredencoding,
  62     render_table,
  63     SameFileError,
  64     sanitize_filename,
  65     sanitize_path,
  66     std_headers,
  67     subtitles_filename,
  68     UnavailableVideoError,
  69     url_basename,
  70     version_tuple,
  71     write_json_file,
  72     write_string,
  73     YoutubeDLHandler,
  74     prepend_extension,
  75     replace_extension,
  76     args_to_str,
  77     age_restricted,
  78 )
  79 from .cache import Cache
  80 from .extractor import get_info_extractor, gen_extractors
  81 from .downloader import get_suitable_downloader
  82 from .downloader.rtmp import rtmpdump_version
  83 from .postprocessor import (
  84     FFmpegFixupM4aPP,
  85     FFmpegFixupStretchedPP,
  86     FFmpegMergerPP,
  87     FFmpegPostProcessor,
  88     get_postprocessor,
  89 )
  90 from .version import __version__
  91
  92
  93 class YoutubeDL(object):
  94     """YoutubeDL class.
  95
  96     YoutubeDL objects are the ones responsible of downloading the
  97     actual video file and writing it to disk if the user has requested
  98     it, among some other tasks. In most cases there should be one per
  99     program. As, given a video URL, the downloader doesn't know how to
 100     extract all the needed information, task that InfoExtractors do, it
 101     has to pass the URL to one of them.
 102
 103     For this, YoutubeDL objects have a method that allows
 104     InfoExtractors to be registered in a given order. When it is passed
 105     a URL, the YoutubeDL object handles it to the first InfoExtractor it
 106     finds that reports being able to handle it. The InfoExtractor extracts
 107     all the information about the video or videos the URL refers to, and
 108     YoutubeDL process the extracted information, possibly using a File
 109     Downloader to download the video.
 110
 111     YoutubeDL objects accept a lot of parameters. In order not to saturate
 112     the object constructor with arguments, it receives a dictionary of
 113     options instead. These options are available through the params
 114     attribute for the InfoExtractors to use. The YoutubeDL also
 115     registers itself as the downloader in charge for the InfoExtractors
 116     that are added to it, so this is a "mutual registration".
 117
 118     Available options:
 119
 120     username:          Username for authentication purposes.
 121     password:          Password for authentication purposes.
 122     videopassword:     Password for acces a video.
 123     usenetrc:          Use netrc for authentication instead.
 124     verbose:           Print additional info to stdout.
 125     quiet:             Do not print messages to stdout.
 126     no_warnings:       Do not print out anything for warnings.
 127     forceurl:          Force printing final URL.
 128     forcetitle:        Force printing title.
 129     forceid:           Force printing ID.
 130     forcethumbnail:    Force printing thumbnail URL.
 131     forcedescription:  Force printing description.
 132     forcefilename:     Force printing final filename.
 133     forceduration:     Force printing duration.
 134     forcejson:         Force printing info_dict as JSON.
 135     dump_single_json:  Force printing the info_dict of the whole playlist
 136                        (or video) as a single JSON line.
 137     simulate:          Do not download the video files.
 138     format:            Video format code. See options.py for more information.
 139     outtmpl:           Template for output names.
 140     restrictfilenames: Do not allow "&" and spaces in file names
 141     ignoreerrors:      Do not stop on download errors.
 142     force_generic_extractor: Force downloader to use the generic extractor
 143     nooverwrites:      Prevent overwriting files.
 144     playliststart:     Playlist item to start at.
 145     playlistend:       Playlist item to end at.
 146     playlist_items:    Specific indices of playlist to download.
 147     playlistreverse:   Download playlist items in reverse order.
 148     matchtitle:        Download only matching titles.
 149     rejecttitle:       Reject downloads for matching titles.
 150     logger:            Log messages to a logging.Logger instance.
 151     logtostderr:       Log messages to stderr instead of stdout.
 152     writedescription:  Write the video description to a .description file
 153     writeinfojson:     Write the video description to a .info.json file
 154     writeannotations:  Write the video annotations to a .annotations.xml file
 155     writethumbnail:    Write the thumbnail image to a file
 156     write_all_thumbnails:  Write all thumbnail formats to files
 157     writesubtitles:    Write the video subtitles to a file
 158     writeautomaticsub: Write the automatic subtitles to a file
 159     allsubtitles:      Downloads all the subtitles of the video
 160                        (requires writesubtitles or writeautomaticsub)
 161     listsubtitles:     Lists all available subtitles for the video
 162     subtitlesformat:   The format code for subtitles
 163     subtitleslangs:    List of languages of the subtitles to download
 164     keepvideo:         Keep the video file after post-processing
 165     daterange:         A DateRange object, download only if the upload_date is in the range.
 166     skip_download:     Skip the actual download of the video file
 167     cachedir:          Location of the cache files in the filesystem.
 168                        False to disable filesystem cache.
 169     noplaylist:        Download single video instead of a playlist if in doubt.
 170     age_limit:         An integer representing the user's age in years.
 171                        Unsuitable videos for the given age are skipped.
 172     min_views:         An integer representing the minimum view count the video
 173                        must have in order to not be skipped.
 174                        Videos without view count information are always
 175                        downloaded. None for no limit.
 176     max_views:         An integer representing the maximum view count.
 177                        Videos that are more popular than that are not
 178                        downloaded.
 179                        Videos without view count information are always
 180                        downloaded. None for no limit.
 181     download_archive:  File name of a file where all downloads are recorded.
 182                        Videos already present in the file are not downloaded
 183                        again.
 184     cookiefile:        File name where cookies should be read from and dumped to.
 185     nocheckcertificate:Do not verify SSL certificates
 186     prefer_insecure:   Use HTTP instead of HTTPS to retrieve information.
 187                        At the moment, this is only supported by YouTube.
 188     proxy:             URL of the proxy server to use
 189     cn_verification_proxy:  URL of the proxy to use for IP address verification
 190                        on Chinese sites. (Experimental)
 191     socket_timeout:    Time to wait for unresponsive hosts, in seconds
 192     bidi_workaround:   Work around buggy terminals without bidirectional text
 193                        support, using fridibi
 194     debug_printtraffic:Print out sent and received HTTP traffic
 195     include_ads:       Download ads as well
 196     default_search:    Prepend this string if an input url is not valid.
 197                        'auto' for elaborate guessing
 198     encoding:          Use this encoding instead of the system-specified.
 199     extract_flat:      Do not resolve URLs, return the immediate result.
 200                        Pass in 'in_playlist' to only show this behavior for
 201                        playlist items.
 202     postprocessors:    A list of dictionaries, each with an entry
 203                        * key:  The name of the postprocessor. See
 204                                youtube_dl/postprocessor/__init__.py for a list.
 205                        as well as any further keyword arguments for the
 206                        postprocessor.
 207     progress_hooks:    A list of functions that get called on download
 208                        progress, with a dictionary with the entries
 209                        * status: One of "downloading", "error", or "finished".
 210                                  Check this first and ignore unknown values.
 211
 212                        If status is one of "downloading", or "finished", the
 213                        following properties may also be present:
 214                        * filename: The final filename (always present)
 215                        * tmpfilename: The filename we're currently writing to
 216                        * downloaded_bytes: Bytes on disk
 217                        * total_bytes: Size of the whole file, None if unknown
 218                        * total_bytes_estimate: Guess of the eventual file size,
 219                                                None if unavailable.
 220                        * elapsed: The number of seconds since download started.
 221                        * eta: The estimated time in seconds, None if unknown
 222                        * speed: The download speed in bytes/second, None if
 223                                 unknown
 224                        * fragment_index: The counter of the currently
 225                                          downloaded video fragment.
 226                        * fragment_count: The number of fragments (= individual
 227                                          files that will be merged)
 228
 229                        Progress hooks are guaranteed to be called at least once
 230                        (with status "finished") if the download is successful.
 231     merge_output_format: Extension to use when merging formats.
 232     fixup:             Automatically correct known faults of the file.
 233                        One of:
 234                        - "never": do nothing
 235                        - "warn": only emit a warning
 236                        - "detect_or_warn": check whether we can do anything
 237                                            about it, warn otherwise (default)
 238     source_address:    (Experimental) Client-side IP address to bind to.
 239     call_home:         Boolean, true iff we are allowed to contact the
 240                        youtube-dl servers for debugging.
 241     sleep_interval:    Number of seconds to sleep before each download.
 242     listformats:       Print an overview of available video formats and exit.
 243     list_thumbnails:   Print a table of all thumbnails and exit.
 244     match_filter:      A function that gets called with the info_dict of
 245                        every video.
 246                        If it returns a message, the video is ignored.
 247                        If it returns None, the video is downloaded.
 248                        match_filter_func in utils.py is one example for this.
 249     no_color:          Do not emit color codes in output.
 250
 251     The following options determine which downloader is picked:
 252     external_downloader: Executable of the external downloader to call.
 253                        None or unset for standard (built-in) downloader.
 254     hls_prefer_native: Use the native HLS downloader instead of ffmpeg/avconv.
 255
 256     The following parameters are not used by YoutubeDL itself, they are used by
 257     the downloader (see youtube_dl/downloader/common.py):
 258     nopart, updatetime, buffersize, ratelimit, min_filesize, max_filesize, test,
 259     noresizebuffer, retries, continuedl, noprogress, consoletitle,
 260     xattr_set_filesize, external_downloader_args.
 261
 262     The following options are used by the post processors:
 263     prefer_ffmpeg:     If True, use ffmpeg instead of avconv if both are available,
 264                        otherwise prefer avconv.
 265     """
 266
 267     params = None
 268     _ies = []
 269     _pps = []
 270     _download_retcode = None
 271     _num_downloads = None
 272     _screen_file = None
 273
 274     def __init__(self, params=None, auto_init=True):
 275         """Create a FileDownloader object with the given options."""
 276         if params is None:
 277             params = {}
 278         self._ies = []
 279         self._ies_instances = {}
 280         self._pps = []
 281         self._progress_hooks = []
 282         self._download_retcode = 0
 283         self._num_downloads = 0
 284         self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)]
 285         self._err_file = sys.stderr
 286         self._force_generic_extractor_required = params.get('force_generic_extractor', False)
 287         self.params = params
 288         self.cache = Cache(self)
 289
 290         if params.get('bidi_workaround', False):
 291             try:
 292                 import pty
 293                 master, slave = pty.openpty()
 294                 width = compat_get_terminal_size().columns
 295                 if width is None:
 296                     width_args = []
 297                 else:
 298                     width_args = ['-w', str(width)]
 299                 sp_kwargs = dict(
 300                     stdin=subprocess.PIPE,
 301                     stdout=slave,
 302                     stderr=self._err_file)
 303                 try:
 304                     self._output_process = subprocess.Popen(
 305                         ['bidiv'] + width_args, **sp_kwargs
 306                     )
 307                 except OSError:
 308                     self._output_process = subprocess.Popen(
 309                         ['fribidi', '-c', 'UTF-8'] + width_args, **sp_kwargs)
 310                 self._output_channel = os.fdopen(master, 'rb')
 311             except OSError as ose:
 312                 if ose.errno == 2:
 313                     self.report_warning('Could not find fribidi executable, ignoring --bidi-workaround . Make sure that  fribidi  is an executable file in one of the directories in your $PATH.')
 314                 else:
 315                     raise
 316
 317         if (sys.version_info >= (3,) and sys.platform != 'win32' and
 318                 sys.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968'] and
 319                 not params.get('restrictfilenames', False)):
 320             # On Python 3, the Unicode filesystem API will throw errors (#1474)
 321             self.report_warning(
 322                 'Assuming --restrict-filenames since file system encoding '
 323                 'cannot encode all characters. '
 324                 'Set the LC_ALL environment variable to fix this.')
 325             self.params['restrictfilenames'] = True
 326
 327         if isinstance(params.get('outtmpl'), bytes):
 328             self.report_warning(
 329                 'Parameter outtmpl is bytes, but should be a unicode string. '
 330                 'Put  from __future__ import unicode_literals  at the top of your code file or consider switching to Python 3.x.')
 331
 332         self._setup_opener()
 333
 334         if auto_init:
 335             self.print_debug_header()
 336             self.add_default_info_extractors()
 337
 338         for pp_def_raw in self.params.get('postprocessors', []):
 339             pp_class = get_postprocessor(pp_def_raw['key'])
 340             pp_def = dict(pp_def_raw)
 341             del pp_def['key']
 342             pp = pp_class(self, **compat_kwargs(pp_def))
 343             self.add_post_processor(pp)
 344
 345         for ph in self.params.get('progress_hooks', []):
 346             self.add_progress_hook(ph)
 347
 348     def warn_if_short_id(self, argv):
 349         # short YouTube ID starting with dash?
 350         idxs = [
 351             i for i, a in enumerate(argv)
 352             if re.match(r'^-[0-9A-Za-z_-]{10}$', a)]
 353         if idxs:
 354             correct_argv = (
 355                 ['youtube-dl'] +
 356                 [a for i, a in enumerate(argv) if i not in idxs] +
 357                 ['--'] + [argv[i] for i in idxs]
 358             )
 359             self.report_warning(
 360                 'Long argument string detected. '
 361                 'Use -- to separate parameters and URLs, like this:\n%s\n' %
 362                 args_to_str(correct_argv))
 363
 364     def add_info_extractor(self, ie):
 365         """Add an InfoExtractor object to the end of the list."""
 366         self._ies.append(ie)
 367         self._ies_instances[ie.ie_key()] = ie
 368         ie.set_downloader(self)
 369
 370     def get_info_extractor(self, ie_key):
 371         """
 372         Get an instance of an IE with name ie_key, it will try to get one from
 373         the _ies list, if there's no instance it will create a new one and add
 374         it to the extractor list.
 375         """
 376         ie = self._ies_instances.get(ie_key)
 377         if ie is None:
 378             ie = get_info_extractor(ie_key)()
 379             self.add_info_extractor(ie)
 380         return ie
 381
 382     def add_default_info_extractors(self):
 383         """
 384         Add the InfoExtractors returned by gen_extractors to the end of the list
 385         """
 386         for ie in gen_extractors():
 387             self.add_info_extractor(ie)
 388
 389     def add_post_processor(self, pp):
 390         """Add a PostProcessor object to the end of the chain."""
 391         self._pps.append(pp)
 392         pp.set_downloader(self)
 393
 394     def add_progress_hook(self, ph):
 395         """Add the progress hook (currently only for the file downloader)"""
 396         self._progress_hooks.append(ph)
 397
 398     def _bidi_workaround(self, message):
 399         if not hasattr(self, '_output_channel'):
 400             return message
 401
 402         assert hasattr(self, '_output_process')
 403         assert isinstance(message, compat_str)
 404         line_count = message.count('\n') + 1
 405         self._output_process.stdin.write((message + '\n').encode('utf-8'))
 406         self._output_process.stdin.flush()
 407         res = ''.join(self._output_channel.readline().decode('utf-8')
 408                       for _ in range(line_count))
 409         return res[:-len('\n')]
 410
 411     def to_screen(self, message, skip_eol=False):
 412         """Print message to stdout if not in quiet mode."""
 413         return self.to_stdout(message, skip_eol, check_quiet=True)
 414
 415     def _write_string(self, s, out=None):
 416         write_string(s, out=out, encoding=self.params.get('encoding'))
 417
 418     def to_stdout(self, message, skip_eol=False, check_quiet=False):
 419         """Print message to stdout if not in quiet mode."""
 420         if self.params.get('logger'):
 421             self.params['logger'].debug(message)
 422         elif not check_quiet or not self.params.get('quiet', False):
 423             message = self._bidi_workaround(message)
 424             terminator = ['\n', ''][skip_eol]
 425             output = message + terminator
 426
 427             self._write_string(output, self._screen_file)
 428
 429     def to_stderr(self, message):
 430         """Print message to stderr."""
 431         assert isinstance(message, compat_str)
 432         if self.params.get('logger'):
 433             self.params['logger'].error(message)
 434         else:
 435             message = self._bidi_workaround(message)
 436             output = message + '\n'
 437             self._write_string(output, self._err_file)
 438
 439     def to_console_title(self, message):
 440         if not self.params.get('consoletitle', False):
 441             return
 442         if os.name == 'nt' and ctypes.windll.kernel32.GetConsoleWindow():
 443             # c_wchar_p() might not be necessary if `message` is
 444             # already of type unicode()
 445             ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message))
 446         elif 'TERM' in os.environ:
 447             self._write_string('\033]0;%s\007' % message, self._screen_file)
 448
 449     def save_console_title(self):
 450         if not self.params.get('consoletitle', False):
 451             return
 452         if 'TERM' in os.environ:
 453             # Save the title on stack
 454             self._write_string('\033[22;0t', self._screen_file)
 455
 456     def restore_console_title(self):
 457         if not self.params.get('consoletitle', False):
 458             return
 459         if 'TERM' in os.environ:
 460             # Restore the title from stack
 461             self._write_string('\033[23;0t', self._screen_file)
 462
 463     def __enter__(self):
 464         self.save_console_title()
 465         return self
 466
 467     def __exit__(self, *args):
 468         self.restore_console_title()
 469
 470         if self.params.get('cookiefile') is not None:
 471             self.cookiejar.save()
 472
 473     def trouble(self, message=None, tb=None):
 474         """Determine action to take when a download problem appears.
 475
 476         Depending on if the downloader has been configured to ignore
 477         download errors or not, this method may throw an exception or
 478         not when errors are found, after printing the message.
 479
 480         tb, if given, is additional traceback information.
 481         """
 482         if message is not None:
 483             self.to_stderr(message)
 484         if self.params.get('verbose'):
 485             if tb is None:
 486                 if sys.exc_info()[0]:  # if .trouble has been called from an except block
 487                     tb = ''
 488                     if hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
 489                         tb += ''.join(traceback.format_exception(*sys.exc_info()[1].exc_info))
 490                     tb += compat_str(traceback.format_exc())
 491                 else:
 492                     tb_data = traceback.format_list(traceback.extract_stack())
 493                     tb = ''.join(tb_data)
 494             self.to_stderr(tb)
 495         if not self.params.get('ignoreerrors', False):
 496             if sys.exc_info()[0] and hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
 497                 exc_info = sys.exc_info()[1].exc_info
 498             else:
 499                 exc_info = sys.exc_info()
 500             raise DownloadError(message, exc_info)
 501         self._download_retcode = 1
 502
 503     def report_warning(self, message):
 504         '''
 505         Print the message to stderr, it will be prefixed with 'WARNING:'
 506         If stderr is a tty file the 'WARNING:' will be colored
 507         '''
 508         if self.params.get('logger') is not None:
 509             self.params['logger'].warning(message)
 510         else:
 511             if self.params.get('no_warnings'):
 512                 return
 513             if not self.params.get('no_color') and self._err_file.isatty() and os.name != 'nt':
 514                 _msg_header = '\033[0;33mWARNING:\033[0m'
 515             else:
 516                 _msg_header = 'WARNING:'
 517             warning_message = '%s %s' % (_msg_header, message)
 518             self.to_stderr(warning_message)
 519
 520     def report_error(self, message, tb=None):
 521         '''
 522         Do the same as trouble, but prefixes the message with 'ERROR:', colored
 523         in red if stderr is a tty file.
 524         '''
 525         if not self.params.get('no_color') and self._err_file.isatty() and os.name != 'nt':
 526             _msg_header = '\033[0;31mERROR:\033[0m'
 527         else:
 528             _msg_header = 'ERROR:'
 529         error_message = '%s %s' % (_msg_header, message)
 530         self.trouble(error_message, tb)
 531
 532     def report_file_already_downloaded(self, file_name):
 533         """Report file has already been fully downloaded."""
 534         try:
 535             self.to_screen('[download] %s has already been downloaded' % file_name)
 536         except UnicodeEncodeError:
 537             self.to_screen('[download] The file has already been downloaded')
 538
 539     def prepare_filename(self, info_dict):
 540         """Generate the output filename."""
 541         try:
 542             template_dict = dict(info_dict)
 543
 544             template_dict['epoch'] = int(time.time())
 545             autonumber_size = self.params.get('autonumber_size')
 546             if autonumber_size is None:
 547                 autonumber_size = 5
 548             autonumber_templ = '%0' + str(autonumber_size) + 'd'
 549             template_dict['autonumber'] = autonumber_templ % self._num_downloads
 550             if template_dict.get('playlist_index') is not None:
 551                 template_dict['playlist_index'] = '%0*d' % (len(str(template_dict['n_entries'])), template_dict['playlist_index'])
 552             if template_dict.get('resolution') is None:
 553                 if template_dict.get('width') and template_dict.get('height'):
 554                     template_dict['resolution'] = '%dx%d' % (template_dict['width'], template_dict['height'])
 555                 elif template_dict.get('height'):
 556                     template_dict['resolution'] = '%sp' % template_dict['height']
 557                 elif template_dict.get('width'):
 558                     template_dict['resolution'] = '?x%d' % template_dict['width']
 559
 560             sanitize = lambda k, v: sanitize_filename(
 561                 compat_str(v),
 562                 restricted=self.params.get('restrictfilenames'),
 563                 is_id=(k == 'id'))
 564             template_dict = dict((k, sanitize(k, v))
 565                                  for k, v in template_dict.items()
 566                                  if v is not None)
 567             template_dict = collections.defaultdict(lambda: 'NA', template_dict)
 568
 569             outtmpl = sanitize_path(self.params.get('outtmpl', DEFAULT_OUTTMPL))
 570             tmpl = compat_expanduser(outtmpl)
 571             filename = tmpl % template_dict
 572             # Temporary fix for #4787
 573             # 'Treat' all problem characters by passing filename through preferredencoding
 574             # to workaround encoding issues with subprocess on python2 @ Windows
 575             if sys.version_info < (3, 0) and sys.platform == 'win32':
 576                 filename = encodeFilename(filename, True).decode(preferredencoding())
 577             return filename
 578         except ValueError as err:
 579             self.report_error('Error in output template: ' + str(err) + ' (encoding: ' + repr(preferredencoding()) + ')')
 580             return None
 581
 582     def _match_entry(self, info_dict, incomplete):
 583         """ Returns None iff the file should be downloaded """
 584
 585         video_title = info_dict.get('title', info_dict.get('id', 'video'))
 586         if 'title' in info_dict:
 587             # This can happen when we're just evaluating the playlist
 588             title = info_dict['title']
 589             matchtitle = self.params.get('matchtitle', False)
 590             if matchtitle:
 591                 if not re.search(matchtitle, title, re.IGNORECASE):
 592                     return '"' + title + '" title did not match pattern "' + matchtitle + '"'
 593             rejecttitle = self.params.get('rejecttitle', False)
 594             if rejecttitle:
 595                 if re.search(rejecttitle, title, re.IGNORECASE):
 596                     return '"' + title + '" title matched reject pattern "' + rejecttitle + '"'
 597         date = info_dict.get('upload_date', None)
 598         if date is not None:
 599             dateRange = self.params.get('daterange', DateRange())
 600             if date not in dateRange:
 601                 return '%s upload date is not in range %s' % (date_from_str(date).isoformat(), dateRange)
 602         view_count = info_dict.get('view_count', None)
 603         if view_count is not None:
 604             min_views = self.params.get('min_views')
 605             if min_views is not None and view_count < min_views:
 606                 return 'Skipping %s, because it has not reached minimum view count (%d/%d)' % (video_title, view_count, min_views)
 607             max_views = self.params.get('max_views')
 608             if max_views is not None and view_count > max_views:
 609                 return 'Skipping %s, because it has exceeded the maximum view count (%d/%d)' % (video_title, view_count, max_views)
 610         if age_restricted(info_dict.get('age_limit'), self.params.get('age_limit')):
 611             return 'Skipping "%s" because it is age restricted' % video_title
 612         if self.in_download_archive(info_dict):
 613             return '%s has already been recorded in archive' % video_title
 614
 615         if not incomplete:
 616             match_filter = self.params.get('match_filter')
 617             if match_filter is not None:
 618                 ret = match_filter(info_dict)
 619                 if ret is not None:
 620                     return ret
 621
 622         return None
 623
 624     @staticmethod
 625     def add_extra_info(info_dict, extra_info):
 626         '''Set the keys from extra_info in info dict if they are missing'''
 627         for key, value in extra_info.items():
 628             info_dict.setdefault(key, value)
 629
 630     def extract_info(self, url, download=True, ie_key=None, extra_info={},
 631                      process=True):
 632         '''
 633         Returns a list with a dictionary for each video we find.
 634         If 'download', also downloads the videos.
 635         extra_info is a dict containing the extra values to add to each result
 636         '''
 637
 638         if not ie_key and self._force_generic_extractor_required:
 639             self._force_generic_extractor_required = False
 640             ie_key = 'Generic'
 641
 642         if ie_key:
 643             ies = [self.get_info_extractor(ie_key)]
 644         else:
 645             ies = self._ies
 646
 647         for ie in ies:
 648             if not ie.suitable(url):
 649                 continue
 650
 651             if not ie.working():
 652                 self.report_warning('The program functionality for this site has been marked as broken, '
 653                                     'and will probably not work.')
 654
 655             try:
 656                 ie_result = ie.extract(url)
 657                 if ie_result is None:  # Finished already (backwards compatibility; listformats and friends should be moved here)
 658                     break
 659                 if isinstance(ie_result, list):
 660                     # Backwards compatibility: old IE result format
 661                     ie_result = {
 662                         '_type': 'compat_list',
 663                         'entries': ie_result,
 664                     }
 665                 self.add_default_extra_info(ie_result, ie, url)
 666                 if process:
 667                     return self.process_ie_result(ie_result, download, extra_info)
 668                 else:
 669                     return ie_result
 670             except ExtractorError as de:  # An error we somewhat expected
 671                 self.report_error(compat_str(de), de.format_traceback())
 672                 break
 673             except MaxDownloadsReached:
 674                 raise
 675             except Exception as e:
 676                 if self.params.get('ignoreerrors', False):
 677                     self.report_error(compat_str(e), tb=compat_str(traceback.format_exc()))
 678                     break
 679                 else:
 680                     raise
 681         else:
 682             self.report_error('no suitable InfoExtractor for URL %s' % url)
 683
 684     def add_default_extra_info(self, ie_result, ie, url):
 685         self.add_extra_info(ie_result, {
 686             'extractor': ie.IE_NAME,
 687             'webpage_url': url,
 688             'webpage_url_basename': url_basename(url),
 689             'extractor_key': ie.ie_key(),
 690         })
 691
 692     def process_ie_result(self, ie_result, download=True, extra_info={}):
 693         """
 694         Take the result of the ie(may be modified) and resolve all unresolved
 695         references (URLs, playlist items).
 696
 697         It will also download the videos if 'download'.
 698         Returns the resolved ie_result.
 699         """
 700
 701         result_type = ie_result.get('_type', 'video')
 702
 703         if result_type in ('url', 'url_transparent'):
 704             extract_flat = self.params.get('extract_flat', False)
 705             if ((extract_flat == 'in_playlist' and 'playlist' in extra_info) or
 706                     extract_flat is True):
 707                 if self.params.get('forcejson', False):
 708                     self.to_stdout(json.dumps(ie_result))
 709                 return ie_result
 710
 711         if result_type == 'video':
 712             self.add_extra_info(ie_result, extra_info)
 713             return self.process_video_result(ie_result, download=download)
 714         elif result_type == 'url':
 715             # We have to add extra_info to the results because it may be
 716             # contained in a playlist
 717             return self.extract_info(ie_result['url'],
 718                                      download,
 719                                      ie_key=ie_result.get('ie_key'),
 720                                      extra_info=extra_info)
 721         elif result_type == 'url_transparent':
 722             # Use the information from the embedding page
 723             info = self.extract_info(
 724                 ie_result['url'], ie_key=ie_result.get('ie_key'),
 725                 extra_info=extra_info, download=False, process=False)
 726
 727             force_properties = dict(
 728                 (k, v) for k, v in ie_result.items() if v is not None)
 729             for f in ('_type', 'url'):
 730                 if f in force_properties:
 731                     del force_properties[f]
 732             new_result = info.copy()
 733             new_result.update(force_properties)
 734
 735             assert new_result.get('_type') != 'url_transparent'
 736
 737             return self.process_ie_result(
 738                 new_result, download=download, extra_info=extra_info)
 739         elif result_type == 'playlist' or result_type == 'multi_video':
 740             # We process each entry in the playlist
 741             playlist = ie_result.get('title', None) or ie_result.get('id', None)
 742             self.to_screen('[download] Downloading playlist: %s' % playlist)
 743
 744             playlist_results = []
 745
 746             playliststart = self.params.get('playliststart', 1) - 1
 747             playlistend = self.params.get('playlistend', None)
 748             # For backwards compatibility, interpret -1 as whole list
 749             if playlistend == -1:
 750                 playlistend = None
 751
 752             playlistitems_str = self.params.get('playlist_items', None)
 753             playlistitems = None
 754             if playlistitems_str is not None:
 755                 def iter_playlistitems(format):
 756                     for string_segment in format.split(','):
 757                         if '-' in string_segment:
 758                             start, end = string_segment.split('-')
 759                             for item in range(int(start), int(end) + 1):
 760                                 yield int(item)
 761                         else:
 762                             yield int(string_segment)
 763                 playlistitems = iter_playlistitems(playlistitems_str)
 764
 765             ie_entries = ie_result['entries']
 766             if isinstance(ie_entries, list):
 767                 n_all_entries = len(ie_entries)
 768                 if playlistitems:
 769                     entries = [
 770                         ie_entries[i - 1] for i in playlistitems
 771                         if -n_all_entries <= i - 1 < n_all_entries]
 772                 else:
 773                     entries = ie_entries[playliststart:playlistend]
 774                 n_entries = len(entries)
 775                 self.to_screen(
 776                     "[%s] playlist %s: Collected %d video ids (downloading %d of them)" %
 777                     (ie_result['extractor'], playlist, n_all_entries, n_entries))
 778             elif isinstance(ie_entries, PagedList):
 779                 if playlistitems:
 780                     entries = []
 781                     for item in playlistitems:
 782                         entries.extend(ie_entries.getslice(
 783                             item - 1, item
 784                         ))
 785                 else:
 786                     entries = ie_entries.getslice(
 787                         playliststart, playlistend)
 788                 n_entries = len(entries)
 789                 self.to_screen(
 790                     "[%s] playlist %s: Downloading %d videos" %
 791                     (ie_result['extractor'], playlist, n_entries))
 792             else:  # iterable
 793                 if playlistitems:
 794                     entry_list = list(ie_entries)
 795                     entries = [entry_list[i - 1] for i in playlistitems]
 796                 else:
 797                     entries = list(itertools.islice(
 798                         ie_entries, playliststart, playlistend))
 799                 n_entries = len(entries)
 800                 self.to_screen(
 801                     "[%s] playlist %s: Downloading %d videos" %
 802                     (ie_result['extractor'], playlist, n_entries))
 803
 804             if self.params.get('playlistreverse', False):
 805                 entries = entries[::-1]
 806
 807             for i, entry in enumerate(entries, 1):
 808                 self.to_screen('[download] Downloading video %s of %s' % (i, n_entries))
 809                 extra = {
 810                     'n_entries': n_entries,
 811                     'playlist': playlist,
 812                     'playlist_id': ie_result.get('id'),
 813                     'playlist_title': ie_result.get('title'),
 814                     'playlist_index': i + playliststart,
 815                     'extractor': ie_result['extractor'],
 816                     'webpage_url': ie_result['webpage_url'],
 817                     'webpage_url_basename': url_basename(ie_result['webpage_url']),
 818                     'extractor_key': ie_result['extractor_key'],
 819                 }
 820
 821                 reason = self._match_entry(entry, incomplete=True)
 822                 if reason is not None:
 823                     self.to_screen('[download] ' + reason)
 824                     continue
 825
 826                 entry_result = self.process_ie_result(entry,
 827                                                       download=download,
 828                                                       extra_info=extra)
 829                 playlist_results.append(entry_result)
 830             ie_result['entries'] = playlist_results
 831             return ie_result
 832         elif result_type == 'compat_list':
 833             self.report_warning(
 834                 'Extractor %s returned a compat_list result. '
 835                 'It needs to be updated.' % ie_result.get('extractor'))
 836
 837             def _fixup(r):
 838                 self.add_extra_info(
 839                     r,
 840                     {
 841                         'extractor': ie_result['extractor'],
 842                         'webpage_url': ie_result['webpage_url'],
 843                         'webpage_url_basename': url_basename(ie_result['webpage_url']),
 844                         'extractor_key': ie_result['extractor_key'],
 845                     }
 846                 )
 847                 return r
 848             ie_result['entries'] = [
 849                 self.process_ie_result(_fixup(r), download, extra_info)
 850                 for r in ie_result['entries']
 851             ]
 852             return ie_result
 853         else:
 854             raise Exception('Invalid result type: %s' % result_type)
 855
 856     def _apply_format_filter(self, format_spec, available_formats):
 857         " Returns a tuple of the remaining format_spec and filtered formats "
 858
 859         OPERATORS = {
 860             '<': operator.lt,
 861             '<=': operator.le,
 862             '>': operator.gt,
 863             '>=': operator.ge,
 864             '=': operator.eq,
 865             '!=': operator.ne,
 866         }
 867         operator_rex = re.compile(r'''(?x)\s*\[
 868             (?P<key>width|height|tbr|abr|vbr|asr|filesize|fps)
 869             \s*(?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
 870             (?P<value>[0-9.]+(?:[kKmMgGtTpPeEzZyY]i?[Bb]?)?)
 871             \]$
 872             ''' % '|'.join(map(re.escape, OPERATORS.keys())))
 873         m = operator_rex.search(format_spec)
 874         if m:
 875             try:
 876                 comparison_value = int(m.group('value'))
 877             except ValueError:
 878                 comparison_value = parse_filesize(m.group('value'))
 879                 if comparison_value is None:
 880                     comparison_value = parse_filesize(m.group('value') + 'B')
 881                 if comparison_value is None:
 882                     raise ValueError(
 883                         'Invalid value %r in format specification %r' % (
 884                             m.group('value'), format_spec))
 885             op = OPERATORS[m.group('op')]
 886
 887         if not m:
 888             STR_OPERATORS = {
 889                 '=': operator.eq,
 890                 '!=': operator.ne,
 891             }
 892             str_operator_rex = re.compile(r'''(?x)\s*\[
 893                 \s*(?P<key>ext|acodec|vcodec|container|protocol)
 894                 \s*(?P<op>%s)(?P<none_inclusive>\s*\?)?
 895                 \s*(?P<value>[a-zA-Z0-9_-]+)
 896                 \s*\]$
 897                 ''' % '|'.join(map(re.escape, STR_OPERATORS.keys())))
 898             m = str_operator_rex.search(format_spec)
 899             if m:
 900                 comparison_value = m.group('value')
 901                 op = STR_OPERATORS[m.group('op')]
 902
 903         if not m:
 904             raise ValueError('Invalid format specification %r' % format_spec)
 905
 906         def _filter(f):
 907             actual_value = f.get(m.group('key'))
 908             if actual_value is None:
 909                 return m.group('none_inclusive')
 910             return op(actual_value, comparison_value)
 911         new_formats = [f for f in available_formats if _filter(f)]
 912
 913         new_format_spec = format_spec[:-len(m.group(0))]
 914         if not new_format_spec:
 915             new_format_spec = 'best'
 916
 917         return (new_format_spec, new_formats)
 918
 919     def select_format(self, format_spec, available_formats):
 920         while format_spec.endswith(']'):
 921             format_spec, available_formats = self._apply_format_filter(
 922                 format_spec, available_formats)
 923         if not available_formats:
 924             return None
 925
 926         if format_spec in ['best', 'worst', None]:
 927             format_idx = 0 if format_spec == 'worst' else -1
 928             audiovideo_formats = [
 929                 f for f in available_formats
 930                 if f.get('vcodec') != 'none' and f.get('acodec') != 'none']
 931             if audiovideo_formats:
 932                 return audiovideo_formats[format_idx]
 933             # for audio only (soundcloud) or video only (imgur) urls, select the best/worst audio format
 934             elif (all(f.get('acodec') != 'none' for f in available_formats) or
 935                   all(f.get('vcodec') != 'none' for f in available_formats)):
 936                 return available_formats[format_idx]
 937         elif format_spec == 'bestaudio':
 938             audio_formats = [
 939                 f for f in available_formats
 940                 if f.get('vcodec') == 'none']
 941             if audio_formats:
 942                 return audio_formats[-1]
 943         elif format_spec == 'worstaudio':
 944             audio_formats = [
 945                 f for f in available_formats
 946                 if f.get('vcodec') == 'none']
 947             if audio_formats:
 948                 return audio_formats[0]
 949         elif format_spec == 'bestvideo':
 950             video_formats = [
 951                 f for f in available_formats
 952                 if f.get('acodec') == 'none']
 953             if video_formats:
 954                 return video_formats[-1]
 955         elif format_spec == 'worstvideo':
 956             video_formats = [
 957                 f for f in available_formats
 958                 if f.get('acodec') == 'none']
 959             if video_formats:
 960                 return video_formats[0]
 961         else:
 962             extensions = ['mp4', 'flv', 'webm', '3gp', 'm4a', 'mp3', 'ogg', 'aac', 'wav']
 963             if format_spec in extensions:
 964                 filter_f = lambda f: f['ext'] == format_spec
 965             else:
 966                 filter_f = lambda f: f['format_id'] == format_spec
 967             matches = list(filter(filter_f, available_formats))
 968             if matches:
 969                 return matches[-1]
 970         return None
 971
 972     def _calc_headers(self, info_dict):
 973         res = std_headers.copy()
 974
 975         add_headers = info_dict.get('http_headers')
 976         if add_headers:
 977             res.update(add_headers)
 978
 979         cookies = self._calc_cookies(info_dict)
 980         if cookies:
 981             res['Cookie'] = cookies
 982
 983         return res
 984
 985     def _calc_cookies(self, info_dict):
 986         pr = compat_urllib_request.Request(info_dict['url'])
 987         self.cookiejar.add_cookie_header(pr)
 988         return pr.get_header('Cookie')
 989
 990     def process_video_result(self, info_dict, download=True):
 991         assert info_dict.get('_type', 'video') == 'video'
 992
 993         if 'id' not in info_dict:
 994             raise ExtractorError('Missing "id" field in extractor result')
 995         if 'title' not in info_dict:
 996             raise ExtractorError('Missing "title" field in extractor result')
 997
 998         if 'playlist' not in info_dict:
 999             # It isn't part of a playlist
1000             info_dict['playlist'] = None
1001             info_dict['playlist_index'] = None
1002
1003         thumbnails = info_dict.get('thumbnails')
1004         if thumbnails is None:
1005             thumbnail = info_dict.get('thumbnail')
1006             if thumbnail:
1007                 info_dict['thumbnails'] = thumbnails = [{'url': thumbnail}]
1008         if thumbnails:
1009             thumbnails.sort(key=lambda t: (
1010                 t.get('preference'), t.get('width'), t.get('height'),
1011                 t.get('id'), t.get('url')))
1012             for i, t in enumerate(thumbnails):
1013                 if 'width' in t and 'height' in t:
1014                     t['resolution'] = '%dx%d' % (t['width'], t['height'])
1015                 if t.get('id') is None:
1016                     t['id'] = '%d' % i
1017
1018         if thumbnails and 'thumbnail' not in info_dict:
1019             info_dict['thumbnail'] = thumbnails[-1]['url']
1020
1021         if 'display_id' not in info_dict and 'id' in info_dict:
1022             info_dict['display_id'] = info_dict['id']
1023
1024         if info_dict.get('upload_date') is None and info_dict.get('timestamp') is not None:
1025             # Working around out-of-range timestamp values (e.g. negative ones on Windows,
1026             # see http://bugs.python.org/issue1646728)
1027             try:
1028                 upload_date = datetime.datetime.utcfromtimestamp(info_dict['timestamp'])
1029                 info_dict['upload_date'] = upload_date.strftime('%Y%m%d')
1030             except (ValueError, OverflowError, OSError):
1031                 pass
1032
1033         if self.params.get('listsubtitles', False):
1034             if 'automatic_captions' in info_dict:
1035                 self.list_subtitles(info_dict['id'], info_dict.get('automatic_captions'), 'automatic captions')
1036             self.list_subtitles(info_dict['id'], info_dict.get('subtitles'), 'subtitles')
1037             return
1038         info_dict['requested_subtitles'] = self.process_subtitles(
1039             info_dict['id'], info_dict.get('subtitles'),
1040             info_dict.get('automatic_captions'))
1041
1042         # This extractors handle format selection themselves
1043         if info_dict['extractor'] in ['Youku']:
1044             if download:
1045                 self.process_info(info_dict)
1046             return info_dict
1047
1048         # We now pick which formats have to be downloaded
1049         if info_dict.get('formats') is None:
1050             # There's only one format available
1051             formats = [info_dict]
1052         else:
1053             formats = info_dict['formats']
1054
1055         if not formats:
1056             raise ExtractorError('No video formats found!')
1057
1058         formats_dict = {}
1059
1060         # We check that all the formats have the format and format_id fields
1061         for i, format in enumerate(formats):
1062             if 'url' not in format:
1063                 raise ExtractorError('Missing "url" key in result (index %d)' % i)
1064
1065             if format.get('format_id') is None:
1066                 format['format_id'] = compat_str(i)
1067             format_id = format['format_id']
1068             if format_id not in formats_dict:
1069                 formats_dict[format_id] = []
1070             formats_dict[format_id].append(format)
1071
1072         # Make sure all formats have unique format_id
1073         for format_id, ambiguous_formats in formats_dict.items():
1074             if len(ambiguous_formats) > 1:
1075                 for i, format in enumerate(ambiguous_formats):
1076                     format['format_id'] = '%s-%d' % (format_id, i)
1077
1078         for i, format in enumerate(formats):
1079             if format.get('format') is None:
1080                 format['format'] = '{id} - {res}{note}'.format(
1081                     id=format['format_id'],
1082                     res=self.format_resolution(format),
1083                     note=' ({0})'.format(format['format_note']) if format.get('format_note') is not None else '',
1084                 )
1085             # Automatically determine file extension if missing
1086             if 'ext' not in format:
1087                 format['ext'] = determine_ext(format['url']).lower()
1088             # Add HTTP headers, so that external programs can use them from the
1089             # json output
1090             full_format_info = info_dict.copy()
1091             full_format_info.update(format)
1092             format['http_headers'] = self._calc_headers(full_format_info)
1093
1094         # TODO Central sorting goes here
1095
1096         if formats[0] is not info_dict:
1097             # only set the 'formats' fields if the original info_dict list them
1098             # otherwise we end up with a circular reference, the first (and unique)
1099             # element in the 'formats' field in info_dict is info_dict itself,
1100             # wich can't be exported to json
1101             info_dict['formats'] = formats
1102         if self.params.get('listformats'):
1103             self.list_formats(info_dict)
1104             return
1105         if self.params.get('list_thumbnails'):
1106             self.list_thumbnails(info_dict)
1107             return
1108
1109         req_format = self.params.get('format')
1110         if req_format is None:
1111             req_format_list = []
1112             if (self.params.get('outtmpl', DEFAULT_OUTTMPL) != '-' and
1113                     info_dict['extractor'] in ['youtube', 'ted']):
1114                 merger = FFmpegMergerPP(self)
1115                 if merger.available and merger.can_merge():
1116                     req_format_list.append('bestvideo+bestaudio')
1117             req_format_list.append('best')
1118             req_format = '/'.join(req_format_list)
1119         formats_to_download = []
1120         if req_format == 'all':
1121             formats_to_download = formats
1122         else:
1123             for rfstr in req_format.split(','):
1124                 # We can accept formats requested in the format: 34/5/best, we pick
1125                 # the first that is available, starting from left
1126                 req_formats = rfstr.split('/')
1127                 for rf in req_formats:
1128                     if re.match(r'.+?\+.+?', rf) is not None:
1129                         # Two formats have been requested like '137+139'
1130                         format_1, format_2 = rf.split('+')
1131                         formats_info = (self.select_format(format_1, formats),
1132                                         self.select_format(format_2, formats))
1133                         if all(formats_info):
1134                             # The first format must contain the video and the
1135                             # second the audio
1136                             if formats_info[0].get('vcodec') == 'none':
1137                                 self.report_error('The first format must '
1138                                                   'contain the video, try using '
1139                                                   '"-f %s+%s"' % (format_2, format_1))
1140                                 return
1141                             output_ext = (
1142                                 formats_info[0]['ext']
1143                                 if self.params.get('merge_output_format') is None
1144                                 else self.params['merge_output_format'])
1145                             selected_format = {
1146                                 'requested_formats': formats_info,
1147                                 'format': '%s+%s' % (formats_info[0].get('format'),
1148                                                      formats_info[1].get('format')),
1149                                 'format_id': '%s+%s' % (formats_info[0].get('format_id'),
1150                                                         formats_info[1].get('format_id')),
1151                                 'width': formats_info[0].get('width'),
1152                                 'height': formats_info[0].get('height'),
1153                                 'resolution': formats_info[0].get('resolution'),
1154                                 'fps': formats_info[0].get('fps'),
1155                                 'vcodec': formats_info[0].get('vcodec'),
1156                                 'vbr': formats_info[0].get('vbr'),
1157                                 'stretched_ratio': formats_info[0].get('stretched_ratio'),
1158                                 'acodec': formats_info[1].get('acodec'),
1159                                 'abr': formats_info[1].get('abr'),
1160                                 'ext': output_ext,
1161                             }
1162                         else:
1163                             selected_format = None
1164                     else:
1165                         selected_format = self.select_format(rf, formats)
1166                     if selected_format is not None:
1167                         formats_to_download.append(selected_format)
1168                         break
1169         if not formats_to_download:
1170             raise ExtractorError('requested format not available',
1171                                  expected=True)
1172
1173         if download:
1174             if len(formats_to_download) > 1:
1175                 self.to_screen('[info] %s: downloading video in %s formats' % (info_dict['id'], len(formats_to_download)))
1176             for format in formats_to_download:
1177                 new_info = dict(info_dict)
1178                 new_info.update(format)
1179                 self.process_info(new_info)
1180         # We update the info dict with the best quality format (backwards compatibility)
1181         info_dict.update(formats_to_download[-1])
1182         return info_dict
1183
1184     def process_subtitles(self, video_id, normal_subtitles, automatic_captions):
1185         """Select the requested subtitles and their format"""
1186         available_subs = {}
1187         if normal_subtitles and self.params.get('writesubtitles'):
1188             available_subs.update(normal_subtitles)
1189         if automatic_captions and self.params.get('writeautomaticsub'):
1190             for lang, cap_info in automatic_captions.items():
1191                 if lang not in available_subs:
1192                     available_subs[lang] = cap_info
1193
1194         if (not self.params.get('writesubtitles') and not
1195                 self.params.get('writeautomaticsub') or not
1196                 available_subs):
1197             return None
1198
1199         if self.params.get('allsubtitles', False):
1200             requested_langs = available_subs.keys()
1201         else:
1202             if self.params.get('subtitleslangs', False):
1203                 requested_langs = self.params.get('subtitleslangs')
1204             elif 'en' in available_subs:
1205                 requested_langs = ['en']
1206             else:
1207                 requested_langs = [list(available_subs.keys())[0]]
1208
1209         formats_query = self.params.get('subtitlesformat', 'best')
1210         formats_preference = formats_query.split('/') if formats_query else []
1211         subs = {}
1212         for lang in requested_langs:
1213             formats = available_subs.get(lang)
1214             if formats is None:
1215                 self.report_warning('%s subtitles not available for %s' % (lang, video_id))
1216                 continue
1217             for ext in formats_preference:
1218                 if ext == 'best':
1219                     f = formats[-1]
1220                     break
1221                 matches = list(filter(lambda f: f['ext'] == ext, formats))
1222                 if matches:
1223                     f = matches[-1]
1224                     break
1225             else:
1226                 f = formats[-1]
1227                 self.report_warning(
1228                     'No subtitle format found matching "%s" for language %s, '
1229                     'using %s' % (formats_query, lang, f['ext']))
1230             subs[lang] = f
1231         return subs
1232
1233     def process_info(self, info_dict):
1234         """Process a single resolved IE result."""
1235
1236         assert info_dict.get('_type', 'video') == 'video'
1237
1238         max_downloads = self.params.get('max_downloads')
1239         if max_downloads is not None:
1240             if self._num_downloads >= int(max_downloads):
1241                 raise MaxDownloadsReached()
1242
1243         info_dict['fulltitle'] = info_dict['title']
1244         if len(info_dict['title']) > 200:
1245             info_dict['title'] = info_dict['title'][:197] + '...'
1246
1247         if 'format' not in info_dict:
1248             info_dict['format'] = info_dict['ext']
1249
1250         reason = self._match_entry(info_dict, incomplete=False)
1251         if reason is not None:
1252             self.to_screen('[download] ' + reason)
1253             return
1254
1255         self._num_downloads += 1
1256
1257         info_dict['_filename'] = filename = self.prepare_filename(info_dict)
1258
1259         # Forced printings
1260         if self.params.get('forcetitle', False):
1261             self.to_stdout(info_dict['fulltitle'])
1262         if self.params.get('forceid', False):
1263             self.to_stdout(info_dict['id'])
1264         if self.params.get('forceurl', False):
1265             if info_dict.get('requested_formats') is not None:
1266                 for f in info_dict['requested_formats']:
1267                     self.to_stdout(f['url'] + f.get('play_path', ''))
1268             else:
1269                 # For RTMP URLs, also include the playpath
1270                 self.to_stdout(info_dict['url'] + info_dict.get('play_path', ''))
1271         if self.params.get('forcethumbnail', False) and info_dict.get('thumbnail') is not None:
1272             self.to_stdout(info_dict['thumbnail'])
1273         if self.params.get('forcedescription', False) and info_dict.get('description') is not None:
1274             self.to_stdout(info_dict['description'])
1275         if self.params.get('forcefilename', False) and filename is not None:
1276             self.to_stdout(filename)
1277         if self.params.get('forceduration', False) and info_dict.get('duration') is not None:
1278             self.to_stdout(formatSeconds(info_dict['duration']))
1279         if self.params.get('forceformat', False):
1280             self.to_stdout(info_dict['format'])
1281         if self.params.get('forcejson', False):
1282             self.to_stdout(json.dumps(info_dict))
1283
1284         # Do nothing else if in simulate mode
1285         if self.params.get('simulate', False):
1286             return
1287
1288         if filename is None:
1289             return
1290
1291         try:
1292             dn = os.path.dirname(sanitize_path(encodeFilename(filename)))
1293             if dn and not os.path.exists(dn):
1294                 os.makedirs(dn)
1295         except (OSError, IOError) as err:
1296             self.report_error('unable to create directory ' + compat_str(err))
1297             return
1298
1299         if self.params.get('writedescription', False):
1300             descfn = replace_extension(filename, 'description', info_dict.get('ext'))
1301             if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(descfn)):
1302                 self.to_screen('[info] Video description is already present')
1303             elif info_dict.get('description') is None:
1304                 self.report_warning('There\'s no description to write.')
1305             else:
1306                 try:
1307                     self.to_screen('[info] Writing video description to: ' + descfn)
1308                     with io.open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile:
1309                         descfile.write(info_dict['description'])
1310                 except (OSError, IOError):
1311                     self.report_error('Cannot write description file ' + descfn)
1312                     return
1313
1314         if self.params.get('writeannotations', False):
1315             annofn = replace_extension(filename, 'annotations.xml', info_dict.get('ext'))
1316             if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(annofn)):
1317                 self.to_screen('[info] Video annotations are already present')
1318             else:
1319                 try:
1320                     self.to_screen('[info] Writing video annotations to: ' + annofn)
1321                     with io.open(encodeFilename(annofn), 'w', encoding='utf-8') as annofile:
1322                         annofile.write(info_dict['annotations'])
1323                 except (KeyError, TypeError):
1324                     self.report_warning('There are no annotations to write.')
1325                 except (OSError, IOError):
1326                     self.report_error('Cannot write annotations file: ' + annofn)
1327                     return
1328
1329         subtitles_are_requested = any([self.params.get('writesubtitles', False),
1330                                        self.params.get('writeautomaticsub')])
1331
1332         if subtitles_are_requested and info_dict.get('requested_subtitles'):
1333             # subtitles download errors are already managed as troubles in relevant IE
1334             # that way it will silently go on when used with unsupporting IE
1335             subtitles = info_dict['requested_subtitles']
1336             ie = self.get_info_extractor(info_dict['extractor_key'])
1337             for sub_lang, sub_info in subtitles.items():
1338                 sub_format = sub_info['ext']
1339                 if sub_info.get('data') is not None:
1340                     sub_data = sub_info['data']
1341                 else:
1342                     try:
1343                         sub_data = ie._download_webpage(
1344                             sub_info['url'], info_dict['id'], note=False)
1345                     except ExtractorError as err:
1346                         self.report_warning('Unable to download subtitle for "%s": %s' %
1347                                             (sub_lang, compat_str(err.cause)))
1348                         continue
1349                 try:
1350                     sub_filename = subtitles_filename(filename, sub_lang, sub_format)
1351                     if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(sub_filename)):
1352                         self.to_screen('[info] Video subtitle %s.%s is already_present' % (sub_lang, sub_format))
1353                     else:
1354                         self.to_screen('[info] Writing video subtitles to: ' + sub_filename)
1355                         with io.open(encodeFilename(sub_filename), 'w', encoding='utf-8') as subfile:
1356                             subfile.write(sub_data)
1357                 except (OSError, IOError):
1358                     self.report_error('Cannot write subtitles file ' + sub_filename)
1359                     return
1360
1361         if self.params.get('writeinfojson', False):
1362             infofn = replace_extension(filename, 'info.json', info_dict.get('ext'))
1363             if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(infofn)):
1364                 self.to_screen('[info] Video description metadata is already present')
1365             else:
1366                 self.to_screen('[info] Writing video description metadata as JSON to: ' + infofn)
1367                 try:
1368                     write_json_file(self.filter_requested_info(info_dict), infofn)
1369                 except (OSError, IOError):
1370                     self.report_error('Cannot write metadata to JSON file ' + infofn)
1371                     return
1372
1373         self._write_thumbnails(info_dict, filename)
1374
1375         if not self.params.get('skip_download', False):
1376             try:
1377                 def dl(name, info):
1378                     fd = get_suitable_downloader(info, self.params)(self, self.params)
1379                     for ph in self._progress_hooks:
1380                         fd.add_progress_hook(ph)
1381                     if self.params.get('verbose'):
1382                         self.to_stdout('[debug] Invoking downloader on %r' % info.get('url'))
1383                     return fd.download(name, info)
1384
1385                 if info_dict.get('requested_formats') is not None:
1386                     downloaded = []
1387                     success = True
1388                     merger = FFmpegMergerPP(self)
1389                     if not merger.available:
1390                         postprocessors = []
1391                         self.report_warning('You have requested multiple '
1392                                             'formats but ffmpeg or avconv are not installed.'
1393                                             ' The formats won\'t be merged.')
1394                     else:
1395                         postprocessors = [merger]
1396
1397                     def compatible_formats(formats):
1398                         video, audio = formats
1399                         # Check extension
1400                         video_ext, audio_ext = audio.get('ext'), video.get('ext')
1401                         if video_ext and audio_ext:
1402                             COMPATIBLE_EXTS = (
1403                                 ('mp3', 'mp4', 'm4a', 'm4p', 'm4b', 'm4r', 'm4v'),
1404                                 ('webm')
1405                             )
1406                             for exts in COMPATIBLE_EXTS:
1407                                 if video_ext in exts and audio_ext in exts:
1408                                     return True
1409                         # TODO: Check acodec/vcodec
1410                         return False
1411
1412                     filename_real_ext = os.path.splitext(filename)[1][1:]
1413                     filename_wo_ext = (
1414                         os.path.splitext(filename)[0]
1415                         if filename_real_ext == info_dict['ext']
1416                         else filename)
1417                     requested_formats = info_dict['requested_formats']
1418                     if self.params.get('merge_output_format') is None and not compatible_formats(requested_formats):
1419                         info_dict['ext'] = 'mkv'
1420                         self.report_warning(
1421                             'Requested formats are incompatible for merge and will be merged into mkv.')
1422                     # Ensure filename always has a correct extension for successful merge
1423                     filename = '%s.%s' % (filename_wo_ext, info_dict['ext'])
1424                     if os.path.exists(encodeFilename(filename)):
1425                         self.to_screen(
1426                             '[download] %s has already been downloaded and '
1427                             'merged' % filename)
1428                     else:
1429                         for f in requested_formats:
1430                             new_info = dict(info_dict)
1431                             new_info.update(f)
1432                             fname = self.prepare_filename(new_info)
1433                             fname = prepend_extension(fname, 'f%s' % f['format_id'], new_info['ext'])
1434                             downloaded.append(fname)
1435                             partial_success = dl(fname, new_info)
1436                             success = success and partial_success
1437                         info_dict['__postprocessors'] = postprocessors
1438                         info_dict['__files_to_merge'] = downloaded
1439                 else:
1440                     # Just a single file
1441                     success = dl(filename, info_dict)
1442             except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
1443                 self.report_error('unable to download video data: %s' % str(err))
1444                 return
1445             except (OSError, IOError) as err:
1446                 raise UnavailableVideoError(err)
1447             except (ContentTooShortError, ) as err:
1448                 self.report_error('content too short (expected %s bytes and served %s)' % (err.expected, err.downloaded))
1449                 return
1450
1451             if success:
1452                 # Fixup content
1453                 fixup_policy = self.params.get('fixup')
1454                 if fixup_policy is None:
1455                     fixup_policy = 'detect_or_warn'
1456
1457                 stretched_ratio = info_dict.get('stretched_ratio')
1458                 if stretched_ratio is not None and stretched_ratio != 1:
1459                     if fixup_policy == 'warn':
1460                         self.report_warning('%s: Non-uniform pixel ratio (%s)' % (
1461                             info_dict['id'], stretched_ratio))
1462                     elif fixup_policy == 'detect_or_warn':
1463                         stretched_pp = FFmpegFixupStretchedPP(self)
1464                         if stretched_pp.available:
1465                             info_dict.setdefault('__postprocessors', [])
1466                             info_dict['__postprocessors'].append(stretched_pp)
1467                         else:
1468                             self.report_warning(
1469                                 '%s: Non-uniform pixel ratio (%s). Install ffmpeg or avconv to fix this automatically.' % (
1470                                     info_dict['id'], stretched_ratio))
1471                     else:
1472                         assert fixup_policy in ('ignore', 'never')
1473
1474                 if info_dict.get('requested_formats') is None and info_dict.get('container') == 'm4a_dash':
1475                     if fixup_policy == 'warn':
1476                         self.report_warning('%s: writing DASH m4a. Only some players support this container.' % (
1477                             info_dict['id']))
1478                     elif fixup_policy == 'detect_or_warn':
1479                         fixup_pp = FFmpegFixupM4aPP(self)
1480                         if fixup_pp.available:
1481                             info_dict.setdefault('__postprocessors', [])
1482                             info_dict['__postprocessors'].append(fixup_pp)
1483                         else:
1484                             self.report_warning(
1485                                 '%s: writing DASH m4a. Only some players support this container. Install ffmpeg or avconv to fix this automatically.' % (
1486                                     info_dict['id']))
1487                     else:
1488                         assert fixup_policy in ('ignore', 'never')
1489
1490                 try:
1491                     self.post_process(filename, info_dict)
1492                 except (PostProcessingError) as err:
1493                     self.report_error('postprocessing: %s' % str(err))
1494                     return
1495                 self.record_download_archive(info_dict)
1496
1497     def download(self, url_list):
1498         """Download a given list of URLs."""
1499         outtmpl = self.params.get('outtmpl', DEFAULT_OUTTMPL)
1500         if (len(url_list) > 1 and
1501                 '%' not in outtmpl and
1502                 self.params.get('max_downloads') != 1):
1503             raise SameFileError(outtmpl)
1504
1505         for url in url_list:
1506             try:
1507                 # It also downloads the videos
1508                 res = self.extract_info(url)
1509             except UnavailableVideoError:
1510                 self.report_error('unable to download video')
1511             except MaxDownloadsReached:
1512                 self.to_screen('[info] Maximum number of downloaded files reached.')
1513                 raise
1514             else:
1515                 if self.params.get('dump_single_json', False):
1516                     self.to_stdout(json.dumps(res))
1517
1518         return self._download_retcode
1519
1520     def download_with_info_file(self, info_filename):
1521         with contextlib.closing(fileinput.FileInput(
1522                 [info_filename], mode='r',
1523                 openhook=fileinput.hook_encoded('utf-8'))) as f:
1524             # FileInput doesn't have a read method, we can't call json.load
1525             info = self.filter_requested_info(json.loads('\n'.join(f)))
1526         try:
1527             self.process_ie_result(info, download=True)
1528         except DownloadError:
1529             webpage_url = info.get('webpage_url')
1530             if webpage_url is not None:
1531                 self.report_warning('The info failed to download, trying with "%s"' % webpage_url)
1532                 return self.download([webpage_url])
1533             else:
1534                 raise
1535         return self._download_retcode
1536
1537     @staticmethod
1538     def filter_requested_info(info_dict):
1539         return dict(
1540             (k, v) for k, v in info_dict.items()
1541             if k not in ['requested_formats', 'requested_subtitles'])
1542
1543     def post_process(self, filename, ie_info):
1544         """Run all the postprocessors on the given file."""
1545         info = dict(ie_info)
1546         info['filepath'] = filename
1547         pps_chain = []
1548         if ie_info.get('__postprocessors') is not None:
1549             pps_chain.extend(ie_info['__postprocessors'])
1550         pps_chain.extend(self._pps)
1551         for pp in pps_chain:
1552             files_to_delete = []
1553             try:
1554                 files_to_delete, info = pp.run(info)
1555             except PostProcessingError as e:
1556                 self.report_error(e.msg)
1557             if files_to_delete and not self.params.get('keepvideo', False):
1558                 for old_filename in files_to_delete:
1559                     self.to_screen('Deleting original file %s (pass -k to keep)' % old_filename)
1560                     try:
1561                         os.remove(encodeFilename(old_filename))
1562                     except (IOError, OSError):
1563                         self.report_warning('Unable to remove downloaded original file')
1564
1565     def _make_archive_id(self, info_dict):
1566         # Future-proof against any change in case
1567         # and backwards compatibility with prior versions
1568         extractor = info_dict.get('extractor_key')
1569         if extractor is None:
1570             if 'id' in info_dict:
1571                 extractor = info_dict.get('ie_key')  # key in a playlist
1572         if extractor is None:
1573             return None  # Incomplete video information
1574         return extractor.lower() + ' ' + info_dict['id']
1575
1576     def in_download_archive(self, info_dict):
1577         fn = self.params.get('download_archive')
1578         if fn is None:
1579             return False
1580
1581         vid_id = self._make_archive_id(info_dict)
1582         if vid_id is None:
1583             return False  # Incomplete video information
1584
1585         try:
1586             with locked_file(fn, 'r', encoding='utf-8') as archive_file:
1587                 for line in archive_file:
1588                     if line.strip() == vid_id:
1589                         return True
1590         except IOError as ioe:
1591             if ioe.errno != errno.ENOENT:
1592                 raise
1593         return False
1594
1595     def record_download_archive(self, info_dict):
1596         fn = self.params.get('download_archive')
1597         if fn is None:
1598             return
1599         vid_id = self._make_archive_id(info_dict)
1600         assert vid_id
1601         with locked_file(fn, 'a', encoding='utf-8') as archive_file:
1602             archive_file.write(vid_id + '\n')
1603
1604     @staticmethod
1605     def format_resolution(format, default='unknown'):
1606         if format.get('vcodec') == 'none':
1607             return 'audio only'
1608         if format.get('resolution') is not None:
1609             return format['resolution']
1610         if format.get('height') is not None:
1611             if format.get('width') is not None:
1612                 res = '%sx%s' % (format['width'], format['height'])
1613             else:
1614                 res = '%sp' % format['height']
1615         elif format.get('width') is not None:
1616             res = '?x%d' % format['width']
1617         else:
1618             res = default
1619         return res
1620
1621     def _format_note(self, fdict):
1622         res = ''
1623         if fdict.get('ext') in ['f4f', 'f4m']:
1624             res += '(unsupported) '
1625         if fdict.get('format_note') is not None:
1626             res += fdict['format_note'] + ' '
1627         if fdict.get('tbr') is not None:
1628             res += '%4dk ' % fdict['tbr']
1629         if fdict.get('container') is not None:
1630             if res:
1631                 res += ', '
1632             res += '%s container' % fdict['container']
1633         if (fdict.get('vcodec') is not None and
1634                 fdict.get('vcodec') != 'none'):
1635             if res:
1636                 res += ', '
1637             res += fdict['vcodec']
1638             if fdict.get('vbr') is not None:
1639                 res += '@'
1640         elif fdict.get('vbr') is not None and fdict.get('abr') is not None:
1641             res += 'video@'
1642         if fdict.get('vbr') is not None:
1643             res += '%4dk' % fdict['vbr']
1644         if fdict.get('fps') is not None:
1645             res += ', %sfps' % fdict['fps']
1646         if fdict.get('acodec') is not None:
1647             if res:
1648                 res += ', '
1649             if fdict['acodec'] == 'none':
1650                 res += 'video only'
1651             else:
1652                 res += '%-5s' % fdict['acodec']
1653         elif fdict.get('abr') is not None:
1654             if res:
1655                 res += ', '
1656             res += 'audio'
1657         if fdict.get('abr') is not None:
1658             res += '@%3dk' % fdict['abr']
1659         if fdict.get('asr') is not None:
1660             res += ' (%5dHz)' % fdict['asr']
1661         if fdict.get('filesize') is not None:
1662             if res:
1663                 res += ', '
1664             res += format_bytes(fdict['filesize'])
1665         elif fdict.get('filesize_approx') is not None:
1666             if res:
1667                 res += ', '
1668             res += '~' + format_bytes(fdict['filesize_approx'])
1669         return res
1670
1671     def list_formats(self, info_dict):
1672         formats = info_dict.get('formats', [info_dict])
1673         table = [
1674             [f['format_id'], f['ext'], self.format_resolution(f), self._format_note(f)]
1675             for f in formats
1676             if f.get('preference') is None or f['preference'] >= -1000]
1677         if len(formats) > 1:
1678             table[-1][-1] += (' ' if table[-1][-1] else '') + '(best)'
1679
1680         header_line = ['format code', 'extension', 'resolution', 'note']
1681         self.to_screen(
1682             '[info] Available formats for %s:\n%s' %
1683             (info_dict['id'], render_table(header_line, table)))
1684
1685     def list_thumbnails(self, info_dict):
1686         thumbnails = info_dict.get('thumbnails')
1687         if not thumbnails:
1688             tn_url = info_dict.get('thumbnail')
1689             if tn_url:
1690                 thumbnails = [{'id': '0', 'url': tn_url}]
1691             else:
1692                 self.to_screen(
1693                     '[info] No thumbnails present for %s' % info_dict['id'])
1694                 return
1695
1696         self.to_screen(
1697             '[info] Thumbnails for %s:' % info_dict['id'])
1698         self.to_screen(render_table(
1699             ['ID', 'width', 'height', 'URL'],
1700             [[t['id'], t.get('width', 'unknown'), t.get('height', 'unknown'), t['url']] for t in thumbnails]))
1701
1702     def list_subtitles(self, video_id, subtitles, name='subtitles'):
1703         if not subtitles:
1704             self.to_screen('%s has no %s' % (video_id, name))
1705             return
1706         self.to_screen(
1707             'Available %s for %s:' % (name, video_id))
1708         self.to_screen(render_table(
1709             ['Language', 'formats'],
1710             [[lang, ', '.join(f['ext'] for f in reversed(formats))]
1711                 for lang, formats in subtitles.items()]))
1712
1713     def urlopen(self, req):
1714         """ Start an HTTP download """
1715
1716         # According to RFC 3986, URLs can not contain non-ASCII characters, however this is not
1717         # always respected by websites, some tend to give out URLs with non percent-encoded
1718         # non-ASCII characters (see telemb.py, ard.py [#3412])
1719         # urllib chokes on URLs with non-ASCII characters (see http://bugs.python.org/issue3991)
1720         # To work around aforementioned issue we will replace request's original URL with
1721         # percent-encoded one
1722         req_is_string = isinstance(req, compat_basestring)
1723         url = req if req_is_string else req.get_full_url()
1724         url_escaped = escape_url(url)
1725
1726         # Substitute URL if any change after escaping
1727         if url != url_escaped:
1728             if req_is_string:
1729                 req = url_escaped
1730             else:
1731                 req_type = HEADRequest if req.get_method() == 'HEAD' else compat_urllib_request.Request
1732                 req = req_type(
1733                     url_escaped, data=req.data, headers=req.headers,
1734                     origin_req_host=req.origin_req_host, unverifiable=req.unverifiable)
1735
1736         return self._opener.open(req, timeout=self._socket_timeout)
1737
1738     def print_debug_header(self):
1739         if not self.params.get('verbose'):
1740             return
1741
1742         if type('') is not compat_str:
1743             # Python 2.6 on SLES11 SP1 (https://github.com/rg3/youtube-dl/issues/3326)
1744             self.report_warning(
1745                 'Your Python is broken! Update to a newer and supported version')
1746
1747         stdout_encoding = getattr(
1748             sys.stdout, 'encoding', 'missing (%s)' % type(sys.stdout).__name__)
1749         encoding_str = (
1750             '[debug] Encodings: locale %s, fs %s, out %s, pref %s\n' % (
1751                 locale.getpreferredencoding(),
1752                 sys.getfilesystemencoding(),
1753                 stdout_encoding,
1754                 self.get_encoding()))
1755         write_string(encoding_str, encoding=None)
1756
1757         self._write_string('[debug] youtube-dl version ' + __version__ + '\n')
1758         try:
1759             sp = subprocess.Popen(
1760                 ['git', 'rev-parse', '--short', 'HEAD'],
1761                 stdout=subprocess.PIPE, stderr=subprocess.PIPE,
1762                 cwd=os.path.dirname(os.path.abspath(__file__)))
1763             out, err = sp.communicate()
1764             out = out.decode().strip()
1765             if re.match('[0-9a-f]+', out):
1766                 self._write_string('[debug] Git HEAD: ' + out + '\n')
1767         except Exception:
1768             try:
1769                 sys.exc_clear()
1770             except Exception:
1771                 pass
1772         self._write_string('[debug] Python version %s - %s\n' % (
1773             platform.python_version(), platform_name()))
1774
1775         exe_versions = FFmpegPostProcessor.get_versions(self)
1776         exe_versions['rtmpdump'] = rtmpdump_version()
1777         exe_str = ', '.join(
1778             '%s %s' % (exe, v)
1779             for exe, v in sorted(exe_versions.items())
1780             if v
1781         )
1782         if not exe_str:
1783             exe_str = 'none'
1784         self._write_string('[debug] exe versions: %s\n' % exe_str)
1785
1786         proxy_map = {}
1787         for handler in self._opener.handlers:
1788             if hasattr(handler, 'proxies'):
1789                 proxy_map.update(handler.proxies)
1790         self._write_string('[debug] Proxy map: ' + compat_str(proxy_map) + '\n')
1791
1792         if self.params.get('call_home', False):
1793             ipaddr = self.urlopen('https://yt-dl.org/ip').read().decode('utf-8')
1794             self._write_string('[debug] Public IP address: %s\n' % ipaddr)
1795             latest_version = self.urlopen(
1796                 'https://yt-dl.org/latest/version').read().decode('utf-8')
1797             if version_tuple(latest_version) > version_tuple(__version__):
1798                 self.report_warning(
1799                     'You are using an outdated version (newest version: %s)! '
1800                     'See https://yt-dl.org/update if you need help updating.' %
1801                     latest_version)
1802
1803     def _setup_opener(self):
1804         timeout_val = self.params.get('socket_timeout')
1805         self._socket_timeout = 600 if timeout_val is None else float(timeout_val)
1806
1807         opts_cookiefile = self.params.get('cookiefile')
1808         opts_proxy = self.params.get('proxy')
1809
1810         if opts_cookiefile is None:
1811             self.cookiejar = compat_cookiejar.CookieJar()
1812         else:
1813             self.cookiejar = compat_cookiejar.MozillaCookieJar(
1814                 opts_cookiefile)
1815             if os.access(opts_cookiefile, os.R_OK):
1816                 self.cookiejar.load()
1817
1818         cookie_processor = compat_urllib_request.HTTPCookieProcessor(
1819             self.cookiejar)
1820         if opts_proxy is not None:
1821             if opts_proxy == '':
1822                 proxies = {}
1823             else:
1824                 proxies = {'http': opts_proxy, 'https': opts_proxy}
1825         else:
1826             proxies = compat_urllib_request.getproxies()
1827             # Set HTTPS proxy to HTTP one if given (https://github.com/rg3/youtube-dl/issues/805)
1828             if 'http' in proxies and 'https' not in proxies:
1829                 proxies['https'] = proxies['http']
1830         proxy_handler = PerRequestProxyHandler(proxies)
1831
1832         debuglevel = 1 if self.params.get('debug_printtraffic') else 0
1833         https_handler = make_HTTPS_handler(self.params, debuglevel=debuglevel)
1834         ydlh = YoutubeDLHandler(self.params, debuglevel=debuglevel)
1835         opener = compat_urllib_request.build_opener(
1836             proxy_handler, https_handler, cookie_processor, ydlh)
1837
1838         # Delete the default user-agent header, which would otherwise apply in
1839         # cases where our custom HTTP handler doesn't come into play
1840         # (See https://github.com/rg3/youtube-dl/issues/1309 for details)
1841         opener.addheaders = []
1842         self._opener = opener
1843
1844     def encode(self, s):
1845         if isinstance(s, bytes):
1846             return s  # Already encoded
1847
1848         try:
1849             return s.encode(self.get_encoding())
1850         except UnicodeEncodeError as err:
1851             err.reason = err.reason + '. Check your system encoding configuration or use the --encoding option.'
1852             raise
1853
1854     def get_encoding(self):
1855         encoding = self.params.get('encoding')
1856         if encoding is None:
1857             encoding = preferredencoding()
1858         return encoding
1859
1860     def _write_thumbnails(self, info_dict, filename):
1861         if self.params.get('writethumbnail', False):
1862             thumbnails = info_dict.get('thumbnails')
1863             if thumbnails:
1864                 thumbnails = [thumbnails[-1]]
1865         elif self.params.get('write_all_thumbnails', False):
1866             thumbnails = info_dict.get('thumbnails')
1867         else:
1868             return
1869
1870         if not thumbnails:
1871             # No thumbnails present, so return immediately
1872             return
1873
1874         for t in thumbnails:
1875             thumb_ext = determine_ext(t['url'], 'jpg')
1876             suffix = '_%s' % t['id'] if len(thumbnails) > 1 else ''
1877             thumb_display_id = '%s ' % t['id'] if len(thumbnails) > 1 else ''
1878             t['filename'] = thumb_filename = os.path.splitext(filename)[0] + suffix + '.' + thumb_ext
1879
1880             if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(thumb_filename)):
1881                 self.to_screen('[%s] %s: Thumbnail %sis already present' %
1882                                (info_dict['extractor'], info_dict['id'], thumb_display_id))
1883             else:
1884                 self.to_screen('[%s] %s: Downloading thumbnail %s...' %
1885                                (info_dict['extractor'], info_dict['id'], thumb_display_id))
1886                 try:
1887                     uf = self.urlopen(t['url'])
1888                     with open(thumb_filename, 'wb') as thumbf:
1889                         shutil.copyfileobj(uf, thumbf)
1890                     self.to_screen('[%s] %s: Writing thumbnail %sto: %s' %
1891                                    (info_dict['extractor'], info_dict['id'], thumb_display_id, thumb_filename))
1892                 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
1893                     self.report_warning('Unable to download thumbnail "%s": %s' %
1894                                         (t['url'], compat_str(err)))