_ Git - youtube-dl/blob - youtube_dl/YoutubeDL.py

   1 #!/usr/bin/env python
   2 # -*- coding: utf-8 -*-
   3
   4 from __future__ import absolute_import, unicode_literals
   5
   6 import collections
   7 import contextlib
   8 import datetime
   9 import errno
  10 import fileinput
  11 import io
  12 import itertools
  13 import json
  14 import locale
  15 import operator
  16 import os
  17 import platform
  18 import re
  19 import shutil
  20 import subprocess
  21 import socket
  22 import sys
  23 import time
  24 import traceback
  25
  26 if os.name == 'nt':
  27     import ctypes
  28
  29 from .compat import (
  30     compat_basestring,
  31     compat_cookiejar,
  32     compat_expanduser,
  33     compat_get_terminal_size,
  34     compat_http_client,
  35     compat_kwargs,
  36     compat_str,
  37     compat_urllib_error,
  38     compat_urllib_request,
  39 )
  40 from .utils import (
  41     escape_url,
  42     ContentTooShortError,
  43     date_from_str,
  44     DateRange,
  45     DEFAULT_OUTTMPL,
  46     determine_ext,
  47     DownloadError,
  48     encodeFilename,
  49     ExtractorError,
  50     format_bytes,
  51     formatSeconds,
  52     HEADRequest,
  53     locked_file,
  54     make_HTTPS_handler,
  55     MaxDownloadsReached,
  56     PagedList,
  57     parse_filesize,
  58     PerRequestProxyHandler,
  59     PostProcessingError,
  60     platform_name,
  61     preferredencoding,
  62     render_table,
  63     SameFileError,
  64     sanitize_filename,
  65     sanitize_path,
  66     std_headers,
  67     subtitles_filename,
  68     UnavailableVideoError,
  69     url_basename,
  70     version_tuple,
  71     write_json_file,
  72     write_string,
  73     YoutubeDLHandler,
  74     prepend_extension,
  75     replace_extension,
  76     args_to_str,
  77     age_restricted,
  78 )
  79 from .cache import Cache
  80 from .extractor import get_info_extractor, gen_extractors
  81 from .downloader import get_suitable_downloader
  82 from .downloader.rtmp import rtmpdump_version
  83 from .postprocessor import (
  84     FFmpegFixupM4aPP,
  85     FFmpegFixupStretchedPP,
  86     FFmpegMergerPP,
  87     FFmpegPostProcessor,
  88     get_postprocessor,
  89 )
  90 from .version import __version__
  91
  92
  93 class YoutubeDL(object):
  94     """YoutubeDL class.
  95
  96     YoutubeDL objects are the ones responsible of downloading the
  97     actual video file and writing it to disk if the user has requested
  98     it, among some other tasks. In most cases there should be one per
  99     program. As, given a video URL, the downloader doesn't know how to
 100     extract all the needed information, task that InfoExtractors do, it
 101     has to pass the URL to one of them.
 102
 103     For this, YoutubeDL objects have a method that allows
 104     InfoExtractors to be registered in a given order. When it is passed
 105     a URL, the YoutubeDL object handles it to the first InfoExtractor it
 106     finds that reports being able to handle it. The InfoExtractor extracts
 107     all the information about the video or videos the URL refers to, and
 108     YoutubeDL process the extracted information, possibly using a File
 109     Downloader to download the video.
 110
 111     YoutubeDL objects accept a lot of parameters. In order not to saturate
 112     the object constructor with arguments, it receives a dictionary of
 113     options instead. These options are available through the params
 114     attribute for the InfoExtractors to use. The YoutubeDL also
 115     registers itself as the downloader in charge for the InfoExtractors
 116     that are added to it, so this is a "mutual registration".
 117
 118     Available options:
 119
 120     username:          Username for authentication purposes.
 121     password:          Password for authentication purposes.
 122     videopassword:     Password for acces a video.
 123     usenetrc:          Use netrc for authentication instead.
 124     verbose:           Print additional info to stdout.
 125     quiet:             Do not print messages to stdout.
 126     no_warnings:       Do not print out anything for warnings.
 127     forceurl:          Force printing final URL.
 128     forcetitle:        Force printing title.
 129     forceid:           Force printing ID.
 130     forcethumbnail:    Force printing thumbnail URL.
 131     forcedescription:  Force printing description.
 132     forcefilename:     Force printing final filename.
 133     forceduration:     Force printing duration.
 134     forcejson:         Force printing info_dict as JSON.
 135     dump_single_json:  Force printing the info_dict of the whole playlist
 136                        (or video) as a single JSON line.
 137     simulate:          Do not download the video files.
 138     format:            Video format code. See options.py for more information.
 139     outtmpl:           Template for output names.
 140     restrictfilenames: Do not allow "&" and spaces in file names
 141     ignoreerrors:      Do not stop on download errors.
 142     force_generic_extractor: Force downloader to use the generic extractor
 143     nooverwrites:      Prevent overwriting files.
 144     playliststart:     Playlist item to start at.
 145     playlistend:       Playlist item to end at.
 146     playlist_items:    Specific indices of playlist to download.
 147     playlistreverse:   Download playlist items in reverse order.
 148     matchtitle:        Download only matching titles.
 149     rejecttitle:       Reject downloads for matching titles.
 150     logger:            Log messages to a logging.Logger instance.
 151     logtostderr:       Log messages to stderr instead of stdout.
 152     writedescription:  Write the video description to a .description file
 153     writeinfojson:     Write the video description to a .info.json file
 154     writeannotations:  Write the video annotations to a .annotations.xml file
 155     writethumbnail:    Write the thumbnail image to a file
 156     write_all_thumbnails:  Write all thumbnail formats to files
 157     writesubtitles:    Write the video subtitles to a file
 158     writeautomaticsub: Write the automatic subtitles to a file
 159     allsubtitles:      Downloads all the subtitles of the video
 160                        (requires writesubtitles or writeautomaticsub)
 161     listsubtitles:     Lists all available subtitles for the video
 162     subtitlesformat:   The format code for subtitles
 163     subtitleslangs:    List of languages of the subtitles to download
 164     keepvideo:         Keep the video file after post-processing
 165     daterange:         A DateRange object, download only if the upload_date is in the range.
 166     skip_download:     Skip the actual download of the video file
 167     cachedir:          Location of the cache files in the filesystem.
 168                        False to disable filesystem cache.
 169     noplaylist:        Download single video instead of a playlist if in doubt.
 170     age_limit:         An integer representing the user's age in years.
 171                        Unsuitable videos for the given age are skipped.
 172     min_views:         An integer representing the minimum view count the video
 173                        must have in order to not be skipped.
 174                        Videos without view count information are always
 175                        downloaded. None for no limit.
 176     max_views:         An integer representing the maximum view count.
 177                        Videos that are more popular than that are not
 178                        downloaded.
 179                        Videos without view count information are always
 180                        downloaded. None for no limit.
 181     download_archive:  File name of a file where all downloads are recorded.
 182                        Videos already present in the file are not downloaded
 183                        again.
 184     cookiefile:        File name where cookies should be read from and dumped to.
 185     nocheckcertificate:Do not verify SSL certificates
 186     prefer_insecure:   Use HTTP instead of HTTPS to retrieve information.
 187                        At the moment, this is only supported by YouTube.
 188     proxy:             URL of the proxy server to use
 189     cn_verification_proxy:  URL of the proxy to use for IP address verification
 190                        on Chinese sites. (Experimental)
 191     socket_timeout:    Time to wait for unresponsive hosts, in seconds
 192     bidi_workaround:   Work around buggy terminals without bidirectional text
 193                        support, using fridibi
 194     debug_printtraffic:Print out sent and received HTTP traffic
 195     include_ads:       Download ads as well
 196     default_search:    Prepend this string if an input url is not valid.
 197                        'auto' for elaborate guessing
 198     encoding:          Use this encoding instead of the system-specified.
 199     extract_flat:      Do not resolve URLs, return the immediate result.
 200                        Pass in 'in_playlist' to only show this behavior for
 201                        playlist items.
 202     postprocessors:    A list of dictionaries, each with an entry
 203                        * key:  The name of the postprocessor. See
 204                                youtube_dl/postprocessor/__init__.py for a list.
 205                        as well as any further keyword arguments for the
 206                        postprocessor.
 207     progress_hooks:    A list of functions that get called on download
 208                        progress, with a dictionary with the entries
 209                        * status: One of "downloading", "error", or "finished".
 210                                  Check this first and ignore unknown values.
 211
 212                        If status is one of "downloading", or "finished", the
 213                        following properties may also be present:
 214                        * filename: The final filename (always present)
 215                        * tmpfilename: The filename we're currently writing to
 216                        * downloaded_bytes: Bytes on disk
 217                        * total_bytes: Size of the whole file, None if unknown
 218                        * total_bytes_estimate: Guess of the eventual file size,
 219                                                None if unavailable.
 220                        * elapsed: The number of seconds since download started.
 221                        * eta: The estimated time in seconds, None if unknown
 222                        * speed: The download speed in bytes/second, None if
 223                                 unknown
 224                        * fragment_index: The counter of the currently
 225                                          downloaded video fragment.
 226                        * fragment_count: The number of fragments (= individual
 227                                          files that will be merged)
 228
 229                        Progress hooks are guaranteed to be called at least once
 230                        (with status "finished") if the download is successful.
 231     merge_output_format: Extension to use when merging formats.
 232     fixup:             Automatically correct known faults of the file.
 233                        One of:
 234                        - "never": do nothing
 235                        - "warn": only emit a warning
 236                        - "detect_or_warn": check whether we can do anything
 237                                            about it, warn otherwise (default)
 238     source_address:    (Experimental) Client-side IP address to bind to.
 239     call_home:         Boolean, true iff we are allowed to contact the
 240                        youtube-dl servers for debugging.
 241     sleep_interval:    Number of seconds to sleep before each download.
 242     listformats:       Print an overview of available video formats and exit.
 243     list_thumbnails:   Print a table of all thumbnails and exit.
 244     match_filter:      A function that gets called with the info_dict of
 245                        every video.
 246                        If it returns a message, the video is ignored.
 247                        If it returns None, the video is downloaded.
 248                        match_filter_func in utils.py is one example for this.
 249     no_color:          Do not emit color codes in output.
 250
 251     The following options determine which downloader is picked:
 252     external_downloader: Executable of the external downloader to call.
 253                        None or unset for standard (built-in) downloader.
 254     hls_prefer_native: Use the native HLS downloader instead of ffmpeg/avconv.
 255
 256     The following parameters are not used by YoutubeDL itself, they are used by
 257     the downloader (see youtube_dl/downloader/common.py):
 258     nopart, updatetime, buffersize, ratelimit, min_filesize, max_filesize, test,
 259     noresizebuffer, retries, continuedl, noprogress, consoletitle,
 260     xattr_set_filesize, external_downloader_args.
 261
 262     The following options are used by the post processors:
 263     prefer_ffmpeg:     If True, use ffmpeg instead of avconv if both are available,
 264                        otherwise prefer avconv.
 265     """
 266
 267     params = None
 268     _ies = []
 269     _pps = []
 270     _download_retcode = None
 271     _num_downloads = None
 272     _screen_file = None
 273
 274     def __init__(self, params=None, auto_init=True):
 275         """Create a FileDownloader object with the given options."""
 276         if params is None:
 277             params = {}
 278         self._ies = []
 279         self._ies_instances = {}
 280         self._pps = []
 281         self._progress_hooks = []
 282         self._download_retcode = 0
 283         self._num_downloads = 0
 284         self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)]
 285         self._err_file = sys.stderr
 286         self.params = params
 287         self.cache = Cache(self)
 288
 289         if params.get('bidi_workaround', False):
 290             try:
 291                 import pty
 292                 master, slave = pty.openpty()
 293                 width = compat_get_terminal_size().columns
 294                 if width is None:
 295                     width_args = []
 296                 else:
 297                     width_args = ['-w', str(width)]
 298                 sp_kwargs = dict(
 299                     stdin=subprocess.PIPE,
 300                     stdout=slave,
 301                     stderr=self._err_file)
 302                 try:
 303                     self._output_process = subprocess.Popen(
 304                         ['bidiv'] + width_args, **sp_kwargs
 305                     )
 306                 except OSError:
 307                     self._output_process = subprocess.Popen(
 308                         ['fribidi', '-c', 'UTF-8'] + width_args, **sp_kwargs)
 309                 self._output_channel = os.fdopen(master, 'rb')
 310             except OSError as ose:
 311                 if ose.errno == 2:
 312                     self.report_warning('Could not find fribidi executable, ignoring --bidi-workaround . Make sure that  fribidi  is an executable file in one of the directories in your $PATH.')
 313                 else:
 314                     raise
 315
 316         if (sys.version_info >= (3,) and sys.platform != 'win32' and
 317                 sys.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968'] and
 318                 not params.get('restrictfilenames', False)):
 319             # On Python 3, the Unicode filesystem API will throw errors (#1474)
 320             self.report_warning(
 321                 'Assuming --restrict-filenames since file system encoding '
 322                 'cannot encode all characters. '
 323                 'Set the LC_ALL environment variable to fix this.')
 324             self.params['restrictfilenames'] = True
 325
 326         if isinstance(params.get('outtmpl'), bytes):
 327             self.report_warning(
 328                 'Parameter outtmpl is bytes, but should be a unicode string. '
 329                 'Put  from __future__ import unicode_literals  at the top of your code file or consider switching to Python 3.x.')
 330
 331         self._setup_opener()
 332
 333         if auto_init:
 334             self.print_debug_header()
 335             self.add_default_info_extractors()
 336
 337         for pp_def_raw in self.params.get('postprocessors', []):
 338             pp_class = get_postprocessor(pp_def_raw['key'])
 339             pp_def = dict(pp_def_raw)
 340             del pp_def['key']
 341             pp = pp_class(self, **compat_kwargs(pp_def))
 342             self.add_post_processor(pp)
 343
 344         for ph in self.params.get('progress_hooks', []):
 345             self.add_progress_hook(ph)
 346
 347     def warn_if_short_id(self, argv):
 348         # short YouTube ID starting with dash?
 349         idxs = [
 350             i for i, a in enumerate(argv)
 351             if re.match(r'^-[0-9A-Za-z_-]{10}$', a)]
 352         if idxs:
 353             correct_argv = (
 354                 ['youtube-dl'] +
 355                 [a for i, a in enumerate(argv) if i not in idxs] +
 356                 ['--'] + [argv[i] for i in idxs]
 357             )
 358             self.report_warning(
 359                 'Long argument string detected. '
 360                 'Use -- to separate parameters and URLs, like this:\n%s\n' %
 361                 args_to_str(correct_argv))
 362
 363     def add_info_extractor(self, ie):
 364         """Add an InfoExtractor object to the end of the list."""
 365         self._ies.append(ie)
 366         self._ies_instances[ie.ie_key()] = ie
 367         ie.set_downloader(self)
 368
 369     def get_info_extractor(self, ie_key):
 370         """
 371         Get an instance of an IE with name ie_key, it will try to get one from
 372         the _ies list, if there's no instance it will create a new one and add
 373         it to the extractor list.
 374         """
 375         ie = self._ies_instances.get(ie_key)
 376         if ie is None:
 377             ie = get_info_extractor(ie_key)()
 378             self.add_info_extractor(ie)
 379         return ie
 380
 381     def add_default_info_extractors(self):
 382         """
 383         Add the InfoExtractors returned by gen_extractors to the end of the list
 384         """
 385         for ie in gen_extractors():
 386             self.add_info_extractor(ie)
 387
 388     def add_post_processor(self, pp):
 389         """Add a PostProcessor object to the end of the chain."""
 390         self._pps.append(pp)
 391         pp.set_downloader(self)
 392
 393     def add_progress_hook(self, ph):
 394         """Add the progress hook (currently only for the file downloader)"""
 395         self._progress_hooks.append(ph)
 396
 397     def _bidi_workaround(self, message):
 398         if not hasattr(self, '_output_channel'):
 399             return message
 400
 401         assert hasattr(self, '_output_process')
 402         assert isinstance(message, compat_str)
 403         line_count = message.count('\n') + 1
 404         self._output_process.stdin.write((message + '\n').encode('utf-8'))
 405         self._output_process.stdin.flush()
 406         res = ''.join(self._output_channel.readline().decode('utf-8')
 407                       for _ in range(line_count))
 408         return res[:-len('\n')]
 409
 410     def to_screen(self, message, skip_eol=False):
 411         """Print message to stdout if not in quiet mode."""
 412         return self.to_stdout(message, skip_eol, check_quiet=True)
 413
 414     def _write_string(self, s, out=None):
 415         write_string(s, out=out, encoding=self.params.get('encoding'))
 416
 417     def to_stdout(self, message, skip_eol=False, check_quiet=False):
 418         """Print message to stdout if not in quiet mode."""
 419         if self.params.get('logger'):
 420             self.params['logger'].debug(message)
 421         elif not check_quiet or not self.params.get('quiet', False):
 422             message = self._bidi_workaround(message)
 423             terminator = ['\n', ''][skip_eol]
 424             output = message + terminator
 425
 426             self._write_string(output, self._screen_file)
 427
 428     def to_stderr(self, message):
 429         """Print message to stderr."""
 430         assert isinstance(message, compat_str)
 431         if self.params.get('logger'):
 432             self.params['logger'].error(message)
 433         else:
 434             message = self._bidi_workaround(message)
 435             output = message + '\n'
 436             self._write_string(output, self._err_file)
 437
 438     def to_console_title(self, message):
 439         if not self.params.get('consoletitle', False):
 440             return
 441         if os.name == 'nt' and ctypes.windll.kernel32.GetConsoleWindow():
 442             # c_wchar_p() might not be necessary if `message` is
 443             # already of type unicode()
 444             ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message))
 445         elif 'TERM' in os.environ:
 446             self._write_string('\033]0;%s\007' % message, self._screen_file)
 447
 448     def save_console_title(self):
 449         if not self.params.get('consoletitle', False):
 450             return
 451         if 'TERM' in os.environ:
 452             # Save the title on stack
 453             self._write_string('\033[22;0t', self._screen_file)
 454
 455     def restore_console_title(self):
 456         if not self.params.get('consoletitle', False):
 457             return
 458         if 'TERM' in os.environ:
 459             # Restore the title from stack
 460             self._write_string('\033[23;0t', self._screen_file)
 461
 462     def __enter__(self):
 463         self.save_console_title()
 464         return self
 465
 466     def __exit__(self, *args):
 467         self.restore_console_title()
 468
 469         if self.params.get('cookiefile') is not None:
 470             self.cookiejar.save()
 471
 472     def trouble(self, message=None, tb=None):
 473         """Determine action to take when a download problem appears.
 474
 475         Depending on if the downloader has been configured to ignore
 476         download errors or not, this method may throw an exception or
 477         not when errors are found, after printing the message.
 478
 479         tb, if given, is additional traceback information.
 480         """
 481         if message is not None:
 482             self.to_stderr(message)
 483         if self.params.get('verbose'):
 484             if tb is None:
 485                 if sys.exc_info()[0]:  # if .trouble has been called from an except block
 486                     tb = ''
 487                     if hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
 488                         tb += ''.join(traceback.format_exception(*sys.exc_info()[1].exc_info))
 489                     tb += compat_str(traceback.format_exc())
 490                 else:
 491                     tb_data = traceback.format_list(traceback.extract_stack())
 492                     tb = ''.join(tb_data)
 493             self.to_stderr(tb)
 494         if not self.params.get('ignoreerrors', False):
 495             if sys.exc_info()[0] and hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
 496                 exc_info = sys.exc_info()[1].exc_info
 497             else:
 498                 exc_info = sys.exc_info()
 499             raise DownloadError(message, exc_info)
 500         self._download_retcode = 1
 501
 502     def report_warning(self, message):
 503         '''
 504         Print the message to stderr, it will be prefixed with 'WARNING:'
 505         If stderr is a tty file the 'WARNING:' will be colored
 506         '''
 507         if self.params.get('logger') is not None:
 508             self.params['logger'].warning(message)
 509         else:
 510             if self.params.get('no_warnings'):
 511                 return
 512             if not self.params.get('no_color') and self._err_file.isatty() and os.name != 'nt':
 513                 _msg_header = '\033[0;33mWARNING:\033[0m'
 514             else:
 515                 _msg_header = 'WARNING:'
 516             warning_message = '%s %s' % (_msg_header, message)
 517             self.to_stderr(warning_message)
 518
 519     def report_error(self, message, tb=None):
 520         '''
 521         Do the same as trouble, but prefixes the message with 'ERROR:', colored
 522         in red if stderr is a tty file.
 523         '''
 524         if not self.params.get('no_color') and self._err_file.isatty() and os.name != 'nt':
 525             _msg_header = '\033[0;31mERROR:\033[0m'
 526         else:
 527             _msg_header = 'ERROR:'
 528         error_message = '%s %s' % (_msg_header, message)
 529         self.trouble(error_message, tb)
 530
 531     def report_file_already_downloaded(self, file_name):
 532         """Report file has already been fully downloaded."""
 533         try:
 534             self.to_screen('[download] %s has already been downloaded' % file_name)
 535         except UnicodeEncodeError:
 536             self.to_screen('[download] The file has already been downloaded')
 537
 538     def prepare_filename(self, info_dict):
 539         """Generate the output filename."""
 540         try:
 541             template_dict = dict(info_dict)
 542
 543             template_dict['epoch'] = int(time.time())
 544             autonumber_size = self.params.get('autonumber_size')
 545             if autonumber_size is None:
 546                 autonumber_size = 5
 547             autonumber_templ = '%0' + str(autonumber_size) + 'd'
 548             template_dict['autonumber'] = autonumber_templ % self._num_downloads
 549             if template_dict.get('playlist_index') is not None:
 550                 template_dict['playlist_index'] = '%0*d' % (len(str(template_dict['n_entries'])), template_dict['playlist_index'])
 551             if template_dict.get('resolution') is None:
 552                 if template_dict.get('width') and template_dict.get('height'):
 553                     template_dict['resolution'] = '%dx%d' % (template_dict['width'], template_dict['height'])
 554                 elif template_dict.get('height'):
 555                     template_dict['resolution'] = '%sp' % template_dict['height']
 556                 elif template_dict.get('width'):
 557                     template_dict['resolution'] = '?x%d' % template_dict['width']
 558
 559             sanitize = lambda k, v: sanitize_filename(
 560                 compat_str(v),
 561                 restricted=self.params.get('restrictfilenames'),
 562                 is_id=(k == 'id'))
 563             template_dict = dict((k, sanitize(k, v))
 564                                  for k, v in template_dict.items()
 565                                  if v is not None)
 566             template_dict = collections.defaultdict(lambda: 'NA', template_dict)
 567
 568             outtmpl = sanitize_path(self.params.get('outtmpl', DEFAULT_OUTTMPL))
 569             tmpl = compat_expanduser(outtmpl)
 570             filename = tmpl % template_dict
 571             # Temporary fix for #4787
 572             # 'Treat' all problem characters by passing filename through preferredencoding
 573             # to workaround encoding issues with subprocess on python2 @ Windows
 574             if sys.version_info < (3, 0) and sys.platform == 'win32':
 575                 filename = encodeFilename(filename, True).decode(preferredencoding())
 576             return filename
 577         except ValueError as err:
 578             self.report_error('Error in output template: ' + str(err) + ' (encoding: ' + repr(preferredencoding()) + ')')
 579             return None
 580
 581     def _match_entry(self, info_dict, incomplete):
 582         """ Returns None iff the file should be downloaded """
 583
 584         video_title = info_dict.get('title', info_dict.get('id', 'video'))
 585         if 'title' in info_dict:
 586             # This can happen when we're just evaluating the playlist
 587             title = info_dict['title']
 588             matchtitle = self.params.get('matchtitle', False)
 589             if matchtitle:
 590                 if not re.search(matchtitle, title, re.IGNORECASE):
 591                     return '"' + title + '" title did not match pattern "' + matchtitle + '"'
 592             rejecttitle = self.params.get('rejecttitle', False)
 593             if rejecttitle:
 594                 if re.search(rejecttitle, title, re.IGNORECASE):
 595                     return '"' + title + '" title matched reject pattern "' + rejecttitle + '"'
 596         date = info_dict.get('upload_date', None)
 597         if date is not None:
 598             dateRange = self.params.get('daterange', DateRange())
 599             if date not in dateRange:
 600                 return '%s upload date is not in range %s' % (date_from_str(date).isoformat(), dateRange)
 601         view_count = info_dict.get('view_count', None)
 602         if view_count is not None:
 603             min_views = self.params.get('min_views')
 604             if min_views is not None and view_count < min_views:
 605                 return 'Skipping %s, because it has not reached minimum view count (%d/%d)' % (video_title, view_count, min_views)
 606             max_views = self.params.get('max_views')
 607             if max_views is not None and view_count > max_views:
 608                 return 'Skipping %s, because it has exceeded the maximum view count (%d/%d)' % (video_title, view_count, max_views)
 609         if age_restricted(info_dict.get('age_limit'), self.params.get('age_limit')):
 610             return 'Skipping "%s" because it is age restricted' % video_title
 611         if self.in_download_archive(info_dict):
 612             return '%s has already been recorded in archive' % video_title
 613
 614         if not incomplete:
 615             match_filter = self.params.get('match_filter')
 616             if match_filter is not None:
 617                 ret = match_filter(info_dict)
 618                 if ret is not None:
 619                     return ret
 620
 621         return None
 622
 623     @staticmethod
 624     def add_extra_info(info_dict, extra_info):
 625         '''Set the keys from extra_info in info dict if they are missing'''
 626         for key, value in extra_info.items():
 627             info_dict.setdefault(key, value)
 628
 629     def extract_info(self, url, download=True, ie_key=None, extra_info={},
 630                      process=True, force_generic_extractor=False):
 631         '''
 632         Returns a list with a dictionary for each video we find.
 633         If 'download', also downloads the videos.
 634         extra_info is a dict containing the extra values to add to each result
 635         '''
 636
 637         if not ie_key and force_generic_extractor:
 638             ie_key = 'Generic'
 639
 640         if ie_key:
 641             ies = [self.get_info_extractor(ie_key)]
 642         else:
 643             ies = self._ies
 644
 645         for ie in ies:
 646             if not ie.suitable(url):
 647                 continue
 648
 649             if not ie.working():
 650                 self.report_warning('The program functionality for this site has been marked as broken, '
 651                                     'and will probably not work.')
 652
 653             try:
 654                 ie_result = ie.extract(url)
 655                 if ie_result is None:  # Finished already (backwards compatibility; listformats and friends should be moved here)
 656                     break
 657                 if isinstance(ie_result, list):
 658                     # Backwards compatibility: old IE result format
 659                     ie_result = {
 660                         '_type': 'compat_list',
 661                         'entries': ie_result,
 662                     }
 663                 self.add_default_extra_info(ie_result, ie, url)
 664                 if process:
 665                     return self.process_ie_result(ie_result, download, extra_info)
 666                 else:
 667                     return ie_result
 668             except ExtractorError as de:  # An error we somewhat expected
 669                 self.report_error(compat_str(de), de.format_traceback())
 670                 break
 671             except MaxDownloadsReached:
 672                 raise
 673             except Exception as e:
 674                 if self.params.get('ignoreerrors', False):
 675                     self.report_error(compat_str(e), tb=compat_str(traceback.format_exc()))
 676                     break
 677                 else:
 678                     raise
 679         else:
 680             self.report_error('no suitable InfoExtractor for URL %s' % url)
 681
 682     def add_default_extra_info(self, ie_result, ie, url):
 683         self.add_extra_info(ie_result, {
 684             'extractor': ie.IE_NAME,
 685             'webpage_url': url,
 686             'webpage_url_basename': url_basename(url),
 687             'extractor_key': ie.ie_key(),
 688         })
 689
 690     def process_ie_result(self, ie_result, download=True, extra_info={}):
 691         """
 692         Take the result of the ie(may be modified) and resolve all unresolved
 693         references (URLs, playlist items).
 694
 695         It will also download the videos if 'download'.
 696         Returns the resolved ie_result.
 697         """
 698
 699         result_type = ie_result.get('_type', 'video')
 700
 701         if result_type in ('url', 'url_transparent'):
 702             extract_flat = self.params.get('extract_flat', False)
 703             if ((extract_flat == 'in_playlist' and 'playlist' in extra_info) or
 704                     extract_flat is True):
 705                 if self.params.get('forcejson', False):
 706                     self.to_stdout(json.dumps(ie_result))
 707                 return ie_result
 708
 709         if result_type == 'video':
 710             self.add_extra_info(ie_result, extra_info)
 711             return self.process_video_result(ie_result, download=download)
 712         elif result_type == 'url':
 713             # We have to add extra_info to the results because it may be
 714             # contained in a playlist
 715             return self.extract_info(ie_result['url'],
 716                                      download,
 717                                      ie_key=ie_result.get('ie_key'),
 718                                      extra_info=extra_info)
 719         elif result_type == 'url_transparent':
 720             # Use the information from the embedding page
 721             info = self.extract_info(
 722                 ie_result['url'], ie_key=ie_result.get('ie_key'),
 723                 extra_info=extra_info, download=False, process=False)
 724
 725             force_properties = dict(
 726                 (k, v) for k, v in ie_result.items() if v is not None)
 727             for f in ('_type', 'url'):
 728                 if f in force_properties:
 729                     del force_properties[f]
 730             new_result = info.copy()
 731             new_result.update(force_properties)
 732
 733             assert new_result.get('_type') != 'url_transparent'
 734
 735             return self.process_ie_result(
 736                 new_result, download=download, extra_info=extra_info)
 737         elif result_type == 'playlist' or result_type == 'multi_video':
 738             # We process each entry in the playlist
 739             playlist = ie_result.get('title', None) or ie_result.get('id', None)
 740             self.to_screen('[download] Downloading playlist: %s' % playlist)
 741
 742             playlist_results = []
 743
 744             playliststart = self.params.get('playliststart', 1) - 1
 745             playlistend = self.params.get('playlistend', None)
 746             # For backwards compatibility, interpret -1 as whole list
 747             if playlistend == -1:
 748                 playlistend = None
 749
 750             playlistitems_str = self.params.get('playlist_items', None)
 751             playlistitems = None
 752             if playlistitems_str is not None:
 753                 def iter_playlistitems(format):
 754                     for string_segment in format.split(','):
 755                         if '-' in string_segment:
 756                             start, end = string_segment.split('-')
 757                             for item in range(int(start), int(end) + 1):
 758                                 yield int(item)
 759                         else:
 760                             yield int(string_segment)
 761                 playlistitems = iter_playlistitems(playlistitems_str)
 762
 763             ie_entries = ie_result['entries']
 764             if isinstance(ie_entries, list):
 765                 n_all_entries = len(ie_entries)
 766                 if playlistitems:
 767                     entries = [
 768                         ie_entries[i - 1] for i in playlistitems
 769                         if -n_all_entries <= i - 1 < n_all_entries]
 770                 else:
 771                     entries = ie_entries[playliststart:playlistend]
 772                 n_entries = len(entries)
 773                 self.to_screen(
 774                     "[%s] playlist %s: Collected %d video ids (downloading %d of them)" %
 775                     (ie_result['extractor'], playlist, n_all_entries, n_entries))
 776             elif isinstance(ie_entries, PagedList):
 777                 if playlistitems:
 778                     entries = []
 779                     for item in playlistitems:
 780                         entries.extend(ie_entries.getslice(
 781                             item - 1, item
 782                         ))
 783                 else:
 784                     entries = ie_entries.getslice(
 785                         playliststart, playlistend)
 786                 n_entries = len(entries)
 787                 self.to_screen(
 788                     "[%s] playlist %s: Downloading %d videos" %
 789                     (ie_result['extractor'], playlist, n_entries))
 790             else:  # iterable
 791                 if playlistitems:
 792                     entry_list = list(ie_entries)
 793                     entries = [entry_list[i - 1] for i in playlistitems]
 794                 else:
 795                     entries = list(itertools.islice(
 796                         ie_entries, playliststart, playlistend))
 797                 n_entries = len(entries)
 798                 self.to_screen(
 799                     "[%s] playlist %s: Downloading %d videos" %
 800                     (ie_result['extractor'], playlist, n_entries))
 801
 802             if self.params.get('playlistreverse', False):
 803                 entries = entries[::-1]
 804
 805             for i, entry in enumerate(entries, 1):
 806                 self.to_screen('[download] Downloading video %s of %s' % (i, n_entries))
 807                 extra = {
 808                     'n_entries': n_entries,
 809                     'playlist': playlist,
 810                     'playlist_id': ie_result.get('id'),
 811                     'playlist_title': ie_result.get('title'),
 812                     'playlist_index': i + playliststart,
 813                     'extractor': ie_result['extractor'],
 814                     'webpage_url': ie_result['webpage_url'],
 815                     'webpage_url_basename': url_basename(ie_result['webpage_url']),
 816                     'extractor_key': ie_result['extractor_key'],
 817                 }
 818
 819                 reason = self._match_entry(entry, incomplete=True)
 820                 if reason is not None:
 821                     self.to_screen('[download] ' + reason)
 822                     continue
 823
 824                 entry_result = self.process_ie_result(entry,
 825                                                       download=download,
 826                                                       extra_info=extra)
 827                 playlist_results.append(entry_result)
 828             ie_result['entries'] = playlist_results
 829             return ie_result
 830         elif result_type == 'compat_list':
 831             self.report_warning(
 832                 'Extractor %s returned a compat_list result. '
 833                 'It needs to be updated.' % ie_result.get('extractor'))
 834
 835             def _fixup(r):
 836                 self.add_extra_info(
 837                     r,
 838                     {
 839                         'extractor': ie_result['extractor'],
 840                         'webpage_url': ie_result['webpage_url'],
 841                         'webpage_url_basename': url_basename(ie_result['webpage_url']),
 842                         'extractor_key': ie_result['extractor_key'],
 843                     }
 844                 )
 845                 return r
 846             ie_result['entries'] = [
 847                 self.process_ie_result(_fixup(r), download, extra_info)
 848                 for r in ie_result['entries']
 849             ]
 850             return ie_result
 851         else:
 852             raise Exception('Invalid result type: %s' % result_type)
 853
 854     def _apply_format_filter(self, format_spec, available_formats):
 855         " Returns a tuple of the remaining format_spec and filtered formats "
 856
 857         OPERATORS = {
 858             '<': operator.lt,
 859             '<=': operator.le,
 860             '>': operator.gt,
 861             '>=': operator.ge,
 862             '=': operator.eq,
 863             '!=': operator.ne,
 864         }
 865         operator_rex = re.compile(r'''(?x)\s*\[
 866             (?P<key>width|height|tbr|abr|vbr|asr|filesize|fps)
 867             \s*(?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
 868             (?P<value>[0-9.]+(?:[kKmMgGtTpPeEzZyY]i?[Bb]?)?)
 869             \]$
 870             ''' % '|'.join(map(re.escape, OPERATORS.keys())))
 871         m = operator_rex.search(format_spec)
 872         if m:
 873             try:
 874                 comparison_value = int(m.group('value'))
 875             except ValueError:
 876                 comparison_value = parse_filesize(m.group('value'))
 877                 if comparison_value is None:
 878                     comparison_value = parse_filesize(m.group('value') + 'B')
 879                 if comparison_value is None:
 880                     raise ValueError(
 881                         'Invalid value %r in format specification %r' % (
 882                             m.group('value'), format_spec))
 883             op = OPERATORS[m.group('op')]
 884
 885         if not m:
 886             STR_OPERATORS = {
 887                 '=': operator.eq,
 888                 '!=': operator.ne,
 889             }
 890             str_operator_rex = re.compile(r'''(?x)\s*\[
 891                 \s*(?P<key>ext|acodec|vcodec|container|protocol)
 892                 \s*(?P<op>%s)(?P<none_inclusive>\s*\?)?
 893                 \s*(?P<value>[a-zA-Z0-9_-]+)
 894                 \s*\]$
 895                 ''' % '|'.join(map(re.escape, STR_OPERATORS.keys())))
 896             m = str_operator_rex.search(format_spec)
 897             if m:
 898                 comparison_value = m.group('value')
 899                 op = STR_OPERATORS[m.group('op')]
 900
 901         if not m:
 902             raise ValueError('Invalid format specification %r' % format_spec)
 903
 904         def _filter(f):
 905             actual_value = f.get(m.group('key'))
 906             if actual_value is None:
 907                 return m.group('none_inclusive')
 908             return op(actual_value, comparison_value)
 909         new_formats = [f for f in available_formats if _filter(f)]
 910
 911         new_format_spec = format_spec[:-len(m.group(0))]
 912         if not new_format_spec:
 913             new_format_spec = 'best'
 914
 915         return (new_format_spec, new_formats)
 916
 917     def select_format(self, format_spec, available_formats):
 918         while format_spec.endswith(']'):
 919             format_spec, available_formats = self._apply_format_filter(
 920                 format_spec, available_formats)
 921         if not available_formats:
 922             return None
 923
 924         if format_spec in ['best', 'worst', None]:
 925             format_idx = 0 if format_spec == 'worst' else -1
 926             audiovideo_formats = [
 927                 f for f in available_formats
 928                 if f.get('vcodec') != 'none' and f.get('acodec') != 'none']
 929             if audiovideo_formats:
 930                 return audiovideo_formats[format_idx]
 931             # for audio only (soundcloud) or video only (imgur) urls, select the best/worst audio format
 932             elif (all(f.get('acodec') != 'none' for f in available_formats) or
 933                   all(f.get('vcodec') != 'none' for f in available_formats)):
 934                 return available_formats[format_idx]
 935         elif format_spec == 'bestaudio':
 936             audio_formats = [
 937                 f for f in available_formats
 938                 if f.get('vcodec') == 'none']
 939             if audio_formats:
 940                 return audio_formats[-1]
 941         elif format_spec == 'worstaudio':
 942             audio_formats = [
 943                 f for f in available_formats
 944                 if f.get('vcodec') == 'none']
 945             if audio_formats:
 946                 return audio_formats[0]
 947         elif format_spec == 'bestvideo':
 948             video_formats = [
 949                 f for f in available_formats
 950                 if f.get('acodec') == 'none']
 951             if video_formats:
 952                 return video_formats[-1]
 953         elif format_spec == 'worstvideo':
 954             video_formats = [
 955                 f for f in available_formats
 956                 if f.get('acodec') == 'none']
 957             if video_formats:
 958                 return video_formats[0]
 959         else:
 960             extensions = ['mp4', 'flv', 'webm', '3gp', 'm4a', 'mp3', 'ogg', 'aac', 'wav']
 961             if format_spec in extensions:
 962                 filter_f = lambda f: f['ext'] == format_spec
 963             else:
 964                 filter_f = lambda f: f['format_id'] == format_spec
 965             matches = list(filter(filter_f, available_formats))
 966             if matches:
 967                 return matches[-1]
 968         return None
 969
 970     def _calc_headers(self, info_dict):
 971         res = std_headers.copy()
 972
 973         add_headers = info_dict.get('http_headers')
 974         if add_headers:
 975             res.update(add_headers)
 976
 977         cookies = self._calc_cookies(info_dict)
 978         if cookies:
 979             res['Cookie'] = cookies
 980
 981         return res
 982
 983     def _calc_cookies(self, info_dict):
 984         pr = compat_urllib_request.Request(info_dict['url'])
 985         self.cookiejar.add_cookie_header(pr)
 986         return pr.get_header('Cookie')
 987
 988     def process_video_result(self, info_dict, download=True):
 989         assert info_dict.get('_type', 'video') == 'video'
 990
 991         if 'id' not in info_dict:
 992             raise ExtractorError('Missing "id" field in extractor result')
 993         if 'title' not in info_dict:
 994             raise ExtractorError('Missing "title" field in extractor result')
 995
 996         if 'playlist' not in info_dict:
 997             # It isn't part of a playlist
 998             info_dict['playlist'] = None
 999             info_dict['playlist_index'] = None
1000
1001         thumbnails = info_dict.get('thumbnails')
1002         if thumbnails is None:
1003             thumbnail = info_dict.get('thumbnail')
1004             if thumbnail:
1005                 info_dict['thumbnails'] = thumbnails = [{'url': thumbnail}]
1006         if thumbnails:
1007             thumbnails.sort(key=lambda t: (
1008                 t.get('preference'), t.get('width'), t.get('height'),
1009                 t.get('id'), t.get('url')))
1010             for i, t in enumerate(thumbnails):
1011                 if 'width' in t and 'height' in t:
1012                     t['resolution'] = '%dx%d' % (t['width'], t['height'])
1013                 if t.get('id') is None:
1014                     t['id'] = '%d' % i
1015
1016         if thumbnails and 'thumbnail' not in info_dict:
1017             info_dict['thumbnail'] = thumbnails[-1]['url']
1018
1019         if 'display_id' not in info_dict and 'id' in info_dict:
1020             info_dict['display_id'] = info_dict['id']
1021
1022         if info_dict.get('upload_date') is None and info_dict.get('timestamp') is not None:
1023             # Working around out-of-range timestamp values (e.g. negative ones on Windows,
1024             # see http://bugs.python.org/issue1646728)
1025             try:
1026                 upload_date = datetime.datetime.utcfromtimestamp(info_dict['timestamp'])
1027                 info_dict['upload_date'] = upload_date.strftime('%Y%m%d')
1028             except (ValueError, OverflowError, OSError):
1029                 pass
1030
1031         if self.params.get('listsubtitles', False):
1032             if 'automatic_captions' in info_dict:
1033                 self.list_subtitles(info_dict['id'], info_dict.get('automatic_captions'), 'automatic captions')
1034             self.list_subtitles(info_dict['id'], info_dict.get('subtitles'), 'subtitles')
1035             return
1036         info_dict['requested_subtitles'] = self.process_subtitles(
1037             info_dict['id'], info_dict.get('subtitles'),
1038             info_dict.get('automatic_captions'))
1039
1040         # This extractors handle format selection themselves
1041         if info_dict['extractor'] in ['Youku']:
1042             if download:
1043                 self.process_info(info_dict)
1044             return info_dict
1045
1046         # We now pick which formats have to be downloaded
1047         if info_dict.get('formats') is None:
1048             # There's only one format available
1049             formats = [info_dict]
1050         else:
1051             formats = info_dict['formats']
1052
1053         if not formats:
1054             raise ExtractorError('No video formats found!')
1055
1056         formats_dict = {}
1057
1058         # We check that all the formats have the format and format_id fields
1059         for i, format in enumerate(formats):
1060             if 'url' not in format:
1061                 raise ExtractorError('Missing "url" key in result (index %d)' % i)
1062
1063             if format.get('format_id') is None:
1064                 format['format_id'] = compat_str(i)
1065             format_id = format['format_id']
1066             if format_id not in formats_dict:
1067                 formats_dict[format_id] = []
1068             formats_dict[format_id].append(format)
1069
1070         # Make sure all formats have unique format_id
1071         for format_id, ambiguous_formats in formats_dict.items():
1072             if len(ambiguous_formats) > 1:
1073                 for i, format in enumerate(ambiguous_formats):
1074                     format['format_id'] = '%s-%d' % (format_id, i)
1075
1076         for i, format in enumerate(formats):
1077             if format.get('format') is None:
1078                 format['format'] = '{id} - {res}{note}'.format(
1079                     id=format['format_id'],
1080                     res=self.format_resolution(format),
1081                     note=' ({0})'.format(format['format_note']) if format.get('format_note') is not None else '',
1082                 )
1083             # Automatically determine file extension if missing
1084             if 'ext' not in format:
1085                 format['ext'] = determine_ext(format['url']).lower()
1086             # Add HTTP headers, so that external programs can use them from the
1087             # json output
1088             full_format_info = info_dict.copy()
1089             full_format_info.update(format)
1090             format['http_headers'] = self._calc_headers(full_format_info)
1091
1092         # TODO Central sorting goes here
1093
1094         if formats[0] is not info_dict:
1095             # only set the 'formats' fields if the original info_dict list them
1096             # otherwise we end up with a circular reference, the first (and unique)
1097             # element in the 'formats' field in info_dict is info_dict itself,
1098             # wich can't be exported to json
1099             info_dict['formats'] = formats
1100         if self.params.get('listformats'):
1101             self.list_formats(info_dict)
1102             return
1103         if self.params.get('list_thumbnails'):
1104             self.list_thumbnails(info_dict)
1105             return
1106
1107         req_format = self.params.get('format')
1108         if req_format is None:
1109             req_format_list = []
1110             if (self.params.get('outtmpl', DEFAULT_OUTTMPL) != '-' and
1111                     info_dict['extractor'] in ['youtube', 'ted']):
1112                 merger = FFmpegMergerPP(self)
1113                 if merger.available and merger.can_merge():
1114                     req_format_list.append('bestvideo+bestaudio')
1115             req_format_list.append('best')
1116             req_format = '/'.join(req_format_list)
1117         formats_to_download = []
1118         if req_format == 'all':
1119             formats_to_download = formats
1120         else:
1121             for rfstr in req_format.split(','):
1122                 # We can accept formats requested in the format: 34/5/best, we pick
1123                 # the first that is available, starting from left
1124                 req_formats = rfstr.split('/')
1125                 for rf in req_formats:
1126                     if re.match(r'.+?\+.+?', rf) is not None:
1127                         # Two formats have been requested like '137+139'
1128                         format_1, format_2 = rf.split('+')
1129                         formats_info = (self.select_format(format_1, formats),
1130                                         self.select_format(format_2, formats))
1131                         if all(formats_info):
1132                             # The first format must contain the video and the
1133                             # second the audio
1134                             if formats_info[0].get('vcodec') == 'none':
1135                                 self.report_error('The first format must '
1136                                                   'contain the video, try using '
1137                                                   '"-f %s+%s"' % (format_2, format_1))
1138                                 return
1139                             output_ext = (
1140                                 formats_info[0]['ext']
1141                                 if self.params.get('merge_output_format') is None
1142                                 else self.params['merge_output_format'])
1143                             selected_format = {
1144                                 'requested_formats': formats_info,
1145                                 'format': '%s+%s' % (formats_info[0].get('format'),
1146                                                      formats_info[1].get('format')),
1147                                 'format_id': '%s+%s' % (formats_info[0].get('format_id'),
1148                                                         formats_info[1].get('format_id')),
1149                                 'width': formats_info[0].get('width'),
1150                                 'height': formats_info[0].get('height'),
1151                                 'resolution': formats_info[0].get('resolution'),
1152                                 'fps': formats_info[0].get('fps'),
1153                                 'vcodec': formats_info[0].get('vcodec'),
1154                                 'vbr': formats_info[0].get('vbr'),
1155                                 'stretched_ratio': formats_info[0].get('stretched_ratio'),
1156                                 'acodec': formats_info[1].get('acodec'),
1157                                 'abr': formats_info[1].get('abr'),
1158                                 'ext': output_ext,
1159                             }
1160                         else:
1161                             selected_format = None
1162                     else:
1163                         selected_format = self.select_format(rf, formats)
1164                     if selected_format is not None:
1165                         formats_to_download.append(selected_format)
1166                         break
1167         if not formats_to_download:
1168             raise ExtractorError('requested format not available',
1169                                  expected=True)
1170
1171         if download:
1172             if len(formats_to_download) > 1:
1173                 self.to_screen('[info] %s: downloading video in %s formats' % (info_dict['id'], len(formats_to_download)))
1174             for format in formats_to_download:
1175                 new_info = dict(info_dict)
1176                 new_info.update(format)
1177                 self.process_info(new_info)
1178         # We update the info dict with the best quality format (backwards compatibility)
1179         info_dict.update(formats_to_download[-1])
1180         return info_dict
1181
1182     def process_subtitles(self, video_id, normal_subtitles, automatic_captions):
1183         """Select the requested subtitles and their format"""
1184         available_subs = {}
1185         if normal_subtitles and self.params.get('writesubtitles'):
1186             available_subs.update(normal_subtitles)
1187         if automatic_captions and self.params.get('writeautomaticsub'):
1188             for lang, cap_info in automatic_captions.items():
1189                 if lang not in available_subs:
1190                     available_subs[lang] = cap_info
1191
1192         if (not self.params.get('writesubtitles') and not
1193                 self.params.get('writeautomaticsub') or not
1194                 available_subs):
1195             return None
1196
1197         if self.params.get('allsubtitles', False):
1198             requested_langs = available_subs.keys()
1199         else:
1200             if self.params.get('subtitleslangs', False):
1201                 requested_langs = self.params.get('subtitleslangs')
1202             elif 'en' in available_subs:
1203                 requested_langs = ['en']
1204             else:
1205                 requested_langs = [list(available_subs.keys())[0]]
1206
1207         formats_query = self.params.get('subtitlesformat', 'best')
1208         formats_preference = formats_query.split('/') if formats_query else []
1209         subs = {}
1210         for lang in requested_langs:
1211             formats = available_subs.get(lang)
1212             if formats is None:
1213                 self.report_warning('%s subtitles not available for %s' % (lang, video_id))
1214                 continue
1215             for ext in formats_preference:
1216                 if ext == 'best':
1217                     f = formats[-1]
1218                     break
1219                 matches = list(filter(lambda f: f['ext'] == ext, formats))
1220                 if matches:
1221                     f = matches[-1]
1222                     break
1223             else:
1224                 f = formats[-1]
1225                 self.report_warning(
1226                     'No subtitle format found matching "%s" for language %s, '
1227                     'using %s' % (formats_query, lang, f['ext']))
1228             subs[lang] = f
1229         return subs
1230
1231     def process_info(self, info_dict):
1232         """Process a single resolved IE result."""
1233
1234         assert info_dict.get('_type', 'video') == 'video'
1235
1236         max_downloads = self.params.get('max_downloads')
1237         if max_downloads is not None:
1238             if self._num_downloads >= int(max_downloads):
1239                 raise MaxDownloadsReached()
1240
1241         info_dict['fulltitle'] = info_dict['title']
1242         if len(info_dict['title']) > 200:
1243             info_dict['title'] = info_dict['title'][:197] + '...'
1244
1245         if 'format' not in info_dict:
1246             info_dict['format'] = info_dict['ext']
1247
1248         reason = self._match_entry(info_dict, incomplete=False)
1249         if reason is not None:
1250             self.to_screen('[download] ' + reason)
1251             return
1252
1253         self._num_downloads += 1
1254
1255         info_dict['_filename'] = filename = self.prepare_filename(info_dict)
1256
1257         # Forced printings
1258         if self.params.get('forcetitle', False):
1259             self.to_stdout(info_dict['fulltitle'])
1260         if self.params.get('forceid', False):
1261             self.to_stdout(info_dict['id'])
1262         if self.params.get('forceurl', False):
1263             if info_dict.get('requested_formats') is not None:
1264                 for f in info_dict['requested_formats']:
1265                     self.to_stdout(f['url'] + f.get('play_path', ''))
1266             else:
1267                 # For RTMP URLs, also include the playpath
1268                 self.to_stdout(info_dict['url'] + info_dict.get('play_path', ''))
1269         if self.params.get('forcethumbnail', False) and info_dict.get('thumbnail') is not None:
1270             self.to_stdout(info_dict['thumbnail'])
1271         if self.params.get('forcedescription', False) and info_dict.get('description') is not None:
1272             self.to_stdout(info_dict['description'])
1273         if self.params.get('forcefilename', False) and filename is not None:
1274             self.to_stdout(filename)
1275         if self.params.get('forceduration', False) and info_dict.get('duration') is not None:
1276             self.to_stdout(formatSeconds(info_dict['duration']))
1277         if self.params.get('forceformat', False):
1278             self.to_stdout(info_dict['format'])
1279         if self.params.get('forcejson', False):
1280             self.to_stdout(json.dumps(info_dict))
1281
1282         # Do nothing else if in simulate mode
1283         if self.params.get('simulate', False):
1284             return
1285
1286         if filename is None:
1287             return
1288
1289         try:
1290             dn = os.path.dirname(sanitize_path(encodeFilename(filename)))
1291             if dn and not os.path.exists(dn):
1292                 os.makedirs(dn)
1293         except (OSError, IOError) as err:
1294             self.report_error('unable to create directory ' + compat_str(err))
1295             return
1296
1297         if self.params.get('writedescription', False):
1298             descfn = replace_extension(filename, 'description', info_dict.get('ext'))
1299             if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(descfn)):
1300                 self.to_screen('[info] Video description is already present')
1301             elif info_dict.get('description') is None:
1302                 self.report_warning('There\'s no description to write.')
1303             else:
1304                 try:
1305                     self.to_screen('[info] Writing video description to: ' + descfn)
1306                     with io.open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile:
1307                         descfile.write(info_dict['description'])
1308                 except (OSError, IOError):
1309                     self.report_error('Cannot write description file ' + descfn)
1310                     return
1311
1312         if self.params.get('writeannotations', False):
1313             annofn = replace_extension(filename, 'annotations.xml', info_dict.get('ext'))
1314             if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(annofn)):
1315                 self.to_screen('[info] Video annotations are already present')
1316             else:
1317                 try:
1318                     self.to_screen('[info] Writing video annotations to: ' + annofn)
1319                     with io.open(encodeFilename(annofn), 'w', encoding='utf-8') as annofile:
1320                         annofile.write(info_dict['annotations'])
1321                 except (KeyError, TypeError):
1322                     self.report_warning('There are no annotations to write.')
1323                 except (OSError, IOError):
1324                     self.report_error('Cannot write annotations file: ' + annofn)
1325                     return
1326
1327         subtitles_are_requested = any([self.params.get('writesubtitles', False),
1328                                        self.params.get('writeautomaticsub')])
1329
1330         if subtitles_are_requested and info_dict.get('requested_subtitles'):
1331             # subtitles download errors are already managed as troubles in relevant IE
1332             # that way it will silently go on when used with unsupporting IE
1333             subtitles = info_dict['requested_subtitles']
1334             ie = self.get_info_extractor(info_dict['extractor_key'])
1335             for sub_lang, sub_info in subtitles.items():
1336                 sub_format = sub_info['ext']
1337                 if sub_info.get('data') is not None:
1338                     sub_data = sub_info['data']
1339                 else:
1340                     try:
1341                         sub_data = ie._download_webpage(
1342                             sub_info['url'], info_dict['id'], note=False)
1343                     except ExtractorError as err:
1344                         self.report_warning('Unable to download subtitle for "%s": %s' %
1345                                             (sub_lang, compat_str(err.cause)))
1346                         continue
1347                 try:
1348                     sub_filename = subtitles_filename(filename, sub_lang, sub_format)
1349                     if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(sub_filename)):
1350                         self.to_screen('[info] Video subtitle %s.%s is already_present' % (sub_lang, sub_format))
1351                     else:
1352                         self.to_screen('[info] Writing video subtitles to: ' + sub_filename)
1353                         with io.open(encodeFilename(sub_filename), 'w', encoding='utf-8') as subfile:
1354                             subfile.write(sub_data)
1355                 except (OSError, IOError):
1356                     self.report_error('Cannot write subtitles file ' + sub_filename)
1357                     return
1358
1359         if self.params.get('writeinfojson', False):
1360             infofn = replace_extension(filename, 'info.json', info_dict.get('ext'))
1361             if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(infofn)):
1362                 self.to_screen('[info] Video description metadata is already present')
1363             else:
1364                 self.to_screen('[info] Writing video description metadata as JSON to: ' + infofn)
1365                 try:
1366                     write_json_file(self.filter_requested_info(info_dict), infofn)
1367                 except (OSError, IOError):
1368                     self.report_error('Cannot write metadata to JSON file ' + infofn)
1369                     return
1370
1371         self._write_thumbnails(info_dict, filename)
1372
1373         if not self.params.get('skip_download', False):
1374             try:
1375                 def dl(name, info):
1376                     fd = get_suitable_downloader(info, self.params)(self, self.params)
1377                     for ph in self._progress_hooks:
1378                         fd.add_progress_hook(ph)
1379                     if self.params.get('verbose'):
1380                         self.to_stdout('[debug] Invoking downloader on %r' % info.get('url'))
1381                     return fd.download(name, info)
1382
1383                 if info_dict.get('requested_formats') is not None:
1384                     downloaded = []
1385                     success = True
1386                     merger = FFmpegMergerPP(self)
1387                     if not merger.available:
1388                         postprocessors = []
1389                         self.report_warning('You have requested multiple '
1390                                             'formats but ffmpeg or avconv are not installed.'
1391                                             ' The formats won\'t be merged.')
1392                     else:
1393                         postprocessors = [merger]
1394
1395                     def compatible_formats(formats):
1396                         video, audio = formats
1397                         # Check extension
1398                         video_ext, audio_ext = audio.get('ext'), video.get('ext')
1399                         if video_ext and audio_ext:
1400                             COMPATIBLE_EXTS = (
1401                                 ('mp3', 'mp4', 'm4a', 'm4p', 'm4b', 'm4r', 'm4v'),
1402                                 ('webm')
1403                             )
1404                             for exts in COMPATIBLE_EXTS:
1405                                 if video_ext in exts and audio_ext in exts:
1406                                     return True
1407                         # TODO: Check acodec/vcodec
1408                         return False
1409
1410                     filename_real_ext = os.path.splitext(filename)[1][1:]
1411                     filename_wo_ext = (
1412                         os.path.splitext(filename)[0]
1413                         if filename_real_ext == info_dict['ext']
1414                         else filename)
1415                     requested_formats = info_dict['requested_formats']
1416                     if self.params.get('merge_output_format') is None and not compatible_formats(requested_formats):
1417                         info_dict['ext'] = 'mkv'
1418                         self.report_warning(
1419                             'Requested formats are incompatible for merge and will be merged into mkv.')
1420                     # Ensure filename always has a correct extension for successful merge
1421                     filename = '%s.%s' % (filename_wo_ext, info_dict['ext'])
1422                     if os.path.exists(encodeFilename(filename)):
1423                         self.to_screen(
1424                             '[download] %s has already been downloaded and '
1425                             'merged' % filename)
1426                     else:
1427                         for f in requested_formats:
1428                             new_info = dict(info_dict)
1429                             new_info.update(f)
1430                             fname = self.prepare_filename(new_info)
1431                             fname = prepend_extension(fname, 'f%s' % f['format_id'], new_info['ext'])
1432                             downloaded.append(fname)
1433                             partial_success = dl(fname, new_info)
1434                             success = success and partial_success
1435                         info_dict['__postprocessors'] = postprocessors
1436                         info_dict['__files_to_merge'] = downloaded
1437                 else:
1438                     # Just a single file
1439                     success = dl(filename, info_dict)
1440             except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
1441                 self.report_error('unable to download video data: %s' % str(err))
1442                 return
1443             except (OSError, IOError) as err:
1444                 raise UnavailableVideoError(err)
1445             except (ContentTooShortError, ) as err:
1446                 self.report_error('content too short (expected %s bytes and served %s)' % (err.expected, err.downloaded))
1447                 return
1448
1449             if success:
1450                 # Fixup content
1451                 fixup_policy = self.params.get('fixup')
1452                 if fixup_policy is None:
1453                     fixup_policy = 'detect_or_warn'
1454
1455                 stretched_ratio = info_dict.get('stretched_ratio')
1456                 if stretched_ratio is not None and stretched_ratio != 1:
1457                     if fixup_policy == 'warn':
1458                         self.report_warning('%s: Non-uniform pixel ratio (%s)' % (
1459                             info_dict['id'], stretched_ratio))
1460                     elif fixup_policy == 'detect_or_warn':
1461                         stretched_pp = FFmpegFixupStretchedPP(self)
1462                         if stretched_pp.available:
1463                             info_dict.setdefault('__postprocessors', [])
1464                             info_dict['__postprocessors'].append(stretched_pp)
1465                         else:
1466                             self.report_warning(
1467                                 '%s: Non-uniform pixel ratio (%s). Install ffmpeg or avconv to fix this automatically.' % (
1468                                     info_dict['id'], stretched_ratio))
1469                     else:
1470                         assert fixup_policy in ('ignore', 'never')
1471
1472                 if info_dict.get('requested_formats') is None and info_dict.get('container') == 'm4a_dash':
1473                     if fixup_policy == 'warn':
1474                         self.report_warning('%s: writing DASH m4a. Only some players support this container.' % (
1475                             info_dict['id']))
1476                     elif fixup_policy == 'detect_or_warn':
1477                         fixup_pp = FFmpegFixupM4aPP(self)
1478                         if fixup_pp.available:
1479                             info_dict.setdefault('__postprocessors', [])
1480                             info_dict['__postprocessors'].append(fixup_pp)
1481                         else:
1482                             self.report_warning(
1483                                 '%s: writing DASH m4a. Only some players support this container. Install ffmpeg or avconv to fix this automatically.' % (
1484                                     info_dict['id']))
1485                     else:
1486                         assert fixup_policy in ('ignore', 'never')
1487
1488                 try:
1489                     self.post_process(filename, info_dict)
1490                 except (PostProcessingError) as err:
1491                     self.report_error('postprocessing: %s' % str(err))
1492                     return
1493                 self.record_download_archive(info_dict)
1494
1495     def download(self, url_list):
1496         """Download a given list of URLs."""
1497         outtmpl = self.params.get('outtmpl', DEFAULT_OUTTMPL)
1498         if (len(url_list) > 1 and
1499                 '%' not in outtmpl and
1500                 self.params.get('max_downloads') != 1):
1501             raise SameFileError(outtmpl)
1502
1503         for url in url_list:
1504             try:
1505                 # It also downloads the videos
1506                 res = self.extract_info(
1507                     url, force_generic_extractor=self.params.get('force_generic_extractor', False))
1508             except UnavailableVideoError:
1509                 self.report_error('unable to download video')
1510             except MaxDownloadsReached:
1511                 self.to_screen('[info] Maximum number of downloaded files reached.')
1512                 raise
1513             else:
1514                 if self.params.get('dump_single_json', False):
1515                     self.to_stdout(json.dumps(res))
1516
1517         return self._download_retcode
1518
1519     def download_with_info_file(self, info_filename):
1520         with contextlib.closing(fileinput.FileInput(
1521                 [info_filename], mode='r',
1522                 openhook=fileinput.hook_encoded('utf-8'))) as f:
1523             # FileInput doesn't have a read method, we can't call json.load
1524             info = self.filter_requested_info(json.loads('\n'.join(f)))
1525         try:
1526             self.process_ie_result(info, download=True)
1527         except DownloadError:
1528             webpage_url = info.get('webpage_url')
1529             if webpage_url is not None:
1530                 self.report_warning('The info failed to download, trying with "%s"' % webpage_url)
1531                 return self.download([webpage_url])
1532             else:
1533                 raise
1534         return self._download_retcode
1535
1536     @staticmethod
1537     def filter_requested_info(info_dict):
1538         return dict(
1539             (k, v) for k, v in info_dict.items()
1540             if k not in ['requested_formats', 'requested_subtitles'])
1541
1542     def post_process(self, filename, ie_info):
1543         """Run all the postprocessors on the given file."""
1544         info = dict(ie_info)
1545         info['filepath'] = filename
1546         pps_chain = []
1547         if ie_info.get('__postprocessors') is not None:
1548             pps_chain.extend(ie_info['__postprocessors'])
1549         pps_chain.extend(self._pps)
1550         for pp in pps_chain:
1551             files_to_delete = []
1552             try:
1553                 files_to_delete, info = pp.run(info)
1554             except PostProcessingError as e:
1555                 self.report_error(e.msg)
1556             if files_to_delete and not self.params.get('keepvideo', False):
1557                 for old_filename in files_to_delete:
1558                     self.to_screen('Deleting original file %s (pass -k to keep)' % old_filename)
1559                     try:
1560                         os.remove(encodeFilename(old_filename))
1561                     except (IOError, OSError):
1562                         self.report_warning('Unable to remove downloaded original file')
1563
1564     def _make_archive_id(self, info_dict):
1565         # Future-proof against any change in case
1566         # and backwards compatibility with prior versions
1567         extractor = info_dict.get('extractor_key')
1568         if extractor is None:
1569             if 'id' in info_dict:
1570                 extractor = info_dict.get('ie_key')  # key in a playlist
1571         if extractor is None:
1572             return None  # Incomplete video information
1573         return extractor.lower() + ' ' + info_dict['id']
1574
1575     def in_download_archive(self, info_dict):
1576         fn = self.params.get('download_archive')
1577         if fn is None:
1578             return False
1579
1580         vid_id = self._make_archive_id(info_dict)
1581         if vid_id is None:
1582             return False  # Incomplete video information
1583
1584         try:
1585             with locked_file(fn, 'r', encoding='utf-8') as archive_file:
1586                 for line in archive_file:
1587                     if line.strip() == vid_id:
1588                         return True
1589         except IOError as ioe:
1590             if ioe.errno != errno.ENOENT:
1591                 raise
1592         return False
1593
1594     def record_download_archive(self, info_dict):
1595         fn = self.params.get('download_archive')
1596         if fn is None:
1597             return
1598         vid_id = self._make_archive_id(info_dict)
1599         assert vid_id
1600         with locked_file(fn, 'a', encoding='utf-8') as archive_file:
1601             archive_file.write(vid_id + '\n')
1602
1603     @staticmethod
1604     def format_resolution(format, default='unknown'):
1605         if format.get('vcodec') == 'none':
1606             return 'audio only'
1607         if format.get('resolution') is not None:
1608             return format['resolution']
1609         if format.get('height') is not None:
1610             if format.get('width') is not None:
1611                 res = '%sx%s' % (format['width'], format['height'])
1612             else:
1613                 res = '%sp' % format['height']
1614         elif format.get('width') is not None:
1615             res = '?x%d' % format['width']
1616         else:
1617             res = default
1618         return res
1619
1620     def _format_note(self, fdict):
1621         res = ''
1622         if fdict.get('ext') in ['f4f', 'f4m']:
1623             res += '(unsupported) '
1624         if fdict.get('format_note') is not None:
1625             res += fdict['format_note'] + ' '
1626         if fdict.get('tbr') is not None:
1627             res += '%4dk ' % fdict['tbr']
1628         if fdict.get('container') is not None:
1629             if res:
1630                 res += ', '
1631             res += '%s container' % fdict['container']
1632         if (fdict.get('vcodec') is not None and
1633                 fdict.get('vcodec') != 'none'):
1634             if res:
1635                 res += ', '
1636             res += fdict['vcodec']
1637             if fdict.get('vbr') is not None:
1638                 res += '@'
1639         elif fdict.get('vbr') is not None and fdict.get('abr') is not None:
1640             res += 'video@'
1641         if fdict.get('vbr') is not None:
1642             res += '%4dk' % fdict['vbr']
1643         if fdict.get('fps') is not None:
1644             res += ', %sfps' % fdict['fps']
1645         if fdict.get('acodec') is not None:
1646             if res:
1647                 res += ', '
1648             if fdict['acodec'] == 'none':
1649                 res += 'video only'
1650             else:
1651                 res += '%-5s' % fdict['acodec']
1652         elif fdict.get('abr') is not None:
1653             if res:
1654                 res += ', '
1655             res += 'audio'
1656         if fdict.get('abr') is not None:
1657             res += '@%3dk' % fdict['abr']
1658         if fdict.get('asr') is not None:
1659             res += ' (%5dHz)' % fdict['asr']
1660         if fdict.get('filesize') is not None:
1661             if res:
1662                 res += ', '
1663             res += format_bytes(fdict['filesize'])
1664         elif fdict.get('filesize_approx') is not None:
1665             if res:
1666                 res += ', '
1667             res += '~' + format_bytes(fdict['filesize_approx'])
1668         return res
1669
1670     def list_formats(self, info_dict):
1671         formats = info_dict.get('formats', [info_dict])
1672         table = [
1673             [f['format_id'], f['ext'], self.format_resolution(f), self._format_note(f)]
1674             for f in formats
1675             if f.get('preference') is None or f['preference'] >= -1000]
1676         if len(formats) > 1:
1677             table[-1][-1] += (' ' if table[-1][-1] else '') + '(best)'
1678
1679         header_line = ['format code', 'extension', 'resolution', 'note']
1680         self.to_screen(
1681             '[info] Available formats for %s:\n%s' %
1682             (info_dict['id'], render_table(header_line, table)))
1683
1684     def list_thumbnails(self, info_dict):
1685         thumbnails = info_dict.get('thumbnails')
1686         if not thumbnails:
1687             tn_url = info_dict.get('thumbnail')
1688             if tn_url:
1689                 thumbnails = [{'id': '0', 'url': tn_url}]
1690             else:
1691                 self.to_screen(
1692                     '[info] No thumbnails present for %s' % info_dict['id'])
1693                 return
1694
1695         self.to_screen(
1696             '[info] Thumbnails for %s:' % info_dict['id'])
1697         self.to_screen(render_table(
1698             ['ID', 'width', 'height', 'URL'],
1699             [[t['id'], t.get('width', 'unknown'), t.get('height', 'unknown'), t['url']] for t in thumbnails]))
1700
1701     def list_subtitles(self, video_id, subtitles, name='subtitles'):
1702         if not subtitles:
1703             self.to_screen('%s has no %s' % (video_id, name))
1704             return
1705         self.to_screen(
1706             'Available %s for %s:' % (name, video_id))
1707         self.to_screen(render_table(
1708             ['Language', 'formats'],
1709             [[lang, ', '.join(f['ext'] for f in reversed(formats))]
1710                 for lang, formats in subtitles.items()]))
1711
1712     def urlopen(self, req):
1713         """ Start an HTTP download """
1714
1715         # According to RFC 3986, URLs can not contain non-ASCII characters, however this is not
1716         # always respected by websites, some tend to give out URLs with non percent-encoded
1717         # non-ASCII characters (see telemb.py, ard.py [#3412])
1718         # urllib chokes on URLs with non-ASCII characters (see http://bugs.python.org/issue3991)
1719         # To work around aforementioned issue we will replace request's original URL with
1720         # percent-encoded one
1721         req_is_string = isinstance(req, compat_basestring)
1722         url = req if req_is_string else req.get_full_url()
1723         url_escaped = escape_url(url)
1724
1725         # Substitute URL if any change after escaping
1726         if url != url_escaped:
1727             if req_is_string:
1728                 req = url_escaped
1729             else:
1730                 req_type = HEADRequest if req.get_method() == 'HEAD' else compat_urllib_request.Request
1731                 req = req_type(
1732                     url_escaped, data=req.data, headers=req.headers,
1733                     origin_req_host=req.origin_req_host, unverifiable=req.unverifiable)
1734
1735         return self._opener.open(req, timeout=self._socket_timeout)
1736
1737     def print_debug_header(self):
1738         if not self.params.get('verbose'):
1739             return
1740
1741         if type('') is not compat_str:
1742             # Python 2.6 on SLES11 SP1 (https://github.com/rg3/youtube-dl/issues/3326)
1743             self.report_warning(
1744                 'Your Python is broken! Update to a newer and supported version')
1745
1746         stdout_encoding = getattr(
1747             sys.stdout, 'encoding', 'missing (%s)' % type(sys.stdout).__name__)
1748         encoding_str = (
1749             '[debug] Encodings: locale %s, fs %s, out %s, pref %s\n' % (
1750                 locale.getpreferredencoding(),
1751                 sys.getfilesystemencoding(),
1752                 stdout_encoding,
1753                 self.get_encoding()))
1754         write_string(encoding_str, encoding=None)
1755
1756         self._write_string('[debug] youtube-dl version ' + __version__ + '\n')
1757         try:
1758             sp = subprocess.Popen(
1759                 ['git', 'rev-parse', '--short', 'HEAD'],
1760                 stdout=subprocess.PIPE, stderr=subprocess.PIPE,
1761                 cwd=os.path.dirname(os.path.abspath(__file__)))
1762             out, err = sp.communicate()
1763             out = out.decode().strip()
1764             if re.match('[0-9a-f]+', out):
1765                 self._write_string('[debug] Git HEAD: ' + out + '\n')
1766         except Exception:
1767             try:
1768                 sys.exc_clear()
1769             except Exception:
1770                 pass
1771         self._write_string('[debug] Python version %s - %s\n' % (
1772             platform.python_version(), platform_name()))
1773
1774         exe_versions = FFmpegPostProcessor.get_versions(self)
1775         exe_versions['rtmpdump'] = rtmpdump_version()
1776         exe_str = ', '.join(
1777             '%s %s' % (exe, v)
1778             for exe, v in sorted(exe_versions.items())
1779             if v
1780         )
1781         if not exe_str:
1782             exe_str = 'none'
1783         self._write_string('[debug] exe versions: %s\n' % exe_str)
1784
1785         proxy_map = {}
1786         for handler in self._opener.handlers:
1787             if hasattr(handler, 'proxies'):
1788                 proxy_map.update(handler.proxies)
1789         self._write_string('[debug] Proxy map: ' + compat_str(proxy_map) + '\n')
1790
1791         if self.params.get('call_home', False):
1792             ipaddr = self.urlopen('https://yt-dl.org/ip').read().decode('utf-8')
1793             self._write_string('[debug] Public IP address: %s\n' % ipaddr)
1794             latest_version = self.urlopen(
1795                 'https://yt-dl.org/latest/version').read().decode('utf-8')
1796             if version_tuple(latest_version) > version_tuple(__version__):
1797                 self.report_warning(
1798                     'You are using an outdated version (newest version: %s)! '
1799                     'See https://yt-dl.org/update if you need help updating.' %
1800                     latest_version)
1801
1802     def _setup_opener(self):
1803         timeout_val = self.params.get('socket_timeout')
1804         self._socket_timeout = 600 if timeout_val is None else float(timeout_val)
1805
1806         opts_cookiefile = self.params.get('cookiefile')
1807         opts_proxy = self.params.get('proxy')
1808
1809         if opts_cookiefile is None:
1810             self.cookiejar = compat_cookiejar.CookieJar()
1811         else:
1812             self.cookiejar = compat_cookiejar.MozillaCookieJar(
1813                 opts_cookiefile)
1814             if os.access(opts_cookiefile, os.R_OK):
1815                 self.cookiejar.load()
1816
1817         cookie_processor = compat_urllib_request.HTTPCookieProcessor(
1818             self.cookiejar)
1819         if opts_proxy is not None:
1820             if opts_proxy == '':
1821                 proxies = {}
1822             else:
1823                 proxies = {'http': opts_proxy, 'https': opts_proxy}
1824         else:
1825             proxies = compat_urllib_request.getproxies()
1826             # Set HTTPS proxy to HTTP one if given (https://github.com/rg3/youtube-dl/issues/805)
1827             if 'http' in proxies and 'https' not in proxies:
1828                 proxies['https'] = proxies['http']
1829         proxy_handler = PerRequestProxyHandler(proxies)
1830
1831         debuglevel = 1 if self.params.get('debug_printtraffic') else 0
1832         https_handler = make_HTTPS_handler(self.params, debuglevel=debuglevel)
1833         ydlh = YoutubeDLHandler(self.params, debuglevel=debuglevel)
1834         opener = compat_urllib_request.build_opener(
1835             proxy_handler, https_handler, cookie_processor, ydlh)
1836
1837         # Delete the default user-agent header, which would otherwise apply in
1838         # cases where our custom HTTP handler doesn't come into play
1839         # (See https://github.com/rg3/youtube-dl/issues/1309 for details)
1840         opener.addheaders = []
1841         self._opener = opener
1842
1843     def encode(self, s):
1844         if isinstance(s, bytes):
1845             return s  # Already encoded
1846
1847         try:
1848             return s.encode(self.get_encoding())
1849         except UnicodeEncodeError as err:
1850             err.reason = err.reason + '. Check your system encoding configuration or use the --encoding option.'
1851             raise
1852
1853     def get_encoding(self):
1854         encoding = self.params.get('encoding')
1855         if encoding is None:
1856             encoding = preferredencoding()
1857         return encoding
1858
1859     def _write_thumbnails(self, info_dict, filename):
1860         if self.params.get('writethumbnail', False):
1861             thumbnails = info_dict.get('thumbnails')
1862             if thumbnails:
1863                 thumbnails = [thumbnails[-1]]
1864         elif self.params.get('write_all_thumbnails', False):
1865             thumbnails = info_dict.get('thumbnails')
1866         else:
1867             return
1868
1869         if not thumbnails:
1870             # No thumbnails present, so return immediately
1871             return
1872
1873         for t in thumbnails:
1874             thumb_ext = determine_ext(t['url'], 'jpg')
1875             suffix = '_%s' % t['id'] if len(thumbnails) > 1 else ''
1876             thumb_display_id = '%s ' % t['id'] if len(thumbnails) > 1 else ''
1877             t['filename'] = thumb_filename = os.path.splitext(filename)[0] + suffix + '.' + thumb_ext
1878
1879             if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(thumb_filename)):
1880                 self.to_screen('[%s] %s: Thumbnail %sis already present' %
1881                                (info_dict['extractor'], info_dict['id'], thumb_display_id))
1882             else:
1883                 self.to_screen('[%s] %s: Downloading thumbnail %s...' %
1884                                (info_dict['extractor'], info_dict['id'], thumb_display_id))
1885                 try:
1886                     uf = self.urlopen(t['url'])
1887                     with open(thumb_filename, 'wb') as thumbf:
1888                         shutil.copyfileobj(uf, thumbf)
1889                     self.to_screen('[%s] %s: Writing thumbnail %sto: %s' %
1890                                    (info_dict['extractor'], info_dict['id'], thumb_display_id, thumb_filename))
1891                 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
1892                     self.report_warning('Unable to download thumbnail "%s": %s' %
1893                                         (t['url'], compat_str(err)))