git.bitcoin.ninja Git - youtube-dl/blob - youtube_dl/YoutubeDL.py

   1 #!/usr/bin/env python
   2 # coding: utf-8
   3
   4 from __future__ import absolute_import, unicode_literals
   5
   6 import collections
   7 import contextlib
   8 import copy
   9 import datetime
  10 import errno
  11 import fileinput
  12 import io
  13 import itertools
  14 import json
  15 import locale
  16 import operator
  17 import os
  18 import platform
  19 import re
  20 import shutil
  21 import subprocess
  22 import socket
  23 import sys
  24 import time
  25 import tokenize
  26 import traceback
  27 import random
  28
  29 from .compat import (
  30     compat_basestring,
  31     compat_cookiejar,
  32     compat_expanduser,
  33     compat_get_terminal_size,
  34     compat_http_client,
  35     compat_kwargs,
  36     compat_os_name,
  37     compat_str,
  38     compat_tokenize_tokenize,
  39     compat_urllib_error,
  40     compat_urllib_request,
  41     compat_urllib_request_DataHandler,
  42 )
  43 from .utils import (
  44     age_restricted,
  45     args_to_str,
  46     ContentTooShortError,
  47     date_from_str,
  48     DateRange,
  49     DEFAULT_OUTTMPL,
  50     determine_ext,
  51     determine_protocol,
  52     DownloadError,
  53     encode_compat_str,
  54     encodeFilename,
  55     error_to_compat_str,
  56     ExtractorError,
  57     format_bytes,
  58     formatSeconds,
  59     GeoRestrictedError,
  60     ISO3166Utils,
  61     locked_file,
  62     make_HTTPS_handler,
  63     MaxDownloadsReached,
  64     PagedList,
  65     parse_filesize,
  66     PerRequestProxyHandler,
  67     platform_name,
  68     PostProcessingError,
  69     preferredencoding,
  70     prepend_extension,
  71     register_socks_protocols,
  72     render_table,
  73     replace_extension,
  74     SameFileError,
  75     sanitize_filename,
  76     sanitize_path,
  77     sanitize_url,
  78     sanitized_Request,
  79     std_headers,
  80     subtitles_filename,
  81     UnavailableVideoError,
  82     url_basename,
  83     version_tuple,
  84     write_json_file,
  85     write_string,
  86     YoutubeDLCookieProcessor,
  87     YoutubeDLHandler,
  88 )
  89 from .cache import Cache
  90 from .extractor import get_info_extractor, gen_extractor_classes, _LAZY_LOADER
  91 from .downloader import get_suitable_downloader
  92 from .downloader.rtmp import rtmpdump_version
  93 from .postprocessor import (
  94     FFmpegFixupM3u8PP,
  95     FFmpegFixupM4aPP,
  96     FFmpegFixupStretchedPP,
  97     FFmpegMergerPP,
  98     FFmpegPostProcessor,
  99     get_postprocessor,
 100 )
 101 from .version import __version__
 102
 103 if compat_os_name == 'nt':
 104     import ctypes
 105
 106
 107 class YoutubeDL(object):
 108     """YoutubeDL class.
 109
 110     YoutubeDL objects are the ones responsible of downloading the
 111     actual video file and writing it to disk if the user has requested
 112     it, among some other tasks. In most cases there should be one per
 113     program. As, given a video URL, the downloader doesn't know how to
 114     extract all the needed information, task that InfoExtractors do, it
 115     has to pass the URL to one of them.
 116
 117     For this, YoutubeDL objects have a method that allows
 118     InfoExtractors to be registered in a given order. When it is passed
 119     a URL, the YoutubeDL object handles it to the first InfoExtractor it
 120     finds that reports being able to handle it. The InfoExtractor extracts
 121     all the information about the video or videos the URL refers to, and
 122     YoutubeDL process the extracted information, possibly using a File
 123     Downloader to download the video.
 124
 125     YoutubeDL objects accept a lot of parameters. In order not to saturate
 126     the object constructor with arguments, it receives a dictionary of
 127     options instead. These options are available through the params
 128     attribute for the InfoExtractors to use. The YoutubeDL also
 129     registers itself as the downloader in charge for the InfoExtractors
 130     that are added to it, so this is a "mutual registration".
 131
 132     Available options:
 133
 134     username:          Username for authentication purposes.
 135     password:          Password for authentication purposes.
 136     videopassword:     Password for accessing a video.
 137     ap_mso:            Adobe Pass multiple-system operator identifier.
 138     ap_username:       Multiple-system operator account username.
 139     ap_password:       Multiple-system operator account password.
 140     usenetrc:          Use netrc for authentication instead.
 141     verbose:           Print additional info to stdout.
 142     quiet:             Do not print messages to stdout.
 143     no_warnings:       Do not print out anything for warnings.
 144     forceurl:          Force printing final URL.
 145     forcetitle:        Force printing title.
 146     forceid:           Force printing ID.
 147     forcethumbnail:    Force printing thumbnail URL.
 148     forcedescription:  Force printing description.
 149     forcefilename:     Force printing final filename.
 150     forceduration:     Force printing duration.
 151     forcejson:         Force printing info_dict as JSON.
 152     dump_single_json:  Force printing the info_dict of the whole playlist
 153                        (or video) as a single JSON line.
 154     simulate:          Do not download the video files.
 155     format:            Video format code. See options.py for more information.
 156     outtmpl:           Template for output names.
 157     restrictfilenames: Do not allow "&" and spaces in file names
 158     ignoreerrors:      Do not stop on download errors.
 159     force_generic_extractor: Force downloader to use the generic extractor
 160     nooverwrites:      Prevent overwriting files.
 161     playliststart:     Playlist item to start at.
 162     playlistend:       Playlist item to end at.
 163     playlist_items:    Specific indices of playlist to download.
 164     playlistreverse:   Download playlist items in reverse order.
 165     playlistrandom:    Download playlist items in random order.
 166     matchtitle:        Download only matching titles.
 167     rejecttitle:       Reject downloads for matching titles.
 168     logger:            Log messages to a logging.Logger instance.
 169     logtostderr:       Log messages to stderr instead of stdout.
 170     writedescription:  Write the video description to a .description file
 171     writeinfojson:     Write the video description to a .info.json file
 172     writeannotations:  Write the video annotations to a .annotations.xml file
 173     writethumbnail:    Write the thumbnail image to a file
 174     write_all_thumbnails:  Write all thumbnail formats to files
 175     writesubtitles:    Write the video subtitles to a file
 176     writeautomaticsub: Write the automatically generated subtitles to a file
 177     allsubtitles:      Downloads all the subtitles of the video
 178                        (requires writesubtitles or writeautomaticsub)
 179     listsubtitles:     Lists all available subtitles for the video
 180     subtitlesformat:   The format code for subtitles
 181     subtitleslangs:    List of languages of the subtitles to download
 182     keepvideo:         Keep the video file after post-processing
 183     daterange:         A DateRange object, download only if the upload_date is in the range.
 184     skip_download:     Skip the actual download of the video file
 185     cachedir:          Location of the cache files in the filesystem.
 186                        False to disable filesystem cache.
 187     noplaylist:        Download single video instead of a playlist if in doubt.
 188     age_limit:         An integer representing the user's age in years.
 189                        Unsuitable videos for the given age are skipped.
 190     min_views:         An integer representing the minimum view count the video
 191                        must have in order to not be skipped.
 192                        Videos without view count information are always
 193                        downloaded. None for no limit.
 194     max_views:         An integer representing the maximum view count.
 195                        Videos that are more popular than that are not
 196                        downloaded.
 197                        Videos without view count information are always
 198                        downloaded. None for no limit.
 199     download_archive:  File name of a file where all downloads are recorded.
 200                        Videos already present in the file are not downloaded
 201                        again.
 202     cookiefile:        File name where cookies should be read from and dumped to.
 203     nocheckcertificate:Do not verify SSL certificates
 204     prefer_insecure:   Use HTTP instead of HTTPS to retrieve information.
 205                        At the moment, this is only supported by YouTube.
 206     proxy:             URL of the proxy server to use
 207     geo_verification_proxy:  URL of the proxy to use for IP address verification
 208                        on geo-restricted sites. (Experimental)
 209     socket_timeout:    Time to wait for unresponsive hosts, in seconds
 210     bidi_workaround:   Work around buggy terminals without bidirectional text
 211                        support, using fridibi
 212     debug_printtraffic:Print out sent and received HTTP traffic
 213     include_ads:       Download ads as well
 214     default_search:    Prepend this string if an input url is not valid.
 215                        'auto' for elaborate guessing
 216     encoding:          Use this encoding instead of the system-specified.
 217     extract_flat:      Do not resolve URLs, return the immediate result.
 218                        Pass in 'in_playlist' to only show this behavior for
 219                        playlist items.
 220     postprocessors:    A list of dictionaries, each with an entry
 221                        * key:  The name of the postprocessor. See
 222                                youtube_dl/postprocessor/__init__.py for a list.
 223                        as well as any further keyword arguments for the
 224                        postprocessor.
 225     progress_hooks:    A list of functions that get called on download
 226                        progress, with a dictionary with the entries
 227                        * status: One of "downloading", "error", or "finished".
 228                                  Check this first and ignore unknown values.
 229
 230                        If status is one of "downloading", or "finished", the
 231                        following properties may also be present:
 232                        * filename: The final filename (always present)
 233                        * tmpfilename: The filename we're currently writing to
 234                        * downloaded_bytes: Bytes on disk
 235                        * total_bytes: Size of the whole file, None if unknown
 236                        * total_bytes_estimate: Guess of the eventual file size,
 237                                                None if unavailable.
 238                        * elapsed: The number of seconds since download started.
 239                        * eta: The estimated time in seconds, None if unknown
 240                        * speed: The download speed in bytes/second, None if
 241                                 unknown
 242                        * fragment_index: The counter of the currently
 243                                          downloaded video fragment.
 244                        * fragment_count: The number of fragments (= individual
 245                                          files that will be merged)
 246
 247                        Progress hooks are guaranteed to be called at least once
 248                        (with status "finished") if the download is successful.
 249     merge_output_format: Extension to use when merging formats.
 250     fixup:             Automatically correct known faults of the file.
 251                        One of:
 252                        - "never": do nothing
 253                        - "warn": only emit a warning
 254                        - "detect_or_warn": check whether we can do anything
 255                                            about it, warn otherwise (default)
 256     source_address:    (Experimental) Client-side IP address to bind to.
 257     call_home:         Boolean, true iff we are allowed to contact the
 258                        youtube-dl servers for debugging.
 259     sleep_interval:    Number of seconds to sleep before each download when
 260                        used alone or a lower bound of a range for randomized
 261                        sleep before each download (minimum possible number
 262                        of seconds to sleep) when used along with
 263                        max_sleep_interval.
 264     max_sleep_interval:Upper bound of a range for randomized sleep before each
 265                        download (maximum possible number of seconds to sleep).
 266                        Must only be used along with sleep_interval.
 267                        Actual sleep time will be a random float from range
 268                        [sleep_interval; max_sleep_interval].
 269     listformats:       Print an overview of available video formats and exit.
 270     list_thumbnails:   Print a table of all thumbnails and exit.
 271     match_filter:      A function that gets called with the info_dict of
 272                        every video.
 273                        If it returns a message, the video is ignored.
 274                        If it returns None, the video is downloaded.
 275                        match_filter_func in utils.py is one example for this.
 276     no_color:          Do not emit color codes in output.
 277     geo_bypass:        Bypass geographic restriction via faking X-Forwarded-For
 278                        HTTP header (experimental)
 279     geo_bypass_country:
 280                        Two-letter ISO 3166-2 country code that will be used for
 281                        explicit geographic restriction bypassing via faking
 282                        X-Forwarded-For HTTP header (experimental)
 283
 284     The following options determine which downloader is picked:
 285     external_downloader: Executable of the external downloader to call.
 286                        None or unset for standard (built-in) downloader.
 287     hls_prefer_native: Use the native HLS downloader instead of ffmpeg/avconv
 288                        if True, otherwise use ffmpeg/avconv if False, otherwise
 289                        use downloader suggested by extractor if None.
 290
 291     The following parameters are not used by YoutubeDL itself, they are used by
 292     the downloader (see youtube_dl/downloader/common.py):
 293     nopart, updatetime, buffersize, ratelimit, min_filesize, max_filesize, test,
 294     noresizebuffer, retries, continuedl, noprogress, consoletitle,
 295     xattr_set_filesize, external_downloader_args, hls_use_mpegts.
 296
 297     The following options are used by the post processors:
 298     prefer_ffmpeg:     If True, use ffmpeg instead of avconv if both are available,
 299                        otherwise prefer avconv.
 300     postprocessor_args: A list of additional command-line arguments for the
 301                         postprocessor.
 302     """
 303
 304     params = None
 305     _ies = []
 306     _pps = []
 307     _download_retcode = None
 308     _num_downloads = None
 309     _screen_file = None
 310
 311     def __init__(self, params=None, auto_init=True):
 312         """Create a FileDownloader object with the given options."""
 313         if params is None:
 314             params = {}
 315         self._ies = []
 316         self._ies_instances = {}
 317         self._pps = []
 318         self._progress_hooks = []
 319         self._download_retcode = 0
 320         self._num_downloads = 0
 321         self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)]
 322         self._err_file = sys.stderr
 323         self.params = {
 324             # Default parameters
 325             'nocheckcertificate': False,
 326         }
 327         self.params.update(params)
 328         self.cache = Cache(self)
 329
 330         if self.params.get('cn_verification_proxy') is not None:
 331             self.report_warning('--cn-verification-proxy is deprecated. Use --geo-verification-proxy instead.')
 332             if self.params.get('geo_verification_proxy') is None:
 333                 self.params['geo_verification_proxy'] = self.params['cn_verification_proxy']
 334
 335         if params.get('bidi_workaround', False):
 336             try:
 337                 import pty
 338                 master, slave = pty.openpty()
 339                 width = compat_get_terminal_size().columns
 340                 if width is None:
 341                     width_args = []
 342                 else:
 343                     width_args = ['-w', str(width)]
 344                 sp_kwargs = dict(
 345                     stdin=subprocess.PIPE,
 346                     stdout=slave,
 347                     stderr=self._err_file)
 348                 try:
 349                     self._output_process = subprocess.Popen(
 350                         ['bidiv'] + width_args, **sp_kwargs
 351                     )
 352                 except OSError:
 353                     self._output_process = subprocess.Popen(
 354                         ['fribidi', '-c', 'UTF-8'] + width_args, **sp_kwargs)
 355                 self._output_channel = os.fdopen(master, 'rb')
 356             except OSError as ose:
 357                 if ose.errno == errno.ENOENT:
 358                     self.report_warning('Could not find fribidi executable, ignoring --bidi-workaround . Make sure that  fribidi  is an executable file in one of the directories in your $PATH.')
 359                 else:
 360                     raise
 361
 362         if (sys.version_info >= (3,) and sys.platform != 'win32' and
 363                 sys.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968'] and
 364                 not params.get('restrictfilenames', False)):
 365             # On Python 3, the Unicode filesystem API will throw errors (#1474)
 366             self.report_warning(
 367                 'Assuming --restrict-filenames since file system encoding '
 368                 'cannot encode all characters. '
 369                 'Set the LC_ALL environment variable to fix this.')
 370             self.params['restrictfilenames'] = True
 371
 372         if isinstance(params.get('outtmpl'), bytes):
 373             self.report_warning(
 374                 'Parameter outtmpl is bytes, but should be a unicode string. '
 375                 'Put  from __future__ import unicode_literals  at the top of your code file or consider switching to Python 3.x.')
 376
 377         self._setup_opener()
 378
 379         if auto_init:
 380             self.print_debug_header()
 381             self.add_default_info_extractors()
 382
 383         for pp_def_raw in self.params.get('postprocessors', []):
 384             pp_class = get_postprocessor(pp_def_raw['key'])
 385             pp_def = dict(pp_def_raw)
 386             del pp_def['key']
 387             pp = pp_class(self, **compat_kwargs(pp_def))
 388             self.add_post_processor(pp)
 389
 390         for ph in self.params.get('progress_hooks', []):
 391             self.add_progress_hook(ph)
 392
 393         register_socks_protocols()
 394
 395     def warn_if_short_id(self, argv):
 396         # short YouTube ID starting with dash?
 397         idxs = [
 398             i for i, a in enumerate(argv)
 399             if re.match(r'^-[0-9A-Za-z_-]{10}$', a)]
 400         if idxs:
 401             correct_argv = (
 402                 ['youtube-dl'] +
 403                 [a for i, a in enumerate(argv) if i not in idxs] +
 404                 ['--'] + [argv[i] for i in idxs]
 405             )
 406             self.report_warning(
 407                 'Long argument string detected. '
 408                 'Use -- to separate parameters and URLs, like this:\n%s\n' %
 409                 args_to_str(correct_argv))
 410
 411     def add_info_extractor(self, ie):
 412         """Add an InfoExtractor object to the end of the list."""
 413         self._ies.append(ie)
 414         if not isinstance(ie, type):
 415             self._ies_instances[ie.ie_key()] = ie
 416             ie.set_downloader(self)
 417
 418     def get_info_extractor(self, ie_key):
 419         """
 420         Get an instance of an IE with name ie_key, it will try to get one from
 421         the _ies list, if there's no instance it will create a new one and add
 422         it to the extractor list.
 423         """
 424         ie = self._ies_instances.get(ie_key)
 425         if ie is None:
 426             ie = get_info_extractor(ie_key)()
 427             self.add_info_extractor(ie)
 428         return ie
 429
 430     def add_default_info_extractors(self):
 431         """
 432         Add the InfoExtractors returned by gen_extractors to the end of the list
 433         """
 434         for ie in gen_extractor_classes():
 435             self.add_info_extractor(ie)
 436
 437     def add_post_processor(self, pp):
 438         """Add a PostProcessor object to the end of the chain."""
 439         self._pps.append(pp)
 440         pp.set_downloader(self)
 441
 442     def add_progress_hook(self, ph):
 443         """Add the progress hook (currently only for the file downloader)"""
 444         self._progress_hooks.append(ph)
 445
 446     def _bidi_workaround(self, message):
 447         if not hasattr(self, '_output_channel'):
 448             return message
 449
 450         assert hasattr(self, '_output_process')
 451         assert isinstance(message, compat_str)
 452         line_count = message.count('\n') + 1
 453         self._output_process.stdin.write((message + '\n').encode('utf-8'))
 454         self._output_process.stdin.flush()
 455         res = ''.join(self._output_channel.readline().decode('utf-8')
 456                       for _ in range(line_count))
 457         return res[:-len('\n')]
 458
 459     def to_screen(self, message, skip_eol=False):
 460         """Print message to stdout if not in quiet mode."""
 461         return self.to_stdout(message, skip_eol, check_quiet=True)
 462
 463     def _write_string(self, s, out=None):
 464         write_string(s, out=out, encoding=self.params.get('encoding'))
 465
 466     def to_stdout(self, message, skip_eol=False, check_quiet=False):
 467         """Print message to stdout if not in quiet mode."""
 468         if self.params.get('logger'):
 469             self.params['logger'].debug(message)
 470         elif not check_quiet or not self.params.get('quiet', False):
 471             message = self._bidi_workaround(message)
 472             terminator = ['\n', ''][skip_eol]
 473             output = message + terminator
 474
 475             self._write_string(output, self._screen_file)
 476
 477     def to_stderr(self, message):
 478         """Print message to stderr."""
 479         assert isinstance(message, compat_str)
 480         if self.params.get('logger'):
 481             self.params['logger'].error(message)
 482         else:
 483             message = self._bidi_workaround(message)
 484             output = message + '\n'
 485             self._write_string(output, self._err_file)
 486
 487     def to_console_title(self, message):
 488         if not self.params.get('consoletitle', False):
 489             return
 490         if compat_os_name == 'nt' and ctypes.windll.kernel32.GetConsoleWindow():
 491             # c_wchar_p() might not be necessary if `message` is
 492             # already of type unicode()
 493             ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message))
 494         elif 'TERM' in os.environ:
 495             self._write_string('\033]0;%s\007' % message, self._screen_file)
 496
 497     def save_console_title(self):
 498         if not self.params.get('consoletitle', False):
 499             return
 500         if 'TERM' in os.environ:
 501             # Save the title on stack
 502             self._write_string('\033[22;0t', self._screen_file)
 503
 504     def restore_console_title(self):
 505         if not self.params.get('consoletitle', False):
 506             return
 507         if 'TERM' in os.environ:
 508             # Restore the title from stack
 509             self._write_string('\033[23;0t', self._screen_file)
 510
 511     def __enter__(self):
 512         self.save_console_title()
 513         return self
 514
 515     def __exit__(self, *args):
 516         self.restore_console_title()
 517
 518         if self.params.get('cookiefile') is not None:
 519             self.cookiejar.save()
 520
 521     def trouble(self, message=None, tb=None):
 522         """Determine action to take when a download problem appears.
 523
 524         Depending on if the downloader has been configured to ignore
 525         download errors or not, this method may throw an exception or
 526         not when errors are found, after printing the message.
 527
 528         tb, if given, is additional traceback information.
 529         """
 530         if message is not None:
 531             self.to_stderr(message)
 532         if self.params.get('verbose'):
 533             if tb is None:
 534                 if sys.exc_info()[0]:  # if .trouble has been called from an except block
 535                     tb = ''
 536                     if hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
 537                         tb += ''.join(traceback.format_exception(*sys.exc_info()[1].exc_info))
 538                     tb += encode_compat_str(traceback.format_exc())
 539                 else:
 540                     tb_data = traceback.format_list(traceback.extract_stack())
 541                     tb = ''.join(tb_data)
 542             self.to_stderr(tb)
 543         if not self.params.get('ignoreerrors', False):
 544             if sys.exc_info()[0] and hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
 545                 exc_info = sys.exc_info()[1].exc_info
 546             else:
 547                 exc_info = sys.exc_info()
 548             raise DownloadError(message, exc_info)
 549         self._download_retcode = 1
 550
 551     def report_warning(self, message):
 552         '''
 553         Print the message to stderr, it will be prefixed with 'WARNING:'
 554         If stderr is a tty file the 'WARNING:' will be colored
 555         '''
 556         if self.params.get('logger') is not None:
 557             self.params['logger'].warning(message)
 558         else:
 559             if self.params.get('no_warnings'):
 560                 return
 561             if not self.params.get('no_color') and self._err_file.isatty() and compat_os_name != 'nt':
 562                 _msg_header = '\033[0;33mWARNING:\033[0m'
 563             else:
 564                 _msg_header = 'WARNING:'
 565             warning_message = '%s %s' % (_msg_header, message)
 566             self.to_stderr(warning_message)
 567
 568     def report_error(self, message, tb=None):
 569         '''
 570         Do the same as trouble, but prefixes the message with 'ERROR:', colored
 571         in red if stderr is a tty file.
 572         '''
 573         if not self.params.get('no_color') and self._err_file.isatty() and compat_os_name != 'nt':
 574             _msg_header = '\033[0;31mERROR:\033[0m'
 575         else:
 576             _msg_header = 'ERROR:'
 577         error_message = '%s %s' % (_msg_header, message)
 578         self.trouble(error_message, tb)
 579
 580     def report_file_already_downloaded(self, file_name):
 581         """Report file has already been fully downloaded."""
 582         try:
 583             self.to_screen('[download] %s has already been downloaded' % file_name)
 584         except UnicodeEncodeError:
 585             self.to_screen('[download] The file has already been downloaded')
 586
 587     def prepare_filename(self, info_dict):
 588         """Generate the output filename."""
 589         try:
 590             template_dict = dict(info_dict)
 591
 592             template_dict['epoch'] = int(time.time())
 593             autonumber_size = self.params.get('autonumber_size')
 594             if autonumber_size is None:
 595                 autonumber_size = 5
 596             autonumber_templ = '%0' + str(autonumber_size) + 'd'
 597             template_dict['autonumber'] = autonumber_templ % (self.params.get('autonumber_start', 1) - 1 + self._num_downloads)
 598             if template_dict.get('playlist_index') is not None:
 599                 template_dict['playlist_index'] = '%0*d' % (len(str(template_dict['n_entries'])), template_dict['playlist_index'])
 600             if template_dict.get('resolution') is None:
 601                 if template_dict.get('width') and template_dict.get('height'):
 602                     template_dict['resolution'] = '%dx%d' % (template_dict['width'], template_dict['height'])
 603                 elif template_dict.get('height'):
 604                     template_dict['resolution'] = '%sp' % template_dict['height']
 605                 elif template_dict.get('width'):
 606                     template_dict['resolution'] = '%dx?' % template_dict['width']
 607
 608             sanitize = lambda k, v: sanitize_filename(
 609                 compat_str(v),
 610                 restricted=self.params.get('restrictfilenames'),
 611                 is_id=(k == 'id'))
 612             template_dict = dict((k, sanitize(k, v))
 613                                  for k, v in template_dict.items()
 614                                  if v is not None and not isinstance(v, (list, tuple, dict)))
 615             template_dict = collections.defaultdict(lambda: 'NA', template_dict)
 616
 617             outtmpl = self.params.get('outtmpl', DEFAULT_OUTTMPL)
 618             tmpl = compat_expanduser(outtmpl)
 619             filename = tmpl % template_dict
 620             # Temporary fix for #4787
 621             # 'Treat' all problem characters by passing filename through preferredencoding
 622             # to workaround encoding issues with subprocess on python2 @ Windows
 623             if sys.version_info < (3, 0) and sys.platform == 'win32':
 624                 filename = encodeFilename(filename, True).decode(preferredencoding())
 625             return sanitize_path(filename)
 626         except ValueError as err:
 627             self.report_error('Error in output template: ' + str(err) + ' (encoding: ' + repr(preferredencoding()) + ')')
 628             return None
 629
 630     def _match_entry(self, info_dict, incomplete):
 631         """ Returns None iff the file should be downloaded """
 632
 633         video_title = info_dict.get('title', info_dict.get('id', 'video'))
 634         if 'title' in info_dict:
 635             # This can happen when we're just evaluating the playlist
 636             title = info_dict['title']
 637             matchtitle = self.params.get('matchtitle', False)
 638             if matchtitle:
 639                 if not re.search(matchtitle, title, re.IGNORECASE):
 640                     return '"' + title + '" title did not match pattern "' + matchtitle + '"'
 641             rejecttitle = self.params.get('rejecttitle', False)
 642             if rejecttitle:
 643                 if re.search(rejecttitle, title, re.IGNORECASE):
 644                     return '"' + title + '" title matched reject pattern "' + rejecttitle + '"'
 645         date = info_dict.get('upload_date')
 646         if date is not None:
 647             dateRange = self.params.get('daterange', DateRange())
 648             if date not in dateRange:
 649                 return '%s upload date is not in range %s' % (date_from_str(date).isoformat(), dateRange)
 650         view_count = info_dict.get('view_count')
 651         if view_count is not None:
 652             min_views = self.params.get('min_views')
 653             if min_views is not None and view_count < min_views:
 654                 return 'Skipping %s, because it has not reached minimum view count (%d/%d)' % (video_title, view_count, min_views)
 655             max_views = self.params.get('max_views')
 656             if max_views is not None and view_count > max_views:
 657                 return 'Skipping %s, because it has exceeded the maximum view count (%d/%d)' % (video_title, view_count, max_views)
 658         if age_restricted(info_dict.get('age_limit'), self.params.get('age_limit')):
 659             return 'Skipping "%s" because it is age restricted' % video_title
 660         if self.in_download_archive(info_dict):
 661             return '%s has already been recorded in archive' % video_title
 662
 663         if not incomplete:
 664             match_filter = self.params.get('match_filter')
 665             if match_filter is not None:
 666                 ret = match_filter(info_dict)
 667                 if ret is not None:
 668                     return ret
 669
 670         return None
 671
 672     @staticmethod
 673     def add_extra_info(info_dict, extra_info):
 674         '''Set the keys from extra_info in info dict if they are missing'''
 675         for key, value in extra_info.items():
 676             info_dict.setdefault(key, value)
 677
 678     def extract_info(self, url, download=True, ie_key=None, extra_info={},
 679                      process=True, force_generic_extractor=False):
 680         '''
 681         Returns a list with a dictionary for each video we find.
 682         If 'download', also downloads the videos.
 683         extra_info is a dict containing the extra values to add to each result
 684         '''
 685
 686         if not ie_key and force_generic_extractor:
 687             ie_key = 'Generic'
 688
 689         if ie_key:
 690             ies = [self.get_info_extractor(ie_key)]
 691         else:
 692             ies = self._ies
 693
 694         for ie in ies:
 695             if not ie.suitable(url):
 696                 continue
 697
 698             ie = self.get_info_extractor(ie.ie_key())
 699             if not ie.working():
 700                 self.report_warning('The program functionality for this site has been marked as broken, '
 701                                     'and will probably not work.')
 702
 703             try:
 704                 ie_result = ie.extract(url)
 705                 if ie_result is None:  # Finished already (backwards compatibility; listformats and friends should be moved here)
 706                     break
 707                 if isinstance(ie_result, list):
 708                     # Backwards compatibility: old IE result format
 709                     ie_result = {
 710                         '_type': 'compat_list',
 711                         'entries': ie_result,
 712                     }
 713                 self.add_default_extra_info(ie_result, ie, url)
 714                 if process:
 715                     return self.process_ie_result(ie_result, download, extra_info)
 716                 else:
 717                     return ie_result
 718             except GeoRestrictedError as e:
 719                 msg = e.msg
 720                 if e.countries:
 721                     msg += '\nThis video is available in %s.' % ', '.join(
 722                         map(ISO3166Utils.short2full, e.countries))
 723                 msg += '\nYou might want to use a VPN or a proxy server (with --proxy) to workaround.'
 724                 self.report_error(msg)
 725                 break
 726             except ExtractorError as e:  # An error we somewhat expected
 727                 self.report_error(compat_str(e), e.format_traceback())
 728                 break
 729             except MaxDownloadsReached:
 730                 raise
 731             except Exception as e:
 732                 if self.params.get('ignoreerrors', False):
 733                     self.report_error(error_to_compat_str(e), tb=encode_compat_str(traceback.format_exc()))
 734                     break
 735                 else:
 736                     raise
 737         else:
 738             self.report_error('no suitable InfoExtractor for URL %s' % url)
 739
 740     def add_default_extra_info(self, ie_result, ie, url):
 741         self.add_extra_info(ie_result, {
 742             'extractor': ie.IE_NAME,
 743             'webpage_url': url,
 744             'webpage_url_basename': url_basename(url),
 745             'extractor_key': ie.ie_key(),
 746         })
 747
 748     def process_ie_result(self, ie_result, download=True, extra_info={}):
 749         """
 750         Take the result of the ie(may be modified) and resolve all unresolved
 751         references (URLs, playlist items).
 752
 753         It will also download the videos if 'download'.
 754         Returns the resolved ie_result.
 755         """
 756         result_type = ie_result.get('_type', 'video')
 757
 758         if result_type in ('url', 'url_transparent'):
 759             ie_result['url'] = sanitize_url(ie_result['url'])
 760             extract_flat = self.params.get('extract_flat', False)
 761             if ((extract_flat == 'in_playlist' and 'playlist' in extra_info) or
 762                     extract_flat is True):
 763                 if self.params.get('forcejson', False):
 764                     self.to_stdout(json.dumps(ie_result))
 765                 return ie_result
 766
 767         if result_type == 'video':
 768             self.add_extra_info(ie_result, extra_info)
 769             return self.process_video_result(ie_result, download=download)
 770         elif result_type == 'url':
 771             # We have to add extra_info to the results because it may be
 772             # contained in a playlist
 773             return self.extract_info(ie_result['url'],
 774                                      download,
 775                                      ie_key=ie_result.get('ie_key'),
 776                                      extra_info=extra_info)
 777         elif result_type == 'url_transparent':
 778             # Use the information from the embedding page
 779             info = self.extract_info(
 780                 ie_result['url'], ie_key=ie_result.get('ie_key'),
 781                 extra_info=extra_info, download=False, process=False)
 782
 783             force_properties = dict(
 784                 (k, v) for k, v in ie_result.items() if v is not None)
 785             for f in ('_type', 'url', 'ie_key'):
 786                 if f in force_properties:
 787                     del force_properties[f]
 788             new_result = info.copy()
 789             new_result.update(force_properties)
 790
 791             assert new_result.get('_type') != 'url_transparent'
 792
 793             return self.process_ie_result(
 794                 new_result, download=download, extra_info=extra_info)
 795         elif result_type == 'playlist' or result_type == 'multi_video':
 796             # We process each entry in the playlist
 797             playlist = ie_result.get('title') or ie_result.get('id')
 798             self.to_screen('[download] Downloading playlist: %s' % playlist)
 799
 800             playlist_results = []
 801
 802             playliststart = self.params.get('playliststart', 1) - 1
 803             playlistend = self.params.get('playlistend')
 804             # For backwards compatibility, interpret -1 as whole list
 805             if playlistend == -1:
 806                 playlistend = None
 807
 808             playlistitems_str = self.params.get('playlist_items')
 809             playlistitems = None
 810             if playlistitems_str is not None:
 811                 def iter_playlistitems(format):
 812                     for string_segment in format.split(','):
 813                         if '-' in string_segment:
 814                             start, end = string_segment.split('-')
 815                             for item in range(int(start), int(end) + 1):
 816                                 yield int(item)
 817                         else:
 818                             yield int(string_segment)
 819                 playlistitems = iter_playlistitems(playlistitems_str)
 820
 821             ie_entries = ie_result['entries']
 822             if isinstance(ie_entries, list):
 823                 n_all_entries = len(ie_entries)
 824                 if playlistitems:
 825                     entries = [
 826                         ie_entries[i - 1] for i in playlistitems
 827                         if -n_all_entries <= i - 1 < n_all_entries]
 828                 else:
 829                     entries = ie_entries[playliststart:playlistend]
 830                 n_entries = len(entries)
 831                 self.to_screen(
 832                     '[%s] playlist %s: Collected %d video ids (downloading %d of them)' %
 833                     (ie_result['extractor'], playlist, n_all_entries, n_entries))
 834             elif isinstance(ie_entries, PagedList):
 835                 if playlistitems:
 836                     entries = []
 837                     for item in playlistitems:
 838                         entries.extend(ie_entries.getslice(
 839                             item - 1, item
 840                         ))
 841                 else:
 842                     entries = ie_entries.getslice(
 843                         playliststart, playlistend)
 844                 n_entries = len(entries)
 845                 self.to_screen(
 846                     '[%s] playlist %s: Downloading %d videos' %
 847                     (ie_result['extractor'], playlist, n_entries))
 848             else:  # iterable
 849                 if playlistitems:
 850                     entry_list = list(ie_entries)
 851                     entries = [entry_list[i - 1] for i in playlistitems]
 852                 else:
 853                     entries = list(itertools.islice(
 854                         ie_entries, playliststart, playlistend))
 855                 n_entries = len(entries)
 856                 self.to_screen(
 857                     '[%s] playlist %s: Downloading %d videos' %
 858                     (ie_result['extractor'], playlist, n_entries))
 859
 860             if self.params.get('playlistreverse', False):
 861                 entries = entries[::-1]
 862
 863             if self.params.get('playlistrandom', False):
 864                 random.shuffle(entries)
 865
 866             x_forwarded_for = ie_result.get('__x_forwarded_for_ip')
 867
 868             for i, entry in enumerate(entries, 1):
 869                 self.to_screen('[download] Downloading video %s of %s' % (i, n_entries))
 870                 # This __x_forwarded_for_ip thing is a bit ugly but requires
 871                 # minimal changes
 872                 if x_forwarded_for:
 873                     entry['__x_forwarded_for_ip'] = x_forwarded_for
 874                 extra = {
 875                     'n_entries': n_entries,
 876                     'playlist': playlist,
 877                     'playlist_id': ie_result.get('id'),
 878                     'playlist_title': ie_result.get('title'),
 879                     'playlist_index': i + playliststart,
 880                     'extractor': ie_result['extractor'],
 881                     'webpage_url': ie_result['webpage_url'],
 882                     'webpage_url_basename': url_basename(ie_result['webpage_url']),
 883                     'extractor_key': ie_result['extractor_key'],
 884                 }
 885
 886                 reason = self._match_entry(entry, incomplete=True)
 887                 if reason is not None:
 888                     self.to_screen('[download] ' + reason)
 889                     continue
 890
 891                 entry_result = self.process_ie_result(entry,
 892                                                       download=download,
 893                                                       extra_info=extra)
 894                 playlist_results.append(entry_result)
 895             ie_result['entries'] = playlist_results
 896             self.to_screen('[download] Finished downloading playlist: %s' % playlist)
 897             return ie_result
 898         elif result_type == 'compat_list':
 899             self.report_warning(
 900                 'Extractor %s returned a compat_list result. '
 901                 'It needs to be updated.' % ie_result.get('extractor'))
 902
 903             def _fixup(r):
 904                 self.add_extra_info(
 905                     r,
 906                     {
 907                         'extractor': ie_result['extractor'],
 908                         'webpage_url': ie_result['webpage_url'],
 909                         'webpage_url_basename': url_basename(ie_result['webpage_url']),
 910                         'extractor_key': ie_result['extractor_key'],
 911                     }
 912                 )
 913                 return r
 914             ie_result['entries'] = [
 915                 self.process_ie_result(_fixup(r), download, extra_info)
 916                 for r in ie_result['entries']
 917             ]
 918             return ie_result
 919         else:
 920             raise Exception('Invalid result type: %s' % result_type)
 921
 922     def _build_format_filter(self, filter_spec):
 923         " Returns a function to filter the formats according to the filter_spec "
 924
 925         OPERATORS = {
 926             '<': operator.lt,
 927             '<=': operator.le,
 928             '>': operator.gt,
 929             '>=': operator.ge,
 930             '=': operator.eq,
 931             '!=': operator.ne,
 932         }
 933         operator_rex = re.compile(r'''(?x)\s*
 934             (?P<key>width|height|tbr|abr|vbr|asr|filesize|fps)
 935             \s*(?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
 936             (?P<value>[0-9.]+(?:[kKmMgGtTpPeEzZyY]i?[Bb]?)?)
 937             $
 938             ''' % '|'.join(map(re.escape, OPERATORS.keys())))
 939         m = operator_rex.search(filter_spec)
 940         if m:
 941             try:
 942                 comparison_value = int(m.group('value'))
 943             except ValueError:
 944                 comparison_value = parse_filesize(m.group('value'))
 945                 if comparison_value is None:
 946                     comparison_value = parse_filesize(m.group('value') + 'B')
 947                 if comparison_value is None:
 948                     raise ValueError(
 949                         'Invalid value %r in format specification %r' % (
 950                             m.group('value'), filter_spec))
 951             op = OPERATORS[m.group('op')]
 952
 953         if not m:
 954             STR_OPERATORS = {
 955                 '=': operator.eq,
 956                 '!=': operator.ne,
 957                 '^=': lambda attr, value: attr.startswith(value),
 958                 '$=': lambda attr, value: attr.endswith(value),
 959                 '*=': lambda attr, value: value in attr,
 960             }
 961             str_operator_rex = re.compile(r'''(?x)
 962                 \s*(?P<key>ext|acodec|vcodec|container|protocol|format_id)
 963                 \s*(?P<op>%s)(?P<none_inclusive>\s*\?)?
 964                 \s*(?P<value>[a-zA-Z0-9._-]+)
 965                 \s*$
 966                 ''' % '|'.join(map(re.escape, STR_OPERATORS.keys())))
 967             m = str_operator_rex.search(filter_spec)
 968             if m:
 969                 comparison_value = m.group('value')
 970                 op = STR_OPERATORS[m.group('op')]
 971
 972         if not m:
 973             raise ValueError('Invalid filter specification %r' % filter_spec)
 974
 975         def _filter(f):
 976             actual_value = f.get(m.group('key'))
 977             if actual_value is None:
 978                 return m.group('none_inclusive')
 979             return op(actual_value, comparison_value)
 980         return _filter
 981
 982     def build_format_selector(self, format_spec):
 983         def syntax_error(note, start):
 984             message = (
 985                 'Invalid format specification: '
 986                 '{0}\n\t{1}\n\t{2}^'.format(note, format_spec, ' ' * start[1]))
 987             return SyntaxError(message)
 988
 989         PICKFIRST = 'PICKFIRST'
 990         MERGE = 'MERGE'
 991         SINGLE = 'SINGLE'
 992         GROUP = 'GROUP'
 993         FormatSelector = collections.namedtuple('FormatSelector', ['type', 'selector', 'filters'])
 994
 995         def _parse_filter(tokens):
 996             filter_parts = []
 997             for type, string, start, _, _ in tokens:
 998                 if type == tokenize.OP and string == ']':
 999                     return ''.join(filter_parts)
1000                 else:
1001                     filter_parts.append(string)
1002
1003         def _remove_unused_ops(tokens):
1004             # Remove operators that we don't use and join them with the surrounding strings
1005             # for example: 'mp4' '-' 'baseline' '-' '16x9' is converted to 'mp4-baseline-16x9'
1006             ALLOWED_OPS = ('/', '+', ',', '(', ')')
1007             last_string, last_start, last_end, last_line = None, None, None, None
1008             for type, string, start, end, line in tokens:
1009                 if type == tokenize.OP and string == '[':
1010                     if last_string:
1011                         yield tokenize.NAME, last_string, last_start, last_end, last_line
1012                         last_string = None
1013                     yield type, string, start, end, line
1014                     # everything inside brackets will be handled by _parse_filter
1015                     for type, string, start, end, line in tokens:
1016                         yield type, string, start, end, line
1017                         if type == tokenize.OP and string == ']':
1018                             break
1019                 elif type == tokenize.OP and string in ALLOWED_OPS:
1020                     if last_string:
1021                         yield tokenize.NAME, last_string, last_start, last_end, last_line
1022                         last_string = None
1023                     yield type, string, start, end, line
1024                 elif type in [tokenize.NAME, tokenize.NUMBER, tokenize.OP]:
1025                     if not last_string:
1026                         last_string = string
1027                         last_start = start
1028                         last_end = end
1029                     else:
1030                         last_string += string
1031             if last_string:
1032                 yield tokenize.NAME, last_string, last_start, last_end, last_line
1033
1034         def _parse_format_selection(tokens, inside_merge=False, inside_choice=False, inside_group=False):
1035             selectors = []
1036             current_selector = None
1037             for type, string, start, _, _ in tokens:
1038                 # ENCODING is only defined in python 3.x
1039                 if type == getattr(tokenize, 'ENCODING', None):
1040                     continue
1041                 elif type in [tokenize.NAME, tokenize.NUMBER]:
1042                     current_selector = FormatSelector(SINGLE, string, [])
1043                 elif type == tokenize.OP:
1044                     if string == ')':
1045                         if not inside_group:
1046                             # ')' will be handled by the parentheses group
1047                             tokens.restore_last_token()
1048                         break
1049                     elif inside_merge and string in ['/', ',']:
1050                         tokens.restore_last_token()
1051                         break
1052                     elif inside_choice and string == ',':
1053                         tokens.restore_last_token()
1054                         break
1055                     elif string == ',':
1056                         if not current_selector:
1057                             raise syntax_error('"," must follow a format selector', start)
1058                         selectors.append(current_selector)
1059                         current_selector = None
1060                     elif string == '/':
1061                         if not current_selector:
1062                             raise syntax_error('"/" must follow a format selector', start)
1063                         first_choice = current_selector
1064                         second_choice = _parse_format_selection(tokens, inside_choice=True)
1065                         current_selector = FormatSelector(PICKFIRST, (first_choice, second_choice), [])
1066                     elif string == '[':
1067                         if not current_selector:
1068                             current_selector = FormatSelector(SINGLE, 'best', [])
1069                         format_filter = _parse_filter(tokens)
1070                         current_selector.filters.append(format_filter)
1071                     elif string == '(':
1072                         if current_selector:
1073                             raise syntax_error('Unexpected "("', start)
1074                         group = _parse_format_selection(tokens, inside_group=True)
1075                         current_selector = FormatSelector(GROUP, group, [])
1076                     elif string == '+':
1077                         video_selector = current_selector
1078                         audio_selector = _parse_format_selection(tokens, inside_merge=True)
1079                         if not video_selector or not audio_selector:
1080                             raise syntax_error('"+" must be between two format selectors', start)
1081                         current_selector = FormatSelector(MERGE, (video_selector, audio_selector), [])
1082                     else:
1083                         raise syntax_error('Operator not recognized: "{0}"'.format(string), start)
1084                 elif type == tokenize.ENDMARKER:
1085                     break
1086             if current_selector:
1087                 selectors.append(current_selector)
1088             return selectors
1089
1090         def _build_selector_function(selector):
1091             if isinstance(selector, list):
1092                 fs = [_build_selector_function(s) for s in selector]
1093
1094                 def selector_function(ctx):
1095                     for f in fs:
1096                         for format in f(ctx):
1097                             yield format
1098                 return selector_function
1099             elif selector.type == GROUP:
1100                 selector_function = _build_selector_function(selector.selector)
1101             elif selector.type == PICKFIRST:
1102                 fs = [_build_selector_function(s) for s in selector.selector]
1103
1104                 def selector_function(ctx):
1105                     for f in fs:
1106                         picked_formats = list(f(ctx))
1107                         if picked_formats:
1108                             return picked_formats
1109                     return []
1110             elif selector.type == SINGLE:
1111                 format_spec = selector.selector
1112
1113                 def selector_function(ctx):
1114                     formats = list(ctx['formats'])
1115                     if not formats:
1116                         return
1117                     if format_spec == 'all':
1118                         for f in formats:
1119                             yield f
1120                     elif format_spec in ['best', 'worst', None]:
1121                         format_idx = 0 if format_spec == 'worst' else -1
1122                         audiovideo_formats = [
1123                             f for f in formats
1124                             if f.get('vcodec') != 'none' and f.get('acodec') != 'none']
1125                         if audiovideo_formats:
1126                             yield audiovideo_formats[format_idx]
1127                         # for extractors with incomplete formats (audio only (soundcloud)
1128                         # or video only (imgur)) we will fallback to best/worst
1129                         # {video,audio}-only format
1130                         elif ctx['incomplete_formats']:
1131                             yield formats[format_idx]
1132                     elif format_spec == 'bestaudio':
1133                         audio_formats = [
1134                             f for f in formats
1135                             if f.get('vcodec') == 'none']
1136                         if audio_formats:
1137                             yield audio_formats[-1]
1138                     elif format_spec == 'worstaudio':
1139                         audio_formats = [
1140                             f for f in formats
1141                             if f.get('vcodec') == 'none']
1142                         if audio_formats:
1143                             yield audio_formats[0]
1144                     elif format_spec == 'bestvideo':
1145                         video_formats = [
1146                             f for f in formats
1147                             if f.get('acodec') == 'none']
1148                         if video_formats:
1149                             yield video_formats[-1]
1150                     elif format_spec == 'worstvideo':
1151                         video_formats = [
1152                             f for f in formats
1153                             if f.get('acodec') == 'none']
1154                         if video_formats:
1155                             yield video_formats[0]
1156                     else:
1157                         extensions = ['mp4', 'flv', 'webm', '3gp', 'm4a', 'mp3', 'ogg', 'aac', 'wav']
1158                         if format_spec in extensions:
1159                             filter_f = lambda f: f['ext'] == format_spec
1160                         else:
1161                             filter_f = lambda f: f['format_id'] == format_spec
1162                         matches = list(filter(filter_f, formats))
1163                         if matches:
1164                             yield matches[-1]
1165             elif selector.type == MERGE:
1166                 def _merge(formats_info):
1167                     format_1, format_2 = [f['format_id'] for f in formats_info]
1168                     # The first format must contain the video and the
1169                     # second the audio
1170                     if formats_info[0].get('vcodec') == 'none':
1171                         self.report_error('The first format must '
1172                                           'contain the video, try using '
1173                                           '"-f %s+%s"' % (format_2, format_1))
1174                         return
1175                     # Formats must be opposite (video+audio)
1176                     if formats_info[0].get('acodec') == 'none' and formats_info[1].get('acodec') == 'none':
1177                         self.report_error(
1178                             'Both formats %s and %s are video-only, you must specify "-f video+audio"'
1179                             % (format_1, format_2))
1180                         return
1181                     output_ext = (
1182                         formats_info[0]['ext']
1183                         if self.params.get('merge_output_format') is None
1184                         else self.params['merge_output_format'])
1185                     return {
1186                         'requested_formats': formats_info,
1187                         'format': '%s+%s' % (formats_info[0].get('format'),
1188                                              formats_info[1].get('format')),
1189                         'format_id': '%s+%s' % (formats_info[0].get('format_id'),
1190                                                 formats_info[1].get('format_id')),
1191                         'width': formats_info[0].get('width'),
1192                         'height': formats_info[0].get('height'),
1193                         'resolution': formats_info[0].get('resolution'),
1194                         'fps': formats_info[0].get('fps'),
1195                         'vcodec': formats_info[0].get('vcodec'),
1196                         'vbr': formats_info[0].get('vbr'),
1197                         'stretched_ratio': formats_info[0].get('stretched_ratio'),
1198                         'acodec': formats_info[1].get('acodec'),
1199                         'abr': formats_info[1].get('abr'),
1200                         'ext': output_ext,
1201                     }
1202                 video_selector, audio_selector = map(_build_selector_function, selector.selector)
1203
1204                 def selector_function(ctx):
1205                     for pair in itertools.product(
1206                             video_selector(copy.deepcopy(ctx)), audio_selector(copy.deepcopy(ctx))):
1207                         yield _merge(pair)
1208
1209             filters = [self._build_format_filter(f) for f in selector.filters]
1210
1211             def final_selector(ctx):
1212                 ctx_copy = copy.deepcopy(ctx)
1213                 for _filter in filters:
1214                     ctx_copy['formats'] = list(filter(_filter, ctx_copy['formats']))
1215                 return selector_function(ctx_copy)
1216             return final_selector
1217
1218         stream = io.BytesIO(format_spec.encode('utf-8'))
1219         try:
1220             tokens = list(_remove_unused_ops(compat_tokenize_tokenize(stream.readline)))
1221         except tokenize.TokenError:
1222             raise syntax_error('Missing closing/opening brackets or parenthesis', (0, len(format_spec)))
1223
1224         class TokenIterator(object):
1225             def __init__(self, tokens):
1226                 self.tokens = tokens
1227                 self.counter = 0
1228
1229             def __iter__(self):
1230                 return self
1231
1232             def __next__(self):
1233                 if self.counter >= len(self.tokens):
1234                     raise StopIteration()
1235                 value = self.tokens[self.counter]
1236                 self.counter += 1
1237                 return value
1238
1239             next = __next__
1240
1241             def restore_last_token(self):
1242                 self.counter -= 1
1243
1244         parsed_selector = _parse_format_selection(iter(TokenIterator(tokens)))
1245         return _build_selector_function(parsed_selector)
1246
1247     def _calc_headers(self, info_dict):
1248         res = std_headers.copy()
1249
1250         add_headers = info_dict.get('http_headers')
1251         if add_headers:
1252             res.update(add_headers)
1253
1254         cookies = self._calc_cookies(info_dict)
1255         if cookies:
1256             res['Cookie'] = cookies
1257
1258         if 'X-Forwarded-For' not in res:
1259             x_forwarded_for_ip = info_dict.get('__x_forwarded_for_ip')
1260             if x_forwarded_for_ip:
1261                 res['X-Forwarded-For'] = x_forwarded_for_ip
1262
1263         return res
1264
1265     def _calc_cookies(self, info_dict):
1266         pr = sanitized_Request(info_dict['url'])
1267         self.cookiejar.add_cookie_header(pr)
1268         return pr.get_header('Cookie')
1269
1270     def process_video_result(self, info_dict, download=True):
1271         assert info_dict.get('_type', 'video') == 'video'
1272
1273         if 'id' not in info_dict:
1274             raise ExtractorError('Missing "id" field in extractor result')
1275         if 'title' not in info_dict:
1276             raise ExtractorError('Missing "title" field in extractor result')
1277
1278         if not isinstance(info_dict['id'], compat_str):
1279             self.report_warning('"id" field is not a string - forcing string conversion')
1280             info_dict['id'] = compat_str(info_dict['id'])
1281
1282         if 'playlist' not in info_dict:
1283             # It isn't part of a playlist
1284             info_dict['playlist'] = None
1285             info_dict['playlist_index'] = None
1286
1287         thumbnails = info_dict.get('thumbnails')
1288         if thumbnails is None:
1289             thumbnail = info_dict.get('thumbnail')
1290             if thumbnail:
1291                 info_dict['thumbnails'] = thumbnails = [{'url': thumbnail}]
1292         if thumbnails:
1293             thumbnails.sort(key=lambda t: (
1294                 t.get('preference') if t.get('preference') is not None else -1,
1295                 t.get('width') if t.get('width') is not None else -1,
1296                 t.get('height') if t.get('height') is not None else -1,
1297                 t.get('id') if t.get('id') is not None else '', t.get('url')))
1298             for i, t in enumerate(thumbnails):
1299                 t['url'] = sanitize_url(t['url'])
1300                 if t.get('width') and t.get('height'):
1301                     t['resolution'] = '%dx%d' % (t['width'], t['height'])
1302                 if t.get('id') is None:
1303                     t['id'] = '%d' % i
1304
1305         if self.params.get('list_thumbnails'):
1306             self.list_thumbnails(info_dict)
1307             return
1308
1309         thumbnail = info_dict.get('thumbnail')
1310         if thumbnail:
1311             info_dict['thumbnail'] = sanitize_url(thumbnail)
1312         elif thumbnails:
1313             info_dict['thumbnail'] = thumbnails[-1]['url']
1314
1315         if 'display_id' not in info_dict and 'id' in info_dict:
1316             info_dict['display_id'] = info_dict['id']
1317
1318         if info_dict.get('upload_date') is None and info_dict.get('timestamp') is not None:
1319             # Working around out-of-range timestamp values (e.g. negative ones on Windows,
1320             # see http://bugs.python.org/issue1646728)
1321             try:
1322                 upload_date = datetime.datetime.utcfromtimestamp(info_dict['timestamp'])
1323                 info_dict['upload_date'] = upload_date.strftime('%Y%m%d')
1324             except (ValueError, OverflowError, OSError):
1325                 pass
1326
1327         # Auto generate title fields corresponding to the *_number fields when missing
1328         # in order to always have clean titles. This is very common for TV series.
1329         for field in ('chapter', 'season', 'episode'):
1330             if info_dict.get('%s_number' % field) is not None and not info_dict.get(field):
1331                 info_dict[field] = '%s %d' % (field.capitalize(), info_dict['%s_number' % field])
1332
1333         subtitles = info_dict.get('subtitles')
1334         if subtitles:
1335             for _, subtitle in subtitles.items():
1336                 for subtitle_format in subtitle:
1337                     if subtitle_format.get('url'):
1338                         subtitle_format['url'] = sanitize_url(subtitle_format['url'])
1339                     if subtitle_format.get('ext') is None:
1340                         subtitle_format['ext'] = determine_ext(subtitle_format['url']).lower()
1341
1342         if self.params.get('listsubtitles', False):
1343             if 'automatic_captions' in info_dict:
1344                 self.list_subtitles(info_dict['id'], info_dict.get('automatic_captions'), 'automatic captions')
1345             self.list_subtitles(info_dict['id'], subtitles, 'subtitles')
1346             return
1347         info_dict['requested_subtitles'] = self.process_subtitles(
1348             info_dict['id'], subtitles,
1349             info_dict.get('automatic_captions'))
1350
1351         # We now pick which formats have to be downloaded
1352         if info_dict.get('formats') is None:
1353             # There's only one format available
1354             formats = [info_dict]
1355         else:
1356             formats = info_dict['formats']
1357
1358         if not formats:
1359             raise ExtractorError('No video formats found!')
1360
1361         formats_dict = {}
1362
1363         # We check that all the formats have the format and format_id fields
1364         for i, format in enumerate(formats):
1365             if 'url' not in format:
1366                 raise ExtractorError('Missing "url" key in result (index %d)' % i)
1367
1368             format['url'] = sanitize_url(format['url'])
1369
1370             if format.get('format_id') is None:
1371                 format['format_id'] = compat_str(i)
1372             else:
1373                 # Sanitize format_id from characters used in format selector expression
1374                 format['format_id'] = re.sub(r'[\s,/+\[\]()]', '_', format['format_id'])
1375             format_id = format['format_id']
1376             if format_id not in formats_dict:
1377                 formats_dict[format_id] = []
1378             formats_dict[format_id].append(format)
1379
1380         # Make sure all formats have unique format_id
1381         for format_id, ambiguous_formats in formats_dict.items():
1382             if len(ambiguous_formats) > 1:
1383                 for i, format in enumerate(ambiguous_formats):
1384                     format['format_id'] = '%s-%d' % (format_id, i)
1385
1386         for i, format in enumerate(formats):
1387             if format.get('format') is None:
1388                 format['format'] = '{id} - {res}{note}'.format(
1389                     id=format['format_id'],
1390                     res=self.format_resolution(format),
1391                     note=' ({0})'.format(format['format_note']) if format.get('format_note') is not None else '',
1392                 )
1393             # Automatically determine file extension if missing
1394             if format.get('ext') is None:
1395                 format['ext'] = determine_ext(format['url']).lower()
1396             # Automatically determine protocol if missing (useful for format
1397             # selection purposes)
1398             if format.get('protocol') is None:
1399                 format['protocol'] = determine_protocol(format)
1400             # Add HTTP headers, so that external programs can use them from the
1401             # json output
1402             full_format_info = info_dict.copy()
1403             full_format_info.update(format)
1404             format['http_headers'] = self._calc_headers(full_format_info)
1405         # Remove private housekeeping stuff
1406         if '__x_forwarded_for_ip' in info_dict:
1407             del info_dict['__x_forwarded_for_ip']
1408
1409         # TODO Central sorting goes here
1410
1411         if formats[0] is not info_dict:
1412             # only set the 'formats' fields if the original info_dict list them
1413             # otherwise we end up with a circular reference, the first (and unique)
1414             # element in the 'formats' field in info_dict is info_dict itself,
1415             # which can't be exported to json
1416             info_dict['formats'] = formats
1417         if self.params.get('listformats'):
1418             self.list_formats(info_dict)
1419             return
1420
1421         req_format = self.params.get('format')
1422         if req_format is None:
1423             req_format_list = []
1424             if (self.params.get('outtmpl', DEFAULT_OUTTMPL) != '-' and
1425                     not info_dict.get('is_live')):
1426                 merger = FFmpegMergerPP(self)
1427                 if merger.available and merger.can_merge():
1428                     req_format_list.append('bestvideo+bestaudio')
1429             req_format_list.append('best')
1430             req_format = '/'.join(req_format_list)
1431         format_selector = self.build_format_selector(req_format)
1432
1433         # While in format selection we may need to have an access to the original
1434         # format set in order to calculate some metrics or do some processing.
1435         # For now we need to be able to guess whether original formats provided
1436         # by extractor are incomplete or not (i.e. whether extractor provides only
1437         # video-only or audio-only formats) for proper formats selection for
1438         # extractors with such incomplete formats (see
1439         # https://github.com/rg3/youtube-dl/pull/5556).
1440         # Since formats may be filtered during format selection and may not match
1441         # the original formats the results may be incorrect. Thus original formats
1442         # or pre-calculated metrics should be passed to format selection routines
1443         # as well.
1444         # We will pass a context object containing all necessary additional data
1445         # instead of just formats.
1446         # This fixes incorrect format selection issue (see
1447         # https://github.com/rg3/youtube-dl/issues/10083).
1448         incomplete_formats = (
1449             # All formats are video-only or
1450             all(f.get('vcodec') != 'none' and f.get('acodec') == 'none' for f in formats) or
1451             # all formats are audio-only
1452             all(f.get('vcodec') == 'none' and f.get('acodec') != 'none' for f in formats))
1453
1454         ctx = {
1455             'formats': formats,
1456             'incomplete_formats': incomplete_formats,
1457         }
1458
1459         formats_to_download = list(format_selector(ctx))
1460         if not formats_to_download:
1461             raise ExtractorError('requested format not available',
1462                                  expected=True)
1463
1464         if download:
1465             if len(formats_to_download) > 1:
1466                 self.to_screen('[info] %s: downloading video in %s formats' % (info_dict['id'], len(formats_to_download)))
1467             for format in formats_to_download:
1468                 new_info = dict(info_dict)
1469                 new_info.update(format)
1470                 self.process_info(new_info)
1471         # We update the info dict with the best quality format (backwards compatibility)
1472         info_dict.update(formats_to_download[-1])
1473         return info_dict
1474
1475     def process_subtitles(self, video_id, normal_subtitles, automatic_captions):
1476         """Select the requested subtitles and their format"""
1477         available_subs = {}
1478         if normal_subtitles and self.params.get('writesubtitles'):
1479             available_subs.update(normal_subtitles)
1480         if automatic_captions and self.params.get('writeautomaticsub'):
1481             for lang, cap_info in automatic_captions.items():
1482                 if lang not in available_subs:
1483                     available_subs[lang] = cap_info
1484
1485         if (not self.params.get('writesubtitles') and not
1486                 self.params.get('writeautomaticsub') or not
1487                 available_subs):
1488             return None
1489
1490         if self.params.get('allsubtitles', False):
1491             requested_langs = available_subs.keys()
1492         else:
1493             if self.params.get('subtitleslangs', False):
1494                 requested_langs = self.params.get('subtitleslangs')
1495             elif 'en' in available_subs:
1496                 requested_langs = ['en']
1497             else:
1498                 requested_langs = [list(available_subs.keys())[0]]
1499
1500         formats_query = self.params.get('subtitlesformat', 'best')
1501         formats_preference = formats_query.split('/') if formats_query else []
1502         subs = {}
1503         for lang in requested_langs:
1504             formats = available_subs.get(lang)
1505             if formats is None:
1506                 self.report_warning('%s subtitles not available for %s' % (lang, video_id))
1507                 continue
1508             for ext in formats_preference:
1509                 if ext == 'best':
1510                     f = formats[-1]
1511                     break
1512                 matches = list(filter(lambda f: f['ext'] == ext, formats))
1513                 if matches:
1514                     f = matches[-1]
1515                     break
1516             else:
1517                 f = formats[-1]
1518                 self.report_warning(
1519                     'No subtitle format found matching "%s" for language %s, '
1520                     'using %s' % (formats_query, lang, f['ext']))
1521             subs[lang] = f
1522         return subs
1523
1524     def process_info(self, info_dict):
1525         """Process a single resolved IE result."""
1526
1527         assert info_dict.get('_type', 'video') == 'video'
1528
1529         max_downloads = self.params.get('max_downloads')
1530         if max_downloads is not None:
1531             if self._num_downloads >= int(max_downloads):
1532                 raise MaxDownloadsReached()
1533
1534         info_dict['fulltitle'] = info_dict['title']
1535         if len(info_dict['title']) > 200:
1536             info_dict['title'] = info_dict['title'][:197] + '...'
1537
1538         if 'format' not in info_dict:
1539             info_dict['format'] = info_dict['ext']
1540
1541         reason = self._match_entry(info_dict, incomplete=False)
1542         if reason is not None:
1543             self.to_screen('[download] ' + reason)
1544             return
1545
1546         self._num_downloads += 1
1547
1548         info_dict['_filename'] = filename = self.prepare_filename(info_dict)
1549
1550         # Forced printings
1551         if self.params.get('forcetitle', False):
1552             self.to_stdout(info_dict['fulltitle'])
1553         if self.params.get('forceid', False):
1554             self.to_stdout(info_dict['id'])
1555         if self.params.get('forceurl', False):
1556             if info_dict.get('requested_formats') is not None:
1557                 for f in info_dict['requested_formats']:
1558                     self.to_stdout(f['url'] + f.get('play_path', ''))
1559             else:
1560                 # For RTMP URLs, also include the playpath
1561                 self.to_stdout(info_dict['url'] + info_dict.get('play_path', ''))
1562         if self.params.get('forcethumbnail', False) and info_dict.get('thumbnail') is not None:
1563             self.to_stdout(info_dict['thumbnail'])
1564         if self.params.get('forcedescription', False) and info_dict.get('description') is not None:
1565             self.to_stdout(info_dict['description'])
1566         if self.params.get('forcefilename', False) and filename is not None:
1567             self.to_stdout(filename)
1568         if self.params.get('forceduration', False) and info_dict.get('duration') is not None:
1569             self.to_stdout(formatSeconds(info_dict['duration']))
1570         if self.params.get('forceformat', False):
1571             self.to_stdout(info_dict['format'])
1572         if self.params.get('forcejson', False):
1573             self.to_stdout(json.dumps(info_dict))
1574
1575         # Do nothing else if in simulate mode
1576         if self.params.get('simulate', False):
1577             return
1578
1579         if filename is None:
1580             return
1581
1582         try:
1583             dn = os.path.dirname(sanitize_path(encodeFilename(filename)))
1584             if dn and not os.path.exists(dn):
1585                 os.makedirs(dn)
1586         except (OSError, IOError) as err:
1587             self.report_error('unable to create directory ' + error_to_compat_str(err))
1588             return
1589
1590         if self.params.get('writedescription', False):
1591             descfn = replace_extension(filename, 'description', info_dict.get('ext'))
1592             if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(descfn)):
1593                 self.to_screen('[info] Video description is already present')
1594             elif info_dict.get('description') is None:
1595                 self.report_warning('There\'s no description to write.')
1596             else:
1597                 try:
1598                     self.to_screen('[info] Writing video description to: ' + descfn)
1599                     with io.open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile:
1600                         descfile.write(info_dict['description'])
1601                 except (OSError, IOError):
1602                     self.report_error('Cannot write description file ' + descfn)
1603                     return
1604
1605         if self.params.get('writeannotations', False):
1606             annofn = replace_extension(filename, 'annotations.xml', info_dict.get('ext'))
1607             if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(annofn)):
1608                 self.to_screen('[info] Video annotations are already present')
1609             else:
1610                 try:
1611                     self.to_screen('[info] Writing video annotations to: ' + annofn)
1612                     with io.open(encodeFilename(annofn), 'w', encoding='utf-8') as annofile:
1613                         annofile.write(info_dict['annotations'])
1614                 except (KeyError, TypeError):
1615                     self.report_warning('There are no annotations to write.')
1616                 except (OSError, IOError):
1617                     self.report_error('Cannot write annotations file: ' + annofn)
1618                     return
1619
1620         subtitles_are_requested = any([self.params.get('writesubtitles', False),
1621                                        self.params.get('writeautomaticsub')])
1622
1623         if subtitles_are_requested and info_dict.get('requested_subtitles'):
1624             # subtitles download errors are already managed as troubles in relevant IE
1625             # that way it will silently go on when used with unsupporting IE
1626             subtitles = info_dict['requested_subtitles']
1627             ie = self.get_info_extractor(info_dict['extractor_key'])
1628             for sub_lang, sub_info in subtitles.items():
1629                 sub_format = sub_info['ext']
1630                 if sub_info.get('data') is not None:
1631                     sub_data = sub_info['data']
1632                 else:
1633                     try:
1634                         sub_data = ie._download_webpage(
1635                             sub_info['url'], info_dict['id'], note=False)
1636                     except ExtractorError as err:
1637                         self.report_warning('Unable to download subtitle for "%s": %s' %
1638                                             (sub_lang, error_to_compat_str(err.cause)))
1639                         continue
1640                 try:
1641                     sub_filename = subtitles_filename(filename, sub_lang, sub_format)
1642                     if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(sub_filename)):
1643                         self.to_screen('[info] Video subtitle %s.%s is already_present' % (sub_lang, sub_format))
1644                     else:
1645                         self.to_screen('[info] Writing video subtitles to: ' + sub_filename)
1646                         # Use newline='' to prevent conversion of newline characters
1647                         # See https://github.com/rg3/youtube-dl/issues/10268
1648                         with io.open(encodeFilename(sub_filename), 'w', encoding='utf-8', newline='') as subfile:
1649                             subfile.write(sub_data)
1650                 except (OSError, IOError):
1651                     self.report_error('Cannot write subtitles file ' + sub_filename)
1652                     return
1653
1654         if self.params.get('writeinfojson', False):
1655             infofn = replace_extension(filename, 'info.json', info_dict.get('ext'))
1656             if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(infofn)):
1657                 self.to_screen('[info] Video description metadata is already present')
1658             else:
1659                 self.to_screen('[info] Writing video description metadata as JSON to: ' + infofn)
1660                 try:
1661                     write_json_file(self.filter_requested_info(info_dict), infofn)
1662                 except (OSError, IOError):
1663                     self.report_error('Cannot write metadata to JSON file ' + infofn)
1664                     return
1665
1666         self._write_thumbnails(info_dict, filename)
1667
1668         if not self.params.get('skip_download', False):
1669             try:
1670                 def dl(name, info):
1671                     fd = get_suitable_downloader(info, self.params)(self, self.params)
1672                     for ph in self._progress_hooks:
1673                         fd.add_progress_hook(ph)
1674                     if self.params.get('verbose'):
1675                         self.to_stdout('[debug] Invoking downloader on %r' % info.get('url'))
1676                     return fd.download(name, info)
1677
1678                 if info_dict.get('requested_formats') is not None:
1679                     downloaded = []
1680                     success = True
1681                     merger = FFmpegMergerPP(self)
1682                     if not merger.available:
1683                         postprocessors = []
1684                         self.report_warning('You have requested multiple '
1685                                             'formats but ffmpeg or avconv are not installed.'
1686                                             ' The formats won\'t be merged.')
1687                     else:
1688                         postprocessors = [merger]
1689
1690                     def compatible_formats(formats):
1691                         video, audio = formats
1692                         # Check extension
1693                         video_ext, audio_ext = audio.get('ext'), video.get('ext')
1694                         if video_ext and audio_ext:
1695                             COMPATIBLE_EXTS = (
1696                                 ('mp3', 'mp4', 'm4a', 'm4p', 'm4b', 'm4r', 'm4v', 'ismv', 'isma'),
1697                                 ('webm')
1698                             )
1699                             for exts in COMPATIBLE_EXTS:
1700                                 if video_ext in exts and audio_ext in exts:
1701                                     return True
1702                         # TODO: Check acodec/vcodec
1703                         return False
1704
1705                     filename_real_ext = os.path.splitext(filename)[1][1:]
1706                     filename_wo_ext = (
1707                         os.path.splitext(filename)[0]
1708                         if filename_real_ext == info_dict['ext']
1709                         else filename)
1710                     requested_formats = info_dict['requested_formats']
1711                     if self.params.get('merge_output_format') is None and not compatible_formats(requested_formats):
1712                         info_dict['ext'] = 'mkv'
1713                         self.report_warning(
1714                             'Requested formats are incompatible for merge and will be merged into mkv.')
1715                     # Ensure filename always has a correct extension for successful merge
1716                     filename = '%s.%s' % (filename_wo_ext, info_dict['ext'])
1717                     if os.path.exists(encodeFilename(filename)):
1718                         self.to_screen(
1719                             '[download] %s has already been downloaded and '
1720                             'merged' % filename)
1721                     else:
1722                         for f in requested_formats:
1723                             new_info = dict(info_dict)
1724                             new_info.update(f)
1725                             fname = self.prepare_filename(new_info)
1726                             fname = prepend_extension(fname, 'f%s' % f['format_id'], new_info['ext'])
1727                             downloaded.append(fname)
1728                             partial_success = dl(fname, new_info)
1729                             success = success and partial_success
1730                         info_dict['__postprocessors'] = postprocessors
1731                         info_dict['__files_to_merge'] = downloaded
1732                 else:
1733                     # Just a single file
1734                     success = dl(filename, info_dict)
1735             except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
1736                 self.report_error('unable to download video data: %s' % error_to_compat_str(err))
1737                 return
1738             except (OSError, IOError) as err:
1739                 raise UnavailableVideoError(err)
1740             except (ContentTooShortError, ) as err:
1741                 self.report_error('content too short (expected %s bytes and served %s)' % (err.expected, err.downloaded))
1742                 return
1743
1744             if success and filename != '-':
1745                 # Fixup content
1746                 fixup_policy = self.params.get('fixup')
1747                 if fixup_policy is None:
1748                     fixup_policy = 'detect_or_warn'
1749
1750                 INSTALL_FFMPEG_MESSAGE = 'Install ffmpeg or avconv to fix this automatically.'
1751
1752                 stretched_ratio = info_dict.get('stretched_ratio')
1753                 if stretched_ratio is not None and stretched_ratio != 1:
1754                     if fixup_policy == 'warn':
1755                         self.report_warning('%s: Non-uniform pixel ratio (%s)' % (
1756                             info_dict['id'], stretched_ratio))
1757                     elif fixup_policy == 'detect_or_warn':
1758                         stretched_pp = FFmpegFixupStretchedPP(self)
1759                         if stretched_pp.available:
1760                             info_dict.setdefault('__postprocessors', [])
1761                             info_dict['__postprocessors'].append(stretched_pp)
1762                         else:
1763                             self.report_warning(
1764                                 '%s: Non-uniform pixel ratio (%s). %s'
1765                                 % (info_dict['id'], stretched_ratio, INSTALL_FFMPEG_MESSAGE))
1766                     else:
1767                         assert fixup_policy in ('ignore', 'never')
1768
1769                 if (info_dict.get('requested_formats') is None and
1770                         info_dict.get('container') == 'm4a_dash'):
1771                     if fixup_policy == 'warn':
1772                         self.report_warning(
1773                             '%s: writing DASH m4a. '
1774                             'Only some players support this container.'
1775                             % info_dict['id'])
1776                     elif fixup_policy == 'detect_or_warn':
1777                         fixup_pp = FFmpegFixupM4aPP(self)
1778                         if fixup_pp.available:
1779                             info_dict.setdefault('__postprocessors', [])
1780                             info_dict['__postprocessors'].append(fixup_pp)
1781                         else:
1782                             self.report_warning(
1783                                 '%s: writing DASH m4a. '
1784                                 'Only some players support this container. %s'
1785                                 % (info_dict['id'], INSTALL_FFMPEG_MESSAGE))
1786                     else:
1787                         assert fixup_policy in ('ignore', 'never')
1788
1789                 if (info_dict.get('protocol') == 'm3u8_native' or
1790                         info_dict.get('protocol') == 'm3u8' and
1791                         self.params.get('hls_prefer_native')):
1792                     if fixup_policy == 'warn':
1793                         self.report_warning('%s: malformated aac bitstream.' % (
1794                             info_dict['id']))
1795                     elif fixup_policy == 'detect_or_warn':
1796                         fixup_pp = FFmpegFixupM3u8PP(self)
1797                         if fixup_pp.available:
1798                             info_dict.setdefault('__postprocessors', [])
1799                             info_dict['__postprocessors'].append(fixup_pp)
1800                         else:
1801                             self.report_warning(
1802                                 '%s: malformated aac bitstream. %s'
1803                                 % (info_dict['id'], INSTALL_FFMPEG_MESSAGE))
1804                     else:
1805                         assert fixup_policy in ('ignore', 'never')
1806
1807                 try:
1808                     self.post_process(filename, info_dict)
1809                 except (PostProcessingError) as err:
1810                     self.report_error('postprocessing: %s' % str(err))
1811                     return
1812                 self.record_download_archive(info_dict)
1813
1814     def download(self, url_list):
1815         """Download a given list of URLs."""
1816         outtmpl = self.params.get('outtmpl', DEFAULT_OUTTMPL)
1817         if (len(url_list) > 1 and
1818                 '%' not in outtmpl and
1819                 self.params.get('max_downloads') != 1):
1820             raise SameFileError(outtmpl)
1821
1822         for url in url_list:
1823             try:
1824                 # It also downloads the videos
1825                 res = self.extract_info(
1826                     url, force_generic_extractor=self.params.get('force_generic_extractor', False))
1827             except UnavailableVideoError:
1828                 self.report_error('unable to download video')
1829             except MaxDownloadsReached:
1830                 self.to_screen('[info] Maximum number of downloaded files reached.')
1831                 raise
1832             else:
1833                 if self.params.get('dump_single_json', False):
1834                     self.to_stdout(json.dumps(res))
1835
1836         return self._download_retcode
1837
1838     def download_with_info_file(self, info_filename):
1839         with contextlib.closing(fileinput.FileInput(
1840                 [info_filename], mode='r',
1841                 openhook=fileinput.hook_encoded('utf-8'))) as f:
1842             # FileInput doesn't have a read method, we can't call json.load
1843             info = self.filter_requested_info(json.loads('\n'.join(f)))
1844         try:
1845             self.process_ie_result(info, download=True)
1846         except DownloadError:
1847             webpage_url = info.get('webpage_url')
1848             if webpage_url is not None:
1849                 self.report_warning('The info failed to download, trying with "%s"' % webpage_url)
1850                 return self.download([webpage_url])
1851             else:
1852                 raise
1853         return self._download_retcode
1854
1855     @staticmethod
1856     def filter_requested_info(info_dict):
1857         return dict(
1858             (k, v) for k, v in info_dict.items()
1859             if k not in ['requested_formats', 'requested_subtitles'])
1860
1861     def post_process(self, filename, ie_info):
1862         """Run all the postprocessors on the given file."""
1863         info = dict(ie_info)
1864         info['filepath'] = filename
1865         pps_chain = []
1866         if ie_info.get('__postprocessors') is not None:
1867             pps_chain.extend(ie_info['__postprocessors'])
1868         pps_chain.extend(self._pps)
1869         for pp in pps_chain:
1870             files_to_delete = []
1871             try:
1872                 files_to_delete, info = pp.run(info)
1873             except PostProcessingError as e:
1874                 self.report_error(e.msg)
1875             if files_to_delete and not self.params.get('keepvideo', False):
1876                 for old_filename in files_to_delete:
1877                     self.to_screen('Deleting original file %s (pass -k to keep)' % old_filename)
1878                     try:
1879                         os.remove(encodeFilename(old_filename))
1880                     except (IOError, OSError):
1881                         self.report_warning('Unable to remove downloaded original file')
1882
1883     def _make_archive_id(self, info_dict):
1884         # Future-proof against any change in case
1885         # and backwards compatibility with prior versions
1886         extractor = info_dict.get('extractor_key')
1887         if extractor is None:
1888             if 'id' in info_dict:
1889                 extractor = info_dict.get('ie_key')  # key in a playlist
1890         if extractor is None:
1891             return None  # Incomplete video information
1892         return extractor.lower() + ' ' + info_dict['id']
1893
1894     def in_download_archive(self, info_dict):
1895         fn = self.params.get('download_archive')
1896         if fn is None:
1897             return False
1898
1899         vid_id = self._make_archive_id(info_dict)
1900         if vid_id is None:
1901             return False  # Incomplete video information
1902
1903         try:
1904             with locked_file(fn, 'r', encoding='utf-8') as archive_file:
1905                 for line in archive_file:
1906                     if line.strip() == vid_id:
1907                         return True
1908         except IOError as ioe:
1909             if ioe.errno != errno.ENOENT:
1910                 raise
1911         return False
1912
1913     def record_download_archive(self, info_dict):
1914         fn = self.params.get('download_archive')
1915         if fn is None:
1916             return
1917         vid_id = self._make_archive_id(info_dict)
1918         assert vid_id
1919         with locked_file(fn, 'a', encoding='utf-8') as archive_file:
1920             archive_file.write(vid_id + '\n')
1921
1922     @staticmethod
1923     def format_resolution(format, default='unknown'):
1924         if format.get('vcodec') == 'none':
1925             return 'audio only'
1926         if format.get('resolution') is not None:
1927             return format['resolution']
1928         if format.get('height') is not None:
1929             if format.get('width') is not None:
1930                 res = '%sx%s' % (format['width'], format['height'])
1931             else:
1932                 res = '%sp' % format['height']
1933         elif format.get('width') is not None:
1934             res = '%dx?' % format['width']
1935         else:
1936             res = default
1937         return res
1938
1939     def _format_note(self, fdict):
1940         res = ''
1941         if fdict.get('ext') in ['f4f', 'f4m']:
1942             res += '(unsupported) '
1943         if fdict.get('language'):
1944             if res:
1945                 res += ' '
1946             res += '[%s] ' % fdict['language']
1947         if fdict.get('format_note') is not None:
1948             res += fdict['format_note'] + ' '
1949         if fdict.get('tbr') is not None:
1950             res += '%4dk ' % fdict['tbr']
1951         if fdict.get('container') is not None:
1952             if res:
1953                 res += ', '
1954             res += '%s container' % fdict['container']
1955         if (fdict.get('vcodec') is not None and
1956                 fdict.get('vcodec') != 'none'):
1957             if res:
1958                 res += ', '
1959             res += fdict['vcodec']
1960             if fdict.get('vbr') is not None:
1961                 res += '@'
1962         elif fdict.get('vbr') is not None and fdict.get('abr') is not None:
1963             res += 'video@'
1964         if fdict.get('vbr') is not None:
1965             res += '%4dk' % fdict['vbr']
1966         if fdict.get('fps') is not None:
1967             if res:
1968                 res += ', '
1969             res += '%sfps' % fdict['fps']
1970         if fdict.get('acodec') is not None:
1971             if res:
1972                 res += ', '
1973             if fdict['acodec'] == 'none':
1974                 res += 'video only'
1975             else:
1976                 res += '%-5s' % fdict['acodec']
1977         elif fdict.get('abr') is not None:
1978             if res:
1979                 res += ', '
1980             res += 'audio'
1981         if fdict.get('abr') is not None:
1982             res += '@%3dk' % fdict['abr']
1983         if fdict.get('asr') is not None:
1984             res += ' (%5dHz)' % fdict['asr']
1985         if fdict.get('filesize') is not None:
1986             if res:
1987                 res += ', '
1988             res += format_bytes(fdict['filesize'])
1989         elif fdict.get('filesize_approx') is not None:
1990             if res:
1991                 res += ', '
1992             res += '~' + format_bytes(fdict['filesize_approx'])
1993         return res
1994
1995     def list_formats(self, info_dict):
1996         formats = info_dict.get('formats', [info_dict])
1997         table = [
1998             [f['format_id'], f['ext'], self.format_resolution(f), self._format_note(f)]
1999             for f in formats
2000             if f.get('preference') is None or f['preference'] >= -1000]
2001         if len(formats) > 1:
2002             table[-1][-1] += (' ' if table[-1][-1] else '') + '(best)'
2003
2004         header_line = ['format code', 'extension', 'resolution', 'note']
2005         self.to_screen(
2006             '[info] Available formats for %s:\n%s' %
2007             (info_dict['id'], render_table(header_line, table)))
2008
2009     def list_thumbnails(self, info_dict):
2010         thumbnails = info_dict.get('thumbnails')
2011         if not thumbnails:
2012             self.to_screen('[info] No thumbnails present for %s' % info_dict['id'])
2013             return
2014
2015         self.to_screen(
2016             '[info] Thumbnails for %s:' % info_dict['id'])
2017         self.to_screen(render_table(
2018             ['ID', 'width', 'height', 'URL'],
2019             [[t['id'], t.get('width', 'unknown'), t.get('height', 'unknown'), t['url']] for t in thumbnails]))
2020
2021     def list_subtitles(self, video_id, subtitles, name='subtitles'):
2022         if not subtitles:
2023             self.to_screen('%s has no %s' % (video_id, name))
2024             return
2025         self.to_screen(
2026             'Available %s for %s:' % (name, video_id))
2027         self.to_screen(render_table(
2028             ['Language', 'formats'],
2029             [[lang, ', '.join(f['ext'] for f in reversed(formats))]
2030                 for lang, formats in subtitles.items()]))
2031
2032     def urlopen(self, req):
2033         """ Start an HTTP download """
2034         if isinstance(req, compat_basestring):
2035             req = sanitized_Request(req)
2036         return self._opener.open(req, timeout=self._socket_timeout)
2037
2038     def print_debug_header(self):
2039         if not self.params.get('verbose'):
2040             return
2041
2042         if type('') is not compat_str:
2043             # Python 2.6 on SLES11 SP1 (https://github.com/rg3/youtube-dl/issues/3326)
2044             self.report_warning(
2045                 'Your Python is broken! Update to a newer and supported version')
2046
2047         stdout_encoding = getattr(
2048             sys.stdout, 'encoding', 'missing (%s)' % type(sys.stdout).__name__)
2049         encoding_str = (
2050             '[debug] Encodings: locale %s, fs %s, out %s, pref %s\n' % (
2051                 locale.getpreferredencoding(),
2052                 sys.getfilesystemencoding(),
2053                 stdout_encoding,
2054                 self.get_encoding()))
2055         write_string(encoding_str, encoding=None)
2056
2057         self._write_string('[debug] youtube-dl version ' + __version__ + '\n')
2058         if _LAZY_LOADER:
2059             self._write_string('[debug] Lazy loading extractors enabled' + '\n')
2060         try:
2061             sp = subprocess.Popen(
2062                 ['git', 'rev-parse', '--short', 'HEAD'],
2063                 stdout=subprocess.PIPE, stderr=subprocess.PIPE,
2064                 cwd=os.path.dirname(os.path.abspath(__file__)))
2065             out, err = sp.communicate()
2066             out = out.decode().strip()
2067             if re.match('[0-9a-f]+', out):
2068                 self._write_string('[debug] Git HEAD: ' + out + '\n')
2069         except Exception:
2070             try:
2071                 sys.exc_clear()
2072             except Exception:
2073                 pass
2074         self._write_string('[debug] Python version %s - %s\n' % (
2075             platform.python_version(), platform_name()))
2076
2077         exe_versions = FFmpegPostProcessor.get_versions(self)
2078         exe_versions['rtmpdump'] = rtmpdump_version()
2079         exe_str = ', '.join(
2080             '%s %s' % (exe, v)
2081             for exe, v in sorted(exe_versions.items())
2082             if v
2083         )
2084         if not exe_str:
2085             exe_str = 'none'
2086         self._write_string('[debug] exe versions: %s\n' % exe_str)
2087
2088         proxy_map = {}
2089         for handler in self._opener.handlers:
2090             if hasattr(handler, 'proxies'):
2091                 proxy_map.update(handler.proxies)
2092         self._write_string('[debug] Proxy map: ' + compat_str(proxy_map) + '\n')
2093
2094         if self.params.get('call_home', False):
2095             ipaddr = self.urlopen('https://yt-dl.org/ip').read().decode('utf-8')
2096             self._write_string('[debug] Public IP address: %s\n' % ipaddr)
2097             latest_version = self.urlopen(
2098                 'https://yt-dl.org/latest/version').read().decode('utf-8')
2099             if version_tuple(latest_version) > version_tuple(__version__):
2100                 self.report_warning(
2101                     'You are using an outdated version (newest version: %s)! '
2102                     'See https://yt-dl.org/update if you need help updating.' %
2103                     latest_version)
2104
2105     def _setup_opener(self):
2106         timeout_val = self.params.get('socket_timeout')
2107         self._socket_timeout = 600 if timeout_val is None else float(timeout_val)
2108
2109         opts_cookiefile = self.params.get('cookiefile')
2110         opts_proxy = self.params.get('proxy')
2111
2112         if opts_cookiefile is None:
2113             self.cookiejar = compat_cookiejar.CookieJar()
2114         else:
2115             opts_cookiefile = compat_expanduser(opts_cookiefile)
2116             self.cookiejar = compat_cookiejar.MozillaCookieJar(
2117                 opts_cookiefile)
2118             if os.access(opts_cookiefile, os.R_OK):
2119                 self.cookiejar.load()
2120
2121         cookie_processor = YoutubeDLCookieProcessor(self.cookiejar)
2122         if opts_proxy is not None:
2123             if opts_proxy == '':
2124                 proxies = {}
2125             else:
2126                 proxies = {'http': opts_proxy, 'https': opts_proxy}
2127         else:
2128             proxies = compat_urllib_request.getproxies()
2129             # Set HTTPS proxy to HTTP one if given (https://github.com/rg3/youtube-dl/issues/805)
2130             if 'http' in proxies and 'https' not in proxies:
2131                 proxies['https'] = proxies['http']
2132         proxy_handler = PerRequestProxyHandler(proxies)
2133
2134         debuglevel = 1 if self.params.get('debug_printtraffic') else 0
2135         https_handler = make_HTTPS_handler(self.params, debuglevel=debuglevel)
2136         ydlh = YoutubeDLHandler(self.params, debuglevel=debuglevel)
2137         data_handler = compat_urllib_request_DataHandler()
2138
2139         # When passing our own FileHandler instance, build_opener won't add the
2140         # default FileHandler and allows us to disable the file protocol, which
2141         # can be used for malicious purposes (see
2142         # https://github.com/rg3/youtube-dl/issues/8227)
2143         file_handler = compat_urllib_request.FileHandler()
2144
2145         def file_open(*args, **kwargs):
2146             raise compat_urllib_error.URLError('file:// scheme is explicitly disabled in youtube-dl for security reasons')
2147         file_handler.file_open = file_open
2148
2149         opener = compat_urllib_request.build_opener(
2150             proxy_handler, https_handler, cookie_processor, ydlh, data_handler, file_handler)
2151
2152         # Delete the default user-agent header, which would otherwise apply in
2153         # cases where our custom HTTP handler doesn't come into play
2154         # (See https://github.com/rg3/youtube-dl/issues/1309 for details)
2155         opener.addheaders = []
2156         self._opener = opener
2157
2158     def encode(self, s):
2159         if isinstance(s, bytes):
2160             return s  # Already encoded
2161
2162         try:
2163             return s.encode(self.get_encoding())
2164         except UnicodeEncodeError as err:
2165             err.reason = err.reason + '. Check your system encoding configuration or use the --encoding option.'
2166             raise
2167
2168     def get_encoding(self):
2169         encoding = self.params.get('encoding')
2170         if encoding is None:
2171             encoding = preferredencoding()
2172         return encoding
2173
2174     def _write_thumbnails(self, info_dict, filename):
2175         if self.params.get('writethumbnail', False):
2176             thumbnails = info_dict.get('thumbnails')
2177             if thumbnails:
2178                 thumbnails = [thumbnails[-1]]
2179         elif self.params.get('write_all_thumbnails', False):
2180             thumbnails = info_dict.get('thumbnails')
2181         else:
2182             return
2183
2184         if not thumbnails:
2185             # No thumbnails present, so return immediately
2186             return
2187
2188         for t in thumbnails:
2189             thumb_ext = determine_ext(t['url'], 'jpg')
2190             suffix = '_%s' % t['id'] if len(thumbnails) > 1 else ''
2191             thumb_display_id = '%s ' % t['id'] if len(thumbnails) > 1 else ''
2192             t['filename'] = thumb_filename = os.path.splitext(filename)[0] + suffix + '.' + thumb_ext
2193
2194             if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(thumb_filename)):
2195                 self.to_screen('[%s] %s: Thumbnail %sis already present' %
2196                                (info_dict['extractor'], info_dict['id'], thumb_display_id))
2197             else:
2198                 self.to_screen('[%s] %s: Downloading thumbnail %s...' %
2199                                (info_dict['extractor'], info_dict['id'], thumb_display_id))
2200                 try:
2201                     uf = self.urlopen(t['url'])
2202                     with open(encodeFilename(thumb_filename), 'wb') as thumbf:
2203                         shutil.copyfileobj(uf, thumbf)
2204                     self.to_screen('[%s] %s: Writing thumbnail %sto: %s' %
2205                                    (info_dict['extractor'], info_dict['id'], thumb_display_id, thumb_filename))
2206                 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
2207                     self.report_warning('Unable to download thumbnail "%s": %s' %
2208                                         (t['url'], error_to_compat_str(err)))