git.bitcoin.ninja Git - youtube-dl/blob - youtube_dl/YoutubeDL.py

   1 #!/usr/bin/env python
   2 # -*- coding: utf-8 -*-
   3
   4 from __future__ import absolute_import, unicode_literals
   5
   6 import collections
   7 import contextlib
   8 import copy
   9 import datetime
  10 import errno
  11 import fileinput
  12 import io
  13 import itertools
  14 import json
  15 import locale
  16 import operator
  17 import os
  18 import platform
  19 import re
  20 import shutil
  21 import subprocess
  22 import socket
  23 import sys
  24 import time
  25 import tokenize
  26 import traceback
  27
  28 from .compat import (
  29     compat_basestring,
  30     compat_cookiejar,
  31     compat_expanduser,
  32     compat_get_terminal_size,
  33     compat_http_client,
  34     compat_kwargs,
  35     compat_os_name,
  36     compat_str,
  37     compat_tokenize_tokenize,
  38     compat_urllib_error,
  39     compat_urllib_request,
  40     compat_urllib_request_DataHandler,
  41 )
  42 from .utils import (
  43     age_restricted,
  44     args_to_str,
  45     ContentTooShortError,
  46     date_from_str,
  47     DateRange,
  48     DEFAULT_OUTTMPL,
  49     determine_ext,
  50     determine_protocol,
  51     DownloadError,
  52     encode_compat_str,
  53     encodeFilename,
  54     error_to_compat_str,
  55     ExtractorError,
  56     format_bytes,
  57     formatSeconds,
  58     locked_file,
  59     make_HTTPS_handler,
  60     MaxDownloadsReached,
  61     PagedList,
  62     parse_filesize,
  63     PerRequestProxyHandler,
  64     platform_name,
  65     PostProcessingError,
  66     preferredencoding,
  67     prepend_extension,
  68     register_socks_protocols,
  69     render_table,
  70     replace_extension,
  71     SameFileError,
  72     sanitize_filename,
  73     sanitize_path,
  74     sanitize_url,
  75     sanitized_Request,
  76     std_headers,
  77     subtitles_filename,
  78     UnavailableVideoError,
  79     url_basename,
  80     version_tuple,
  81     write_json_file,
  82     write_string,
  83     YoutubeDLCookieProcessor,
  84     YoutubeDLHandler,
  85 )
  86 from .cache import Cache
  87 from .extractor import get_info_extractor, gen_extractor_classes, _LAZY_LOADER
  88 from .downloader import get_suitable_downloader
  89 from .downloader.rtmp import rtmpdump_version
  90 from .postprocessor import (
  91     FFmpegFixupM3u8PP,
  92     FFmpegFixupM4aPP,
  93     FFmpegFixupStretchedPP,
  94     FFmpegMergerPP,
  95     FFmpegPostProcessor,
  96     get_postprocessor,
  97 )
  98 from .version import __version__
  99
 100 if compat_os_name == 'nt':
 101     import ctypes
 102
 103
 104 class YoutubeDL(object):
 105     """YoutubeDL class.
 106
 107     YoutubeDL objects are the ones responsible of downloading the
 108     actual video file and writing it to disk if the user has requested
 109     it, among some other tasks. In most cases there should be one per
 110     program. As, given a video URL, the downloader doesn't know how to
 111     extract all the needed information, task that InfoExtractors do, it
 112     has to pass the URL to one of them.
 113
 114     For this, YoutubeDL objects have a method that allows
 115     InfoExtractors to be registered in a given order. When it is passed
 116     a URL, the YoutubeDL object handles it to the first InfoExtractor it
 117     finds that reports being able to handle it. The InfoExtractor extracts
 118     all the information about the video or videos the URL refers to, and
 119     YoutubeDL process the extracted information, possibly using a File
 120     Downloader to download the video.
 121
 122     YoutubeDL objects accept a lot of parameters. In order not to saturate
 123     the object constructor with arguments, it receives a dictionary of
 124     options instead. These options are available through the params
 125     attribute for the InfoExtractors to use. The YoutubeDL also
 126     registers itself as the downloader in charge for the InfoExtractors
 127     that are added to it, so this is a "mutual registration".
 128
 129     Available options:
 130
 131     username:          Username for authentication purposes.
 132     password:          Password for authentication purposes.
 133     videopassword:     Password for accessing a video.
 134     usenetrc:          Use netrc for authentication instead.
 135     verbose:           Print additional info to stdout.
 136     quiet:             Do not print messages to stdout.
 137     no_warnings:       Do not print out anything for warnings.
 138     forceurl:          Force printing final URL.
 139     forcetitle:        Force printing title.
 140     forceid:           Force printing ID.
 141     forcethumbnail:    Force printing thumbnail URL.
 142     forcedescription:  Force printing description.
 143     forcefilename:     Force printing final filename.
 144     forceduration:     Force printing duration.
 145     forcejson:         Force printing info_dict as JSON.
 146     dump_single_json:  Force printing the info_dict of the whole playlist
 147                        (or video) as a single JSON line.
 148     simulate:          Do not download the video files.
 149     format:            Video format code. See options.py for more information.
 150     outtmpl:           Template for output names.
 151     restrictfilenames: Do not allow "&" and spaces in file names
 152     ignoreerrors:      Do not stop on download errors.
 153     force_generic_extractor: Force downloader to use the generic extractor
 154     nooverwrites:      Prevent overwriting files.
 155     playliststart:     Playlist item to start at.
 156     playlistend:       Playlist item to end at.
 157     playlist_items:    Specific indices of playlist to download.
 158     playlistreverse:   Download playlist items in reverse order.
 159     matchtitle:        Download only matching titles.
 160     rejecttitle:       Reject downloads for matching titles.
 161     logger:            Log messages to a logging.Logger instance.
 162     logtostderr:       Log messages to stderr instead of stdout.
 163     writedescription:  Write the video description to a .description file
 164     writeinfojson:     Write the video description to a .info.json file
 165     writeannotations:  Write the video annotations to a .annotations.xml file
 166     writethumbnail:    Write the thumbnail image to a file
 167     write_all_thumbnails:  Write all thumbnail formats to files
 168     writesubtitles:    Write the video subtitles to a file
 169     writeautomaticsub: Write the automatically generated subtitles to a file
 170     allsubtitles:      Downloads all the subtitles of the video
 171                        (requires writesubtitles or writeautomaticsub)
 172     listsubtitles:     Lists all available subtitles for the video
 173     subtitlesformat:   The format code for subtitles
 174     subtitleslangs:    List of languages of the subtitles to download
 175     keepvideo:         Keep the video file after post-processing
 176     daterange:         A DateRange object, download only if the upload_date is in the range.
 177     skip_download:     Skip the actual download of the video file
 178     cachedir:          Location of the cache files in the filesystem.
 179                        False to disable filesystem cache.
 180     noplaylist:        Download single video instead of a playlist if in doubt.
 181     age_limit:         An integer representing the user's age in years.
 182                        Unsuitable videos for the given age are skipped.
 183     min_views:         An integer representing the minimum view count the video
 184                        must have in order to not be skipped.
 185                        Videos without view count information are always
 186                        downloaded. None for no limit.
 187     max_views:         An integer representing the maximum view count.
 188                        Videos that are more popular than that are not
 189                        downloaded.
 190                        Videos without view count information are always
 191                        downloaded. None for no limit.
 192     download_archive:  File name of a file where all downloads are recorded.
 193                        Videos already present in the file are not downloaded
 194                        again.
 195     cookiefile:        File name where cookies should be read from and dumped to.
 196     nocheckcertificate:Do not verify SSL certificates
 197     prefer_insecure:   Use HTTP instead of HTTPS to retrieve information.
 198                        At the moment, this is only supported by YouTube.
 199     proxy:             URL of the proxy server to use
 200     geo_verification_proxy:  URL of the proxy to use for IP address verification
 201                        on geo-restricted sites. (Experimental)
 202     socket_timeout:    Time to wait for unresponsive hosts, in seconds
 203     bidi_workaround:   Work around buggy terminals without bidirectional text
 204                        support, using fridibi
 205     debug_printtraffic:Print out sent and received HTTP traffic
 206     include_ads:       Download ads as well
 207     default_search:    Prepend this string if an input url is not valid.
 208                        'auto' for elaborate guessing
 209     encoding:          Use this encoding instead of the system-specified.
 210     extract_flat:      Do not resolve URLs, return the immediate result.
 211                        Pass in 'in_playlist' to only show this behavior for
 212                        playlist items.
 213     postprocessors:    A list of dictionaries, each with an entry
 214                        * key:  The name of the postprocessor. See
 215                                youtube_dl/postprocessor/__init__.py for a list.
 216                        as well as any further keyword arguments for the
 217                        postprocessor.
 218     progress_hooks:    A list of functions that get called on download
 219                        progress, with a dictionary with the entries
 220                        * status: One of "downloading", "error", or "finished".
 221                                  Check this first and ignore unknown values.
 222
 223                        If status is one of "downloading", or "finished", the
 224                        following properties may also be present:
 225                        * filename: The final filename (always present)
 226                        * tmpfilename: The filename we're currently writing to
 227                        * downloaded_bytes: Bytes on disk
 228                        * total_bytes: Size of the whole file, None if unknown
 229                        * total_bytes_estimate: Guess of the eventual file size,
 230                                                None if unavailable.
 231                        * elapsed: The number of seconds since download started.
 232                        * eta: The estimated time in seconds, None if unknown
 233                        * speed: The download speed in bytes/second, None if
 234                                 unknown
 235                        * fragment_index: The counter of the currently
 236                                          downloaded video fragment.
 237                        * fragment_count: The number of fragments (= individual
 238                                          files that will be merged)
 239
 240                        Progress hooks are guaranteed to be called at least once
 241                        (with status "finished") if the download is successful.
 242     merge_output_format: Extension to use when merging formats.
 243     fixup:             Automatically correct known faults of the file.
 244                        One of:
 245                        - "never": do nothing
 246                        - "warn": only emit a warning
 247                        - "detect_or_warn": check whether we can do anything
 248                                            about it, warn otherwise (default)
 249     source_address:    (Experimental) Client-side IP address to bind to.
 250     call_home:         Boolean, true iff we are allowed to contact the
 251                        youtube-dl servers for debugging.
 252     sleep_interval:    Number of seconds to sleep before each download when
 253                        used alone or a lower bound of a range for randomized
 254                        sleep before each download (minimum possible number
 255                        of seconds to sleep) when used along with
 256                        max_sleep_interval.
 257     max_sleep_interval:Upper bound of a range for randomized sleep before each
 258                        download (maximum possible number of seconds to sleep).
 259                        Must only be used along with sleep_interval.
 260                        Actual sleep time will be a random float from range
 261                        [sleep_interval; max_sleep_interval].
 262     listformats:       Print an overview of available video formats and exit.
 263     list_thumbnails:   Print a table of all thumbnails and exit.
 264     match_filter:      A function that gets called with the info_dict of
 265                        every video.
 266                        If it returns a message, the video is ignored.
 267                        If it returns None, the video is downloaded.
 268                        match_filter_func in utils.py is one example for this.
 269     no_color:          Do not emit color codes in output.
 270
 271     The following options determine which downloader is picked:
 272     external_downloader: Executable of the external downloader to call.
 273                        None or unset for standard (built-in) downloader.
 274     hls_prefer_native: Use the native HLS downloader instead of ffmpeg/avconv
 275                        if True, otherwise use ffmpeg/avconv if False, otherwise
 276                        use downloader suggested by extractor if None.
 277
 278     The following parameters are not used by YoutubeDL itself, they are used by
 279     the downloader (see youtube_dl/downloader/common.py):
 280     nopart, updatetime, buffersize, ratelimit, min_filesize, max_filesize, test,
 281     noresizebuffer, retries, continuedl, noprogress, consoletitle,
 282     xattr_set_filesize, external_downloader_args, hls_use_mpegts.
 283
 284     The following options are used by the post processors:
 285     prefer_ffmpeg:     If True, use ffmpeg instead of avconv if both are available,
 286                        otherwise prefer avconv.
 287     postprocessor_args: A list of additional command-line arguments for the
 288                         postprocessor.
 289     """
 290
 291     params = None
 292     _ies = []
 293     _pps = []
 294     _download_retcode = None
 295     _num_downloads = None
 296     _screen_file = None
 297
 298     def __init__(self, params=None, auto_init=True):
 299         """Create a FileDownloader object with the given options."""
 300         if params is None:
 301             params = {}
 302         self._ies = []
 303         self._ies_instances = {}
 304         self._pps = []
 305         self._progress_hooks = []
 306         self._download_retcode = 0
 307         self._num_downloads = 0
 308         self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)]
 309         self._err_file = sys.stderr
 310         self.params = {
 311             # Default parameters
 312             'nocheckcertificate': False,
 313         }
 314         self.params.update(params)
 315         self.cache = Cache(self)
 316
 317         if self.params.get('cn_verification_proxy') is not None:
 318             self.report_warning('--cn-verification-proxy is deprecated. Use --geo-verification-proxy instead.')
 319             if self.params.get('geo_verification_proxy') is None:
 320                 self.params['geo_verification_proxy'] = self.params['cn_verification_proxy']
 321
 322         if params.get('bidi_workaround', False):
 323             try:
 324                 import pty
 325                 master, slave = pty.openpty()
 326                 width = compat_get_terminal_size().columns
 327                 if width is None:
 328                     width_args = []
 329                 else:
 330                     width_args = ['-w', str(width)]
 331                 sp_kwargs = dict(
 332                     stdin=subprocess.PIPE,
 333                     stdout=slave,
 334                     stderr=self._err_file)
 335                 try:
 336                     self._output_process = subprocess.Popen(
 337                         ['bidiv'] + width_args, **sp_kwargs
 338                     )
 339                 except OSError:
 340                     self._output_process = subprocess.Popen(
 341                         ['fribidi', '-c', 'UTF-8'] + width_args, **sp_kwargs)
 342                 self._output_channel = os.fdopen(master, 'rb')
 343             except OSError as ose:
 344                 if ose.errno == errno.ENOENT:
 345                     self.report_warning('Could not find fribidi executable, ignoring --bidi-workaround . Make sure that  fribidi  is an executable file in one of the directories in your $PATH.')
 346                 else:
 347                     raise
 348
 349         if (sys.version_info >= (3,) and sys.platform != 'win32' and
 350                 sys.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968'] and
 351                 not params.get('restrictfilenames', False)):
 352             # On Python 3, the Unicode filesystem API will throw errors (#1474)
 353             self.report_warning(
 354                 'Assuming --restrict-filenames since file system encoding '
 355                 'cannot encode all characters. '
 356                 'Set the LC_ALL environment variable to fix this.')
 357             self.params['restrictfilenames'] = True
 358
 359         if isinstance(params.get('outtmpl'), bytes):
 360             self.report_warning(
 361                 'Parameter outtmpl is bytes, but should be a unicode string. '
 362                 'Put  from __future__ import unicode_literals  at the top of your code file or consider switching to Python 3.x.')
 363
 364         self._setup_opener()
 365
 366         if auto_init:
 367             self.print_debug_header()
 368             self.add_default_info_extractors()
 369
 370         for pp_def_raw in self.params.get('postprocessors', []):
 371             pp_class = get_postprocessor(pp_def_raw['key'])
 372             pp_def = dict(pp_def_raw)
 373             del pp_def['key']
 374             pp = pp_class(self, **compat_kwargs(pp_def))
 375             self.add_post_processor(pp)
 376
 377         for ph in self.params.get('progress_hooks', []):
 378             self.add_progress_hook(ph)
 379
 380         register_socks_protocols()
 381
 382     def warn_if_short_id(self, argv):
 383         # short YouTube ID starting with dash?
 384         idxs = [
 385             i for i, a in enumerate(argv)
 386             if re.match(r'^-[0-9A-Za-z_-]{10}$', a)]
 387         if idxs:
 388             correct_argv = (
 389                 ['youtube-dl'] +
 390                 [a for i, a in enumerate(argv) if i not in idxs] +
 391                 ['--'] + [argv[i] for i in idxs]
 392             )
 393             self.report_warning(
 394                 'Long argument string detected. '
 395                 'Use -- to separate parameters and URLs, like this:\n%s\n' %
 396                 args_to_str(correct_argv))
 397
 398     def add_info_extractor(self, ie):
 399         """Add an InfoExtractor object to the end of the list."""
 400         self._ies.append(ie)
 401         if not isinstance(ie, type):
 402             self._ies_instances[ie.ie_key()] = ie
 403             ie.set_downloader(self)
 404
 405     def get_info_extractor(self, ie_key):
 406         """
 407         Get an instance of an IE with name ie_key, it will try to get one from
 408         the _ies list, if there's no instance it will create a new one and add
 409         it to the extractor list.
 410         """
 411         ie = self._ies_instances.get(ie_key)
 412         if ie is None:
 413             ie = get_info_extractor(ie_key)()
 414             self.add_info_extractor(ie)
 415         return ie
 416
 417     def add_default_info_extractors(self):
 418         """
 419         Add the InfoExtractors returned by gen_extractors to the end of the list
 420         """
 421         for ie in gen_extractor_classes():
 422             self.add_info_extractor(ie)
 423
 424     def add_post_processor(self, pp):
 425         """Add a PostProcessor object to the end of the chain."""
 426         self._pps.append(pp)
 427         pp.set_downloader(self)
 428
 429     def add_progress_hook(self, ph):
 430         """Add the progress hook (currently only for the file downloader)"""
 431         self._progress_hooks.append(ph)
 432
 433     def _bidi_workaround(self, message):
 434         if not hasattr(self, '_output_channel'):
 435             return message
 436
 437         assert hasattr(self, '_output_process')
 438         assert isinstance(message, compat_str)
 439         line_count = message.count('\n') + 1
 440         self._output_process.stdin.write((message + '\n').encode('utf-8'))
 441         self._output_process.stdin.flush()
 442         res = ''.join(self._output_channel.readline().decode('utf-8')
 443                       for _ in range(line_count))
 444         return res[:-len('\n')]
 445
 446     def to_screen(self, message, skip_eol=False):
 447         """Print message to stdout if not in quiet mode."""
 448         return self.to_stdout(message, skip_eol, check_quiet=True)
 449
 450     def _write_string(self, s, out=None):
 451         write_string(s, out=out, encoding=self.params.get('encoding'))
 452
 453     def to_stdout(self, message, skip_eol=False, check_quiet=False):
 454         """Print message to stdout if not in quiet mode."""
 455         if self.params.get('logger'):
 456             self.params['logger'].debug(message)
 457         elif not check_quiet or not self.params.get('quiet', False):
 458             message = self._bidi_workaround(message)
 459             terminator = ['\n', ''][skip_eol]
 460             output = message + terminator
 461
 462             self._write_string(output, self._screen_file)
 463
 464     def to_stderr(self, message):
 465         """Print message to stderr."""
 466         assert isinstance(message, compat_str)
 467         if self.params.get('logger'):
 468             self.params['logger'].error(message)
 469         else:
 470             message = self._bidi_workaround(message)
 471             output = message + '\n'
 472             self._write_string(output, self._err_file)
 473
 474     def to_console_title(self, message):
 475         if not self.params.get('consoletitle', False):
 476             return
 477         if compat_os_name == 'nt' and ctypes.windll.kernel32.GetConsoleWindow():
 478             # c_wchar_p() might not be necessary if `message` is
 479             # already of type unicode()
 480             ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message))
 481         elif 'TERM' in os.environ:
 482             self._write_string('\033]0;%s\007' % message, self._screen_file)
 483
 484     def save_console_title(self):
 485         if not self.params.get('consoletitle', False):
 486             return
 487         if 'TERM' in os.environ:
 488             # Save the title on stack
 489             self._write_string('\033[22;0t', self._screen_file)
 490
 491     def restore_console_title(self):
 492         if not self.params.get('consoletitle', False):
 493             return
 494         if 'TERM' in os.environ:
 495             # Restore the title from stack
 496             self._write_string('\033[23;0t', self._screen_file)
 497
 498     def __enter__(self):
 499         self.save_console_title()
 500         return self
 501
 502     def __exit__(self, *args):
 503         self.restore_console_title()
 504
 505         if self.params.get('cookiefile') is not None:
 506             self.cookiejar.save()
 507
 508     def trouble(self, message=None, tb=None):
 509         """Determine action to take when a download problem appears.
 510
 511         Depending on if the downloader has been configured to ignore
 512         download errors or not, this method may throw an exception or
 513         not when errors are found, after printing the message.
 514
 515         tb, if given, is additional traceback information.
 516         """
 517         if message is not None:
 518             self.to_stderr(message)
 519         if self.params.get('verbose'):
 520             if tb is None:
 521                 if sys.exc_info()[0]:  # if .trouble has been called from an except block
 522                     tb = ''
 523                     if hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
 524                         tb += ''.join(traceback.format_exception(*sys.exc_info()[1].exc_info))
 525                     tb += encode_compat_str(traceback.format_exc())
 526                 else:
 527                     tb_data = traceback.format_list(traceback.extract_stack())
 528                     tb = ''.join(tb_data)
 529             self.to_stderr(tb)
 530         if not self.params.get('ignoreerrors', False):
 531             if sys.exc_info()[0] and hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
 532                 exc_info = sys.exc_info()[1].exc_info
 533             else:
 534                 exc_info = sys.exc_info()
 535             raise DownloadError(message, exc_info)
 536         self._download_retcode = 1
 537
 538     def report_warning(self, message):
 539         '''
 540         Print the message to stderr, it will be prefixed with 'WARNING:'
 541         If stderr is a tty file the 'WARNING:' will be colored
 542         '''
 543         if self.params.get('logger') is not None:
 544             self.params['logger'].warning(message)
 545         else:
 546             if self.params.get('no_warnings'):
 547                 return
 548             if not self.params.get('no_color') and self._err_file.isatty() and compat_os_name != 'nt':
 549                 _msg_header = '\033[0;33mWARNING:\033[0m'
 550             else:
 551                 _msg_header = 'WARNING:'
 552             warning_message = '%s %s' % (_msg_header, message)
 553             self.to_stderr(warning_message)
 554
 555     def report_error(self, message, tb=None):
 556         '''
 557         Do the same as trouble, but prefixes the message with 'ERROR:', colored
 558         in red if stderr is a tty file.
 559         '''
 560         if not self.params.get('no_color') and self._err_file.isatty() and compat_os_name != 'nt':
 561             _msg_header = '\033[0;31mERROR:\033[0m'
 562         else:
 563             _msg_header = 'ERROR:'
 564         error_message = '%s %s' % (_msg_header, message)
 565         self.trouble(error_message, tb)
 566
 567     def report_file_already_downloaded(self, file_name):
 568         """Report file has already been fully downloaded."""
 569         try:
 570             self.to_screen('[download] %s has already been downloaded' % file_name)
 571         except UnicodeEncodeError:
 572             self.to_screen('[download] The file has already been downloaded')
 573
 574     def prepare_filename(self, info_dict):
 575         """Generate the output filename."""
 576         try:
 577             template_dict = dict(info_dict)
 578
 579             template_dict['epoch'] = int(time.time())
 580             autonumber_size = self.params.get('autonumber_size')
 581             if autonumber_size is None:
 582                 autonumber_size = 5
 583             autonumber_templ = '%0' + str(autonumber_size) + 'd'
 584             template_dict['autonumber'] = autonumber_templ % self._num_downloads
 585             if template_dict.get('playlist_index') is not None:
 586                 template_dict['playlist_index'] = '%0*d' % (len(str(template_dict['n_entries'])), template_dict['playlist_index'])
 587             if template_dict.get('resolution') is None:
 588                 if template_dict.get('width') and template_dict.get('height'):
 589                     template_dict['resolution'] = '%dx%d' % (template_dict['width'], template_dict['height'])
 590                 elif template_dict.get('height'):
 591                     template_dict['resolution'] = '%sp' % template_dict['height']
 592                 elif template_dict.get('width'):
 593                     template_dict['resolution'] = '%dx?' % template_dict['width']
 594
 595             sanitize = lambda k, v: sanitize_filename(
 596                 compat_str(v),
 597                 restricted=self.params.get('restrictfilenames'),
 598                 is_id=(k == 'id'))
 599             template_dict = dict((k, sanitize(k, v))
 600                                  for k, v in template_dict.items()
 601                                  if v is not None and not isinstance(v, (list, tuple, dict)))
 602             template_dict = collections.defaultdict(lambda: 'NA', template_dict)
 603
 604             outtmpl = self.params.get('outtmpl', DEFAULT_OUTTMPL)
 605             tmpl = compat_expanduser(outtmpl)
 606             filename = tmpl % template_dict
 607             # Temporary fix for #4787
 608             # 'Treat' all problem characters by passing filename through preferredencoding
 609             # to workaround encoding issues with subprocess on python2 @ Windows
 610             if sys.version_info < (3, 0) and sys.platform == 'win32':
 611                 filename = encodeFilename(filename, True).decode(preferredencoding())
 612             return sanitize_path(filename)
 613         except ValueError as err:
 614             self.report_error('Error in output template: ' + str(err) + ' (encoding: ' + repr(preferredencoding()) + ')')
 615             return None
 616
 617     def _match_entry(self, info_dict, incomplete):
 618         """ Returns None iff the file should be downloaded """
 619
 620         video_title = info_dict.get('title', info_dict.get('id', 'video'))
 621         if 'title' in info_dict:
 622             # This can happen when we're just evaluating the playlist
 623             title = info_dict['title']
 624             matchtitle = self.params.get('matchtitle', False)
 625             if matchtitle:
 626                 if not re.search(matchtitle, title, re.IGNORECASE):
 627                     return '"' + title + '" title did not match pattern "' + matchtitle + '"'
 628             rejecttitle = self.params.get('rejecttitle', False)
 629             if rejecttitle:
 630                 if re.search(rejecttitle, title, re.IGNORECASE):
 631                     return '"' + title + '" title matched reject pattern "' + rejecttitle + '"'
 632         date = info_dict.get('upload_date')
 633         if date is not None:
 634             dateRange = self.params.get('daterange', DateRange())
 635             if date not in dateRange:
 636                 return '%s upload date is not in range %s' % (date_from_str(date).isoformat(), dateRange)
 637         view_count = info_dict.get('view_count')
 638         if view_count is not None:
 639             min_views = self.params.get('min_views')
 640             if min_views is not None and view_count < min_views:
 641                 return 'Skipping %s, because it has not reached minimum view count (%d/%d)' % (video_title, view_count, min_views)
 642             max_views = self.params.get('max_views')
 643             if max_views is not None and view_count > max_views:
 644                 return 'Skipping %s, because it has exceeded the maximum view count (%d/%d)' % (video_title, view_count, max_views)
 645         if age_restricted(info_dict.get('age_limit'), self.params.get('age_limit')):
 646             return 'Skipping "%s" because it is age restricted' % video_title
 647         if self.in_download_archive(info_dict):
 648             return '%s has already been recorded in archive' % video_title
 649
 650         if not incomplete:
 651             match_filter = self.params.get('match_filter')
 652             if match_filter is not None:
 653                 ret = match_filter(info_dict)
 654                 if ret is not None:
 655                     return ret
 656
 657         return None
 658
 659     @staticmethod
 660     def add_extra_info(info_dict, extra_info):
 661         '''Set the keys from extra_info in info dict if they are missing'''
 662         for key, value in extra_info.items():
 663             info_dict.setdefault(key, value)
 664
 665     def extract_info(self, url, download=True, ie_key=None, extra_info={},
 666                      process=True, force_generic_extractor=False):
 667         '''
 668         Returns a list with a dictionary for each video we find.
 669         If 'download', also downloads the videos.
 670         extra_info is a dict containing the extra values to add to each result
 671         '''
 672
 673         if not ie_key and force_generic_extractor:
 674             ie_key = 'Generic'
 675
 676         if ie_key:
 677             ies = [self.get_info_extractor(ie_key)]
 678         else:
 679             ies = self._ies
 680
 681         for ie in ies:
 682             if not ie.suitable(url):
 683                 continue
 684
 685             ie = self.get_info_extractor(ie.ie_key())
 686             if not ie.working():
 687                 self.report_warning('The program functionality for this site has been marked as broken, '
 688                                     'and will probably not work.')
 689
 690             try:
 691                 ie_result = ie.extract(url)
 692                 if ie_result is None:  # Finished already (backwards compatibility; listformats and friends should be moved here)
 693                     break
 694                 if isinstance(ie_result, list):
 695                     # Backwards compatibility: old IE result format
 696                     ie_result = {
 697                         '_type': 'compat_list',
 698                         'entries': ie_result,
 699                     }
 700                 self.add_default_extra_info(ie_result, ie, url)
 701                 if process:
 702                     return self.process_ie_result(ie_result, download, extra_info)
 703                 else:
 704                     return ie_result
 705             except ExtractorError as e:  # An error we somewhat expected
 706                 self.report_error(compat_str(e), e.format_traceback())
 707                 break
 708             except MaxDownloadsReached:
 709                 raise
 710             except Exception as e:
 711                 if self.params.get('ignoreerrors', False):
 712                     self.report_error(error_to_compat_str(e), tb=encode_compat_str(traceback.format_exc()))
 713                     break
 714                 else:
 715                     raise
 716         else:
 717             self.report_error('no suitable InfoExtractor for URL %s' % url)
 718
 719     def add_default_extra_info(self, ie_result, ie, url):
 720         self.add_extra_info(ie_result, {
 721             'extractor': ie.IE_NAME,
 722             'webpage_url': url,
 723             'webpage_url_basename': url_basename(url),
 724             'extractor_key': ie.ie_key(),
 725         })
 726
 727     def process_ie_result(self, ie_result, download=True, extra_info={}):
 728         """
 729         Take the result of the ie(may be modified) and resolve all unresolved
 730         references (URLs, playlist items).
 731
 732         It will also download the videos if 'download'.
 733         Returns the resolved ie_result.
 734         """
 735         result_type = ie_result.get('_type', 'video')
 736
 737         if result_type in ('url', 'url_transparent'):
 738             ie_result['url'] = sanitize_url(ie_result['url'])
 739             extract_flat = self.params.get('extract_flat', False)
 740             if ((extract_flat == 'in_playlist' and 'playlist' in extra_info) or
 741                     extract_flat is True):
 742                 if self.params.get('forcejson', False):
 743                     self.to_stdout(json.dumps(ie_result))
 744                 return ie_result
 745
 746         if result_type == 'video':
 747             self.add_extra_info(ie_result, extra_info)
 748             return self.process_video_result(ie_result, download=download)
 749         elif result_type == 'url':
 750             # We have to add extra_info to the results because it may be
 751             # contained in a playlist
 752             return self.extract_info(ie_result['url'],
 753                                      download,
 754                                      ie_key=ie_result.get('ie_key'),
 755                                      extra_info=extra_info)
 756         elif result_type == 'url_transparent':
 757             # Use the information from the embedding page
 758             info = self.extract_info(
 759                 ie_result['url'], ie_key=ie_result.get('ie_key'),
 760                 extra_info=extra_info, download=False, process=False)
 761
 762             force_properties = dict(
 763                 (k, v) for k, v in ie_result.items() if v is not None)
 764             for f in ('_type', 'url', 'ie_key'):
 765                 if f in force_properties:
 766                     del force_properties[f]
 767             new_result = info.copy()
 768             new_result.update(force_properties)
 769
 770             assert new_result.get('_type') != 'url_transparent'
 771
 772             return self.process_ie_result(
 773                 new_result, download=download, extra_info=extra_info)
 774         elif result_type == 'playlist' or result_type == 'multi_video':
 775             # We process each entry in the playlist
 776             playlist = ie_result.get('title') or ie_result.get('id')
 777             self.to_screen('[download] Downloading playlist: %s' % playlist)
 778
 779             playlist_results = []
 780
 781             playliststart = self.params.get('playliststart', 1) - 1
 782             playlistend = self.params.get('playlistend')
 783             # For backwards compatibility, interpret -1 as whole list
 784             if playlistend == -1:
 785                 playlistend = None
 786
 787             playlistitems_str = self.params.get('playlist_items')
 788             playlistitems = None
 789             if playlistitems_str is not None:
 790                 def iter_playlistitems(format):
 791                     for string_segment in format.split(','):
 792                         if '-' in string_segment:
 793                             start, end = string_segment.split('-')
 794                             for item in range(int(start), int(end) + 1):
 795                                 yield int(item)
 796                         else:
 797                             yield int(string_segment)
 798                 playlistitems = iter_playlistitems(playlistitems_str)
 799
 800             ie_entries = ie_result['entries']
 801             if isinstance(ie_entries, list):
 802                 n_all_entries = len(ie_entries)
 803                 if playlistitems:
 804                     entries = [
 805                         ie_entries[i - 1] for i in playlistitems
 806                         if -n_all_entries <= i - 1 < n_all_entries]
 807                 else:
 808                     entries = ie_entries[playliststart:playlistend]
 809                 n_entries = len(entries)
 810                 self.to_screen(
 811                     '[%s] playlist %s: Collected %d video ids (downloading %d of them)' %
 812                     (ie_result['extractor'], playlist, n_all_entries, n_entries))
 813             elif isinstance(ie_entries, PagedList):
 814                 if playlistitems:
 815                     entries = []
 816                     for item in playlistitems:
 817                         entries.extend(ie_entries.getslice(
 818                             item - 1, item
 819                         ))
 820                 else:
 821                     entries = ie_entries.getslice(
 822                         playliststart, playlistend)
 823                 n_entries = len(entries)
 824                 self.to_screen(
 825                     '[%s] playlist %s: Downloading %d videos' %
 826                     (ie_result['extractor'], playlist, n_entries))
 827             else:  # iterable
 828                 if playlistitems:
 829                     entry_list = list(ie_entries)
 830                     entries = [entry_list[i - 1] for i in playlistitems]
 831                 else:
 832                     entries = list(itertools.islice(
 833                         ie_entries, playliststart, playlistend))
 834                 n_entries = len(entries)
 835                 self.to_screen(
 836                     '[%s] playlist %s: Downloading %d videos' %
 837                     (ie_result['extractor'], playlist, n_entries))
 838
 839             if self.params.get('playlistreverse', False):
 840                 entries = entries[::-1]
 841
 842             for i, entry in enumerate(entries, 1):
 843                 self.to_screen('[download] Downloading video %s of %s' % (i, n_entries))
 844                 extra = {
 845                     'n_entries': n_entries,
 846                     'playlist': playlist,
 847                     'playlist_id': ie_result.get('id'),
 848                     'playlist_title': ie_result.get('title'),
 849                     'playlist_index': i + playliststart,
 850                     'extractor': ie_result['extractor'],
 851                     'webpage_url': ie_result['webpage_url'],
 852                     'webpage_url_basename': url_basename(ie_result['webpage_url']),
 853                     'extractor_key': ie_result['extractor_key'],
 854                 }
 855
 856                 reason = self._match_entry(entry, incomplete=True)
 857                 if reason is not None:
 858                     self.to_screen('[download] ' + reason)
 859                     continue
 860
 861                 entry_result = self.process_ie_result(entry,
 862                                                       download=download,
 863                                                       extra_info=extra)
 864                 playlist_results.append(entry_result)
 865             ie_result['entries'] = playlist_results
 866             self.to_screen('[download] Finished downloading playlist: %s' % playlist)
 867             return ie_result
 868         elif result_type == 'compat_list':
 869             self.report_warning(
 870                 'Extractor %s returned a compat_list result. '
 871                 'It needs to be updated.' % ie_result.get('extractor'))
 872
 873             def _fixup(r):
 874                 self.add_extra_info(
 875                     r,
 876                     {
 877                         'extractor': ie_result['extractor'],
 878                         'webpage_url': ie_result['webpage_url'],
 879                         'webpage_url_basename': url_basename(ie_result['webpage_url']),
 880                         'extractor_key': ie_result['extractor_key'],
 881                     }
 882                 )
 883                 return r
 884             ie_result['entries'] = [
 885                 self.process_ie_result(_fixup(r), download, extra_info)
 886                 for r in ie_result['entries']
 887             ]
 888             return ie_result
 889         else:
 890             raise Exception('Invalid result type: %s' % result_type)
 891
 892     def _build_format_filter(self, filter_spec):
 893         " Returns a function to filter the formats according to the filter_spec "
 894
 895         OPERATORS = {
 896             '<': operator.lt,
 897             '<=': operator.le,
 898             '>': operator.gt,
 899             '>=': operator.ge,
 900             '=': operator.eq,
 901             '!=': operator.ne,
 902         }
 903         operator_rex = re.compile(r'''(?x)\s*
 904             (?P<key>width|height|tbr|abr|vbr|asr|filesize|fps)
 905             \s*(?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
 906             (?P<value>[0-9.]+(?:[kKmMgGtTpPeEzZyY]i?[Bb]?)?)
 907             $
 908             ''' % '|'.join(map(re.escape, OPERATORS.keys())))
 909         m = operator_rex.search(filter_spec)
 910         if m:
 911             try:
 912                 comparison_value = int(m.group('value'))
 913             except ValueError:
 914                 comparison_value = parse_filesize(m.group('value'))
 915                 if comparison_value is None:
 916                     comparison_value = parse_filesize(m.group('value') + 'B')
 917                 if comparison_value is None:
 918                     raise ValueError(
 919                         'Invalid value %r in format specification %r' % (
 920                             m.group('value'), filter_spec))
 921             op = OPERATORS[m.group('op')]
 922
 923         if not m:
 924             STR_OPERATORS = {
 925                 '=': operator.eq,
 926                 '!=': operator.ne,
 927                 '^=': lambda attr, value: attr.startswith(value),
 928                 '$=': lambda attr, value: attr.endswith(value),
 929                 '*=': lambda attr, value: value in attr,
 930             }
 931             str_operator_rex = re.compile(r'''(?x)
 932                 \s*(?P<key>ext|acodec|vcodec|container|protocol|format_id)
 933                 \s*(?P<op>%s)(?P<none_inclusive>\s*\?)?
 934                 \s*(?P<value>[a-zA-Z0-9._-]+)
 935                 \s*$
 936                 ''' % '|'.join(map(re.escape, STR_OPERATORS.keys())))
 937             m = str_operator_rex.search(filter_spec)
 938             if m:
 939                 comparison_value = m.group('value')
 940                 op = STR_OPERATORS[m.group('op')]
 941
 942         if not m:
 943             raise ValueError('Invalid filter specification %r' % filter_spec)
 944
 945         def _filter(f):
 946             actual_value = f.get(m.group('key'))
 947             if actual_value is None:
 948                 return m.group('none_inclusive')
 949             return op(actual_value, comparison_value)
 950         return _filter
 951
 952     def build_format_selector(self, format_spec):
 953         def syntax_error(note, start):
 954             message = (
 955                 'Invalid format specification: '
 956                 '{0}\n\t{1}\n\t{2}^'.format(note, format_spec, ' ' * start[1]))
 957             return SyntaxError(message)
 958
 959         PICKFIRST = 'PICKFIRST'
 960         MERGE = 'MERGE'
 961         SINGLE = 'SINGLE'
 962         GROUP = 'GROUP'
 963         FormatSelector = collections.namedtuple('FormatSelector', ['type', 'selector', 'filters'])
 964
 965         def _parse_filter(tokens):
 966             filter_parts = []
 967             for type, string, start, _, _ in tokens:
 968                 if type == tokenize.OP and string == ']':
 969                     return ''.join(filter_parts)
 970                 else:
 971                     filter_parts.append(string)
 972
 973         def _remove_unused_ops(tokens):
 974             # Remove operators that we don't use and join them with the surrounding strings
 975             # for example: 'mp4' '-' 'baseline' '-' '16x9' is converted to 'mp4-baseline-16x9'
 976             ALLOWED_OPS = ('/', '+', ',', '(', ')')
 977             last_string, last_start, last_end, last_line = None, None, None, None
 978             for type, string, start, end, line in tokens:
 979                 if type == tokenize.OP and string == '[':
 980                     if last_string:
 981                         yield tokenize.NAME, last_string, last_start, last_end, last_line
 982                         last_string = None
 983                     yield type, string, start, end, line
 984                     # everything inside brackets will be handled by _parse_filter
 985                     for type, string, start, end, line in tokens:
 986                         yield type, string, start, end, line
 987                         if type == tokenize.OP and string == ']':
 988                             break
 989                 elif type == tokenize.OP and string in ALLOWED_OPS:
 990                     if last_string:
 991                         yield tokenize.NAME, last_string, last_start, last_end, last_line
 992                         last_string = None
 993                     yield type, string, start, end, line
 994                 elif type in [tokenize.NAME, tokenize.NUMBER, tokenize.OP]:
 995                     if not last_string:
 996                         last_string = string
 997                         last_start = start
 998                         last_end = end
 999                     else:
1000                         last_string += string
1001             if last_string:
1002                 yield tokenize.NAME, last_string, last_start, last_end, last_line
1003
1004         def _parse_format_selection(tokens, inside_merge=False, inside_choice=False, inside_group=False):
1005             selectors = []
1006             current_selector = None
1007             for type, string, start, _, _ in tokens:
1008                 # ENCODING is only defined in python 3.x
1009                 if type == getattr(tokenize, 'ENCODING', None):
1010                     continue
1011                 elif type in [tokenize.NAME, tokenize.NUMBER]:
1012                     current_selector = FormatSelector(SINGLE, string, [])
1013                 elif type == tokenize.OP:
1014                     if string == ')':
1015                         if not inside_group:
1016                             # ')' will be handled by the parentheses group
1017                             tokens.restore_last_token()
1018                         break
1019                     elif inside_merge and string in ['/', ',']:
1020                         tokens.restore_last_token()
1021                         break
1022                     elif inside_choice and string == ',':
1023                         tokens.restore_last_token()
1024                         break
1025                     elif string == ',':
1026                         if not current_selector:
1027                             raise syntax_error('"," must follow a format selector', start)
1028                         selectors.append(current_selector)
1029                         current_selector = None
1030                     elif string == '/':
1031                         if not current_selector:
1032                             raise syntax_error('"/" must follow a format selector', start)
1033                         first_choice = current_selector
1034                         second_choice = _parse_format_selection(tokens, inside_choice=True)
1035                         current_selector = FormatSelector(PICKFIRST, (first_choice, second_choice), [])
1036                     elif string == '[':
1037                         if not current_selector:
1038                             current_selector = FormatSelector(SINGLE, 'best', [])
1039                         format_filter = _parse_filter(tokens)
1040                         current_selector.filters.append(format_filter)
1041                     elif string == '(':
1042                         if current_selector:
1043                             raise syntax_error('Unexpected "("', start)
1044                         group = _parse_format_selection(tokens, inside_group=True)
1045                         current_selector = FormatSelector(GROUP, group, [])
1046                     elif string == '+':
1047                         video_selector = current_selector
1048                         audio_selector = _parse_format_selection(tokens, inside_merge=True)
1049                         if not video_selector or not audio_selector:
1050                             raise syntax_error('"+" must be between two format selectors', start)
1051                         current_selector = FormatSelector(MERGE, (video_selector, audio_selector), [])
1052                     else:
1053                         raise syntax_error('Operator not recognized: "{0}"'.format(string), start)
1054                 elif type == tokenize.ENDMARKER:
1055                     break
1056             if current_selector:
1057                 selectors.append(current_selector)
1058             return selectors
1059
1060         def _build_selector_function(selector):
1061             if isinstance(selector, list):
1062                 fs = [_build_selector_function(s) for s in selector]
1063
1064                 def selector_function(ctx):
1065                     for f in fs:
1066                         for format in f(ctx):
1067                             yield format
1068                 return selector_function
1069             elif selector.type == GROUP:
1070                 selector_function = _build_selector_function(selector.selector)
1071             elif selector.type == PICKFIRST:
1072                 fs = [_build_selector_function(s) for s in selector.selector]
1073
1074                 def selector_function(ctx):
1075                     for f in fs:
1076                         picked_formats = list(f(ctx))
1077                         if picked_formats:
1078                             return picked_formats
1079                     return []
1080             elif selector.type == SINGLE:
1081                 format_spec = selector.selector
1082
1083                 def selector_function(ctx):
1084                     formats = list(ctx['formats'])
1085                     if not formats:
1086                         return
1087                     if format_spec == 'all':
1088                         for f in formats:
1089                             yield f
1090                     elif format_spec in ['best', 'worst', None]:
1091                         format_idx = 0 if format_spec == 'worst' else -1
1092                         audiovideo_formats = [
1093                             f for f in formats
1094                             if f.get('vcodec') != 'none' and f.get('acodec') != 'none']
1095                         if audiovideo_formats:
1096                             yield audiovideo_formats[format_idx]
1097                         # for extractors with incomplete formats (audio only (soundcloud)
1098                         # or video only (imgur)) we will fallback to best/worst
1099                         # {video,audio}-only format
1100                         elif ctx['incomplete_formats']:
1101                             yield formats[format_idx]
1102                     elif format_spec == 'bestaudio':
1103                         audio_formats = [
1104                             f for f in formats
1105                             if f.get('vcodec') == 'none']
1106                         if audio_formats:
1107                             yield audio_formats[-1]
1108                     elif format_spec == 'worstaudio':
1109                         audio_formats = [
1110                             f for f in formats
1111                             if f.get('vcodec') == 'none']
1112                         if audio_formats:
1113                             yield audio_formats[0]
1114                     elif format_spec == 'bestvideo':
1115                         video_formats = [
1116                             f for f in formats
1117                             if f.get('acodec') == 'none']
1118                         if video_formats:
1119                             yield video_formats[-1]
1120                     elif format_spec == 'worstvideo':
1121                         video_formats = [
1122                             f for f in formats
1123                             if f.get('acodec') == 'none']
1124                         if video_formats:
1125                             yield video_formats[0]
1126                     else:
1127                         extensions = ['mp4', 'flv', 'webm', '3gp', 'm4a', 'mp3', 'ogg', 'aac', 'wav']
1128                         if format_spec in extensions:
1129                             filter_f = lambda f: f['ext'] == format_spec
1130                         else:
1131                             filter_f = lambda f: f['format_id'] == format_spec
1132                         matches = list(filter(filter_f, formats))
1133                         if matches:
1134                             yield matches[-1]
1135             elif selector.type == MERGE:
1136                 def _merge(formats_info):
1137                     format_1, format_2 = [f['format_id'] for f in formats_info]
1138                     # The first format must contain the video and the
1139                     # second the audio
1140                     if formats_info[0].get('vcodec') == 'none':
1141                         self.report_error('The first format must '
1142                                           'contain the video, try using '
1143                                           '"-f %s+%s"' % (format_2, format_1))
1144                         return
1145                     # Formats must be opposite (video+audio)
1146                     if formats_info[0].get('acodec') == 'none' and formats_info[1].get('acodec') == 'none':
1147                         self.report_error(
1148                             'Both formats %s and %s are video-only, you must specify "-f video+audio"'
1149                             % (format_1, format_2))
1150                         return
1151                     output_ext = (
1152                         formats_info[0]['ext']
1153                         if self.params.get('merge_output_format') is None
1154                         else self.params['merge_output_format'])
1155                     return {
1156                         'requested_formats': formats_info,
1157                         'format': '%s+%s' % (formats_info[0].get('format'),
1158                                              formats_info[1].get('format')),
1159                         'format_id': '%s+%s' % (formats_info[0].get('format_id'),
1160                                                 formats_info[1].get('format_id')),
1161                         'width': formats_info[0].get('width'),
1162                         'height': formats_info[0].get('height'),
1163                         'resolution': formats_info[0].get('resolution'),
1164                         'fps': formats_info[0].get('fps'),
1165                         'vcodec': formats_info[0].get('vcodec'),
1166                         'vbr': formats_info[0].get('vbr'),
1167                         'stretched_ratio': formats_info[0].get('stretched_ratio'),
1168                         'acodec': formats_info[1].get('acodec'),
1169                         'abr': formats_info[1].get('abr'),
1170                         'ext': output_ext,
1171                     }
1172                 video_selector, audio_selector = map(_build_selector_function, selector.selector)
1173
1174                 def selector_function(ctx):
1175                     for pair in itertools.product(
1176                             video_selector(copy.deepcopy(ctx)), audio_selector(copy.deepcopy(ctx))):
1177                         yield _merge(pair)
1178
1179             filters = [self._build_format_filter(f) for f in selector.filters]
1180
1181             def final_selector(ctx):
1182                 ctx_copy = copy.deepcopy(ctx)
1183                 for _filter in filters:
1184                     ctx_copy['formats'] = list(filter(_filter, ctx_copy['formats']))
1185                 return selector_function(ctx_copy)
1186             return final_selector
1187
1188         stream = io.BytesIO(format_spec.encode('utf-8'))
1189         try:
1190             tokens = list(_remove_unused_ops(compat_tokenize_tokenize(stream.readline)))
1191         except tokenize.TokenError:
1192             raise syntax_error('Missing closing/opening brackets or parenthesis', (0, len(format_spec)))
1193
1194         class TokenIterator(object):
1195             def __init__(self, tokens):
1196                 self.tokens = tokens
1197                 self.counter = 0
1198
1199             def __iter__(self):
1200                 return self
1201
1202             def __next__(self):
1203                 if self.counter >= len(self.tokens):
1204                     raise StopIteration()
1205                 value = self.tokens[self.counter]
1206                 self.counter += 1
1207                 return value
1208
1209             next = __next__
1210
1211             def restore_last_token(self):
1212                 self.counter -= 1
1213
1214         parsed_selector = _parse_format_selection(iter(TokenIterator(tokens)))
1215         return _build_selector_function(parsed_selector)
1216
1217     def _calc_headers(self, info_dict):
1218         res = std_headers.copy()
1219
1220         add_headers = info_dict.get('http_headers')
1221         if add_headers:
1222             res.update(add_headers)
1223
1224         cookies = self._calc_cookies(info_dict)
1225         if cookies:
1226             res['Cookie'] = cookies
1227
1228         return res
1229
1230     def _calc_cookies(self, info_dict):
1231         pr = sanitized_Request(info_dict['url'])
1232         self.cookiejar.add_cookie_header(pr)
1233         return pr.get_header('Cookie')
1234
1235     def process_video_result(self, info_dict, download=True):
1236         assert info_dict.get('_type', 'video') == 'video'
1237
1238         if 'id' not in info_dict:
1239             raise ExtractorError('Missing "id" field in extractor result')
1240         if 'title' not in info_dict:
1241             raise ExtractorError('Missing "title" field in extractor result')
1242
1243         if not isinstance(info_dict['id'], compat_str):
1244             self.report_warning('"id" field is not a string - forcing string conversion')
1245             info_dict['id'] = compat_str(info_dict['id'])
1246
1247         if 'playlist' not in info_dict:
1248             # It isn't part of a playlist
1249             info_dict['playlist'] = None
1250             info_dict['playlist_index'] = None
1251
1252         thumbnails = info_dict.get('thumbnails')
1253         if thumbnails is None:
1254             thumbnail = info_dict.get('thumbnail')
1255             if thumbnail:
1256                 info_dict['thumbnails'] = thumbnails = [{'url': thumbnail}]
1257         if thumbnails:
1258             thumbnails.sort(key=lambda t: (
1259                 t.get('preference') if t.get('preference') is not None else -1,
1260                 t.get('width') if t.get('width') is not None else -1,
1261                 t.get('height') if t.get('height') is not None else -1,
1262                 t.get('id') if t.get('id') is not None else '', t.get('url')))
1263             for i, t in enumerate(thumbnails):
1264                 t['url'] = sanitize_url(t['url'])
1265                 if t.get('width') and t.get('height'):
1266                     t['resolution'] = '%dx%d' % (t['width'], t['height'])
1267                 if t.get('id') is None:
1268                     t['id'] = '%d' % i
1269
1270         if self.params.get('list_thumbnails'):
1271             self.list_thumbnails(info_dict)
1272             return
1273
1274         thumbnail = info_dict.get('thumbnail')
1275         if thumbnail:
1276             info_dict['thumbnail'] = sanitize_url(thumbnail)
1277         elif thumbnails:
1278             info_dict['thumbnail'] = thumbnails[-1]['url']
1279
1280         if 'display_id' not in info_dict and 'id' in info_dict:
1281             info_dict['display_id'] = info_dict['id']
1282
1283         if info_dict.get('upload_date') is None and info_dict.get('timestamp') is not None:
1284             # Working around out-of-range timestamp values (e.g. negative ones on Windows,
1285             # see http://bugs.python.org/issue1646728)
1286             try:
1287                 upload_date = datetime.datetime.utcfromtimestamp(info_dict['timestamp'])
1288                 info_dict['upload_date'] = upload_date.strftime('%Y%m%d')
1289             except (ValueError, OverflowError, OSError):
1290                 pass
1291
1292         # Auto generate title fields corresponding to the *_number fields when missing
1293         # in order to always have clean titles. This is very common for TV series.
1294         for field in ('chapter', 'season', 'episode'):
1295             if info_dict.get('%s_number' % field) is not None and not info_dict.get(field):
1296                 info_dict[field] = '%s %d' % (field.capitalize(), info_dict['%s_number' % field])
1297
1298         subtitles = info_dict.get('subtitles')
1299         if subtitles:
1300             for _, subtitle in subtitles.items():
1301                 for subtitle_format in subtitle:
1302                     if subtitle_format.get('url'):
1303                         subtitle_format['url'] = sanitize_url(subtitle_format['url'])
1304                     if subtitle_format.get('ext') is None:
1305                         subtitle_format['ext'] = determine_ext(subtitle_format['url']).lower()
1306
1307         if self.params.get('listsubtitles', False):
1308             if 'automatic_captions' in info_dict:
1309                 self.list_subtitles(info_dict['id'], info_dict.get('automatic_captions'), 'automatic captions')
1310             self.list_subtitles(info_dict['id'], subtitles, 'subtitles')
1311             return
1312         info_dict['requested_subtitles'] = self.process_subtitles(
1313             info_dict['id'], subtitles,
1314             info_dict.get('automatic_captions'))
1315
1316         # We now pick which formats have to be downloaded
1317         if info_dict.get('formats') is None:
1318             # There's only one format available
1319             formats = [info_dict]
1320         else:
1321             formats = info_dict['formats']
1322
1323         if not formats:
1324             raise ExtractorError('No video formats found!')
1325
1326         formats_dict = {}
1327
1328         # We check that all the formats have the format and format_id fields
1329         for i, format in enumerate(formats):
1330             if 'url' not in format:
1331                 raise ExtractorError('Missing "url" key in result (index %d)' % i)
1332
1333             format['url'] = sanitize_url(format['url'])
1334
1335             if format.get('format_id') is None:
1336                 format['format_id'] = compat_str(i)
1337             else:
1338                 # Sanitize format_id from characters used in format selector expression
1339                 format['format_id'] = re.sub('[\s,/+\[\]()]', '_', format['format_id'])
1340             format_id = format['format_id']
1341             if format_id not in formats_dict:
1342                 formats_dict[format_id] = []
1343             formats_dict[format_id].append(format)
1344
1345         # Make sure all formats have unique format_id
1346         for format_id, ambiguous_formats in formats_dict.items():
1347             if len(ambiguous_formats) > 1:
1348                 for i, format in enumerate(ambiguous_formats):
1349                     format['format_id'] = '%s-%d' % (format_id, i)
1350
1351         for i, format in enumerate(formats):
1352             if format.get('format') is None:
1353                 format['format'] = '{id} - {res}{note}'.format(
1354                     id=format['format_id'],
1355                     res=self.format_resolution(format),
1356                     note=' ({0})'.format(format['format_note']) if format.get('format_note') is not None else '',
1357                 )
1358             # Automatically determine file extension if missing
1359             if format.get('ext') is None:
1360                 format['ext'] = determine_ext(format['url']).lower()
1361             # Automatically determine protocol if missing (useful for format
1362             # selection purposes)
1363             if 'protocol' not in format:
1364                 format['protocol'] = determine_protocol(format)
1365             # Add HTTP headers, so that external programs can use them from the
1366             # json output
1367             full_format_info = info_dict.copy()
1368             full_format_info.update(format)
1369             format['http_headers'] = self._calc_headers(full_format_info)
1370
1371         # TODO Central sorting goes here
1372
1373         if formats[0] is not info_dict:
1374             # only set the 'formats' fields if the original info_dict list them
1375             # otherwise we end up with a circular reference, the first (and unique)
1376             # element in the 'formats' field in info_dict is info_dict itself,
1377             # which can't be exported to json
1378             info_dict['formats'] = formats
1379         if self.params.get('listformats'):
1380             self.list_formats(info_dict)
1381             return
1382
1383         req_format = self.params.get('format')
1384         if req_format is None:
1385             req_format_list = []
1386             if (self.params.get('outtmpl', DEFAULT_OUTTMPL) != '-' and
1387                     not info_dict.get('is_live')):
1388                 merger = FFmpegMergerPP(self)
1389                 if merger.available and merger.can_merge():
1390                     req_format_list.append('bestvideo+bestaudio')
1391             req_format_list.append('best')
1392             req_format = '/'.join(req_format_list)
1393         format_selector = self.build_format_selector(req_format)
1394
1395         # While in format selection we may need to have an access to the original
1396         # format set in order to calculate some metrics or do some processing.
1397         # For now we need to be able to guess whether original formats provided
1398         # by extractor are incomplete or not (i.e. whether extractor provides only
1399         # video-only or audio-only formats) for proper formats selection for
1400         # extractors with such incomplete formats (see
1401         # https://github.com/rg3/youtube-dl/pull/5556).
1402         # Since formats may be filtered during format selection and may not match
1403         # the original formats the results may be incorrect. Thus original formats
1404         # or pre-calculated metrics should be passed to format selection routines
1405         # as well.
1406         # We will pass a context object containing all necessary additional data
1407         # instead of just formats.
1408         # This fixes incorrect format selection issue (see
1409         # https://github.com/rg3/youtube-dl/issues/10083).
1410         incomplete_formats = (
1411             # All formats are video-only or
1412             all(f.get('vcodec') != 'none' and f.get('acodec') == 'none' for f in formats) or
1413             # all formats are audio-only
1414             all(f.get('vcodec') == 'none' and f.get('acodec') != 'none' for f in formats))
1415
1416         ctx = {
1417             'formats': formats,
1418             'incomplete_formats': incomplete_formats,
1419         }
1420
1421         formats_to_download = list(format_selector(ctx))
1422         if not formats_to_download:
1423             raise ExtractorError('requested format not available',
1424                                  expected=True)
1425
1426         if download:
1427             if len(formats_to_download) > 1:
1428                 self.to_screen('[info] %s: downloading video in %s formats' % (info_dict['id'], len(formats_to_download)))
1429             for format in formats_to_download:
1430                 new_info = dict(info_dict)
1431                 new_info.update(format)
1432                 self.process_info(new_info)
1433         # We update the info dict with the best quality format (backwards compatibility)
1434         info_dict.update(formats_to_download[-1])
1435         return info_dict
1436
1437     def process_subtitles(self, video_id, normal_subtitles, automatic_captions):
1438         """Select the requested subtitles and their format"""
1439         available_subs = {}
1440         if normal_subtitles and self.params.get('writesubtitles'):
1441             available_subs.update(normal_subtitles)
1442         if automatic_captions and self.params.get('writeautomaticsub'):
1443             for lang, cap_info in automatic_captions.items():
1444                 if lang not in available_subs:
1445                     available_subs[lang] = cap_info
1446
1447         if (not self.params.get('writesubtitles') and not
1448                 self.params.get('writeautomaticsub') or not
1449                 available_subs):
1450             return None
1451
1452         if self.params.get('allsubtitles', False):
1453             requested_langs = available_subs.keys()
1454         else:
1455             if self.params.get('subtitleslangs', False):
1456                 requested_langs = self.params.get('subtitleslangs')
1457             elif 'en' in available_subs:
1458                 requested_langs = ['en']
1459             else:
1460                 requested_langs = [list(available_subs.keys())[0]]
1461
1462         formats_query = self.params.get('subtitlesformat', 'best')
1463         formats_preference = formats_query.split('/') if formats_query else []
1464         subs = {}
1465         for lang in requested_langs:
1466             formats = available_subs.get(lang)
1467             if formats is None:
1468                 self.report_warning('%s subtitles not available for %s' % (lang, video_id))
1469                 continue
1470             for ext in formats_preference:
1471                 if ext == 'best':
1472                     f = formats[-1]
1473                     break
1474                 matches = list(filter(lambda f: f['ext'] == ext, formats))
1475                 if matches:
1476                     f = matches[-1]
1477                     break
1478             else:
1479                 f = formats[-1]
1480                 self.report_warning(
1481                     'No subtitle format found matching "%s" for language %s, '
1482                     'using %s' % (formats_query, lang, f['ext']))
1483             subs[lang] = f
1484         return subs
1485
1486     def process_info(self, info_dict):
1487         """Process a single resolved IE result."""
1488
1489         assert info_dict.get('_type', 'video') == 'video'
1490
1491         max_downloads = self.params.get('max_downloads')
1492         if max_downloads is not None:
1493             if self._num_downloads >= int(max_downloads):
1494                 raise MaxDownloadsReached()
1495
1496         info_dict['fulltitle'] = info_dict['title']
1497         if len(info_dict['title']) > 200:
1498             info_dict['title'] = info_dict['title'][:197] + '...'
1499
1500         if 'format' not in info_dict:
1501             info_dict['format'] = info_dict['ext']
1502
1503         reason = self._match_entry(info_dict, incomplete=False)
1504         if reason is not None:
1505             self.to_screen('[download] ' + reason)
1506             return
1507
1508         self._num_downloads += 1
1509
1510         info_dict['_filename'] = filename = self.prepare_filename(info_dict)
1511
1512         # Forced printings
1513         if self.params.get('forcetitle', False):
1514             self.to_stdout(info_dict['fulltitle'])
1515         if self.params.get('forceid', False):
1516             self.to_stdout(info_dict['id'])
1517         if self.params.get('forceurl', False):
1518             if info_dict.get('requested_formats') is not None:
1519                 for f in info_dict['requested_formats']:
1520                     self.to_stdout(f['url'] + f.get('play_path', ''))
1521             else:
1522                 # For RTMP URLs, also include the playpath
1523                 self.to_stdout(info_dict['url'] + info_dict.get('play_path', ''))
1524         if self.params.get('forcethumbnail', False) and info_dict.get('thumbnail') is not None:
1525             self.to_stdout(info_dict['thumbnail'])
1526         if self.params.get('forcedescription', False) and info_dict.get('description') is not None:
1527             self.to_stdout(info_dict['description'])
1528         if self.params.get('forcefilename', False) and filename is not None:
1529             self.to_stdout(filename)
1530         if self.params.get('forceduration', False) and info_dict.get('duration') is not None:
1531             self.to_stdout(formatSeconds(info_dict['duration']))
1532         if self.params.get('forceformat', False):
1533             self.to_stdout(info_dict['format'])
1534         if self.params.get('forcejson', False):
1535             self.to_stdout(json.dumps(info_dict))
1536
1537         # Do nothing else if in simulate mode
1538         if self.params.get('simulate', False):
1539             return
1540
1541         if filename is None:
1542             return
1543
1544         try:
1545             dn = os.path.dirname(sanitize_path(encodeFilename(filename)))
1546             if dn and not os.path.exists(dn):
1547                 os.makedirs(dn)
1548         except (OSError, IOError) as err:
1549             self.report_error('unable to create directory ' + error_to_compat_str(err))
1550             return
1551
1552         if self.params.get('writedescription', False):
1553             descfn = replace_extension(filename, 'description', info_dict.get('ext'))
1554             if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(descfn)):
1555                 self.to_screen('[info] Video description is already present')
1556             elif info_dict.get('description') is None:
1557                 self.report_warning('There\'s no description to write.')
1558             else:
1559                 try:
1560                     self.to_screen('[info] Writing video description to: ' + descfn)
1561                     with io.open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile:
1562                         descfile.write(info_dict['description'])
1563                 except (OSError, IOError):
1564                     self.report_error('Cannot write description file ' + descfn)
1565                     return
1566
1567         if self.params.get('writeannotations', False):
1568             annofn = replace_extension(filename, 'annotations.xml', info_dict.get('ext'))
1569             if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(annofn)):
1570                 self.to_screen('[info] Video annotations are already present')
1571             else:
1572                 try:
1573                     self.to_screen('[info] Writing video annotations to: ' + annofn)
1574                     with io.open(encodeFilename(annofn), 'w', encoding='utf-8') as annofile:
1575                         annofile.write(info_dict['annotations'])
1576                 except (KeyError, TypeError):
1577                     self.report_warning('There are no annotations to write.')
1578                 except (OSError, IOError):
1579                     self.report_error('Cannot write annotations file: ' + annofn)
1580                     return
1581
1582         subtitles_are_requested = any([self.params.get('writesubtitles', False),
1583                                        self.params.get('writeautomaticsub')])
1584
1585         if subtitles_are_requested and info_dict.get('requested_subtitles'):
1586             # subtitles download errors are already managed as troubles in relevant IE
1587             # that way it will silently go on when used with unsupporting IE
1588             subtitles = info_dict['requested_subtitles']
1589             ie = self.get_info_extractor(info_dict['extractor_key'])
1590             for sub_lang, sub_info in subtitles.items():
1591                 sub_format = sub_info['ext']
1592                 if sub_info.get('data') is not None:
1593                     sub_data = sub_info['data']
1594                 else:
1595                     try:
1596                         sub_data = ie._download_webpage(
1597                             sub_info['url'], info_dict['id'], note=False)
1598                     except ExtractorError as err:
1599                         self.report_warning('Unable to download subtitle for "%s": %s' %
1600                                             (sub_lang, error_to_compat_str(err.cause)))
1601                         continue
1602                 try:
1603                     sub_filename = subtitles_filename(filename, sub_lang, sub_format)
1604                     if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(sub_filename)):
1605                         self.to_screen('[info] Video subtitle %s.%s is already_present' % (sub_lang, sub_format))
1606                     else:
1607                         self.to_screen('[info] Writing video subtitles to: ' + sub_filename)
1608                         # Use newline='' to prevent conversion of newline characters
1609                         # See https://github.com/rg3/youtube-dl/issues/10268
1610                         with io.open(encodeFilename(sub_filename), 'w', encoding='utf-8', newline='') as subfile:
1611                             subfile.write(sub_data)
1612                 except (OSError, IOError):
1613                     self.report_error('Cannot write subtitles file ' + sub_filename)
1614                     return
1615
1616         if self.params.get('writeinfojson', False):
1617             infofn = replace_extension(filename, 'info.json', info_dict.get('ext'))
1618             if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(infofn)):
1619                 self.to_screen('[info] Video description metadata is already present')
1620             else:
1621                 self.to_screen('[info] Writing video description metadata as JSON to: ' + infofn)
1622                 try:
1623                     write_json_file(self.filter_requested_info(info_dict), infofn)
1624                 except (OSError, IOError):
1625                     self.report_error('Cannot write metadata to JSON file ' + infofn)
1626                     return
1627
1628         self._write_thumbnails(info_dict, filename)
1629
1630         if not self.params.get('skip_download', False):
1631             try:
1632                 def dl(name, info):
1633                     fd = get_suitable_downloader(info, self.params)(self, self.params)
1634                     for ph in self._progress_hooks:
1635                         fd.add_progress_hook(ph)
1636                     if self.params.get('verbose'):
1637                         self.to_stdout('[debug] Invoking downloader on %r' % info.get('url'))
1638                     return fd.download(name, info)
1639
1640                 if info_dict.get('requested_formats') is not None:
1641                     downloaded = []
1642                     success = True
1643                     merger = FFmpegMergerPP(self)
1644                     if not merger.available:
1645                         postprocessors = []
1646                         self.report_warning('You have requested multiple '
1647                                             'formats but ffmpeg or avconv are not installed.'
1648                                             ' The formats won\'t be merged.')
1649                     else:
1650                         postprocessors = [merger]
1651
1652                     def compatible_formats(formats):
1653                         video, audio = formats
1654                         # Check extension
1655                         video_ext, audio_ext = audio.get('ext'), video.get('ext')
1656                         if video_ext and audio_ext:
1657                             COMPATIBLE_EXTS = (
1658                                 ('mp3', 'mp4', 'm4a', 'm4p', 'm4b', 'm4r', 'm4v'),
1659                                 ('webm')
1660                             )
1661                             for exts in COMPATIBLE_EXTS:
1662                                 if video_ext in exts and audio_ext in exts:
1663                                     return True
1664                         # TODO: Check acodec/vcodec
1665                         return False
1666
1667                     filename_real_ext = os.path.splitext(filename)[1][1:]
1668                     filename_wo_ext = (
1669                         os.path.splitext(filename)[0]
1670                         if filename_real_ext == info_dict['ext']
1671                         else filename)
1672                     requested_formats = info_dict['requested_formats']
1673                     if self.params.get('merge_output_format') is None and not compatible_formats(requested_formats):
1674                         info_dict['ext'] = 'mkv'
1675                         self.report_warning(
1676                             'Requested formats are incompatible for merge and will be merged into mkv.')
1677                     # Ensure filename always has a correct extension for successful merge
1678                     filename = '%s.%s' % (filename_wo_ext, info_dict['ext'])
1679                     if os.path.exists(encodeFilename(filename)):
1680                         self.to_screen(
1681                             '[download] %s has already been downloaded and '
1682                             'merged' % filename)
1683                     else:
1684                         for f in requested_formats:
1685                             new_info = dict(info_dict)
1686                             new_info.update(f)
1687                             fname = self.prepare_filename(new_info)
1688                             fname = prepend_extension(fname, 'f%s' % f['format_id'], new_info['ext'])
1689                             downloaded.append(fname)
1690                             partial_success = dl(fname, new_info)
1691                             success = success and partial_success
1692                         info_dict['__postprocessors'] = postprocessors
1693                         info_dict['__files_to_merge'] = downloaded
1694                 else:
1695                     # Just a single file
1696                     success = dl(filename, info_dict)
1697             except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
1698                 self.report_error('unable to download video data: %s' % error_to_compat_str(err))
1699                 return
1700             except (OSError, IOError) as err:
1701                 raise UnavailableVideoError(err)
1702             except (ContentTooShortError, ) as err:
1703                 self.report_error('content too short (expected %s bytes and served %s)' % (err.expected, err.downloaded))
1704                 return
1705
1706             if success and filename != '-':
1707                 # Fixup content
1708                 fixup_policy = self.params.get('fixup')
1709                 if fixup_policy is None:
1710                     fixup_policy = 'detect_or_warn'
1711
1712                 INSTALL_FFMPEG_MESSAGE = 'Install ffmpeg or avconv to fix this automatically.'
1713
1714                 stretched_ratio = info_dict.get('stretched_ratio')
1715                 if stretched_ratio is not None and stretched_ratio != 1:
1716                     if fixup_policy == 'warn':
1717                         self.report_warning('%s: Non-uniform pixel ratio (%s)' % (
1718                             info_dict['id'], stretched_ratio))
1719                     elif fixup_policy == 'detect_or_warn':
1720                         stretched_pp = FFmpegFixupStretchedPP(self)
1721                         if stretched_pp.available:
1722                             info_dict.setdefault('__postprocessors', [])
1723                             info_dict['__postprocessors'].append(stretched_pp)
1724                         else:
1725                             self.report_warning(
1726                                 '%s: Non-uniform pixel ratio (%s). %s'
1727                                 % (info_dict['id'], stretched_ratio, INSTALL_FFMPEG_MESSAGE))
1728                     else:
1729                         assert fixup_policy in ('ignore', 'never')
1730
1731                 if (info_dict.get('requested_formats') is None and
1732                         info_dict.get('container') == 'm4a_dash'):
1733                     if fixup_policy == 'warn':
1734                         self.report_warning(
1735                             '%s: writing DASH m4a. '
1736                             'Only some players support this container.'
1737                             % info_dict['id'])
1738                     elif fixup_policy == 'detect_or_warn':
1739                         fixup_pp = FFmpegFixupM4aPP(self)
1740                         if fixup_pp.available:
1741                             info_dict.setdefault('__postprocessors', [])
1742                             info_dict['__postprocessors'].append(fixup_pp)
1743                         else:
1744                             self.report_warning(
1745                                 '%s: writing DASH m4a. '
1746                                 'Only some players support this container. %s'
1747                                 % (info_dict['id'], INSTALL_FFMPEG_MESSAGE))
1748                     else:
1749                         assert fixup_policy in ('ignore', 'never')
1750
1751                 if (info_dict.get('protocol') == 'm3u8_native' or
1752                         info_dict.get('protocol') == 'm3u8' and
1753                         self.params.get('hls_prefer_native')):
1754                     if fixup_policy == 'warn':
1755                         self.report_warning('%s: malformated aac bitstream.' % (
1756                             info_dict['id']))
1757                     elif fixup_policy == 'detect_or_warn':
1758                         fixup_pp = FFmpegFixupM3u8PP(self)
1759                         if fixup_pp.available:
1760                             info_dict.setdefault('__postprocessors', [])
1761                             info_dict['__postprocessors'].append(fixup_pp)
1762                         else:
1763                             self.report_warning(
1764                                 '%s: malformated aac bitstream. %s'
1765                                 % (info_dict['id'], INSTALL_FFMPEG_MESSAGE))
1766                     else:
1767                         assert fixup_policy in ('ignore', 'never')
1768
1769                 try:
1770                     self.post_process(filename, info_dict)
1771                 except (PostProcessingError) as err:
1772                     self.report_error('postprocessing: %s' % str(err))
1773                     return
1774                 self.record_download_archive(info_dict)
1775
1776     def download(self, url_list):
1777         """Download a given list of URLs."""
1778         outtmpl = self.params.get('outtmpl', DEFAULT_OUTTMPL)
1779         if (len(url_list) > 1 and
1780                 '%' not in outtmpl and
1781                 self.params.get('max_downloads') != 1):
1782             raise SameFileError(outtmpl)
1783
1784         for url in url_list:
1785             try:
1786                 # It also downloads the videos
1787                 res = self.extract_info(
1788                     url, force_generic_extractor=self.params.get('force_generic_extractor', False))
1789             except UnavailableVideoError:
1790                 self.report_error('unable to download video')
1791             except MaxDownloadsReached:
1792                 self.to_screen('[info] Maximum number of downloaded files reached.')
1793                 raise
1794             else:
1795                 if self.params.get('dump_single_json', False):
1796                     self.to_stdout(json.dumps(res))
1797
1798         return self._download_retcode
1799
1800     def download_with_info_file(self, info_filename):
1801         with contextlib.closing(fileinput.FileInput(
1802                 [info_filename], mode='r',
1803                 openhook=fileinput.hook_encoded('utf-8'))) as f:
1804             # FileInput doesn't have a read method, we can't call json.load
1805             info = self.filter_requested_info(json.loads('\n'.join(f)))
1806         try:
1807             self.process_ie_result(info, download=True)
1808         except DownloadError:
1809             webpage_url = info.get('webpage_url')
1810             if webpage_url is not None:
1811                 self.report_warning('The info failed to download, trying with "%s"' % webpage_url)
1812                 return self.download([webpage_url])
1813             else:
1814                 raise
1815         return self._download_retcode
1816
1817     @staticmethod
1818     def filter_requested_info(info_dict):
1819         return dict(
1820             (k, v) for k, v in info_dict.items()
1821             if k not in ['requested_formats', 'requested_subtitles'])
1822
1823     def post_process(self, filename, ie_info):
1824         """Run all the postprocessors on the given file."""
1825         info = dict(ie_info)
1826         info['filepath'] = filename
1827         pps_chain = []
1828         if ie_info.get('__postprocessors') is not None:
1829             pps_chain.extend(ie_info['__postprocessors'])
1830         pps_chain.extend(self._pps)
1831         for pp in pps_chain:
1832             files_to_delete = []
1833             try:
1834                 files_to_delete, info = pp.run(info)
1835             except PostProcessingError as e:
1836                 self.report_error(e.msg)
1837             if files_to_delete and not self.params.get('keepvideo', False):
1838                 for old_filename in files_to_delete:
1839                     self.to_screen('Deleting original file %s (pass -k to keep)' % old_filename)
1840                     try:
1841                         os.remove(encodeFilename(old_filename))
1842                     except (IOError, OSError):
1843                         self.report_warning('Unable to remove downloaded original file')
1844
1845     def _make_archive_id(self, info_dict):
1846         # Future-proof against any change in case
1847         # and backwards compatibility with prior versions
1848         extractor = info_dict.get('extractor_key')
1849         if extractor is None:
1850             if 'id' in info_dict:
1851                 extractor = info_dict.get('ie_key')  # key in a playlist
1852         if extractor is None:
1853             return None  # Incomplete video information
1854         return extractor.lower() + ' ' + info_dict['id']
1855
1856     def in_download_archive(self, info_dict):
1857         fn = self.params.get('download_archive')
1858         if fn is None:
1859             return False
1860
1861         vid_id = self._make_archive_id(info_dict)
1862         if vid_id is None:
1863             return False  # Incomplete video information
1864
1865         try:
1866             with locked_file(fn, 'r', encoding='utf-8') as archive_file:
1867                 for line in archive_file:
1868                     if line.strip() == vid_id:
1869                         return True
1870         except IOError as ioe:
1871             if ioe.errno != errno.ENOENT:
1872                 raise
1873         return False
1874
1875     def record_download_archive(self, info_dict):
1876         fn = self.params.get('download_archive')
1877         if fn is None:
1878             return
1879         vid_id = self._make_archive_id(info_dict)
1880         assert vid_id
1881         with locked_file(fn, 'a', encoding='utf-8') as archive_file:
1882             archive_file.write(vid_id + '\n')
1883
1884     @staticmethod
1885     def format_resolution(format, default='unknown'):
1886         if format.get('vcodec') == 'none':
1887             return 'audio only'
1888         if format.get('resolution') is not None:
1889             return format['resolution']
1890         if format.get('height') is not None:
1891             if format.get('width') is not None:
1892                 res = '%sx%s' % (format['width'], format['height'])
1893             else:
1894                 res = '%sp' % format['height']
1895         elif format.get('width') is not None:
1896             res = '%dx?' % format['width']
1897         else:
1898             res = default
1899         return res
1900
1901     def _format_note(self, fdict):
1902         res = ''
1903         if fdict.get('ext') in ['f4f', 'f4m']:
1904             res += '(unsupported) '
1905         if fdict.get('language'):
1906             if res:
1907                 res += ' '
1908             res += '[%s] ' % fdict['language']
1909         if fdict.get('format_note') is not None:
1910             res += fdict['format_note'] + ' '
1911         if fdict.get('tbr') is not None:
1912             res += '%4dk ' % fdict['tbr']
1913         if fdict.get('container') is not None:
1914             if res:
1915                 res += ', '
1916             res += '%s container' % fdict['container']
1917         if (fdict.get('vcodec') is not None and
1918                 fdict.get('vcodec') != 'none'):
1919             if res:
1920                 res += ', '
1921             res += fdict['vcodec']
1922             if fdict.get('vbr') is not None:
1923                 res += '@'
1924         elif fdict.get('vbr') is not None and fdict.get('abr') is not None:
1925             res += 'video@'
1926         if fdict.get('vbr') is not None:
1927             res += '%4dk' % fdict['vbr']
1928         if fdict.get('fps') is not None:
1929             if res:
1930                 res += ', '
1931             res += '%sfps' % fdict['fps']
1932         if fdict.get('acodec') is not None:
1933             if res:
1934                 res += ', '
1935             if fdict['acodec'] == 'none':
1936                 res += 'video only'
1937             else:
1938                 res += '%-5s' % fdict['acodec']
1939         elif fdict.get('abr') is not None:
1940             if res:
1941                 res += ', '
1942             res += 'audio'
1943         if fdict.get('abr') is not None:
1944             res += '@%3dk' % fdict['abr']
1945         if fdict.get('asr') is not None:
1946             res += ' (%5dHz)' % fdict['asr']
1947         if fdict.get('filesize') is not None:
1948             if res:
1949                 res += ', '
1950             res += format_bytes(fdict['filesize'])
1951         elif fdict.get('filesize_approx') is not None:
1952             if res:
1953                 res += ', '
1954             res += '~' + format_bytes(fdict['filesize_approx'])
1955         return res
1956
1957     def list_formats(self, info_dict):
1958         formats = info_dict.get('formats', [info_dict])
1959         table = [
1960             [f['format_id'], f['ext'], self.format_resolution(f), self._format_note(f)]
1961             for f in formats
1962             if f.get('preference') is None or f['preference'] >= -1000]
1963         if len(formats) > 1:
1964             table[-1][-1] += (' ' if table[-1][-1] else '') + '(best)'
1965
1966         header_line = ['format code', 'extension', 'resolution', 'note']
1967         self.to_screen(
1968             '[info] Available formats for %s:\n%s' %
1969             (info_dict['id'], render_table(header_line, table)))
1970
1971     def list_thumbnails(self, info_dict):
1972         thumbnails = info_dict.get('thumbnails')
1973         if not thumbnails:
1974             self.to_screen('[info] No thumbnails present for %s' % info_dict['id'])
1975             return
1976
1977         self.to_screen(
1978             '[info] Thumbnails for %s:' % info_dict['id'])
1979         self.to_screen(render_table(
1980             ['ID', 'width', 'height', 'URL'],
1981             [[t['id'], t.get('width', 'unknown'), t.get('height', 'unknown'), t['url']] for t in thumbnails]))
1982
1983     def list_subtitles(self, video_id, subtitles, name='subtitles'):
1984         if not subtitles:
1985             self.to_screen('%s has no %s' % (video_id, name))
1986             return
1987         self.to_screen(
1988             'Available %s for %s:' % (name, video_id))
1989         self.to_screen(render_table(
1990             ['Language', 'formats'],
1991             [[lang, ', '.join(f['ext'] for f in reversed(formats))]
1992                 for lang, formats in subtitles.items()]))
1993
1994     def urlopen(self, req):
1995         """ Start an HTTP download """
1996         if isinstance(req, compat_basestring):
1997             req = sanitized_Request(req)
1998         return self._opener.open(req, timeout=self._socket_timeout)
1999
2000     def print_debug_header(self):
2001         if not self.params.get('verbose'):
2002             return
2003
2004         if type('') is not compat_str:
2005             # Python 2.6 on SLES11 SP1 (https://github.com/rg3/youtube-dl/issues/3326)
2006             self.report_warning(
2007                 'Your Python is broken! Update to a newer and supported version')
2008
2009         stdout_encoding = getattr(
2010             sys.stdout, 'encoding', 'missing (%s)' % type(sys.stdout).__name__)
2011         encoding_str = (
2012             '[debug] Encodings: locale %s, fs %s, out %s, pref %s\n' % (
2013                 locale.getpreferredencoding(),
2014                 sys.getfilesystemencoding(),
2015                 stdout_encoding,
2016                 self.get_encoding()))
2017         write_string(encoding_str, encoding=None)
2018
2019         self._write_string('[debug] youtube-dl version ' + __version__ + '\n')
2020         if _LAZY_LOADER:
2021             self._write_string('[debug] Lazy loading extractors enabled' + '\n')
2022         try:
2023             sp = subprocess.Popen(
2024                 ['git', 'rev-parse', '--short', 'HEAD'],
2025                 stdout=subprocess.PIPE, stderr=subprocess.PIPE,
2026                 cwd=os.path.dirname(os.path.abspath(__file__)))
2027             out, err = sp.communicate()
2028             out = out.decode().strip()
2029             if re.match('[0-9a-f]+', out):
2030                 self._write_string('[debug] Git HEAD: ' + out + '\n')
2031         except Exception:
2032             try:
2033                 sys.exc_clear()
2034             except Exception:
2035                 pass
2036         self._write_string('[debug] Python version %s - %s\n' % (
2037             platform.python_version(), platform_name()))
2038
2039         exe_versions = FFmpegPostProcessor.get_versions(self)
2040         exe_versions['rtmpdump'] = rtmpdump_version()
2041         exe_str = ', '.join(
2042             '%s %s' % (exe, v)
2043             for exe, v in sorted(exe_versions.items())
2044             if v
2045         )
2046         if not exe_str:
2047             exe_str = 'none'
2048         self._write_string('[debug] exe versions: %s\n' % exe_str)
2049
2050         proxy_map = {}
2051         for handler in self._opener.handlers:
2052             if hasattr(handler, 'proxies'):
2053                 proxy_map.update(handler.proxies)
2054         self._write_string('[debug] Proxy map: ' + compat_str(proxy_map) + '\n')
2055
2056         if self.params.get('call_home', False):
2057             ipaddr = self.urlopen('https://yt-dl.org/ip').read().decode('utf-8')
2058             self._write_string('[debug] Public IP address: %s\n' % ipaddr)
2059             latest_version = self.urlopen(
2060                 'https://yt-dl.org/latest/version').read().decode('utf-8')
2061             if version_tuple(latest_version) > version_tuple(__version__):
2062                 self.report_warning(
2063                     'You are using an outdated version (newest version: %s)! '
2064                     'See https://yt-dl.org/update if you need help updating.' %
2065                     latest_version)
2066
2067     def _setup_opener(self):
2068         timeout_val = self.params.get('socket_timeout')
2069         self._socket_timeout = 600 if timeout_val is None else float(timeout_val)
2070
2071         opts_cookiefile = self.params.get('cookiefile')
2072         opts_proxy = self.params.get('proxy')
2073
2074         if opts_cookiefile is None:
2075             self.cookiejar = compat_cookiejar.CookieJar()
2076         else:
2077             opts_cookiefile = compat_expanduser(opts_cookiefile)
2078             self.cookiejar = compat_cookiejar.MozillaCookieJar(
2079                 opts_cookiefile)
2080             if os.access(opts_cookiefile, os.R_OK):
2081                 self.cookiejar.load()
2082
2083         cookie_processor = YoutubeDLCookieProcessor(self.cookiejar)
2084         if opts_proxy is not None:
2085             if opts_proxy == '':
2086                 proxies = {}
2087             else:
2088                 proxies = {'http': opts_proxy, 'https': opts_proxy}
2089         else:
2090             proxies = compat_urllib_request.getproxies()
2091             # Set HTTPS proxy to HTTP one if given (https://github.com/rg3/youtube-dl/issues/805)
2092             if 'http' in proxies and 'https' not in proxies:
2093                 proxies['https'] = proxies['http']
2094         proxy_handler = PerRequestProxyHandler(proxies)
2095
2096         debuglevel = 1 if self.params.get('debug_printtraffic') else 0
2097         https_handler = make_HTTPS_handler(self.params, debuglevel=debuglevel)
2098         ydlh = YoutubeDLHandler(self.params, debuglevel=debuglevel)
2099         data_handler = compat_urllib_request_DataHandler()
2100
2101         # When passing our own FileHandler instance, build_opener won't add the
2102         # default FileHandler and allows us to disable the file protocol, which
2103         # can be used for malicious purposes (see
2104         # https://github.com/rg3/youtube-dl/issues/8227)
2105         file_handler = compat_urllib_request.FileHandler()
2106
2107         def file_open(*args, **kwargs):
2108             raise compat_urllib_error.URLError('file:// scheme is explicitly disabled in youtube-dl for security reasons')
2109         file_handler.file_open = file_open
2110
2111         opener = compat_urllib_request.build_opener(
2112             proxy_handler, https_handler, cookie_processor, ydlh, data_handler, file_handler)
2113
2114         # Delete the default user-agent header, which would otherwise apply in
2115         # cases where our custom HTTP handler doesn't come into play
2116         # (See https://github.com/rg3/youtube-dl/issues/1309 for details)
2117         opener.addheaders = []
2118         self._opener = opener
2119
2120     def encode(self, s):
2121         if isinstance(s, bytes):
2122             return s  # Already encoded
2123
2124         try:
2125             return s.encode(self.get_encoding())
2126         except UnicodeEncodeError as err:
2127             err.reason = err.reason + '. Check your system encoding configuration or use the --encoding option.'
2128             raise
2129
2130     def get_encoding(self):
2131         encoding = self.params.get('encoding')
2132         if encoding is None:
2133             encoding = preferredencoding()
2134         return encoding
2135
2136     def _write_thumbnails(self, info_dict, filename):
2137         if self.params.get('writethumbnail', False):
2138             thumbnails = info_dict.get('thumbnails')
2139             if thumbnails:
2140                 thumbnails = [thumbnails[-1]]
2141         elif self.params.get('write_all_thumbnails', False):
2142             thumbnails = info_dict.get('thumbnails')
2143         else:
2144             return
2145
2146         if not thumbnails:
2147             # No thumbnails present, so return immediately
2148             return
2149
2150         for t in thumbnails:
2151             thumb_ext = determine_ext(t['url'], 'jpg')
2152             suffix = '_%s' % t['id'] if len(thumbnails) > 1 else ''
2153             thumb_display_id = '%s ' % t['id'] if len(thumbnails) > 1 else ''
2154             t['filename'] = thumb_filename = os.path.splitext(filename)[0] + suffix + '.' + thumb_ext
2155
2156             if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(thumb_filename)):
2157                 self.to_screen('[%s] %s: Thumbnail %sis already present' %
2158                                (info_dict['extractor'], info_dict['id'], thumb_display_id))
2159             else:
2160                 self.to_screen('[%s] %s: Downloading thumbnail %s...' %
2161                                (info_dict['extractor'], info_dict['id'], thumb_display_id))
2162                 try:
2163                     uf = self.urlopen(t['url'])
2164                     with open(encodeFilename(thumb_filename), 'wb') as thumbf:
2165                         shutil.copyfileobj(uf, thumbf)
2166                     self.to_screen('[%s] %s: Writing thumbnail %sto: %s' %
2167                                    (info_dict['extractor'], info_dict['id'], thumb_display_id, thumb_filename))
2168                 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
2169                     self.report_warning('Unable to download thumbnail "%s": %s' %
2170                                         (t['url'], error_to_compat_str(err)))