_ Git - youtube-dl/blob - youtube_dl/YoutubeDL.py

   1 #!/usr/bin/env python
   2 # coding: utf-8
   3
   4 from __future__ import absolute_import, unicode_literals
   5
   6 import collections
   7 import contextlib
   8 import copy
   9 import datetime
  10 import errno
  11 import fileinput
  12 import io
  13 import itertools
  14 import json
  15 import locale
  16 import operator
  17 import os
  18 import platform
  19 import re
  20 import shutil
  21 import subprocess
  22 import socket
  23 import sys
  24 import time
  25 import tokenize
  26 import traceback
  27
  28 from .compat import (
  29     compat_basestring,
  30     compat_cookiejar,
  31     compat_expanduser,
  32     compat_get_terminal_size,
  33     compat_http_client,
  34     compat_kwargs,
  35     compat_os_name,
  36     compat_str,
  37     compat_tokenize_tokenize,
  38     compat_urllib_error,
  39     compat_urllib_request,
  40     compat_urllib_request_DataHandler,
  41 )
  42 from .utils import (
  43     age_restricted,
  44     args_to_str,
  45     ContentTooShortError,
  46     date_from_str,
  47     DateRange,
  48     DEFAULT_OUTTMPL,
  49     determine_ext,
  50     determine_protocol,
  51     DownloadError,
  52     encode_compat_str,
  53     encodeFilename,
  54     error_to_compat_str,
  55     ExtractorError,
  56     format_bytes,
  57     formatSeconds,
  58     locked_file,
  59     make_HTTPS_handler,
  60     MaxDownloadsReached,
  61     PagedList,
  62     parse_filesize,
  63     PerRequestProxyHandler,
  64     platform_name,
  65     PostProcessingError,
  66     preferredencoding,
  67     prepend_extension,
  68     register_socks_protocols,
  69     render_table,
  70     replace_extension,
  71     SameFileError,
  72     sanitize_filename,
  73     sanitize_path,
  74     sanitize_url,
  75     sanitized_Request,
  76     std_headers,
  77     subtitles_filename,
  78     UnavailableVideoError,
  79     url_basename,
  80     version_tuple,
  81     write_json_file,
  82     write_string,
  83     YoutubeDLCookieProcessor,
  84     YoutubeDLHandler,
  85 )
  86 from .cache import Cache
  87 from .extractor import get_info_extractor, gen_extractor_classes, _LAZY_LOADER
  88 from .downloader import get_suitable_downloader
  89 from .downloader.rtmp import rtmpdump_version
  90 from .postprocessor import (
  91     FFmpegFixupM3u8PP,
  92     FFmpegFixupM4aPP,
  93     FFmpegFixupStretchedPP,
  94     FFmpegMergerPP,
  95     FFmpegPostProcessor,
  96     get_postprocessor,
  97 )
  98 from .version import __version__
  99
 100 if compat_os_name == 'nt':
 101     import ctypes
 102
 103
 104 class YoutubeDL(object):
 105     """YoutubeDL class.
 106
 107     YoutubeDL objects are the ones responsible of downloading the
 108     actual video file and writing it to disk if the user has requested
 109     it, among some other tasks. In most cases there should be one per
 110     program. As, given a video URL, the downloader doesn't know how to
 111     extract all the needed information, task that InfoExtractors do, it
 112     has to pass the URL to one of them.
 113
 114     For this, YoutubeDL objects have a method that allows
 115     InfoExtractors to be registered in a given order. When it is passed
 116     a URL, the YoutubeDL object handles it to the first InfoExtractor it
 117     finds that reports being able to handle it. The InfoExtractor extracts
 118     all the information about the video or videos the URL refers to, and
 119     YoutubeDL process the extracted information, possibly using a File
 120     Downloader to download the video.
 121
 122     YoutubeDL objects accept a lot of parameters. In order not to saturate
 123     the object constructor with arguments, it receives a dictionary of
 124     options instead. These options are available through the params
 125     attribute for the InfoExtractors to use. The YoutubeDL also
 126     registers itself as the downloader in charge for the InfoExtractors
 127     that are added to it, so this is a "mutual registration".
 128
 129     Available options:
 130
 131     username:          Username for authentication purposes.
 132     password:          Password for authentication purposes.
 133     videopassword:     Password for accessing a video.
 134     ap_mso:            Adobe Pass multiple-system operator identifier.
 135     ap_username:       Multiple-system operator account username.
 136     ap_password:       Multiple-system operator account password.
 137     usenetrc:          Use netrc for authentication instead.
 138     verbose:           Print additional info to stdout.
 139     quiet:             Do not print messages to stdout.
 140     no_warnings:       Do not print out anything for warnings.
 141     forceurl:          Force printing final URL.
 142     forcetitle:        Force printing title.
 143     forceid:           Force printing ID.
 144     forcethumbnail:    Force printing thumbnail URL.
 145     forcedescription:  Force printing description.
 146     forcefilename:     Force printing final filename.
 147     forceduration:     Force printing duration.
 148     forcejson:         Force printing info_dict as JSON.
 149     dump_single_json:  Force printing the info_dict of the whole playlist
 150                        (or video) as a single JSON line.
 151     simulate:          Do not download the video files.
 152     format:            Video format code. See options.py for more information.
 153     outtmpl:           Template for output names.
 154     restrictfilenames: Do not allow "&" and spaces in file names
 155     ignoreerrors:      Do not stop on download errors.
 156     force_generic_extractor: Force downloader to use the generic extractor
 157     nooverwrites:      Prevent overwriting files.
 158     playliststart:     Playlist item to start at.
 159     playlistend:       Playlist item to end at.
 160     playlist_items:    Specific indices of playlist to download.
 161     playlistreverse:   Download playlist items in reverse order.
 162     matchtitle:        Download only matching titles.
 163     rejecttitle:       Reject downloads for matching titles.
 164     logger:            Log messages to a logging.Logger instance.
 165     logtostderr:       Log messages to stderr instead of stdout.
 166     writedescription:  Write the video description to a .description file
 167     writeinfojson:     Write the video description to a .info.json file
 168     writeannotations:  Write the video annotations to a .annotations.xml file
 169     writethumbnail:    Write the thumbnail image to a file
 170     write_all_thumbnails:  Write all thumbnail formats to files
 171     writesubtitles:    Write the video subtitles to a file
 172     writeautomaticsub: Write the automatically generated subtitles to a file
 173     allsubtitles:      Downloads all the subtitles of the video
 174                        (requires writesubtitles or writeautomaticsub)
 175     listsubtitles:     Lists all available subtitles for the video
 176     subtitlesformat:   The format code for subtitles
 177     subtitleslangs:    List of languages of the subtitles to download
 178     keepvideo:         Keep the video file after post-processing
 179     daterange:         A DateRange object, download only if the upload_date is in the range.
 180     skip_download:     Skip the actual download of the video file
 181     cachedir:          Location of the cache files in the filesystem.
 182                        False to disable filesystem cache.
 183     noplaylist:        Download single video instead of a playlist if in doubt.
 184     age_limit:         An integer representing the user's age in years.
 185                        Unsuitable videos for the given age are skipped.
 186     min_views:         An integer representing the minimum view count the video
 187                        must have in order to not be skipped.
 188                        Videos without view count information are always
 189                        downloaded. None for no limit.
 190     max_views:         An integer representing the maximum view count.
 191                        Videos that are more popular than that are not
 192                        downloaded.
 193                        Videos without view count information are always
 194                        downloaded. None for no limit.
 195     download_archive:  File name of a file where all downloads are recorded.
 196                        Videos already present in the file are not downloaded
 197                        again.
 198     cookiefile:        File name where cookies should be read from and dumped to.
 199     nocheckcertificate:Do not verify SSL certificates
 200     prefer_insecure:   Use HTTP instead of HTTPS to retrieve information.
 201                        At the moment, this is only supported by YouTube.
 202     proxy:             URL of the proxy server to use
 203     geo_verification_proxy:  URL of the proxy to use for IP address verification
 204                        on geo-restricted sites. (Experimental)
 205     socket_timeout:    Time to wait for unresponsive hosts, in seconds
 206     bidi_workaround:   Work around buggy terminals without bidirectional text
 207                        support, using fridibi
 208     debug_printtraffic:Print out sent and received HTTP traffic
 209     include_ads:       Download ads as well
 210     default_search:    Prepend this string if an input url is not valid.
 211                        'auto' for elaborate guessing
 212     encoding:          Use this encoding instead of the system-specified.
 213     extract_flat:      Do not resolve URLs, return the immediate result.
 214                        Pass in 'in_playlist' to only show this behavior for
 215                        playlist items.
 216     postprocessors:    A list of dictionaries, each with an entry
 217                        * key:  The name of the postprocessor. See
 218                                youtube_dl/postprocessor/__init__.py for a list.
 219                        as well as any further keyword arguments for the
 220                        postprocessor.
 221     progress_hooks:    A list of functions that get called on download
 222                        progress, with a dictionary with the entries
 223                        * status: One of "downloading", "error", or "finished".
 224                                  Check this first and ignore unknown values.
 225
 226                        If status is one of "downloading", or "finished", the
 227                        following properties may also be present:
 228                        * filename: The final filename (always present)
 229                        * tmpfilename: The filename we're currently writing to
 230                        * downloaded_bytes: Bytes on disk
 231                        * total_bytes: Size of the whole file, None if unknown
 232                        * total_bytes_estimate: Guess of the eventual file size,
 233                                                None if unavailable.
 234                        * elapsed: The number of seconds since download started.
 235                        * eta: The estimated time in seconds, None if unknown
 236                        * speed: The download speed in bytes/second, None if
 237                                 unknown
 238                        * fragment_index: The counter of the currently
 239                                          downloaded video fragment.
 240                        * fragment_count: The number of fragments (= individual
 241                                          files that will be merged)
 242
 243                        Progress hooks are guaranteed to be called at least once
 244                        (with status "finished") if the download is successful.
 245     merge_output_format: Extension to use when merging formats.
 246     fixup:             Automatically correct known faults of the file.
 247                        One of:
 248                        - "never": do nothing
 249                        - "warn": only emit a warning
 250                        - "detect_or_warn": check whether we can do anything
 251                                            about it, warn otherwise (default)
 252     source_address:    (Experimental) Client-side IP address to bind to.
 253     call_home:         Boolean, true iff we are allowed to contact the
 254                        youtube-dl servers for debugging.
 255     sleep_interval:    Number of seconds to sleep before each download when
 256                        used alone or a lower bound of a range for randomized
 257                        sleep before each download (minimum possible number
 258                        of seconds to sleep) when used along with
 259                        max_sleep_interval.
 260     max_sleep_interval:Upper bound of a range for randomized sleep before each
 261                        download (maximum possible number of seconds to sleep).
 262                        Must only be used along with sleep_interval.
 263                        Actual sleep time will be a random float from range
 264                        [sleep_interval; max_sleep_interval].
 265     listformats:       Print an overview of available video formats and exit.
 266     list_thumbnails:   Print a table of all thumbnails and exit.
 267     match_filter:      A function that gets called with the info_dict of
 268                        every video.
 269                        If it returns a message, the video is ignored.
 270                        If it returns None, the video is downloaded.
 271                        match_filter_func in utils.py is one example for this.
 272     no_color:          Do not emit color codes in output.
 273
 274     The following options determine which downloader is picked:
 275     external_downloader: Executable of the external downloader to call.
 276                        None or unset for standard (built-in) downloader.
 277     hls_prefer_native: Use the native HLS downloader instead of ffmpeg/avconv
 278                        if True, otherwise use ffmpeg/avconv if False, otherwise
 279                        use downloader suggested by extractor if None.
 280
 281     The following parameters are not used by YoutubeDL itself, they are used by
 282     the downloader (see youtube_dl/downloader/common.py):
 283     nopart, updatetime, buffersize, ratelimit, min_filesize, max_filesize, test,
 284     noresizebuffer, retries, continuedl, noprogress, consoletitle,
 285     xattr_set_filesize, external_downloader_args, hls_use_mpegts.
 286
 287     The following options are used by the post processors:
 288     prefer_ffmpeg:     If True, use ffmpeg instead of avconv if both are available,
 289                        otherwise prefer avconv.
 290     postprocessor_args: A list of additional command-line arguments for the
 291                         postprocessor.
 292     """
 293
 294     params = None
 295     _ies = []
 296     _pps = []
 297     _download_retcode = None
 298     _num_downloads = None
 299     _screen_file = None
 300
 301     def __init__(self, params=None, auto_init=True):
 302         """Create a FileDownloader object with the given options."""
 303         if params is None:
 304             params = {}
 305         self._ies = []
 306         self._ies_instances = {}
 307         self._pps = []
 308         self._progress_hooks = []
 309         self._download_retcode = 0
 310         self._num_downloads = 0
 311         self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)]
 312         self._err_file = sys.stderr
 313         self.params = {
 314             # Default parameters
 315             'nocheckcertificate': False,
 316         }
 317         self.params.update(params)
 318         self.cache = Cache(self)
 319
 320         if self.params.get('cn_verification_proxy') is not None:
 321             self.report_warning('--cn-verification-proxy is deprecated. Use --geo-verification-proxy instead.')
 322             if self.params.get('geo_verification_proxy') is None:
 323                 self.params['geo_verification_proxy'] = self.params['cn_verification_proxy']
 324
 325         if params.get('bidi_workaround', False):
 326             try:
 327                 import pty
 328                 master, slave = pty.openpty()
 329                 width = compat_get_terminal_size().columns
 330                 if width is None:
 331                     width_args = []
 332                 else:
 333                     width_args = ['-w', str(width)]
 334                 sp_kwargs = dict(
 335                     stdin=subprocess.PIPE,
 336                     stdout=slave,
 337                     stderr=self._err_file)
 338                 try:
 339                     self._output_process = subprocess.Popen(
 340                         ['bidiv'] + width_args, **sp_kwargs
 341                     )
 342                 except OSError:
 343                     self._output_process = subprocess.Popen(
 344                         ['fribidi', '-c', 'UTF-8'] + width_args, **sp_kwargs)
 345                 self._output_channel = os.fdopen(master, 'rb')
 346             except OSError as ose:
 347                 if ose.errno == errno.ENOENT:
 348                     self.report_warning('Could not find fribidi executable, ignoring --bidi-workaround . Make sure that  fribidi  is an executable file in one of the directories in your $PATH.')
 349                 else:
 350                     raise
 351
 352         if (sys.version_info >= (3,) and sys.platform != 'win32' and
 353                 sys.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968'] and
 354                 not params.get('restrictfilenames', False)):
 355             # On Python 3, the Unicode filesystem API will throw errors (#1474)
 356             self.report_warning(
 357                 'Assuming --restrict-filenames since file system encoding '
 358                 'cannot encode all characters. '
 359                 'Set the LC_ALL environment variable to fix this.')
 360             self.params['restrictfilenames'] = True
 361
 362         if isinstance(params.get('outtmpl'), bytes):
 363             self.report_warning(
 364                 'Parameter outtmpl is bytes, but should be a unicode string. '
 365                 'Put  from __future__ import unicode_literals  at the top of your code file or consider switching to Python 3.x.')
 366
 367         self._setup_opener()
 368
 369         if auto_init:
 370             self.print_debug_header()
 371             self.add_default_info_extractors()
 372
 373         for pp_def_raw in self.params.get('postprocessors', []):
 374             pp_class = get_postprocessor(pp_def_raw['key'])
 375             pp_def = dict(pp_def_raw)
 376             del pp_def['key']
 377             pp = pp_class(self, **compat_kwargs(pp_def))
 378             self.add_post_processor(pp)
 379
 380         for ph in self.params.get('progress_hooks', []):
 381             self.add_progress_hook(ph)
 382
 383         register_socks_protocols()
 384
 385     def warn_if_short_id(self, argv):
 386         # short YouTube ID starting with dash?
 387         idxs = [
 388             i for i, a in enumerate(argv)
 389             if re.match(r'^-[0-9A-Za-z_-]{10}$', a)]
 390         if idxs:
 391             correct_argv = (
 392                 ['youtube-dl'] +
 393                 [a for i, a in enumerate(argv) if i not in idxs] +
 394                 ['--'] + [argv[i] for i in idxs]
 395             )
 396             self.report_warning(
 397                 'Long argument string detected. '
 398                 'Use -- to separate parameters and URLs, like this:\n%s\n' %
 399                 args_to_str(correct_argv))
 400
 401     def add_info_extractor(self, ie):
 402         """Add an InfoExtractor object to the end of the list."""
 403         self._ies.append(ie)
 404         if not isinstance(ie, type):
 405             self._ies_instances[ie.ie_key()] = ie
 406             ie.set_downloader(self)
 407
 408     def get_info_extractor(self, ie_key):
 409         """
 410         Get an instance of an IE with name ie_key, it will try to get one from
 411         the _ies list, if there's no instance it will create a new one and add
 412         it to the extractor list.
 413         """
 414         ie = self._ies_instances.get(ie_key)
 415         if ie is None:
 416             ie = get_info_extractor(ie_key)()
 417             self.add_info_extractor(ie)
 418         return ie
 419
 420     def add_default_info_extractors(self):
 421         """
 422         Add the InfoExtractors returned by gen_extractors to the end of the list
 423         """
 424         for ie in gen_extractor_classes():
 425             self.add_info_extractor(ie)
 426
 427     def add_post_processor(self, pp):
 428         """Add a PostProcessor object to the end of the chain."""
 429         self._pps.append(pp)
 430         pp.set_downloader(self)
 431
 432     def add_progress_hook(self, ph):
 433         """Add the progress hook (currently only for the file downloader)"""
 434         self._progress_hooks.append(ph)
 435
 436     def _bidi_workaround(self, message):
 437         if not hasattr(self, '_output_channel'):
 438             return message
 439
 440         assert hasattr(self, '_output_process')
 441         assert isinstance(message, compat_str)
 442         line_count = message.count('\n') + 1
 443         self._output_process.stdin.write((message + '\n').encode('utf-8'))
 444         self._output_process.stdin.flush()
 445         res = ''.join(self._output_channel.readline().decode('utf-8')
 446                       for _ in range(line_count))
 447         return res[:-len('\n')]
 448
 449     def to_screen(self, message, skip_eol=False):
 450         """Print message to stdout if not in quiet mode."""
 451         return self.to_stdout(message, skip_eol, check_quiet=True)
 452
 453     def _write_string(self, s, out=None):
 454         write_string(s, out=out, encoding=self.params.get('encoding'))
 455
 456     def to_stdout(self, message, skip_eol=False, check_quiet=False):
 457         """Print message to stdout if not in quiet mode."""
 458         if self.params.get('logger'):
 459             self.params['logger'].debug(message)
 460         elif not check_quiet or not self.params.get('quiet', False):
 461             message = self._bidi_workaround(message)
 462             terminator = ['\n', ''][skip_eol]
 463             output = message + terminator
 464
 465             self._write_string(output, self._screen_file)
 466
 467     def to_stderr(self, message):
 468         """Print message to stderr."""
 469         assert isinstance(message, compat_str)
 470         if self.params.get('logger'):
 471             self.params['logger'].error(message)
 472         else:
 473             message = self._bidi_workaround(message)
 474             output = message + '\n'
 475             self._write_string(output, self._err_file)
 476
 477     def to_console_title(self, message):
 478         if not self.params.get('consoletitle', False):
 479             return
 480         if compat_os_name == 'nt' and ctypes.windll.kernel32.GetConsoleWindow():
 481             # c_wchar_p() might not be necessary if `message` is
 482             # already of type unicode()
 483             ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message))
 484         elif 'TERM' in os.environ:
 485             self._write_string('\033]0;%s\007' % message, self._screen_file)
 486
 487     def save_console_title(self):
 488         if not self.params.get('consoletitle', False):
 489             return
 490         if 'TERM' in os.environ:
 491             # Save the title on stack
 492             self._write_string('\033[22;0t', self._screen_file)
 493
 494     def restore_console_title(self):
 495         if not self.params.get('consoletitle', False):
 496             return
 497         if 'TERM' in os.environ:
 498             # Restore the title from stack
 499             self._write_string('\033[23;0t', self._screen_file)
 500
 501     def __enter__(self):
 502         self.save_console_title()
 503         return self
 504
 505     def __exit__(self, *args):
 506         self.restore_console_title()
 507
 508         if self.params.get('cookiefile') is not None:
 509             self.cookiejar.save()
 510
 511     def trouble(self, message=None, tb=None):
 512         """Determine action to take when a download problem appears.
 513
 514         Depending on if the downloader has been configured to ignore
 515         download errors or not, this method may throw an exception or
 516         not when errors are found, after printing the message.
 517
 518         tb, if given, is additional traceback information.
 519         """
 520         if message is not None:
 521             self.to_stderr(message)
 522         if self.params.get('verbose'):
 523             if tb is None:
 524                 if sys.exc_info()[0]:  # if .trouble has been called from an except block
 525                     tb = ''
 526                     if hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
 527                         tb += ''.join(traceback.format_exception(*sys.exc_info()[1].exc_info))
 528                     tb += encode_compat_str(traceback.format_exc())
 529                 else:
 530                     tb_data = traceback.format_list(traceback.extract_stack())
 531                     tb = ''.join(tb_data)
 532             self.to_stderr(tb)
 533         if not self.params.get('ignoreerrors', False):
 534             if sys.exc_info()[0] and hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
 535                 exc_info = sys.exc_info()[1].exc_info
 536             else:
 537                 exc_info = sys.exc_info()
 538             raise DownloadError(message, exc_info)
 539         self._download_retcode = 1
 540
 541     def report_warning(self, message):
 542         '''
 543         Print the message to stderr, it will be prefixed with 'WARNING:'
 544         If stderr is a tty file the 'WARNING:' will be colored
 545         '''
 546         if self.params.get('logger') is not None:
 547             self.params['logger'].warning(message)
 548         else:
 549             if self.params.get('no_warnings'):
 550                 return
 551             if not self.params.get('no_color') and self._err_file.isatty() and compat_os_name != 'nt':
 552                 _msg_header = '\033[0;33mWARNING:\033[0m'
 553             else:
 554                 _msg_header = 'WARNING:'
 555             warning_message = '%s %s' % (_msg_header, message)
 556             self.to_stderr(warning_message)
 557
 558     def report_error(self, message, tb=None):
 559         '''
 560         Do the same as trouble, but prefixes the message with 'ERROR:', colored
 561         in red if stderr is a tty file.
 562         '''
 563         if not self.params.get('no_color') and self._err_file.isatty() and compat_os_name != 'nt':
 564             _msg_header = '\033[0;31mERROR:\033[0m'
 565         else:
 566             _msg_header = 'ERROR:'
 567         error_message = '%s %s' % (_msg_header, message)
 568         self.trouble(error_message, tb)
 569
 570     def report_file_already_downloaded(self, file_name):
 571         """Report file has already been fully downloaded."""
 572         try:
 573             self.to_screen('[download] %s has already been downloaded' % file_name)
 574         except UnicodeEncodeError:
 575             self.to_screen('[download] The file has already been downloaded')
 576
 577     def prepare_filename(self, info_dict):
 578         """Generate the output filename."""
 579         try:
 580             template_dict = dict(info_dict)
 581
 582             template_dict['epoch'] = int(time.time())
 583             autonumber_size = self.params.get('autonumber_size')
 584             if autonumber_size is None:
 585                 autonumber_size = 5
 586             autonumber_templ = '%0' + str(autonumber_size) + 'd'
 587             template_dict['autonumber'] = autonumber_templ % self._num_downloads
 588             if template_dict.get('playlist_index') is not None:
 589                 template_dict['playlist_index'] = '%0*d' % (len(str(template_dict['n_entries'])), template_dict['playlist_index'])
 590             if template_dict.get('resolution') is None:
 591                 if template_dict.get('width') and template_dict.get('height'):
 592                     template_dict['resolution'] = '%dx%d' % (template_dict['width'], template_dict['height'])
 593                 elif template_dict.get('height'):
 594                     template_dict['resolution'] = '%sp' % template_dict['height']
 595                 elif template_dict.get('width'):
 596                     template_dict['resolution'] = '%dx?' % template_dict['width']
 597
 598             sanitize = lambda k, v: sanitize_filename(
 599                 compat_str(v),
 600                 restricted=self.params.get('restrictfilenames'),
 601                 is_id=(k == 'id'))
 602             template_dict = dict((k, sanitize(k, v))
 603                                  for k, v in template_dict.items()
 604                                  if v is not None and not isinstance(v, (list, tuple, dict)))
 605             template_dict = collections.defaultdict(lambda: 'NA', template_dict)
 606
 607             outtmpl = self.params.get('outtmpl', DEFAULT_OUTTMPL)
 608             tmpl = compat_expanduser(outtmpl)
 609             filename = tmpl % template_dict
 610             # Temporary fix for #4787
 611             # 'Treat' all problem characters by passing filename through preferredencoding
 612             # to workaround encoding issues with subprocess on python2 @ Windows
 613             if sys.version_info < (3, 0) and sys.platform == 'win32':
 614                 filename = encodeFilename(filename, True).decode(preferredencoding())
 615             return sanitize_path(filename)
 616         except ValueError as err:
 617             self.report_error('Error in output template: ' + str(err) + ' (encoding: ' + repr(preferredencoding()) + ')')
 618             return None
 619
 620     def _match_entry(self, info_dict, incomplete):
 621         """ Returns None iff the file should be downloaded """
 622
 623         video_title = info_dict.get('title', info_dict.get('id', 'video'))
 624         if 'title' in info_dict:
 625             # This can happen when we're just evaluating the playlist
 626             title = info_dict['title']
 627             matchtitle = self.params.get('matchtitle', False)
 628             if matchtitle:
 629                 if not re.search(matchtitle, title, re.IGNORECASE):
 630                     return '"' + title + '" title did not match pattern "' + matchtitle + '"'
 631             rejecttitle = self.params.get('rejecttitle', False)
 632             if rejecttitle:
 633                 if re.search(rejecttitle, title, re.IGNORECASE):
 634                     return '"' + title + '" title matched reject pattern "' + rejecttitle + '"'
 635         date = info_dict.get('upload_date')
 636         if date is not None:
 637             dateRange = self.params.get('daterange', DateRange())
 638             if date not in dateRange:
 639                 return '%s upload date is not in range %s' % (date_from_str(date).isoformat(), dateRange)
 640         view_count = info_dict.get('view_count')
 641         if view_count is not None:
 642             min_views = self.params.get('min_views')
 643             if min_views is not None and view_count < min_views:
 644                 return 'Skipping %s, because it has not reached minimum view count (%d/%d)' % (video_title, view_count, min_views)
 645             max_views = self.params.get('max_views')
 646             if max_views is not None and view_count > max_views:
 647                 return 'Skipping %s, because it has exceeded the maximum view count (%d/%d)' % (video_title, view_count, max_views)
 648         if age_restricted(info_dict.get('age_limit'), self.params.get('age_limit')):
 649             return 'Skipping "%s" because it is age restricted' % video_title
 650         if self.in_download_archive(info_dict):
 651             return '%s has already been recorded in archive' % video_title
 652
 653         if not incomplete:
 654             match_filter = self.params.get('match_filter')
 655             if match_filter is not None:
 656                 ret = match_filter(info_dict)
 657                 if ret is not None:
 658                     return ret
 659
 660         return None
 661
 662     @staticmethod
 663     def add_extra_info(info_dict, extra_info):
 664         '''Set the keys from extra_info in info dict if they are missing'''
 665         for key, value in extra_info.items():
 666             info_dict.setdefault(key, value)
 667
 668     def extract_info(self, url, download=True, ie_key=None, extra_info={},
 669                      process=True, force_generic_extractor=False):
 670         '''
 671         Returns a list with a dictionary for each video we find.
 672         If 'download', also downloads the videos.
 673         extra_info is a dict containing the extra values to add to each result
 674         '''
 675
 676         if not ie_key and force_generic_extractor:
 677             ie_key = 'Generic'
 678
 679         if ie_key:
 680             ies = [self.get_info_extractor(ie_key)]
 681         else:
 682             ies = self._ies
 683
 684         for ie in ies:
 685             if not ie.suitable(url):
 686                 continue
 687
 688             ie = self.get_info_extractor(ie.ie_key())
 689             if not ie.working():
 690                 self.report_warning('The program functionality for this site has been marked as broken, '
 691                                     'and will probably not work.')
 692
 693             try:
 694                 ie_result = ie.extract(url)
 695                 if ie_result is None:  # Finished already (backwards compatibility; listformats and friends should be moved here)
 696                     break
 697                 if isinstance(ie_result, list):
 698                     # Backwards compatibility: old IE result format
 699                     ie_result = {
 700                         '_type': 'compat_list',
 701                         'entries': ie_result,
 702                     }
 703                 self.add_default_extra_info(ie_result, ie, url)
 704                 if process:
 705                     return self.process_ie_result(ie_result, download, extra_info)
 706                 else:
 707                     return ie_result
 708             except ExtractorError as e:  # An error we somewhat expected
 709                 self.report_error(compat_str(e), e.format_traceback())
 710                 break
 711             except MaxDownloadsReached:
 712                 raise
 713             except Exception as e:
 714                 if self.params.get('ignoreerrors', False):
 715                     self.report_error(error_to_compat_str(e), tb=encode_compat_str(traceback.format_exc()))
 716                     break
 717                 else:
 718                     raise
 719         else:
 720             self.report_error('no suitable InfoExtractor for URL %s' % url)
 721
 722     def add_default_extra_info(self, ie_result, ie, url):
 723         self.add_extra_info(ie_result, {
 724             'extractor': ie.IE_NAME,
 725             'webpage_url': url,
 726             'webpage_url_basename': url_basename(url),
 727             'extractor_key': ie.ie_key(),
 728         })
 729
 730     def process_ie_result(self, ie_result, download=True, extra_info={}):
 731         """
 732         Take the result of the ie(may be modified) and resolve all unresolved
 733         references (URLs, playlist items).
 734
 735         It will also download the videos if 'download'.
 736         Returns the resolved ie_result.
 737         """
 738         result_type = ie_result.get('_type', 'video')
 739
 740         if result_type in ('url', 'url_transparent'):
 741             ie_result['url'] = sanitize_url(ie_result['url'])
 742             extract_flat = self.params.get('extract_flat', False)
 743             if ((extract_flat == 'in_playlist' and 'playlist' in extra_info) or
 744                     extract_flat is True):
 745                 if self.params.get('forcejson', False):
 746                     self.to_stdout(json.dumps(ie_result))
 747                 return ie_result
 748
 749         if result_type == 'video':
 750             self.add_extra_info(ie_result, extra_info)
 751             return self.process_video_result(ie_result, download=download)
 752         elif result_type == 'url':
 753             # We have to add extra_info to the results because it may be
 754             # contained in a playlist
 755             return self.extract_info(ie_result['url'],
 756                                      download,
 757                                      ie_key=ie_result.get('ie_key'),
 758                                      extra_info=extra_info)
 759         elif result_type == 'url_transparent':
 760             # Use the information from the embedding page
 761             info = self.extract_info(
 762                 ie_result['url'], ie_key=ie_result.get('ie_key'),
 763                 extra_info=extra_info, download=False, process=False)
 764
 765             force_properties = dict(
 766                 (k, v) for k, v in ie_result.items() if v is not None)
 767             for f in ('_type', 'url', 'ie_key'):
 768                 if f in force_properties:
 769                     del force_properties[f]
 770             new_result = info.copy()
 771             new_result.update(force_properties)
 772
 773             assert new_result.get('_type') != 'url_transparent'
 774
 775             return self.process_ie_result(
 776                 new_result, download=download, extra_info=extra_info)
 777         elif result_type == 'playlist' or result_type == 'multi_video':
 778             # We process each entry in the playlist
 779             playlist = ie_result.get('title') or ie_result.get('id')
 780             self.to_screen('[download] Downloading playlist: %s' % playlist)
 781
 782             playlist_results = []
 783
 784             playliststart = self.params.get('playliststart', 1) - 1
 785             playlistend = self.params.get('playlistend')
 786             # For backwards compatibility, interpret -1 as whole list
 787             if playlistend == -1:
 788                 playlistend = None
 789
 790             playlistitems_str = self.params.get('playlist_items')
 791             playlistitems = None
 792             if playlistitems_str is not None:
 793                 def iter_playlistitems(format):
 794                     for string_segment in format.split(','):
 795                         if '-' in string_segment:
 796                             start, end = string_segment.split('-')
 797                             for item in range(int(start), int(end) + 1):
 798                                 yield int(item)
 799                         else:
 800                             yield int(string_segment)
 801                 playlistitems = iter_playlistitems(playlistitems_str)
 802
 803             ie_entries = ie_result['entries']
 804             if isinstance(ie_entries, list):
 805                 n_all_entries = len(ie_entries)
 806                 if playlistitems:
 807                     entries = [
 808                         ie_entries[i - 1] for i in playlistitems
 809                         if -n_all_entries <= i - 1 < n_all_entries]
 810                 else:
 811                     entries = ie_entries[playliststart:playlistend]
 812                 n_entries = len(entries)
 813                 self.to_screen(
 814                     '[%s] playlist %s: Collected %d video ids (downloading %d of them)' %
 815                     (ie_result['extractor'], playlist, n_all_entries, n_entries))
 816             elif isinstance(ie_entries, PagedList):
 817                 if playlistitems:
 818                     entries = []
 819                     for item in playlistitems:
 820                         entries.extend(ie_entries.getslice(
 821                             item - 1, item
 822                         ))
 823                 else:
 824                     entries = ie_entries.getslice(
 825                         playliststart, playlistend)
 826                 n_entries = len(entries)
 827                 self.to_screen(
 828                     '[%s] playlist %s: Downloading %d videos' %
 829                     (ie_result['extractor'], playlist, n_entries))
 830             else:  # iterable
 831                 if playlistitems:
 832                     entry_list = list(ie_entries)
 833                     entries = [entry_list[i - 1] for i in playlistitems]
 834                 else:
 835                     entries = list(itertools.islice(
 836                         ie_entries, playliststart, playlistend))
 837                 n_entries = len(entries)
 838                 self.to_screen(
 839                     '[%s] playlist %s: Downloading %d videos' %
 840                     (ie_result['extractor'], playlist, n_entries))
 841
 842             if self.params.get('playlistreverse', False):
 843                 entries = entries[::-1]
 844
 845             for i, entry in enumerate(entries, 1):
 846                 self.to_screen('[download] Downloading video %s of %s' % (i, n_entries))
 847                 extra = {
 848                     'n_entries': n_entries,
 849                     'playlist': playlist,
 850                     'playlist_id': ie_result.get('id'),
 851                     'playlist_title': ie_result.get('title'),
 852                     'playlist_index': i + playliststart,
 853                     'extractor': ie_result['extractor'],
 854                     'webpage_url': ie_result['webpage_url'],
 855                     'webpage_url_basename': url_basename(ie_result['webpage_url']),
 856                     'extractor_key': ie_result['extractor_key'],
 857                 }
 858
 859                 reason = self._match_entry(entry, incomplete=True)
 860                 if reason is not None:
 861                     self.to_screen('[download] ' + reason)
 862                     continue
 863
 864                 entry_result = self.process_ie_result(entry,
 865                                                       download=download,
 866                                                       extra_info=extra)
 867                 playlist_results.append(entry_result)
 868             ie_result['entries'] = playlist_results
 869             self.to_screen('[download] Finished downloading playlist: %s' % playlist)
 870             return ie_result
 871         elif result_type == 'compat_list':
 872             self.report_warning(
 873                 'Extractor %s returned a compat_list result. '
 874                 'It needs to be updated.' % ie_result.get('extractor'))
 875
 876             def _fixup(r):
 877                 self.add_extra_info(
 878                     r,
 879                     {
 880                         'extractor': ie_result['extractor'],
 881                         'webpage_url': ie_result['webpage_url'],
 882                         'webpage_url_basename': url_basename(ie_result['webpage_url']),
 883                         'extractor_key': ie_result['extractor_key'],
 884                     }
 885                 )
 886                 return r
 887             ie_result['entries'] = [
 888                 self.process_ie_result(_fixup(r), download, extra_info)
 889                 for r in ie_result['entries']
 890             ]
 891             return ie_result
 892         else:
 893             raise Exception('Invalid result type: %s' % result_type)
 894
 895     def _build_format_filter(self, filter_spec):
 896         " Returns a function to filter the formats according to the filter_spec "
 897
 898         OPERATORS = {
 899             '<': operator.lt,
 900             '<=': operator.le,
 901             '>': operator.gt,
 902             '>=': operator.ge,
 903             '=': operator.eq,
 904             '!=': operator.ne,
 905         }
 906         operator_rex = re.compile(r'''(?x)\s*
 907             (?P<key>width|height|tbr|abr|vbr|asr|filesize|fps)
 908             \s*(?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
 909             (?P<value>[0-9.]+(?:[kKmMgGtTpPeEzZyY]i?[Bb]?)?)
 910             $
 911             ''' % '|'.join(map(re.escape, OPERATORS.keys())))
 912         m = operator_rex.search(filter_spec)
 913         if m:
 914             try:
 915                 comparison_value = int(m.group('value'))
 916             except ValueError:
 917                 comparison_value = parse_filesize(m.group('value'))
 918                 if comparison_value is None:
 919                     comparison_value = parse_filesize(m.group('value') + 'B')
 920                 if comparison_value is None:
 921                     raise ValueError(
 922                         'Invalid value %r in format specification %r' % (
 923                             m.group('value'), filter_spec))
 924             op = OPERATORS[m.group('op')]
 925
 926         if not m:
 927             STR_OPERATORS = {
 928                 '=': operator.eq,
 929                 '!=': operator.ne,
 930                 '^=': lambda attr, value: attr.startswith(value),
 931                 '$=': lambda attr, value: attr.endswith(value),
 932                 '*=': lambda attr, value: value in attr,
 933             }
 934             str_operator_rex = re.compile(r'''(?x)
 935                 \s*(?P<key>ext|acodec|vcodec|container|protocol|format_id)
 936                 \s*(?P<op>%s)(?P<none_inclusive>\s*\?)?
 937                 \s*(?P<value>[a-zA-Z0-9._-]+)
 938                 \s*$
 939                 ''' % '|'.join(map(re.escape, STR_OPERATORS.keys())))
 940             m = str_operator_rex.search(filter_spec)
 941             if m:
 942                 comparison_value = m.group('value')
 943                 op = STR_OPERATORS[m.group('op')]
 944
 945         if not m:
 946             raise ValueError('Invalid filter specification %r' % filter_spec)
 947
 948         def _filter(f):
 949             actual_value = f.get(m.group('key'))
 950             if actual_value is None:
 951                 return m.group('none_inclusive')
 952             return op(actual_value, comparison_value)
 953         return _filter
 954
 955     def build_format_selector(self, format_spec):
 956         def syntax_error(note, start):
 957             message = (
 958                 'Invalid format specification: '
 959                 '{0}\n\t{1}\n\t{2}^'.format(note, format_spec, ' ' * start[1]))
 960             return SyntaxError(message)
 961
 962         PICKFIRST = 'PICKFIRST'
 963         MERGE = 'MERGE'
 964         SINGLE = 'SINGLE'
 965         GROUP = 'GROUP'
 966         FormatSelector = collections.namedtuple('FormatSelector', ['type', 'selector', 'filters'])
 967
 968         def _parse_filter(tokens):
 969             filter_parts = []
 970             for type, string, start, _, _ in tokens:
 971                 if type == tokenize.OP and string == ']':
 972                     return ''.join(filter_parts)
 973                 else:
 974                     filter_parts.append(string)
 975
 976         def _remove_unused_ops(tokens):
 977             # Remove operators that we don't use and join them with the surrounding strings
 978             # for example: 'mp4' '-' 'baseline' '-' '16x9' is converted to 'mp4-baseline-16x9'
 979             ALLOWED_OPS = ('/', '+', ',', '(', ')')
 980             last_string, last_start, last_end, last_line = None, None, None, None
 981             for type, string, start, end, line in tokens:
 982                 if type == tokenize.OP and string == '[':
 983                     if last_string:
 984                         yield tokenize.NAME, last_string, last_start, last_end, last_line
 985                         last_string = None
 986                     yield type, string, start, end, line
 987                     # everything inside brackets will be handled by _parse_filter
 988                     for type, string, start, end, line in tokens:
 989                         yield type, string, start, end, line
 990                         if type == tokenize.OP and string == ']':
 991                             break
 992                 elif type == tokenize.OP and string in ALLOWED_OPS:
 993                     if last_string:
 994                         yield tokenize.NAME, last_string, last_start, last_end, last_line
 995                         last_string = None
 996                     yield type, string, start, end, line
 997                 elif type in [tokenize.NAME, tokenize.NUMBER, tokenize.OP]:
 998                     if not last_string:
 999                         last_string = string
1000                         last_start = start
1001                         last_end = end
1002                     else:
1003                         last_string += string
1004             if last_string:
1005                 yield tokenize.NAME, last_string, last_start, last_end, last_line
1006
1007         def _parse_format_selection(tokens, inside_merge=False, inside_choice=False, inside_group=False):
1008             selectors = []
1009             current_selector = None
1010             for type, string, start, _, _ in tokens:
1011                 # ENCODING is only defined in python 3.x
1012                 if type == getattr(tokenize, 'ENCODING', None):
1013                     continue
1014                 elif type in [tokenize.NAME, tokenize.NUMBER]:
1015                     current_selector = FormatSelector(SINGLE, string, [])
1016                 elif type == tokenize.OP:
1017                     if string == ')':
1018                         if not inside_group:
1019                             # ')' will be handled by the parentheses group
1020                             tokens.restore_last_token()
1021                         break
1022                     elif inside_merge and string in ['/', ',']:
1023                         tokens.restore_last_token()
1024                         break
1025                     elif inside_choice and string == ',':
1026                         tokens.restore_last_token()
1027                         break
1028                     elif string == ',':
1029                         if not current_selector:
1030                             raise syntax_error('"," must follow a format selector', start)
1031                         selectors.append(current_selector)
1032                         current_selector = None
1033                     elif string == '/':
1034                         if not current_selector:
1035                             raise syntax_error('"/" must follow a format selector', start)
1036                         first_choice = current_selector
1037                         second_choice = _parse_format_selection(tokens, inside_choice=True)
1038                         current_selector = FormatSelector(PICKFIRST, (first_choice, second_choice), [])
1039                     elif string == '[':
1040                         if not current_selector:
1041                             current_selector = FormatSelector(SINGLE, 'best', [])
1042                         format_filter = _parse_filter(tokens)
1043                         current_selector.filters.append(format_filter)
1044                     elif string == '(':
1045                         if current_selector:
1046                             raise syntax_error('Unexpected "("', start)
1047                         group = _parse_format_selection(tokens, inside_group=True)
1048                         current_selector = FormatSelector(GROUP, group, [])
1049                     elif string == '+':
1050                         video_selector = current_selector
1051                         audio_selector = _parse_format_selection(tokens, inside_merge=True)
1052                         if not video_selector or not audio_selector:
1053                             raise syntax_error('"+" must be between two format selectors', start)
1054                         current_selector = FormatSelector(MERGE, (video_selector, audio_selector), [])
1055                     else:
1056                         raise syntax_error('Operator not recognized: "{0}"'.format(string), start)
1057                 elif type == tokenize.ENDMARKER:
1058                     break
1059             if current_selector:
1060                 selectors.append(current_selector)
1061             return selectors
1062
1063         def _build_selector_function(selector):
1064             if isinstance(selector, list):
1065                 fs = [_build_selector_function(s) for s in selector]
1066
1067                 def selector_function(ctx):
1068                     for f in fs:
1069                         for format in f(ctx):
1070                             yield format
1071                 return selector_function
1072             elif selector.type == GROUP:
1073                 selector_function = _build_selector_function(selector.selector)
1074             elif selector.type == PICKFIRST:
1075                 fs = [_build_selector_function(s) for s in selector.selector]
1076
1077                 def selector_function(ctx):
1078                     for f in fs:
1079                         picked_formats = list(f(ctx))
1080                         if picked_formats:
1081                             return picked_formats
1082                     return []
1083             elif selector.type == SINGLE:
1084                 format_spec = selector.selector
1085
1086                 def selector_function(ctx):
1087                     formats = list(ctx['formats'])
1088                     if not formats:
1089                         return
1090                     if format_spec == 'all':
1091                         for f in formats:
1092                             yield f
1093                     elif format_spec in ['best', 'worst', None]:
1094                         format_idx = 0 if format_spec == 'worst' else -1
1095                         audiovideo_formats = [
1096                             f for f in formats
1097                             if f.get('vcodec') != 'none' and f.get('acodec') != 'none']
1098                         if audiovideo_formats:
1099                             yield audiovideo_formats[format_idx]
1100                         # for extractors with incomplete formats (audio only (soundcloud)
1101                         # or video only (imgur)) we will fallback to best/worst
1102                         # {video,audio}-only format
1103                         elif ctx['incomplete_formats']:
1104                             yield formats[format_idx]
1105                     elif format_spec == 'bestaudio':
1106                         audio_formats = [
1107                             f for f in formats
1108                             if f.get('vcodec') == 'none']
1109                         if audio_formats:
1110                             yield audio_formats[-1]
1111                     elif format_spec == 'worstaudio':
1112                         audio_formats = [
1113                             f for f in formats
1114                             if f.get('vcodec') == 'none']
1115                         if audio_formats:
1116                             yield audio_formats[0]
1117                     elif format_spec == 'bestvideo':
1118                         video_formats = [
1119                             f for f in formats
1120                             if f.get('acodec') == 'none']
1121                         if video_formats:
1122                             yield video_formats[-1]
1123                     elif format_spec == 'worstvideo':
1124                         video_formats = [
1125                             f for f in formats
1126                             if f.get('acodec') == 'none']
1127                         if video_formats:
1128                             yield video_formats[0]
1129                     else:
1130                         extensions = ['mp4', 'flv', 'webm', '3gp', 'm4a', 'mp3', 'ogg', 'aac', 'wav']
1131                         if format_spec in extensions:
1132                             filter_f = lambda f: f['ext'] == format_spec
1133                         else:
1134                             filter_f = lambda f: f['format_id'] == format_spec
1135                         matches = list(filter(filter_f, formats))
1136                         if matches:
1137                             yield matches[-1]
1138             elif selector.type == MERGE:
1139                 def _merge(formats_info):
1140                     format_1, format_2 = [f['format_id'] for f in formats_info]
1141                     # The first format must contain the video and the
1142                     # second the audio
1143                     if formats_info[0].get('vcodec') == 'none':
1144                         self.report_error('The first format must '
1145                                           'contain the video, try using '
1146                                           '"-f %s+%s"' % (format_2, format_1))
1147                         return
1148                     # Formats must be opposite (video+audio)
1149                     if formats_info[0].get('acodec') == 'none' and formats_info[1].get('acodec') == 'none':
1150                         self.report_error(
1151                             'Both formats %s and %s are video-only, you must specify "-f video+audio"'
1152                             % (format_1, format_2))
1153                         return
1154                     output_ext = (
1155                         formats_info[0]['ext']
1156                         if self.params.get('merge_output_format') is None
1157                         else self.params['merge_output_format'])
1158                     return {
1159                         'requested_formats': formats_info,
1160                         'format': '%s+%s' % (formats_info[0].get('format'),
1161                                              formats_info[1].get('format')),
1162                         'format_id': '%s+%s' % (formats_info[0].get('format_id'),
1163                                                 formats_info[1].get('format_id')),
1164                         'width': formats_info[0].get('width'),
1165                         'height': formats_info[0].get('height'),
1166                         'resolution': formats_info[0].get('resolution'),
1167                         'fps': formats_info[0].get('fps'),
1168                         'vcodec': formats_info[0].get('vcodec'),
1169                         'vbr': formats_info[0].get('vbr'),
1170                         'stretched_ratio': formats_info[0].get('stretched_ratio'),
1171                         'acodec': formats_info[1].get('acodec'),
1172                         'abr': formats_info[1].get('abr'),
1173                         'ext': output_ext,
1174                     }
1175                 video_selector, audio_selector = map(_build_selector_function, selector.selector)
1176
1177                 def selector_function(ctx):
1178                     for pair in itertools.product(
1179                             video_selector(copy.deepcopy(ctx)), audio_selector(copy.deepcopy(ctx))):
1180                         yield _merge(pair)
1181
1182             filters = [self._build_format_filter(f) for f in selector.filters]
1183
1184             def final_selector(ctx):
1185                 ctx_copy = copy.deepcopy(ctx)
1186                 for _filter in filters:
1187                     ctx_copy['formats'] = list(filter(_filter, ctx_copy['formats']))
1188                 return selector_function(ctx_copy)
1189             return final_selector
1190
1191         stream = io.BytesIO(format_spec.encode('utf-8'))
1192         try:
1193             tokens = list(_remove_unused_ops(compat_tokenize_tokenize(stream.readline)))
1194         except tokenize.TokenError:
1195             raise syntax_error('Missing closing/opening brackets or parenthesis', (0, len(format_spec)))
1196
1197         class TokenIterator(object):
1198             def __init__(self, tokens):
1199                 self.tokens = tokens
1200                 self.counter = 0
1201
1202             def __iter__(self):
1203                 return self
1204
1205             def __next__(self):
1206                 if self.counter >= len(self.tokens):
1207                     raise StopIteration()
1208                 value = self.tokens[self.counter]
1209                 self.counter += 1
1210                 return value
1211
1212             next = __next__
1213
1214             def restore_last_token(self):
1215                 self.counter -= 1
1216
1217         parsed_selector = _parse_format_selection(iter(TokenIterator(tokens)))
1218         return _build_selector_function(parsed_selector)
1219
1220     def _calc_headers(self, info_dict):
1221         res = std_headers.copy()
1222
1223         add_headers = info_dict.get('http_headers')
1224         if add_headers:
1225             res.update(add_headers)
1226
1227         cookies = self._calc_cookies(info_dict)
1228         if cookies:
1229             res['Cookie'] = cookies
1230
1231         return res
1232
1233     def _calc_cookies(self, info_dict):
1234         pr = sanitized_Request(info_dict['url'])
1235         self.cookiejar.add_cookie_header(pr)
1236         return pr.get_header('Cookie')
1237
1238     def process_video_result(self, info_dict, download=True):
1239         assert info_dict.get('_type', 'video') == 'video'
1240
1241         if 'id' not in info_dict:
1242             raise ExtractorError('Missing "id" field in extractor result')
1243         if 'title' not in info_dict:
1244             raise ExtractorError('Missing "title" field in extractor result')
1245
1246         if not isinstance(info_dict['id'], compat_str):
1247             self.report_warning('"id" field is not a string - forcing string conversion')
1248             info_dict['id'] = compat_str(info_dict['id'])
1249
1250         if 'playlist' not in info_dict:
1251             # It isn't part of a playlist
1252             info_dict['playlist'] = None
1253             info_dict['playlist_index'] = None
1254
1255         thumbnails = info_dict.get('thumbnails')
1256         if thumbnails is None:
1257             thumbnail = info_dict.get('thumbnail')
1258             if thumbnail:
1259                 info_dict['thumbnails'] = thumbnails = [{'url': thumbnail}]
1260         if thumbnails:
1261             thumbnails.sort(key=lambda t: (
1262                 t.get('preference') if t.get('preference') is not None else -1,
1263                 t.get('width') if t.get('width') is not None else -1,
1264                 t.get('height') if t.get('height') is not None else -1,
1265                 t.get('id') if t.get('id') is not None else '', t.get('url')))
1266             for i, t in enumerate(thumbnails):
1267                 t['url'] = sanitize_url(t['url'])
1268                 if t.get('width') and t.get('height'):
1269                     t['resolution'] = '%dx%d' % (t['width'], t['height'])
1270                 if t.get('id') is None:
1271                     t['id'] = '%d' % i
1272
1273         if self.params.get('list_thumbnails'):
1274             self.list_thumbnails(info_dict)
1275             return
1276
1277         thumbnail = info_dict.get('thumbnail')
1278         if thumbnail:
1279             info_dict['thumbnail'] = sanitize_url(thumbnail)
1280         elif thumbnails:
1281             info_dict['thumbnail'] = thumbnails[-1]['url']
1282
1283         if 'display_id' not in info_dict and 'id' in info_dict:
1284             info_dict['display_id'] = info_dict['id']
1285
1286         if info_dict.get('upload_date') is None and info_dict.get('timestamp') is not None:
1287             # Working around out-of-range timestamp values (e.g. negative ones on Windows,
1288             # see http://bugs.python.org/issue1646728)
1289             try:
1290                 upload_date = datetime.datetime.utcfromtimestamp(info_dict['timestamp'])
1291                 info_dict['upload_date'] = upload_date.strftime('%Y%m%d')
1292             except (ValueError, OverflowError, OSError):
1293                 pass
1294
1295         # Auto generate title fields corresponding to the *_number fields when missing
1296         # in order to always have clean titles. This is very common for TV series.
1297         for field in ('chapter', 'season', 'episode'):
1298             if info_dict.get('%s_number' % field) is not None and not info_dict.get(field):
1299                 info_dict[field] = '%s %d' % (field.capitalize(), info_dict['%s_number' % field])
1300
1301         subtitles = info_dict.get('subtitles')
1302         if subtitles:
1303             for _, subtitle in subtitles.items():
1304                 for subtitle_format in subtitle:
1305                     if subtitle_format.get('url'):
1306                         subtitle_format['url'] = sanitize_url(subtitle_format['url'])
1307                     if subtitle_format.get('ext') is None:
1308                         subtitle_format['ext'] = determine_ext(subtitle_format['url']).lower()
1309
1310         if self.params.get('listsubtitles', False):
1311             if 'automatic_captions' in info_dict:
1312                 self.list_subtitles(info_dict['id'], info_dict.get('automatic_captions'), 'automatic captions')
1313             self.list_subtitles(info_dict['id'], subtitles, 'subtitles')
1314             return
1315         info_dict['requested_subtitles'] = self.process_subtitles(
1316             info_dict['id'], subtitles,
1317             info_dict.get('automatic_captions'))
1318
1319         # We now pick which formats have to be downloaded
1320         if info_dict.get('formats') is None:
1321             # There's only one format available
1322             formats = [info_dict]
1323         else:
1324             formats = info_dict['formats']
1325
1326         if not formats:
1327             raise ExtractorError('No video formats found!')
1328
1329         formats_dict = {}
1330
1331         # We check that all the formats have the format and format_id fields
1332         for i, format in enumerate(formats):
1333             if 'url' not in format:
1334                 raise ExtractorError('Missing "url" key in result (index %d)' % i)
1335
1336             format['url'] = sanitize_url(format['url'])
1337
1338             if format.get('format_id') is None:
1339                 format['format_id'] = compat_str(i)
1340             else:
1341                 # Sanitize format_id from characters used in format selector expression
1342                 format['format_id'] = re.sub('[\s,/+\[\]()]', '_', format['format_id'])
1343             format_id = format['format_id']
1344             if format_id not in formats_dict:
1345                 formats_dict[format_id] = []
1346             formats_dict[format_id].append(format)
1347
1348         # Make sure all formats have unique format_id
1349         for format_id, ambiguous_formats in formats_dict.items():
1350             if len(ambiguous_formats) > 1:
1351                 for i, format in enumerate(ambiguous_formats):
1352                     format['format_id'] = '%s-%d' % (format_id, i)
1353
1354         for i, format in enumerate(formats):
1355             if format.get('format') is None:
1356                 format['format'] = '{id} - {res}{note}'.format(
1357                     id=format['format_id'],
1358                     res=self.format_resolution(format),
1359                     note=' ({0})'.format(format['format_note']) if format.get('format_note') is not None else '',
1360                 )
1361             # Automatically determine file extension if missing
1362             if format.get('ext') is None:
1363                 format['ext'] = determine_ext(format['url']).lower()
1364             # Automatically determine protocol if missing (useful for format
1365             # selection purposes)
1366             if 'protocol' not in format:
1367                 format['protocol'] = determine_protocol(format)
1368             # Add HTTP headers, so that external programs can use them from the
1369             # json output
1370             full_format_info = info_dict.copy()
1371             full_format_info.update(format)
1372             format['http_headers'] = self._calc_headers(full_format_info)
1373
1374         # TODO Central sorting goes here
1375
1376         if formats[0] is not info_dict:
1377             # only set the 'formats' fields if the original info_dict list them
1378             # otherwise we end up with a circular reference, the first (and unique)
1379             # element in the 'formats' field in info_dict is info_dict itself,
1380             # which can't be exported to json
1381             info_dict['formats'] = formats
1382         if self.params.get('listformats'):
1383             self.list_formats(info_dict)
1384             return
1385
1386         req_format = self.params.get('format')
1387         if req_format is None:
1388             req_format_list = []
1389             if (self.params.get('outtmpl', DEFAULT_OUTTMPL) != '-' and
1390                     not info_dict.get('is_live')):
1391                 merger = FFmpegMergerPP(self)
1392                 if merger.available and merger.can_merge():
1393                     req_format_list.append('bestvideo+bestaudio')
1394             req_format_list.append('best')
1395             req_format = '/'.join(req_format_list)
1396         format_selector = self.build_format_selector(req_format)
1397
1398         # While in format selection we may need to have an access to the original
1399         # format set in order to calculate some metrics or do some processing.
1400         # For now we need to be able to guess whether original formats provided
1401         # by extractor are incomplete or not (i.e. whether extractor provides only
1402         # video-only or audio-only formats) for proper formats selection for
1403         # extractors with such incomplete formats (see
1404         # https://github.com/rg3/youtube-dl/pull/5556).
1405         # Since formats may be filtered during format selection and may not match
1406         # the original formats the results may be incorrect. Thus original formats
1407         # or pre-calculated metrics should be passed to format selection routines
1408         # as well.
1409         # We will pass a context object containing all necessary additional data
1410         # instead of just formats.
1411         # This fixes incorrect format selection issue (see
1412         # https://github.com/rg3/youtube-dl/issues/10083).
1413         incomplete_formats = (
1414             # All formats are video-only or
1415             all(f.get('vcodec') != 'none' and f.get('acodec') == 'none' for f in formats) or
1416             # all formats are audio-only
1417             all(f.get('vcodec') == 'none' and f.get('acodec') != 'none' for f in formats))
1418
1419         ctx = {
1420             'formats': formats,
1421             'incomplete_formats': incomplete_formats,
1422         }
1423
1424         formats_to_download = list(format_selector(ctx))
1425         if not formats_to_download:
1426             raise ExtractorError('requested format not available',
1427                                  expected=True)
1428
1429         if download:
1430             if len(formats_to_download) > 1:
1431                 self.to_screen('[info] %s: downloading video in %s formats' % (info_dict['id'], len(formats_to_download)))
1432             for format in formats_to_download:
1433                 new_info = dict(info_dict)
1434                 new_info.update(format)
1435                 self.process_info(new_info)
1436         # We update the info dict with the best quality format (backwards compatibility)
1437         info_dict.update(formats_to_download[-1])
1438         return info_dict
1439
1440     def process_subtitles(self, video_id, normal_subtitles, automatic_captions):
1441         """Select the requested subtitles and their format"""
1442         available_subs = {}
1443         if normal_subtitles and self.params.get('writesubtitles'):
1444             available_subs.update(normal_subtitles)
1445         if automatic_captions and self.params.get('writeautomaticsub'):
1446             for lang, cap_info in automatic_captions.items():
1447                 if lang not in available_subs:
1448                     available_subs[lang] = cap_info
1449
1450         if (not self.params.get('writesubtitles') and not
1451                 self.params.get('writeautomaticsub') or not
1452                 available_subs):
1453             return None
1454
1455         if self.params.get('allsubtitles', False):
1456             requested_langs = available_subs.keys()
1457         else:
1458             if self.params.get('subtitleslangs', False):
1459                 requested_langs = self.params.get('subtitleslangs')
1460             elif 'en' in available_subs:
1461                 requested_langs = ['en']
1462             else:
1463                 requested_langs = [list(available_subs.keys())[0]]
1464
1465         formats_query = self.params.get('subtitlesformat', 'best')
1466         formats_preference = formats_query.split('/') if formats_query else []
1467         subs = {}
1468         for lang in requested_langs:
1469             formats = available_subs.get(lang)
1470             if formats is None:
1471                 self.report_warning('%s subtitles not available for %s' % (lang, video_id))
1472                 continue
1473             for ext in formats_preference:
1474                 if ext == 'best':
1475                     f = formats[-1]
1476                     break
1477                 matches = list(filter(lambda f: f['ext'] == ext, formats))
1478                 if matches:
1479                     f = matches[-1]
1480                     break
1481             else:
1482                 f = formats[-1]
1483                 self.report_warning(
1484                     'No subtitle format found matching "%s" for language %s, '
1485                     'using %s' % (formats_query, lang, f['ext']))
1486             subs[lang] = f
1487         return subs
1488
1489     def process_info(self, info_dict):
1490         """Process a single resolved IE result."""
1491
1492         assert info_dict.get('_type', 'video') == 'video'
1493
1494         max_downloads = self.params.get('max_downloads')
1495         if max_downloads is not None:
1496             if self._num_downloads >= int(max_downloads):
1497                 raise MaxDownloadsReached()
1498
1499         info_dict['fulltitle'] = info_dict['title']
1500         if len(info_dict['title']) > 200:
1501             info_dict['title'] = info_dict['title'][:197] + '...'
1502
1503         if 'format' not in info_dict:
1504             info_dict['format'] = info_dict['ext']
1505
1506         reason = self._match_entry(info_dict, incomplete=False)
1507         if reason is not None:
1508             self.to_screen('[download] ' + reason)
1509             return
1510
1511         self._num_downloads += 1
1512
1513         info_dict['_filename'] = filename = self.prepare_filename(info_dict)
1514
1515         # Forced printings
1516         if self.params.get('forcetitle', False):
1517             self.to_stdout(info_dict['fulltitle'])
1518         if self.params.get('forceid', False):
1519             self.to_stdout(info_dict['id'])
1520         if self.params.get('forceurl', False):
1521             if info_dict.get('requested_formats') is not None:
1522                 for f in info_dict['requested_formats']:
1523                     self.to_stdout(f['url'] + f.get('play_path', ''))
1524             else:
1525                 # For RTMP URLs, also include the playpath
1526                 self.to_stdout(info_dict['url'] + info_dict.get('play_path', ''))
1527         if self.params.get('forcethumbnail', False) and info_dict.get('thumbnail') is not None:
1528             self.to_stdout(info_dict['thumbnail'])
1529         if self.params.get('forcedescription', False) and info_dict.get('description') is not None:
1530             self.to_stdout(info_dict['description'])
1531         if self.params.get('forcefilename', False) and filename is not None:
1532             self.to_stdout(filename)
1533         if self.params.get('forceduration', False) and info_dict.get('duration') is not None:
1534             self.to_stdout(formatSeconds(info_dict['duration']))
1535         if self.params.get('forceformat', False):
1536             self.to_stdout(info_dict['format'])
1537         if self.params.get('forcejson', False):
1538             self.to_stdout(json.dumps(info_dict))
1539
1540         # Do nothing else if in simulate mode
1541         if self.params.get('simulate', False):
1542             return
1543
1544         if filename is None:
1545             return
1546
1547         try:
1548             dn = os.path.dirname(sanitize_path(encodeFilename(filename)))
1549             if dn and not os.path.exists(dn):
1550                 os.makedirs(dn)
1551         except (OSError, IOError) as err:
1552             self.report_error('unable to create directory ' + error_to_compat_str(err))
1553             return
1554
1555         if self.params.get('writedescription', False):
1556             descfn = replace_extension(filename, 'description', info_dict.get('ext'))
1557             if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(descfn)):
1558                 self.to_screen('[info] Video description is already present')
1559             elif info_dict.get('description') is None:
1560                 self.report_warning('There\'s no description to write.')
1561             else:
1562                 try:
1563                     self.to_screen('[info] Writing video description to: ' + descfn)
1564                     with io.open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile:
1565                         descfile.write(info_dict['description'])
1566                 except (OSError, IOError):
1567                     self.report_error('Cannot write description file ' + descfn)
1568                     return
1569
1570         if self.params.get('writeannotations', False):
1571             annofn = replace_extension(filename, 'annotations.xml', info_dict.get('ext'))
1572             if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(annofn)):
1573                 self.to_screen('[info] Video annotations are already present')
1574             else:
1575                 try:
1576                     self.to_screen('[info] Writing video annotations to: ' + annofn)
1577                     with io.open(encodeFilename(annofn), 'w', encoding='utf-8') as annofile:
1578                         annofile.write(info_dict['annotations'])
1579                 except (KeyError, TypeError):
1580                     self.report_warning('There are no annotations to write.')
1581                 except (OSError, IOError):
1582                     self.report_error('Cannot write annotations file: ' + annofn)
1583                     return
1584
1585         subtitles_are_requested = any([self.params.get('writesubtitles', False),
1586                                        self.params.get('writeautomaticsub')])
1587
1588         if subtitles_are_requested and info_dict.get('requested_subtitles'):
1589             # subtitles download errors are already managed as troubles in relevant IE
1590             # that way it will silently go on when used with unsupporting IE
1591             subtitles = info_dict['requested_subtitles']
1592             ie = self.get_info_extractor(info_dict['extractor_key'])
1593             for sub_lang, sub_info in subtitles.items():
1594                 sub_format = sub_info['ext']
1595                 if sub_info.get('data') is not None:
1596                     sub_data = sub_info['data']
1597                 else:
1598                     try:
1599                         sub_data = ie._download_webpage(
1600                             sub_info['url'], info_dict['id'], note=False)
1601                     except ExtractorError as err:
1602                         self.report_warning('Unable to download subtitle for "%s": %s' %
1603                                             (sub_lang, error_to_compat_str(err.cause)))
1604                         continue
1605                 try:
1606                     sub_filename = subtitles_filename(filename, sub_lang, sub_format)
1607                     if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(sub_filename)):
1608                         self.to_screen('[info] Video subtitle %s.%s is already_present' % (sub_lang, sub_format))
1609                     else:
1610                         self.to_screen('[info] Writing video subtitles to: ' + sub_filename)
1611                         # Use newline='' to prevent conversion of newline characters
1612                         # See https://github.com/rg3/youtube-dl/issues/10268
1613                         with io.open(encodeFilename(sub_filename), 'w', encoding='utf-8', newline='') as subfile:
1614                             subfile.write(sub_data)
1615                 except (OSError, IOError):
1616                     self.report_error('Cannot write subtitles file ' + sub_filename)
1617                     return
1618
1619         if self.params.get('writeinfojson', False):
1620             infofn = replace_extension(filename, 'info.json', info_dict.get('ext'))
1621             if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(infofn)):
1622                 self.to_screen('[info] Video description metadata is already present')
1623             else:
1624                 self.to_screen('[info] Writing video description metadata as JSON to: ' + infofn)
1625                 try:
1626                     write_json_file(self.filter_requested_info(info_dict), infofn)
1627                 except (OSError, IOError):
1628                     self.report_error('Cannot write metadata to JSON file ' + infofn)
1629                     return
1630
1631         self._write_thumbnails(info_dict, filename)
1632
1633         if not self.params.get('skip_download', False):
1634             try:
1635                 def dl(name, info):
1636                     fd = get_suitable_downloader(info, self.params)(self, self.params)
1637                     for ph in self._progress_hooks:
1638                         fd.add_progress_hook(ph)
1639                     if self.params.get('verbose'):
1640                         self.to_stdout('[debug] Invoking downloader on %r' % info.get('url'))
1641                     return fd.download(name, info)
1642
1643                 if info_dict.get('requested_formats') is not None:
1644                     downloaded = []
1645                     success = True
1646                     merger = FFmpegMergerPP(self)
1647                     if not merger.available:
1648                         postprocessors = []
1649                         self.report_warning('You have requested multiple '
1650                                             'formats but ffmpeg or avconv are not installed.'
1651                                             ' The formats won\'t be merged.')
1652                     else:
1653                         postprocessors = [merger]
1654
1655                     def compatible_formats(formats):
1656                         video, audio = formats
1657                         # Check extension
1658                         video_ext, audio_ext = audio.get('ext'), video.get('ext')
1659                         if video_ext and audio_ext:
1660                             COMPATIBLE_EXTS = (
1661                                 ('mp3', 'mp4', 'm4a', 'm4p', 'm4b', 'm4r', 'm4v'),
1662                                 ('webm')
1663                             )
1664                             for exts in COMPATIBLE_EXTS:
1665                                 if video_ext in exts and audio_ext in exts:
1666                                     return True
1667                         # TODO: Check acodec/vcodec
1668                         return False
1669
1670                     filename_real_ext = os.path.splitext(filename)[1][1:]
1671                     filename_wo_ext = (
1672                         os.path.splitext(filename)[0]
1673                         if filename_real_ext == info_dict['ext']
1674                         else filename)
1675                     requested_formats = info_dict['requested_formats']
1676                     if self.params.get('merge_output_format') is None and not compatible_formats(requested_formats):
1677                         info_dict['ext'] = 'mkv'
1678                         self.report_warning(
1679                             'Requested formats are incompatible for merge and will be merged into mkv.')
1680                     # Ensure filename always has a correct extension for successful merge
1681                     filename = '%s.%s' % (filename_wo_ext, info_dict['ext'])
1682                     if os.path.exists(encodeFilename(filename)):
1683                         self.to_screen(
1684                             '[download] %s has already been downloaded and '
1685                             'merged' % filename)
1686                     else:
1687                         for f in requested_formats:
1688                             new_info = dict(info_dict)
1689                             new_info.update(f)
1690                             fname = self.prepare_filename(new_info)
1691                             fname = prepend_extension(fname, 'f%s' % f['format_id'], new_info['ext'])
1692                             downloaded.append(fname)
1693                             partial_success = dl(fname, new_info)
1694                             success = success and partial_success
1695                         info_dict['__postprocessors'] = postprocessors
1696                         info_dict['__files_to_merge'] = downloaded
1697                 else:
1698                     # Just a single file
1699                     success = dl(filename, info_dict)
1700             except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
1701                 self.report_error('unable to download video data: %s' % error_to_compat_str(err))
1702                 return
1703             except (OSError, IOError) as err:
1704                 raise UnavailableVideoError(err)
1705             except (ContentTooShortError, ) as err:
1706                 self.report_error('content too short (expected %s bytes and served %s)' % (err.expected, err.downloaded))
1707                 return
1708
1709             if success and filename != '-':
1710                 # Fixup content
1711                 fixup_policy = self.params.get('fixup')
1712                 if fixup_policy is None:
1713                     fixup_policy = 'detect_or_warn'
1714
1715                 INSTALL_FFMPEG_MESSAGE = 'Install ffmpeg or avconv to fix this automatically.'
1716
1717                 stretched_ratio = info_dict.get('stretched_ratio')
1718                 if stretched_ratio is not None and stretched_ratio != 1:
1719                     if fixup_policy == 'warn':
1720                         self.report_warning('%s: Non-uniform pixel ratio (%s)' % (
1721                             info_dict['id'], stretched_ratio))
1722                     elif fixup_policy == 'detect_or_warn':
1723                         stretched_pp = FFmpegFixupStretchedPP(self)
1724                         if stretched_pp.available:
1725                             info_dict.setdefault('__postprocessors', [])
1726                             info_dict['__postprocessors'].append(stretched_pp)
1727                         else:
1728                             self.report_warning(
1729                                 '%s: Non-uniform pixel ratio (%s). %s'
1730                                 % (info_dict['id'], stretched_ratio, INSTALL_FFMPEG_MESSAGE))
1731                     else:
1732                         assert fixup_policy in ('ignore', 'never')
1733
1734                 if (info_dict.get('requested_formats') is None and
1735                         info_dict.get('container') == 'm4a_dash'):
1736                     if fixup_policy == 'warn':
1737                         self.report_warning(
1738                             '%s: writing DASH m4a. '
1739                             'Only some players support this container.'
1740                             % info_dict['id'])
1741                     elif fixup_policy == 'detect_or_warn':
1742                         fixup_pp = FFmpegFixupM4aPP(self)
1743                         if fixup_pp.available:
1744                             info_dict.setdefault('__postprocessors', [])
1745                             info_dict['__postprocessors'].append(fixup_pp)
1746                         else:
1747                             self.report_warning(
1748                                 '%s: writing DASH m4a. '
1749                                 'Only some players support this container. %s'
1750                                 % (info_dict['id'], INSTALL_FFMPEG_MESSAGE))
1751                     else:
1752                         assert fixup_policy in ('ignore', 'never')
1753
1754                 if (info_dict.get('protocol') == 'm3u8_native' or
1755                         info_dict.get('protocol') == 'm3u8' and
1756                         self.params.get('hls_prefer_native')):
1757                     if fixup_policy == 'warn':
1758                         self.report_warning('%s: malformated aac bitstream.' % (
1759                             info_dict['id']))
1760                     elif fixup_policy == 'detect_or_warn':
1761                         fixup_pp = FFmpegFixupM3u8PP(self)
1762                         if fixup_pp.available:
1763                             info_dict.setdefault('__postprocessors', [])
1764                             info_dict['__postprocessors'].append(fixup_pp)
1765                         else:
1766                             self.report_warning(
1767                                 '%s: malformated aac bitstream. %s'
1768                                 % (info_dict['id'], INSTALL_FFMPEG_MESSAGE))
1769                     else:
1770                         assert fixup_policy in ('ignore', 'never')
1771
1772                 try:
1773                     self.post_process(filename, info_dict)
1774                 except (PostProcessingError) as err:
1775                     self.report_error('postprocessing: %s' % str(err))
1776                     return
1777                 self.record_download_archive(info_dict)
1778
1779     def download(self, url_list):
1780         """Download a given list of URLs."""
1781         outtmpl = self.params.get('outtmpl', DEFAULT_OUTTMPL)
1782         if (len(url_list) > 1 and
1783                 '%' not in outtmpl and
1784                 self.params.get('max_downloads') != 1):
1785             raise SameFileError(outtmpl)
1786
1787         for url in url_list:
1788             try:
1789                 # It also downloads the videos
1790                 res = self.extract_info(
1791                     url, force_generic_extractor=self.params.get('force_generic_extractor', False))
1792             except UnavailableVideoError:
1793                 self.report_error('unable to download video')
1794             except MaxDownloadsReached:
1795                 self.to_screen('[info] Maximum number of downloaded files reached.')
1796                 raise
1797             else:
1798                 if self.params.get('dump_single_json', False):
1799                     self.to_stdout(json.dumps(res))
1800
1801         return self._download_retcode
1802
1803     def download_with_info_file(self, info_filename):
1804         with contextlib.closing(fileinput.FileInput(
1805                 [info_filename], mode='r',
1806                 openhook=fileinput.hook_encoded('utf-8'))) as f:
1807             # FileInput doesn't have a read method, we can't call json.load
1808             info = self.filter_requested_info(json.loads('\n'.join(f)))
1809         try:
1810             self.process_ie_result(info, download=True)
1811         except DownloadError:
1812             webpage_url = info.get('webpage_url')
1813             if webpage_url is not None:
1814                 self.report_warning('The info failed to download, trying with "%s"' % webpage_url)
1815                 return self.download([webpage_url])
1816             else:
1817                 raise
1818         return self._download_retcode
1819
1820     @staticmethod
1821     def filter_requested_info(info_dict):
1822         return dict(
1823             (k, v) for k, v in info_dict.items()
1824             if k not in ['requested_formats', 'requested_subtitles'])
1825
1826     def post_process(self, filename, ie_info):
1827         """Run all the postprocessors on the given file."""
1828         info = dict(ie_info)
1829         info['filepath'] = filename
1830         pps_chain = []
1831         if ie_info.get('__postprocessors') is not None:
1832             pps_chain.extend(ie_info['__postprocessors'])
1833         pps_chain.extend(self._pps)
1834         for pp in pps_chain:
1835             files_to_delete = []
1836             try:
1837                 files_to_delete, info = pp.run(info)
1838             except PostProcessingError as e:
1839                 self.report_error(e.msg)
1840             if files_to_delete and not self.params.get('keepvideo', False):
1841                 for old_filename in files_to_delete:
1842                     self.to_screen('Deleting original file %s (pass -k to keep)' % old_filename)
1843                     try:
1844                         os.remove(encodeFilename(old_filename))
1845                     except (IOError, OSError):
1846                         self.report_warning('Unable to remove downloaded original file')
1847
1848     def _make_archive_id(self, info_dict):
1849         # Future-proof against any change in case
1850         # and backwards compatibility with prior versions
1851         extractor = info_dict.get('extractor_key')
1852         if extractor is None:
1853             if 'id' in info_dict:
1854                 extractor = info_dict.get('ie_key')  # key in a playlist
1855         if extractor is None:
1856             return None  # Incomplete video information
1857         return extractor.lower() + ' ' + info_dict['id']
1858
1859     def in_download_archive(self, info_dict):
1860         fn = self.params.get('download_archive')
1861         if fn is None:
1862             return False
1863
1864         vid_id = self._make_archive_id(info_dict)
1865         if vid_id is None:
1866             return False  # Incomplete video information
1867
1868         try:
1869             with locked_file(fn, 'r', encoding='utf-8') as archive_file:
1870                 for line in archive_file:
1871                     if line.strip() == vid_id:
1872                         return True
1873         except IOError as ioe:
1874             if ioe.errno != errno.ENOENT:
1875                 raise
1876         return False
1877
1878     def record_download_archive(self, info_dict):
1879         fn = self.params.get('download_archive')
1880         if fn is None:
1881             return
1882         vid_id = self._make_archive_id(info_dict)
1883         assert vid_id
1884         with locked_file(fn, 'a', encoding='utf-8') as archive_file:
1885             archive_file.write(vid_id + '\n')
1886
1887     @staticmethod
1888     def format_resolution(format, default='unknown'):
1889         if format.get('vcodec') == 'none':
1890             return 'audio only'
1891         if format.get('resolution') is not None:
1892             return format['resolution']
1893         if format.get('height') is not None:
1894             if format.get('width') is not None:
1895                 res = '%sx%s' % (format['width'], format['height'])
1896             else:
1897                 res = '%sp' % format['height']
1898         elif format.get('width') is not None:
1899             res = '%dx?' % format['width']
1900         else:
1901             res = default
1902         return res
1903
1904     def _format_note(self, fdict):
1905         res = ''
1906         if fdict.get('ext') in ['f4f', 'f4m']:
1907             res += '(unsupported) '
1908         if fdict.get('language'):
1909             if res:
1910                 res += ' '
1911             res += '[%s] ' % fdict['language']
1912         if fdict.get('format_note') is not None:
1913             res += fdict['format_note'] + ' '
1914         if fdict.get('tbr') is not None:
1915             res += '%4dk ' % fdict['tbr']
1916         if fdict.get('container') is not None:
1917             if res:
1918                 res += ', '
1919             res += '%s container' % fdict['container']
1920         if (fdict.get('vcodec') is not None and
1921                 fdict.get('vcodec') != 'none'):
1922             if res:
1923                 res += ', '
1924             res += fdict['vcodec']
1925             if fdict.get('vbr') is not None:
1926                 res += '@'
1927         elif fdict.get('vbr') is not None and fdict.get('abr') is not None:
1928             res += 'video@'
1929         if fdict.get('vbr') is not None:
1930             res += '%4dk' % fdict['vbr']
1931         if fdict.get('fps') is not None:
1932             if res:
1933                 res += ', '
1934             res += '%sfps' % fdict['fps']
1935         if fdict.get('acodec') is not None:
1936             if res:
1937                 res += ', '
1938             if fdict['acodec'] == 'none':
1939                 res += 'video only'
1940             else:
1941                 res += '%-5s' % fdict['acodec']
1942         elif fdict.get('abr') is not None:
1943             if res:
1944                 res += ', '
1945             res += 'audio'
1946         if fdict.get('abr') is not None:
1947             res += '@%3dk' % fdict['abr']
1948         if fdict.get('asr') is not None:
1949             res += ' (%5dHz)' % fdict['asr']
1950         if fdict.get('filesize') is not None:
1951             if res:
1952                 res += ', '
1953             res += format_bytes(fdict['filesize'])
1954         elif fdict.get('filesize_approx') is not None:
1955             if res:
1956                 res += ', '
1957             res += '~' + format_bytes(fdict['filesize_approx'])
1958         return res
1959
1960     def list_formats(self, info_dict):
1961         formats = info_dict.get('formats', [info_dict])
1962         table = [
1963             [f['format_id'], f['ext'], self.format_resolution(f), self._format_note(f)]
1964             for f in formats
1965             if f.get('preference') is None or f['preference'] >= -1000]
1966         if len(formats) > 1:
1967             table[-1][-1] += (' ' if table[-1][-1] else '') + '(best)'
1968
1969         header_line = ['format code', 'extension', 'resolution', 'note']
1970         self.to_screen(
1971             '[info] Available formats for %s:\n%s' %
1972             (info_dict['id'], render_table(header_line, table)))
1973
1974     def list_thumbnails(self, info_dict):
1975         thumbnails = info_dict.get('thumbnails')
1976         if not thumbnails:
1977             self.to_screen('[info] No thumbnails present for %s' % info_dict['id'])
1978             return
1979
1980         self.to_screen(
1981             '[info] Thumbnails for %s:' % info_dict['id'])
1982         self.to_screen(render_table(
1983             ['ID', 'width', 'height', 'URL'],
1984             [[t['id'], t.get('width', 'unknown'), t.get('height', 'unknown'), t['url']] for t in thumbnails]))
1985
1986     def list_subtitles(self, video_id, subtitles, name='subtitles'):
1987         if not subtitles:
1988             self.to_screen('%s has no %s' % (video_id, name))
1989             return
1990         self.to_screen(
1991             'Available %s for %s:' % (name, video_id))
1992         self.to_screen(render_table(
1993             ['Language', 'formats'],
1994             [[lang, ', '.join(f['ext'] for f in reversed(formats))]
1995                 for lang, formats in subtitles.items()]))
1996
1997     def urlopen(self, req):
1998         """ Start an HTTP download """
1999         if isinstance(req, compat_basestring):
2000             req = sanitized_Request(req)
2001         return self._opener.open(req, timeout=self._socket_timeout)
2002
2003     def print_debug_header(self):
2004         if not self.params.get('verbose'):
2005             return
2006
2007         if type('') is not compat_str:
2008             # Python 2.6 on SLES11 SP1 (https://github.com/rg3/youtube-dl/issues/3326)
2009             self.report_warning(
2010                 'Your Python is broken! Update to a newer and supported version')
2011
2012         stdout_encoding = getattr(
2013             sys.stdout, 'encoding', 'missing (%s)' % type(sys.stdout).__name__)
2014         encoding_str = (
2015             '[debug] Encodings: locale %s, fs %s, out %s, pref %s\n' % (
2016                 locale.getpreferredencoding(),
2017                 sys.getfilesystemencoding(),
2018                 stdout_encoding,
2019                 self.get_encoding()))
2020         write_string(encoding_str, encoding=None)
2021
2022         self._write_string('[debug] youtube-dl version ' + __version__ + '\n')
2023         if _LAZY_LOADER:
2024             self._write_string('[debug] Lazy loading extractors enabled' + '\n')
2025         try:
2026             sp = subprocess.Popen(
2027                 ['git', 'rev-parse', '--short', 'HEAD'],
2028                 stdout=subprocess.PIPE, stderr=subprocess.PIPE,
2029                 cwd=os.path.dirname(os.path.abspath(__file__)))
2030             out, err = sp.communicate()
2031             out = out.decode().strip()
2032             if re.match('[0-9a-f]+', out):
2033                 self._write_string('[debug] Git HEAD: ' + out + '\n')
2034         except Exception:
2035             try:
2036                 sys.exc_clear()
2037             except Exception:
2038                 pass
2039         self._write_string('[debug] Python version %s - %s\n' % (
2040             platform.python_version(), platform_name()))
2041
2042         exe_versions = FFmpegPostProcessor.get_versions(self)
2043         exe_versions['rtmpdump'] = rtmpdump_version()
2044         exe_str = ', '.join(
2045             '%s %s' % (exe, v)
2046             for exe, v in sorted(exe_versions.items())
2047             if v
2048         )
2049         if not exe_str:
2050             exe_str = 'none'
2051         self._write_string('[debug] exe versions: %s\n' % exe_str)
2052
2053         proxy_map = {}
2054         for handler in self._opener.handlers:
2055             if hasattr(handler, 'proxies'):
2056                 proxy_map.update(handler.proxies)
2057         self._write_string('[debug] Proxy map: ' + compat_str(proxy_map) + '\n')
2058
2059         if self.params.get('call_home', False):
2060             ipaddr = self.urlopen('https://yt-dl.org/ip').read().decode('utf-8')
2061             self._write_string('[debug] Public IP address: %s\n' % ipaddr)
2062             latest_version = self.urlopen(
2063                 'https://yt-dl.org/latest/version').read().decode('utf-8')
2064             if version_tuple(latest_version) > version_tuple(__version__):
2065                 self.report_warning(
2066                     'You are using an outdated version (newest version: %s)! '
2067                     'See https://yt-dl.org/update if you need help updating.' %
2068                     latest_version)
2069
2070     def _setup_opener(self):
2071         timeout_val = self.params.get('socket_timeout')
2072         self._socket_timeout = 600 if timeout_val is None else float(timeout_val)
2073
2074         opts_cookiefile = self.params.get('cookiefile')
2075         opts_proxy = self.params.get('proxy')
2076
2077         if opts_cookiefile is None:
2078             self.cookiejar = compat_cookiejar.CookieJar()
2079         else:
2080             opts_cookiefile = compat_expanduser(opts_cookiefile)
2081             self.cookiejar = compat_cookiejar.MozillaCookieJar(
2082                 opts_cookiefile)
2083             if os.access(opts_cookiefile, os.R_OK):
2084                 self.cookiejar.load()
2085
2086         cookie_processor = YoutubeDLCookieProcessor(self.cookiejar)
2087         if opts_proxy is not None:
2088             if opts_proxy == '':
2089                 proxies = {}
2090             else:
2091                 proxies = {'http': opts_proxy, 'https': opts_proxy}
2092         else:
2093             proxies = compat_urllib_request.getproxies()
2094             # Set HTTPS proxy to HTTP one if given (https://github.com/rg3/youtube-dl/issues/805)
2095             if 'http' in proxies and 'https' not in proxies:
2096                 proxies['https'] = proxies['http']
2097         proxy_handler = PerRequestProxyHandler(proxies)
2098
2099         debuglevel = 1 if self.params.get('debug_printtraffic') else 0
2100         https_handler = make_HTTPS_handler(self.params, debuglevel=debuglevel)
2101         ydlh = YoutubeDLHandler(self.params, debuglevel=debuglevel)
2102         data_handler = compat_urllib_request_DataHandler()
2103
2104         # When passing our own FileHandler instance, build_opener won't add the
2105         # default FileHandler and allows us to disable the file protocol, which
2106         # can be used for malicious purposes (see
2107         # https://github.com/rg3/youtube-dl/issues/8227)
2108         file_handler = compat_urllib_request.FileHandler()
2109
2110         def file_open(*args, **kwargs):
2111             raise compat_urllib_error.URLError('file:// scheme is explicitly disabled in youtube-dl for security reasons')
2112         file_handler.file_open = file_open
2113
2114         opener = compat_urllib_request.build_opener(
2115             proxy_handler, https_handler, cookie_processor, ydlh, data_handler, file_handler)
2116
2117         # Delete the default user-agent header, which would otherwise apply in
2118         # cases where our custom HTTP handler doesn't come into play
2119         # (See https://github.com/rg3/youtube-dl/issues/1309 for details)
2120         opener.addheaders = []
2121         self._opener = opener
2122
2123     def encode(self, s):
2124         if isinstance(s, bytes):
2125             return s  # Already encoded
2126
2127         try:
2128             return s.encode(self.get_encoding())
2129         except UnicodeEncodeError as err:
2130             err.reason = err.reason + '. Check your system encoding configuration or use the --encoding option.'
2131             raise
2132
2133     def get_encoding(self):
2134         encoding = self.params.get('encoding')
2135         if encoding is None:
2136             encoding = preferredencoding()
2137         return encoding
2138
2139     def _write_thumbnails(self, info_dict, filename):
2140         if self.params.get('writethumbnail', False):
2141             thumbnails = info_dict.get('thumbnails')
2142             if thumbnails:
2143                 thumbnails = [thumbnails[-1]]
2144         elif self.params.get('write_all_thumbnails', False):
2145             thumbnails = info_dict.get('thumbnails')
2146         else:
2147             return
2148
2149         if not thumbnails:
2150             # No thumbnails present, so return immediately
2151             return
2152
2153         for t in thumbnails:
2154             thumb_ext = determine_ext(t['url'], 'jpg')
2155             suffix = '_%s' % t['id'] if len(thumbnails) > 1 else ''
2156             thumb_display_id = '%s ' % t['id'] if len(thumbnails) > 1 else ''
2157             t['filename'] = thumb_filename = os.path.splitext(filename)[0] + suffix + '.' + thumb_ext
2158
2159             if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(thumb_filename)):
2160                 self.to_screen('[%s] %s: Thumbnail %sis already present' %
2161                                (info_dict['extractor'], info_dict['id'], thumb_display_id))
2162             else:
2163                 self.to_screen('[%s] %s: Downloading thumbnail %s...' %
2164                                (info_dict['extractor'], info_dict['id'], thumb_display_id))
2165                 try:
2166                     uf = self.urlopen(t['url'])
2167                     with open(encodeFilename(thumb_filename), 'wb') as thumbf:
2168                         shutil.copyfileobj(uf, thumbf)
2169                     self.to_screen('[%s] %s: Writing thumbnail %sto: %s' %
2170                                    (info_dict['extractor'], info_dict['id'], thumb_display_id, thumb_filename))
2171                 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
2172                     self.report_warning('Unable to download thumbnail "%s": %s' %
2173                                         (t['url'], error_to_compat_str(err)))