git.bitcoin.ninja Git - youtube-dl/blob - youtube_dl/YoutubeDL.py

   1 #!/usr/bin/env python
   2 # -*- coding: utf-8 -*-
   3
   4 from __future__ import absolute_import, unicode_literals
   5
   6 import collections
   7 import contextlib
   8 import copy
   9 import datetime
  10 import errno
  11 import fileinput
  12 import io
  13 import itertools
  14 import json
  15 import locale
  16 import operator
  17 import os
  18 import platform
  19 import re
  20 import shutil
  21 import subprocess
  22 import socket
  23 import sys
  24 import time
  25 import tokenize
  26 import traceback
  27
  28 from .compat import (
  29     compat_basestring,
  30     compat_cookiejar,
  31     compat_expanduser,
  32     compat_get_terminal_size,
  33     compat_http_client,
  34     compat_kwargs,
  35     compat_os_name,
  36     compat_str,
  37     compat_tokenize_tokenize,
  38     compat_urllib_error,
  39     compat_urllib_request,
  40     compat_urllib_request_DataHandler,
  41 )
  42 from .utils import (
  43     age_restricted,
  44     args_to_str,
  45     ContentTooShortError,
  46     date_from_str,
  47     DateRange,
  48     DEFAULT_OUTTMPL,
  49     determine_ext,
  50     determine_protocol,
  51     DownloadError,
  52     encode_compat_str,
  53     encodeFilename,
  54     error_to_compat_str,
  55     ExtractorError,
  56     format_bytes,
  57     formatSeconds,
  58     locked_file,
  59     make_HTTPS_handler,
  60     MaxDownloadsReached,
  61     PagedList,
  62     parse_filesize,
  63     PerRequestProxyHandler,
  64     platform_name,
  65     PostProcessingError,
  66     preferredencoding,
  67     prepend_extension,
  68     register_socks_protocols,
  69     render_table,
  70     replace_extension,
  71     SameFileError,
  72     sanitize_filename,
  73     sanitize_path,
  74     sanitize_url,
  75     sanitized_Request,
  76     std_headers,
  77     subtitles_filename,
  78     UnavailableVideoError,
  79     url_basename,
  80     version_tuple,
  81     write_json_file,
  82     write_string,
  83     YoutubeDLCookieProcessor,
  84     YoutubeDLHandler,
  85 )
  86 from .cache import Cache
  87 from .extractor import get_info_extractor, gen_extractor_classes, _LAZY_LOADER
  88 from .downloader import get_suitable_downloader
  89 from .downloader.rtmp import rtmpdump_version
  90 from .postprocessor import (
  91     FFmpegFixupM3u8PP,
  92     FFmpegFixupM4aPP,
  93     FFmpegFixupStretchedPP,
  94     FFmpegMergerPP,
  95     FFmpegPostProcessor,
  96     get_postprocessor,
  97 )
  98 from .version import __version__
  99
 100 if compat_os_name == 'nt':
 101     import ctypes
 102
 103
 104 class YoutubeDL(object):
 105     """YoutubeDL class.
 106
 107     YoutubeDL objects are the ones responsible of downloading the
 108     actual video file and writing it to disk if the user has requested
 109     it, among some other tasks. In most cases there should be one per
 110     program. As, given a video URL, the downloader doesn't know how to
 111     extract all the needed information, task that InfoExtractors do, it
 112     has to pass the URL to one of them.
 113
 114     For this, YoutubeDL objects have a method that allows
 115     InfoExtractors to be registered in a given order. When it is passed
 116     a URL, the YoutubeDL object handles it to the first InfoExtractor it
 117     finds that reports being able to handle it. The InfoExtractor extracts
 118     all the information about the video or videos the URL refers to, and
 119     YoutubeDL process the extracted information, possibly using a File
 120     Downloader to download the video.
 121
 122     YoutubeDL objects accept a lot of parameters. In order not to saturate
 123     the object constructor with arguments, it receives a dictionary of
 124     options instead. These options are available through the params
 125     attribute for the InfoExtractors to use. The YoutubeDL also
 126     registers itself as the downloader in charge for the InfoExtractors
 127     that are added to it, so this is a "mutual registration".
 128
 129     Available options:
 130
 131     username:          Username for authentication purposes.
 132     password:          Password for authentication purposes.
 133     videopassword:     Password for accessing a video.
 134     ap_mso_id          Adobe Pass Multiple-system operator Identifier.
 135     usenetrc:          Use netrc for authentication instead.
 136     verbose:           Print additional info to stdout.
 137     quiet:             Do not print messages to stdout.
 138     no_warnings:       Do not print out anything for warnings.
 139     forceurl:          Force printing final URL.
 140     forcetitle:        Force printing title.
 141     forceid:           Force printing ID.
 142     forcethumbnail:    Force printing thumbnail URL.
 143     forcedescription:  Force printing description.
 144     forcefilename:     Force printing final filename.
 145     forceduration:     Force printing duration.
 146     forcejson:         Force printing info_dict as JSON.
 147     dump_single_json:  Force printing the info_dict of the whole playlist
 148                        (or video) as a single JSON line.
 149     simulate:          Do not download the video files.
 150     format:            Video format code. See options.py for more information.
 151     outtmpl:           Template for output names.
 152     restrictfilenames: Do not allow "&" and spaces in file names
 153     ignoreerrors:      Do not stop on download errors.
 154     force_generic_extractor: Force downloader to use the generic extractor
 155     nooverwrites:      Prevent overwriting files.
 156     playliststart:     Playlist item to start at.
 157     playlistend:       Playlist item to end at.
 158     playlist_items:    Specific indices of playlist to download.
 159     playlistreverse:   Download playlist items in reverse order.
 160     matchtitle:        Download only matching titles.
 161     rejecttitle:       Reject downloads for matching titles.
 162     logger:            Log messages to a logging.Logger instance.
 163     logtostderr:       Log messages to stderr instead of stdout.
 164     writedescription:  Write the video description to a .description file
 165     writeinfojson:     Write the video description to a .info.json file
 166     writeannotations:  Write the video annotations to a .annotations.xml file
 167     writethumbnail:    Write the thumbnail image to a file
 168     write_all_thumbnails:  Write all thumbnail formats to files
 169     writesubtitles:    Write the video subtitles to a file
 170     writeautomaticsub: Write the automatically generated subtitles to a file
 171     allsubtitles:      Downloads all the subtitles of the video
 172                        (requires writesubtitles or writeautomaticsub)
 173     listsubtitles:     Lists all available subtitles for the video
 174     subtitlesformat:   The format code for subtitles
 175     subtitleslangs:    List of languages of the subtitles to download
 176     keepvideo:         Keep the video file after post-processing
 177     daterange:         A DateRange object, download only if the upload_date is in the range.
 178     skip_download:     Skip the actual download of the video file
 179     cachedir:          Location of the cache files in the filesystem.
 180                        False to disable filesystem cache.
 181     noplaylist:        Download single video instead of a playlist if in doubt.
 182     age_limit:         An integer representing the user's age in years.
 183                        Unsuitable videos for the given age are skipped.
 184     min_views:         An integer representing the minimum view count the video
 185                        must have in order to not be skipped.
 186                        Videos without view count information are always
 187                        downloaded. None for no limit.
 188     max_views:         An integer representing the maximum view count.
 189                        Videos that are more popular than that are not
 190                        downloaded.
 191                        Videos without view count information are always
 192                        downloaded. None for no limit.
 193     download_archive:  File name of a file where all downloads are recorded.
 194                        Videos already present in the file are not downloaded
 195                        again.
 196     cookiefile:        File name where cookies should be read from and dumped to.
 197     nocheckcertificate:Do not verify SSL certificates
 198     prefer_insecure:   Use HTTP instead of HTTPS to retrieve information.
 199                        At the moment, this is only supported by YouTube.
 200     proxy:             URL of the proxy server to use
 201     geo_verification_proxy:  URL of the proxy to use for IP address verification
 202                        on geo-restricted sites. (Experimental)
 203     socket_timeout:    Time to wait for unresponsive hosts, in seconds
 204     bidi_workaround:   Work around buggy terminals without bidirectional text
 205                        support, using fridibi
 206     debug_printtraffic:Print out sent and received HTTP traffic
 207     include_ads:       Download ads as well
 208     default_search:    Prepend this string if an input url is not valid.
 209                        'auto' for elaborate guessing
 210     encoding:          Use this encoding instead of the system-specified.
 211     extract_flat:      Do not resolve URLs, return the immediate result.
 212                        Pass in 'in_playlist' to only show this behavior for
 213                        playlist items.
 214     postprocessors:    A list of dictionaries, each with an entry
 215                        * key:  The name of the postprocessor. See
 216                                youtube_dl/postprocessor/__init__.py for a list.
 217                        as well as any further keyword arguments for the
 218                        postprocessor.
 219     progress_hooks:    A list of functions that get called on download
 220                        progress, with a dictionary with the entries
 221                        * status: One of "downloading", "error", or "finished".
 222                                  Check this first and ignore unknown values.
 223
 224                        If status is one of "downloading", or "finished", the
 225                        following properties may also be present:
 226                        * filename: The final filename (always present)
 227                        * tmpfilename: The filename we're currently writing to
 228                        * downloaded_bytes: Bytes on disk
 229                        * total_bytes: Size of the whole file, None if unknown
 230                        * total_bytes_estimate: Guess of the eventual file size,
 231                                                None if unavailable.
 232                        * elapsed: The number of seconds since download started.
 233                        * eta: The estimated time in seconds, None if unknown
 234                        * speed: The download speed in bytes/second, None if
 235                                 unknown
 236                        * fragment_index: The counter of the currently
 237                                          downloaded video fragment.
 238                        * fragment_count: The number of fragments (= individual
 239                                          files that will be merged)
 240
 241                        Progress hooks are guaranteed to be called at least once
 242                        (with status "finished") if the download is successful.
 243     merge_output_format: Extension to use when merging formats.
 244     fixup:             Automatically correct known faults of the file.
 245                        One of:
 246                        - "never": do nothing
 247                        - "warn": only emit a warning
 248                        - "detect_or_warn": check whether we can do anything
 249                                            about it, warn otherwise (default)
 250     source_address:    (Experimental) Client-side IP address to bind to.
 251     call_home:         Boolean, true iff we are allowed to contact the
 252                        youtube-dl servers for debugging.
 253     sleep_interval:    Number of seconds to sleep before each download when
 254                        used alone or a lower bound of a range for randomized
 255                        sleep before each download (minimum possible number
 256                        of seconds to sleep) when used along with
 257                        max_sleep_interval.
 258     max_sleep_interval:Upper bound of a range for randomized sleep before each
 259                        download (maximum possible number of seconds to sleep).
 260                        Must only be used along with sleep_interval.
 261                        Actual sleep time will be a random float from range
 262                        [sleep_interval; max_sleep_interval].
 263     listformats:       Print an overview of available video formats and exit.
 264     list_thumbnails:   Print a table of all thumbnails and exit.
 265     match_filter:      A function that gets called with the info_dict of
 266                        every video.
 267                        If it returns a message, the video is ignored.
 268                        If it returns None, the video is downloaded.
 269                        match_filter_func in utils.py is one example for this.
 270     no_color:          Do not emit color codes in output.
 271
 272     The following options determine which downloader is picked:
 273     external_downloader: Executable of the external downloader to call.
 274                        None or unset for standard (built-in) downloader.
 275     hls_prefer_native: Use the native HLS downloader instead of ffmpeg/avconv
 276                        if True, otherwise use ffmpeg/avconv if False, otherwise
 277                        use downloader suggested by extractor if None.
 278
 279     The following parameters are not used by YoutubeDL itself, they are used by
 280     the downloader (see youtube_dl/downloader/common.py):
 281     nopart, updatetime, buffersize, ratelimit, min_filesize, max_filesize, test,
 282     noresizebuffer, retries, continuedl, noprogress, consoletitle,
 283     xattr_set_filesize, external_downloader_args, hls_use_mpegts.
 284
 285     The following options are used by the post processors:
 286     prefer_ffmpeg:     If True, use ffmpeg instead of avconv if both are available,
 287                        otherwise prefer avconv.
 288     postprocessor_args: A list of additional command-line arguments for the
 289                         postprocessor.
 290     """
 291
 292     params = None
 293     _ies = []
 294     _pps = []
 295     _download_retcode = None
 296     _num_downloads = None
 297     _screen_file = None
 298
 299     def __init__(self, params=None, auto_init=True):
 300         """Create a FileDownloader object with the given options."""
 301         if params is None:
 302             params = {}
 303         self._ies = []
 304         self._ies_instances = {}
 305         self._pps = []
 306         self._progress_hooks = []
 307         self._download_retcode = 0
 308         self._num_downloads = 0
 309         self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)]
 310         self._err_file = sys.stderr
 311         self.params = {
 312             # Default parameters
 313             'nocheckcertificate': False,
 314         }
 315         self.params.update(params)
 316         self.cache = Cache(self)
 317
 318         if self.params.get('cn_verification_proxy') is not None:
 319             self.report_warning('--cn-verification-proxy is deprecated. Use --geo-verification-proxy instead.')
 320             if self.params.get('geo_verification_proxy') is None:
 321                 self.params['geo_verification_proxy'] = self.params['cn_verification_proxy']
 322
 323         if params.get('bidi_workaround', False):
 324             try:
 325                 import pty
 326                 master, slave = pty.openpty()
 327                 width = compat_get_terminal_size().columns
 328                 if width is None:
 329                     width_args = []
 330                 else:
 331                     width_args = ['-w', str(width)]
 332                 sp_kwargs = dict(
 333                     stdin=subprocess.PIPE,
 334                     stdout=slave,
 335                     stderr=self._err_file)
 336                 try:
 337                     self._output_process = subprocess.Popen(
 338                         ['bidiv'] + width_args, **sp_kwargs
 339                     )
 340                 except OSError:
 341                     self._output_process = subprocess.Popen(
 342                         ['fribidi', '-c', 'UTF-8'] + width_args, **sp_kwargs)
 343                 self._output_channel = os.fdopen(master, 'rb')
 344             except OSError as ose:
 345                 if ose.errno == errno.ENOENT:
 346                     self.report_warning('Could not find fribidi executable, ignoring --bidi-workaround . Make sure that  fribidi  is an executable file in one of the directories in your $PATH.')
 347                 else:
 348                     raise
 349
 350         if (sys.version_info >= (3,) and sys.platform != 'win32' and
 351                 sys.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968'] and
 352                 not params.get('restrictfilenames', False)):
 353             # On Python 3, the Unicode filesystem API will throw errors (#1474)
 354             self.report_warning(
 355                 'Assuming --restrict-filenames since file system encoding '
 356                 'cannot encode all characters. '
 357                 'Set the LC_ALL environment variable to fix this.')
 358             self.params['restrictfilenames'] = True
 359
 360         if isinstance(params.get('outtmpl'), bytes):
 361             self.report_warning(
 362                 'Parameter outtmpl is bytes, but should be a unicode string. '
 363                 'Put  from __future__ import unicode_literals  at the top of your code file or consider switching to Python 3.x.')
 364
 365         self._setup_opener()
 366
 367         if auto_init:
 368             self.print_debug_header()
 369             self.add_default_info_extractors()
 370
 371         for pp_def_raw in self.params.get('postprocessors', []):
 372             pp_class = get_postprocessor(pp_def_raw['key'])
 373             pp_def = dict(pp_def_raw)
 374             del pp_def['key']
 375             pp = pp_class(self, **compat_kwargs(pp_def))
 376             self.add_post_processor(pp)
 377
 378         for ph in self.params.get('progress_hooks', []):
 379             self.add_progress_hook(ph)
 380
 381         register_socks_protocols()
 382
 383     def warn_if_short_id(self, argv):
 384         # short YouTube ID starting with dash?
 385         idxs = [
 386             i for i, a in enumerate(argv)
 387             if re.match(r'^-[0-9A-Za-z_-]{10}$', a)]
 388         if idxs:
 389             correct_argv = (
 390                 ['youtube-dl'] +
 391                 [a for i, a in enumerate(argv) if i not in idxs] +
 392                 ['--'] + [argv[i] for i in idxs]
 393             )
 394             self.report_warning(
 395                 'Long argument string detected. '
 396                 'Use -- to separate parameters and URLs, like this:\n%s\n' %
 397                 args_to_str(correct_argv))
 398
 399     def add_info_extractor(self, ie):
 400         """Add an InfoExtractor object to the end of the list."""
 401         self._ies.append(ie)
 402         if not isinstance(ie, type):
 403             self._ies_instances[ie.ie_key()] = ie
 404             ie.set_downloader(self)
 405
 406     def get_info_extractor(self, ie_key):
 407         """
 408         Get an instance of an IE with name ie_key, it will try to get one from
 409         the _ies list, if there's no instance it will create a new one and add
 410         it to the extractor list.
 411         """
 412         ie = self._ies_instances.get(ie_key)
 413         if ie is None:
 414             ie = get_info_extractor(ie_key)()
 415             self.add_info_extractor(ie)
 416         return ie
 417
 418     def add_default_info_extractors(self):
 419         """
 420         Add the InfoExtractors returned by gen_extractors to the end of the list
 421         """
 422         for ie in gen_extractor_classes():
 423             self.add_info_extractor(ie)
 424
 425     def add_post_processor(self, pp):
 426         """Add a PostProcessor object to the end of the chain."""
 427         self._pps.append(pp)
 428         pp.set_downloader(self)
 429
 430     def add_progress_hook(self, ph):
 431         """Add the progress hook (currently only for the file downloader)"""
 432         self._progress_hooks.append(ph)
 433
 434     def _bidi_workaround(self, message):
 435         if not hasattr(self, '_output_channel'):
 436             return message
 437
 438         assert hasattr(self, '_output_process')
 439         assert isinstance(message, compat_str)
 440         line_count = message.count('\n') + 1
 441         self._output_process.stdin.write((message + '\n').encode('utf-8'))
 442         self._output_process.stdin.flush()
 443         res = ''.join(self._output_channel.readline().decode('utf-8')
 444                       for _ in range(line_count))
 445         return res[:-len('\n')]
 446
 447     def to_screen(self, message, skip_eol=False):
 448         """Print message to stdout if not in quiet mode."""
 449         return self.to_stdout(message, skip_eol, check_quiet=True)
 450
 451     def _write_string(self, s, out=None):
 452         write_string(s, out=out, encoding=self.params.get('encoding'))
 453
 454     def to_stdout(self, message, skip_eol=False, check_quiet=False):
 455         """Print message to stdout if not in quiet mode."""
 456         if self.params.get('logger'):
 457             self.params['logger'].debug(message)
 458         elif not check_quiet or not self.params.get('quiet', False):
 459             message = self._bidi_workaround(message)
 460             terminator = ['\n', ''][skip_eol]
 461             output = message + terminator
 462
 463             self._write_string(output, self._screen_file)
 464
 465     def to_stderr(self, message):
 466         """Print message to stderr."""
 467         assert isinstance(message, compat_str)
 468         if self.params.get('logger'):
 469             self.params['logger'].error(message)
 470         else:
 471             message = self._bidi_workaround(message)
 472             output = message + '\n'
 473             self._write_string(output, self._err_file)
 474
 475     def to_console_title(self, message):
 476         if not self.params.get('consoletitle', False):
 477             return
 478         if compat_os_name == 'nt' and ctypes.windll.kernel32.GetConsoleWindow():
 479             # c_wchar_p() might not be necessary if `message` is
 480             # already of type unicode()
 481             ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message))
 482         elif 'TERM' in os.environ:
 483             self._write_string('\033]0;%s\007' % message, self._screen_file)
 484
 485     def save_console_title(self):
 486         if not self.params.get('consoletitle', False):
 487             return
 488         if 'TERM' in os.environ:
 489             # Save the title on stack
 490             self._write_string('\033[22;0t', self._screen_file)
 491
 492     def restore_console_title(self):
 493         if not self.params.get('consoletitle', False):
 494             return
 495         if 'TERM' in os.environ:
 496             # Restore the title from stack
 497             self._write_string('\033[23;0t', self._screen_file)
 498
 499     def __enter__(self):
 500         self.save_console_title()
 501         return self
 502
 503     def __exit__(self, *args):
 504         self.restore_console_title()
 505
 506         if self.params.get('cookiefile') is not None:
 507             self.cookiejar.save()
 508
 509     def trouble(self, message=None, tb=None):
 510         """Determine action to take when a download problem appears.
 511
 512         Depending on if the downloader has been configured to ignore
 513         download errors or not, this method may throw an exception or
 514         not when errors are found, after printing the message.
 515
 516         tb, if given, is additional traceback information.
 517         """
 518         if message is not None:
 519             self.to_stderr(message)
 520         if self.params.get('verbose'):
 521             if tb is None:
 522                 if sys.exc_info()[0]:  # if .trouble has been called from an except block
 523                     tb = ''
 524                     if hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
 525                         tb += ''.join(traceback.format_exception(*sys.exc_info()[1].exc_info))
 526                     tb += encode_compat_str(traceback.format_exc())
 527                 else:
 528                     tb_data = traceback.format_list(traceback.extract_stack())
 529                     tb = ''.join(tb_data)
 530             self.to_stderr(tb)
 531         if not self.params.get('ignoreerrors', False):
 532             if sys.exc_info()[0] and hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
 533                 exc_info = sys.exc_info()[1].exc_info
 534             else:
 535                 exc_info = sys.exc_info()
 536             raise DownloadError(message, exc_info)
 537         self._download_retcode = 1
 538
 539     def report_warning(self, message):
 540         '''
 541         Print the message to stderr, it will be prefixed with 'WARNING:'
 542         If stderr is a tty file the 'WARNING:' will be colored
 543         '''
 544         if self.params.get('logger') is not None:
 545             self.params['logger'].warning(message)
 546         else:
 547             if self.params.get('no_warnings'):
 548                 return
 549             if not self.params.get('no_color') and self._err_file.isatty() and compat_os_name != 'nt':
 550                 _msg_header = '\033[0;33mWARNING:\033[0m'
 551             else:
 552                 _msg_header = 'WARNING:'
 553             warning_message = '%s %s' % (_msg_header, message)
 554             self.to_stderr(warning_message)
 555
 556     def report_error(self, message, tb=None):
 557         '''
 558         Do the same as trouble, but prefixes the message with 'ERROR:', colored
 559         in red if stderr is a tty file.
 560         '''
 561         if not self.params.get('no_color') and self._err_file.isatty() and compat_os_name != 'nt':
 562             _msg_header = '\033[0;31mERROR:\033[0m'
 563         else:
 564             _msg_header = 'ERROR:'
 565         error_message = '%s %s' % (_msg_header, message)
 566         self.trouble(error_message, tb)
 567
 568     def report_file_already_downloaded(self, file_name):
 569         """Report file has already been fully downloaded."""
 570         try:
 571             self.to_screen('[download] %s has already been downloaded' % file_name)
 572         except UnicodeEncodeError:
 573             self.to_screen('[download] The file has already been downloaded')
 574
 575     def prepare_filename(self, info_dict):
 576         """Generate the output filename."""
 577         try:
 578             template_dict = dict(info_dict)
 579
 580             template_dict['epoch'] = int(time.time())
 581             autonumber_size = self.params.get('autonumber_size')
 582             if autonumber_size is None:
 583                 autonumber_size = 5
 584             autonumber_templ = '%0' + str(autonumber_size) + 'd'
 585             template_dict['autonumber'] = autonumber_templ % self._num_downloads
 586             if template_dict.get('playlist_index') is not None:
 587                 template_dict['playlist_index'] = '%0*d' % (len(str(template_dict['n_entries'])), template_dict['playlist_index'])
 588             if template_dict.get('resolution') is None:
 589                 if template_dict.get('width') and template_dict.get('height'):
 590                     template_dict['resolution'] = '%dx%d' % (template_dict['width'], template_dict['height'])
 591                 elif template_dict.get('height'):
 592                     template_dict['resolution'] = '%sp' % template_dict['height']
 593                 elif template_dict.get('width'):
 594                     template_dict['resolution'] = '%dx?' % template_dict['width']
 595
 596             sanitize = lambda k, v: sanitize_filename(
 597                 compat_str(v),
 598                 restricted=self.params.get('restrictfilenames'),
 599                 is_id=(k == 'id'))
 600             template_dict = dict((k, sanitize(k, v))
 601                                  for k, v in template_dict.items()
 602                                  if v is not None and not isinstance(v, (list, tuple, dict)))
 603             template_dict = collections.defaultdict(lambda: 'NA', template_dict)
 604
 605             outtmpl = self.params.get('outtmpl', DEFAULT_OUTTMPL)
 606             tmpl = compat_expanduser(outtmpl)
 607             filename = tmpl % template_dict
 608             # Temporary fix for #4787
 609             # 'Treat' all problem characters by passing filename through preferredencoding
 610             # to workaround encoding issues with subprocess on python2 @ Windows
 611             if sys.version_info < (3, 0) and sys.platform == 'win32':
 612                 filename = encodeFilename(filename, True).decode(preferredencoding())
 613             return sanitize_path(filename)
 614         except ValueError as err:
 615             self.report_error('Error in output template: ' + str(err) + ' (encoding: ' + repr(preferredencoding()) + ')')
 616             return None
 617
 618     def _match_entry(self, info_dict, incomplete):
 619         """ Returns None iff the file should be downloaded """
 620
 621         video_title = info_dict.get('title', info_dict.get('id', 'video'))
 622         if 'title' in info_dict:
 623             # This can happen when we're just evaluating the playlist
 624             title = info_dict['title']
 625             matchtitle = self.params.get('matchtitle', False)
 626             if matchtitle:
 627                 if not re.search(matchtitle, title, re.IGNORECASE):
 628                     return '"' + title + '" title did not match pattern "' + matchtitle + '"'
 629             rejecttitle = self.params.get('rejecttitle', False)
 630             if rejecttitle:
 631                 if re.search(rejecttitle, title, re.IGNORECASE):
 632                     return '"' + title + '" title matched reject pattern "' + rejecttitle + '"'
 633         date = info_dict.get('upload_date')
 634         if date is not None:
 635             dateRange = self.params.get('daterange', DateRange())
 636             if date not in dateRange:
 637                 return '%s upload date is not in range %s' % (date_from_str(date).isoformat(), dateRange)
 638         view_count = info_dict.get('view_count')
 639         if view_count is not None:
 640             min_views = self.params.get('min_views')
 641             if min_views is not None and view_count < min_views:
 642                 return 'Skipping %s, because it has not reached minimum view count (%d/%d)' % (video_title, view_count, min_views)
 643             max_views = self.params.get('max_views')
 644             if max_views is not None and view_count > max_views:
 645                 return 'Skipping %s, because it has exceeded the maximum view count (%d/%d)' % (video_title, view_count, max_views)
 646         if age_restricted(info_dict.get('age_limit'), self.params.get('age_limit')):
 647             return 'Skipping "%s" because it is age restricted' % video_title
 648         if self.in_download_archive(info_dict):
 649             return '%s has already been recorded in archive' % video_title
 650
 651         if not incomplete:
 652             match_filter = self.params.get('match_filter')
 653             if match_filter is not None:
 654                 ret = match_filter(info_dict)
 655                 if ret is not None:
 656                     return ret
 657
 658         return None
 659
 660     @staticmethod
 661     def add_extra_info(info_dict, extra_info):
 662         '''Set the keys from extra_info in info dict if they are missing'''
 663         for key, value in extra_info.items():
 664             info_dict.setdefault(key, value)
 665
 666     def extract_info(self, url, download=True, ie_key=None, extra_info={},
 667                      process=True, force_generic_extractor=False):
 668         '''
 669         Returns a list with a dictionary for each video we find.
 670         If 'download', also downloads the videos.
 671         extra_info is a dict containing the extra values to add to each result
 672         '''
 673
 674         if not ie_key and force_generic_extractor:
 675             ie_key = 'Generic'
 676
 677         if ie_key:
 678             ies = [self.get_info_extractor(ie_key)]
 679         else:
 680             ies = self._ies
 681
 682         for ie in ies:
 683             if not ie.suitable(url):
 684                 continue
 685
 686             ie = self.get_info_extractor(ie.ie_key())
 687             if not ie.working():
 688                 self.report_warning('The program functionality for this site has been marked as broken, '
 689                                     'and will probably not work.')
 690
 691             try:
 692                 ie_result = ie.extract(url)
 693                 if ie_result is None:  # Finished already (backwards compatibility; listformats and friends should be moved here)
 694                     break
 695                 if isinstance(ie_result, list):
 696                     # Backwards compatibility: old IE result format
 697                     ie_result = {
 698                         '_type': 'compat_list',
 699                         'entries': ie_result,
 700                     }
 701                 self.add_default_extra_info(ie_result, ie, url)
 702                 if process:
 703                     return self.process_ie_result(ie_result, download, extra_info)
 704                 else:
 705                     return ie_result
 706             except ExtractorError as e:  # An error we somewhat expected
 707                 self.report_error(compat_str(e), e.format_traceback())
 708                 break
 709             except MaxDownloadsReached:
 710                 raise
 711             except Exception as e:
 712                 if self.params.get('ignoreerrors', False):
 713                     self.report_error(error_to_compat_str(e), tb=encode_compat_str(traceback.format_exc()))
 714                     break
 715                 else:
 716                     raise
 717         else:
 718             self.report_error('no suitable InfoExtractor for URL %s' % url)
 719
 720     def add_default_extra_info(self, ie_result, ie, url):
 721         self.add_extra_info(ie_result, {
 722             'extractor': ie.IE_NAME,
 723             'webpage_url': url,
 724             'webpage_url_basename': url_basename(url),
 725             'extractor_key': ie.ie_key(),
 726         })
 727
 728     def process_ie_result(self, ie_result, download=True, extra_info={}):
 729         """
 730         Take the result of the ie(may be modified) and resolve all unresolved
 731         references (URLs, playlist items).
 732
 733         It will also download the videos if 'download'.
 734         Returns the resolved ie_result.
 735         """
 736         result_type = ie_result.get('_type', 'video')
 737
 738         if result_type in ('url', 'url_transparent'):
 739             ie_result['url'] = sanitize_url(ie_result['url'])
 740             extract_flat = self.params.get('extract_flat', False)
 741             if ((extract_flat == 'in_playlist' and 'playlist' in extra_info) or
 742                     extract_flat is True):
 743                 if self.params.get('forcejson', False):
 744                     self.to_stdout(json.dumps(ie_result))
 745                 return ie_result
 746
 747         if result_type == 'video':
 748             self.add_extra_info(ie_result, extra_info)
 749             return self.process_video_result(ie_result, download=download)
 750         elif result_type == 'url':
 751             # We have to add extra_info to the results because it may be
 752             # contained in a playlist
 753             return self.extract_info(ie_result['url'],
 754                                      download,
 755                                      ie_key=ie_result.get('ie_key'),
 756                                      extra_info=extra_info)
 757         elif result_type == 'url_transparent':
 758             # Use the information from the embedding page
 759             info = self.extract_info(
 760                 ie_result['url'], ie_key=ie_result.get('ie_key'),
 761                 extra_info=extra_info, download=False, process=False)
 762
 763             force_properties = dict(
 764                 (k, v) for k, v in ie_result.items() if v is not None)
 765             for f in ('_type', 'url', 'ie_key'):
 766                 if f in force_properties:
 767                     del force_properties[f]
 768             new_result = info.copy()
 769             new_result.update(force_properties)
 770
 771             assert new_result.get('_type') != 'url_transparent'
 772
 773             return self.process_ie_result(
 774                 new_result, download=download, extra_info=extra_info)
 775         elif result_type == 'playlist' or result_type == 'multi_video':
 776             # We process each entry in the playlist
 777             playlist = ie_result.get('title') or ie_result.get('id')
 778             self.to_screen('[download] Downloading playlist: %s' % playlist)
 779
 780             playlist_results = []
 781
 782             playliststart = self.params.get('playliststart', 1) - 1
 783             playlistend = self.params.get('playlistend')
 784             # For backwards compatibility, interpret -1 as whole list
 785             if playlistend == -1:
 786                 playlistend = None
 787
 788             playlistitems_str = self.params.get('playlist_items')
 789             playlistitems = None
 790             if playlistitems_str is not None:
 791                 def iter_playlistitems(format):
 792                     for string_segment in format.split(','):
 793                         if '-' in string_segment:
 794                             start, end = string_segment.split('-')
 795                             for item in range(int(start), int(end) + 1):
 796                                 yield int(item)
 797                         else:
 798                             yield int(string_segment)
 799                 playlistitems = iter_playlistitems(playlistitems_str)
 800
 801             ie_entries = ie_result['entries']
 802             if isinstance(ie_entries, list):
 803                 n_all_entries = len(ie_entries)
 804                 if playlistitems:
 805                     entries = [
 806                         ie_entries[i - 1] for i in playlistitems
 807                         if -n_all_entries <= i - 1 < n_all_entries]
 808                 else:
 809                     entries = ie_entries[playliststart:playlistend]
 810                 n_entries = len(entries)
 811                 self.to_screen(
 812                     '[%s] playlist %s: Collected %d video ids (downloading %d of them)' %
 813                     (ie_result['extractor'], playlist, n_all_entries, n_entries))
 814             elif isinstance(ie_entries, PagedList):
 815                 if playlistitems:
 816                     entries = []
 817                     for item in playlistitems:
 818                         entries.extend(ie_entries.getslice(
 819                             item - 1, item
 820                         ))
 821                 else:
 822                     entries = ie_entries.getslice(
 823                         playliststart, playlistend)
 824                 n_entries = len(entries)
 825                 self.to_screen(
 826                     '[%s] playlist %s: Downloading %d videos' %
 827                     (ie_result['extractor'], playlist, n_entries))
 828             else:  # iterable
 829                 if playlistitems:
 830                     entry_list = list(ie_entries)
 831                     entries = [entry_list[i - 1] for i in playlistitems]
 832                 else:
 833                     entries = list(itertools.islice(
 834                         ie_entries, playliststart, playlistend))
 835                 n_entries = len(entries)
 836                 self.to_screen(
 837                     '[%s] playlist %s: Downloading %d videos' %
 838                     (ie_result['extractor'], playlist, n_entries))
 839
 840             if self.params.get('playlistreverse', False):
 841                 entries = entries[::-1]
 842
 843             for i, entry in enumerate(entries, 1):
 844                 self.to_screen('[download] Downloading video %s of %s' % (i, n_entries))
 845                 extra = {
 846                     'n_entries': n_entries,
 847                     'playlist': playlist,
 848                     'playlist_id': ie_result.get('id'),
 849                     'playlist_title': ie_result.get('title'),
 850                     'playlist_index': i + playliststart,
 851                     'extractor': ie_result['extractor'],
 852                     'webpage_url': ie_result['webpage_url'],
 853                     'webpage_url_basename': url_basename(ie_result['webpage_url']),
 854                     'extractor_key': ie_result['extractor_key'],
 855                 }
 856
 857                 reason = self._match_entry(entry, incomplete=True)
 858                 if reason is not None:
 859                     self.to_screen('[download] ' + reason)
 860                     continue
 861
 862                 entry_result = self.process_ie_result(entry,
 863                                                       download=download,
 864                                                       extra_info=extra)
 865                 playlist_results.append(entry_result)
 866             ie_result['entries'] = playlist_results
 867             self.to_screen('[download] Finished downloading playlist: %s' % playlist)
 868             return ie_result
 869         elif result_type == 'compat_list':
 870             self.report_warning(
 871                 'Extractor %s returned a compat_list result. '
 872                 'It needs to be updated.' % ie_result.get('extractor'))
 873
 874             def _fixup(r):
 875                 self.add_extra_info(
 876                     r,
 877                     {
 878                         'extractor': ie_result['extractor'],
 879                         'webpage_url': ie_result['webpage_url'],
 880                         'webpage_url_basename': url_basename(ie_result['webpage_url']),
 881                         'extractor_key': ie_result['extractor_key'],
 882                     }
 883                 )
 884                 return r
 885             ie_result['entries'] = [
 886                 self.process_ie_result(_fixup(r), download, extra_info)
 887                 for r in ie_result['entries']
 888             ]
 889             return ie_result
 890         else:
 891             raise Exception('Invalid result type: %s' % result_type)
 892
 893     def _build_format_filter(self, filter_spec):
 894         " Returns a function to filter the formats according to the filter_spec "
 895
 896         OPERATORS = {
 897             '<': operator.lt,
 898             '<=': operator.le,
 899             '>': operator.gt,
 900             '>=': operator.ge,
 901             '=': operator.eq,
 902             '!=': operator.ne,
 903         }
 904         operator_rex = re.compile(r'''(?x)\s*
 905             (?P<key>width|height|tbr|abr|vbr|asr|filesize|fps)
 906             \s*(?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
 907             (?P<value>[0-9.]+(?:[kKmMgGtTpPeEzZyY]i?[Bb]?)?)
 908             $
 909             ''' % '|'.join(map(re.escape, OPERATORS.keys())))
 910         m = operator_rex.search(filter_spec)
 911         if m:
 912             try:
 913                 comparison_value = int(m.group('value'))
 914             except ValueError:
 915                 comparison_value = parse_filesize(m.group('value'))
 916                 if comparison_value is None:
 917                     comparison_value = parse_filesize(m.group('value') + 'B')
 918                 if comparison_value is None:
 919                     raise ValueError(
 920                         'Invalid value %r in format specification %r' % (
 921                             m.group('value'), filter_spec))
 922             op = OPERATORS[m.group('op')]
 923
 924         if not m:
 925             STR_OPERATORS = {
 926                 '=': operator.eq,
 927                 '!=': operator.ne,
 928                 '^=': lambda attr, value: attr.startswith(value),
 929                 '$=': lambda attr, value: attr.endswith(value),
 930                 '*=': lambda attr, value: value in attr,
 931             }
 932             str_operator_rex = re.compile(r'''(?x)
 933                 \s*(?P<key>ext|acodec|vcodec|container|protocol|format_id)
 934                 \s*(?P<op>%s)(?P<none_inclusive>\s*\?)?
 935                 \s*(?P<value>[a-zA-Z0-9._-]+)
 936                 \s*$
 937                 ''' % '|'.join(map(re.escape, STR_OPERATORS.keys())))
 938             m = str_operator_rex.search(filter_spec)
 939             if m:
 940                 comparison_value = m.group('value')
 941                 op = STR_OPERATORS[m.group('op')]
 942
 943         if not m:
 944             raise ValueError('Invalid filter specification %r' % filter_spec)
 945
 946         def _filter(f):
 947             actual_value = f.get(m.group('key'))
 948             if actual_value is None:
 949                 return m.group('none_inclusive')
 950             return op(actual_value, comparison_value)
 951         return _filter
 952
 953     def build_format_selector(self, format_spec):
 954         def syntax_error(note, start):
 955             message = (
 956                 'Invalid format specification: '
 957                 '{0}\n\t{1}\n\t{2}^'.format(note, format_spec, ' ' * start[1]))
 958             return SyntaxError(message)
 959
 960         PICKFIRST = 'PICKFIRST'
 961         MERGE = 'MERGE'
 962         SINGLE = 'SINGLE'
 963         GROUP = 'GROUP'
 964         FormatSelector = collections.namedtuple('FormatSelector', ['type', 'selector', 'filters'])
 965
 966         def _parse_filter(tokens):
 967             filter_parts = []
 968             for type, string, start, _, _ in tokens:
 969                 if type == tokenize.OP and string == ']':
 970                     return ''.join(filter_parts)
 971                 else:
 972                     filter_parts.append(string)
 973
 974         def _remove_unused_ops(tokens):
 975             # Remove operators that we don't use and join them with the surrounding strings
 976             # for example: 'mp4' '-' 'baseline' '-' '16x9' is converted to 'mp4-baseline-16x9'
 977             ALLOWED_OPS = ('/', '+', ',', '(', ')')
 978             last_string, last_start, last_end, last_line = None, None, None, None
 979             for type, string, start, end, line in tokens:
 980                 if type == tokenize.OP and string == '[':
 981                     if last_string:
 982                         yield tokenize.NAME, last_string, last_start, last_end, last_line
 983                         last_string = None
 984                     yield type, string, start, end, line
 985                     # everything inside brackets will be handled by _parse_filter
 986                     for type, string, start, end, line in tokens:
 987                         yield type, string, start, end, line
 988                         if type == tokenize.OP and string == ']':
 989                             break
 990                 elif type == tokenize.OP and string in ALLOWED_OPS:
 991                     if last_string:
 992                         yield tokenize.NAME, last_string, last_start, last_end, last_line
 993                         last_string = None
 994                     yield type, string, start, end, line
 995                 elif type in [tokenize.NAME, tokenize.NUMBER, tokenize.OP]:
 996                     if not last_string:
 997                         last_string = string
 998                         last_start = start
 999                         last_end = end
1000                     else:
1001                         last_string += string
1002             if last_string:
1003                 yield tokenize.NAME, last_string, last_start, last_end, last_line
1004
1005         def _parse_format_selection(tokens, inside_merge=False, inside_choice=False, inside_group=False):
1006             selectors = []
1007             current_selector = None
1008             for type, string, start, _, _ in tokens:
1009                 # ENCODING is only defined in python 3.x
1010                 if type == getattr(tokenize, 'ENCODING', None):
1011                     continue
1012                 elif type in [tokenize.NAME, tokenize.NUMBER]:
1013                     current_selector = FormatSelector(SINGLE, string, [])
1014                 elif type == tokenize.OP:
1015                     if string == ')':
1016                         if not inside_group:
1017                             # ')' will be handled by the parentheses group
1018                             tokens.restore_last_token()
1019                         break
1020                     elif inside_merge and string in ['/', ',']:
1021                         tokens.restore_last_token()
1022                         break
1023                     elif inside_choice and string == ',':
1024                         tokens.restore_last_token()
1025                         break
1026                     elif string == ',':
1027                         if not current_selector:
1028                             raise syntax_error('"," must follow a format selector', start)
1029                         selectors.append(current_selector)
1030                         current_selector = None
1031                     elif string == '/':
1032                         if not current_selector:
1033                             raise syntax_error('"/" must follow a format selector', start)
1034                         first_choice = current_selector
1035                         second_choice = _parse_format_selection(tokens, inside_choice=True)
1036                         current_selector = FormatSelector(PICKFIRST, (first_choice, second_choice), [])
1037                     elif string == '[':
1038                         if not current_selector:
1039                             current_selector = FormatSelector(SINGLE, 'best', [])
1040                         format_filter = _parse_filter(tokens)
1041                         current_selector.filters.append(format_filter)
1042                     elif string == '(':
1043                         if current_selector:
1044                             raise syntax_error('Unexpected "("', start)
1045                         group = _parse_format_selection(tokens, inside_group=True)
1046                         current_selector = FormatSelector(GROUP, group, [])
1047                     elif string == '+':
1048                         video_selector = current_selector
1049                         audio_selector = _parse_format_selection(tokens, inside_merge=True)
1050                         if not video_selector or not audio_selector:
1051                             raise syntax_error('"+" must be between two format selectors', start)
1052                         current_selector = FormatSelector(MERGE, (video_selector, audio_selector), [])
1053                     else:
1054                         raise syntax_error('Operator not recognized: "{0}"'.format(string), start)
1055                 elif type == tokenize.ENDMARKER:
1056                     break
1057             if current_selector:
1058                 selectors.append(current_selector)
1059             return selectors
1060
1061         def _build_selector_function(selector):
1062             if isinstance(selector, list):
1063                 fs = [_build_selector_function(s) for s in selector]
1064
1065                 def selector_function(ctx):
1066                     for f in fs:
1067                         for format in f(ctx):
1068                             yield format
1069                 return selector_function
1070             elif selector.type == GROUP:
1071                 selector_function = _build_selector_function(selector.selector)
1072             elif selector.type == PICKFIRST:
1073                 fs = [_build_selector_function(s) for s in selector.selector]
1074
1075                 def selector_function(ctx):
1076                     for f in fs:
1077                         picked_formats = list(f(ctx))
1078                         if picked_formats:
1079                             return picked_formats
1080                     return []
1081             elif selector.type == SINGLE:
1082                 format_spec = selector.selector
1083
1084                 def selector_function(ctx):
1085                     formats = list(ctx['formats'])
1086                     if not formats:
1087                         return
1088                     if format_spec == 'all':
1089                         for f in formats:
1090                             yield f
1091                     elif format_spec in ['best', 'worst', None]:
1092                         format_idx = 0 if format_spec == 'worst' else -1
1093                         audiovideo_formats = [
1094                             f for f in formats
1095                             if f.get('vcodec') != 'none' and f.get('acodec') != 'none']
1096                         if audiovideo_formats:
1097                             yield audiovideo_formats[format_idx]
1098                         # for extractors with incomplete formats (audio only (soundcloud)
1099                         # or video only (imgur)) we will fallback to best/worst
1100                         # {video,audio}-only format
1101                         elif ctx['incomplete_formats']:
1102                             yield formats[format_idx]
1103                     elif format_spec == 'bestaudio':
1104                         audio_formats = [
1105                             f for f in formats
1106                             if f.get('vcodec') == 'none']
1107                         if audio_formats:
1108                             yield audio_formats[-1]
1109                     elif format_spec == 'worstaudio':
1110                         audio_formats = [
1111                             f for f in formats
1112                             if f.get('vcodec') == 'none']
1113                         if audio_formats:
1114                             yield audio_formats[0]
1115                     elif format_spec == 'bestvideo':
1116                         video_formats = [
1117                             f for f in formats
1118                             if f.get('acodec') == 'none']
1119                         if video_formats:
1120                             yield video_formats[-1]
1121                     elif format_spec == 'worstvideo':
1122                         video_formats = [
1123                             f for f in formats
1124                             if f.get('acodec') == 'none']
1125                         if video_formats:
1126                             yield video_formats[0]
1127                     else:
1128                         extensions = ['mp4', 'flv', 'webm', '3gp', 'm4a', 'mp3', 'ogg', 'aac', 'wav']
1129                         if format_spec in extensions:
1130                             filter_f = lambda f: f['ext'] == format_spec
1131                         else:
1132                             filter_f = lambda f: f['format_id'] == format_spec
1133                         matches = list(filter(filter_f, formats))
1134                         if matches:
1135                             yield matches[-1]
1136             elif selector.type == MERGE:
1137                 def _merge(formats_info):
1138                     format_1, format_2 = [f['format_id'] for f in formats_info]
1139                     # The first format must contain the video and the
1140                     # second the audio
1141                     if formats_info[0].get('vcodec') == 'none':
1142                         self.report_error('The first format must '
1143                                           'contain the video, try using '
1144                                           '"-f %s+%s"' % (format_2, format_1))
1145                         return
1146                     # Formats must be opposite (video+audio)
1147                     if formats_info[0].get('acodec') == 'none' and formats_info[1].get('acodec') == 'none':
1148                         self.report_error(
1149                             'Both formats %s and %s are video-only, you must specify "-f video+audio"'
1150                             % (format_1, format_2))
1151                         return
1152                     output_ext = (
1153                         formats_info[0]['ext']
1154                         if self.params.get('merge_output_format') is None
1155                         else self.params['merge_output_format'])
1156                     return {
1157                         'requested_formats': formats_info,
1158                         'format': '%s+%s' % (formats_info[0].get('format'),
1159                                              formats_info[1].get('format')),
1160                         'format_id': '%s+%s' % (formats_info[0].get('format_id'),
1161                                                 formats_info[1].get('format_id')),
1162                         'width': formats_info[0].get('width'),
1163                         'height': formats_info[0].get('height'),
1164                         'resolution': formats_info[0].get('resolution'),
1165                         'fps': formats_info[0].get('fps'),
1166                         'vcodec': formats_info[0].get('vcodec'),
1167                         'vbr': formats_info[0].get('vbr'),
1168                         'stretched_ratio': formats_info[0].get('stretched_ratio'),
1169                         'acodec': formats_info[1].get('acodec'),
1170                         'abr': formats_info[1].get('abr'),
1171                         'ext': output_ext,
1172                     }
1173                 video_selector, audio_selector = map(_build_selector_function, selector.selector)
1174
1175                 def selector_function(ctx):
1176                     for pair in itertools.product(
1177                             video_selector(copy.deepcopy(ctx)), audio_selector(copy.deepcopy(ctx))):
1178                         yield _merge(pair)
1179
1180             filters = [self._build_format_filter(f) for f in selector.filters]
1181
1182             def final_selector(ctx):
1183                 ctx_copy = copy.deepcopy(ctx)
1184                 for _filter in filters:
1185                     ctx_copy['formats'] = list(filter(_filter, ctx_copy['formats']))
1186                 return selector_function(ctx_copy)
1187             return final_selector
1188
1189         stream = io.BytesIO(format_spec.encode('utf-8'))
1190         try:
1191             tokens = list(_remove_unused_ops(compat_tokenize_tokenize(stream.readline)))
1192         except tokenize.TokenError:
1193             raise syntax_error('Missing closing/opening brackets or parenthesis', (0, len(format_spec)))
1194
1195         class TokenIterator(object):
1196             def __init__(self, tokens):
1197                 self.tokens = tokens
1198                 self.counter = 0
1199
1200             def __iter__(self):
1201                 return self
1202
1203             def __next__(self):
1204                 if self.counter >= len(self.tokens):
1205                     raise StopIteration()
1206                 value = self.tokens[self.counter]
1207                 self.counter += 1
1208                 return value
1209
1210             next = __next__
1211
1212             def restore_last_token(self):
1213                 self.counter -= 1
1214
1215         parsed_selector = _parse_format_selection(iter(TokenIterator(tokens)))
1216         return _build_selector_function(parsed_selector)
1217
1218     def _calc_headers(self, info_dict):
1219         res = std_headers.copy()
1220
1221         add_headers = info_dict.get('http_headers')
1222         if add_headers:
1223             res.update(add_headers)
1224
1225         cookies = self._calc_cookies(info_dict)
1226         if cookies:
1227             res['Cookie'] = cookies
1228
1229         return res
1230
1231     def _calc_cookies(self, info_dict):
1232         pr = sanitized_Request(info_dict['url'])
1233         self.cookiejar.add_cookie_header(pr)
1234         return pr.get_header('Cookie')
1235
1236     def process_video_result(self, info_dict, download=True):
1237         assert info_dict.get('_type', 'video') == 'video'
1238
1239         if 'id' not in info_dict:
1240             raise ExtractorError('Missing "id" field in extractor result')
1241         if 'title' not in info_dict:
1242             raise ExtractorError('Missing "title" field in extractor result')
1243
1244         if not isinstance(info_dict['id'], compat_str):
1245             self.report_warning('"id" field is not a string - forcing string conversion')
1246             info_dict['id'] = compat_str(info_dict['id'])
1247
1248         if 'playlist' not in info_dict:
1249             # It isn't part of a playlist
1250             info_dict['playlist'] = None
1251             info_dict['playlist_index'] = None
1252
1253         thumbnails = info_dict.get('thumbnails')
1254         if thumbnails is None:
1255             thumbnail = info_dict.get('thumbnail')
1256             if thumbnail:
1257                 info_dict['thumbnails'] = thumbnails = [{'url': thumbnail}]
1258         if thumbnails:
1259             thumbnails.sort(key=lambda t: (
1260                 t.get('preference') if t.get('preference') is not None else -1,
1261                 t.get('width') if t.get('width') is not None else -1,
1262                 t.get('height') if t.get('height') is not None else -1,
1263                 t.get('id') if t.get('id') is not None else '', t.get('url')))
1264             for i, t in enumerate(thumbnails):
1265                 t['url'] = sanitize_url(t['url'])
1266                 if t.get('width') and t.get('height'):
1267                     t['resolution'] = '%dx%d' % (t['width'], t['height'])
1268                 if t.get('id') is None:
1269                     t['id'] = '%d' % i
1270
1271         if self.params.get('list_thumbnails'):
1272             self.list_thumbnails(info_dict)
1273             return
1274
1275         thumbnail = info_dict.get('thumbnail')
1276         if thumbnail:
1277             info_dict['thumbnail'] = sanitize_url(thumbnail)
1278         elif thumbnails:
1279             info_dict['thumbnail'] = thumbnails[-1]['url']
1280
1281         if 'display_id' not in info_dict and 'id' in info_dict:
1282             info_dict['display_id'] = info_dict['id']
1283
1284         if info_dict.get('upload_date') is None and info_dict.get('timestamp') is not None:
1285             # Working around out-of-range timestamp values (e.g. negative ones on Windows,
1286             # see http://bugs.python.org/issue1646728)
1287             try:
1288                 upload_date = datetime.datetime.utcfromtimestamp(info_dict['timestamp'])
1289                 info_dict['upload_date'] = upload_date.strftime('%Y%m%d')
1290             except (ValueError, OverflowError, OSError):
1291                 pass
1292
1293         # Auto generate title fields corresponding to the *_number fields when missing
1294         # in order to always have clean titles. This is very common for TV series.
1295         for field in ('chapter', 'season', 'episode'):
1296             if info_dict.get('%s_number' % field) is not None and not info_dict.get(field):
1297                 info_dict[field] = '%s %d' % (field.capitalize(), info_dict['%s_number' % field])
1298
1299         subtitles = info_dict.get('subtitles')
1300         if subtitles:
1301             for _, subtitle in subtitles.items():
1302                 for subtitle_format in subtitle:
1303                     if subtitle_format.get('url'):
1304                         subtitle_format['url'] = sanitize_url(subtitle_format['url'])
1305                     if subtitle_format.get('ext') is None:
1306                         subtitle_format['ext'] = determine_ext(subtitle_format['url']).lower()
1307
1308         if self.params.get('listsubtitles', False):
1309             if 'automatic_captions' in info_dict:
1310                 self.list_subtitles(info_dict['id'], info_dict.get('automatic_captions'), 'automatic captions')
1311             self.list_subtitles(info_dict['id'], subtitles, 'subtitles')
1312             return
1313         info_dict['requested_subtitles'] = self.process_subtitles(
1314             info_dict['id'], subtitles,
1315             info_dict.get('automatic_captions'))
1316
1317         # We now pick which formats have to be downloaded
1318         if info_dict.get('formats') is None:
1319             # There's only one format available
1320             formats = [info_dict]
1321         else:
1322             formats = info_dict['formats']
1323
1324         if not formats:
1325             raise ExtractorError('No video formats found!')
1326
1327         formats_dict = {}
1328
1329         # We check that all the formats have the format and format_id fields
1330         for i, format in enumerate(formats):
1331             if 'url' not in format:
1332                 raise ExtractorError('Missing "url" key in result (index %d)' % i)
1333
1334             format['url'] = sanitize_url(format['url'])
1335
1336             if format.get('format_id') is None:
1337                 format['format_id'] = compat_str(i)
1338             else:
1339                 # Sanitize format_id from characters used in format selector expression
1340                 format['format_id'] = re.sub('[\s,/+\[\]()]', '_', format['format_id'])
1341             format_id = format['format_id']
1342             if format_id not in formats_dict:
1343                 formats_dict[format_id] = []
1344             formats_dict[format_id].append(format)
1345
1346         # Make sure all formats have unique format_id
1347         for format_id, ambiguous_formats in formats_dict.items():
1348             if len(ambiguous_formats) > 1:
1349                 for i, format in enumerate(ambiguous_formats):
1350                     format['format_id'] = '%s-%d' % (format_id, i)
1351
1352         for i, format in enumerate(formats):
1353             if format.get('format') is None:
1354                 format['format'] = '{id} - {res}{note}'.format(
1355                     id=format['format_id'],
1356                     res=self.format_resolution(format),
1357                     note=' ({0})'.format(format['format_note']) if format.get('format_note') is not None else '',
1358                 )
1359             # Automatically determine file extension if missing
1360             if format.get('ext') is None:
1361                 format['ext'] = determine_ext(format['url']).lower()
1362             # Automatically determine protocol if missing (useful for format
1363             # selection purposes)
1364             if 'protocol' not in format:
1365                 format['protocol'] = determine_protocol(format)
1366             # Add HTTP headers, so that external programs can use them from the
1367             # json output
1368             full_format_info = info_dict.copy()
1369             full_format_info.update(format)
1370             format['http_headers'] = self._calc_headers(full_format_info)
1371
1372         # TODO Central sorting goes here
1373
1374         if formats[0] is not info_dict:
1375             # only set the 'formats' fields if the original info_dict list them
1376             # otherwise we end up with a circular reference, the first (and unique)
1377             # element in the 'formats' field in info_dict is info_dict itself,
1378             # which can't be exported to json
1379             info_dict['formats'] = formats
1380         if self.params.get('listformats'):
1381             self.list_formats(info_dict)
1382             return
1383
1384         req_format = self.params.get('format')
1385         if req_format is None:
1386             req_format_list = []
1387             if (self.params.get('outtmpl', DEFAULT_OUTTMPL) != '-' and
1388                     not info_dict.get('is_live')):
1389                 merger = FFmpegMergerPP(self)
1390                 if merger.available and merger.can_merge():
1391                     req_format_list.append('bestvideo+bestaudio')
1392             req_format_list.append('best')
1393             req_format = '/'.join(req_format_list)
1394         format_selector = self.build_format_selector(req_format)
1395
1396         # While in format selection we may need to have an access to the original
1397         # format set in order to calculate some metrics or do some processing.
1398         # For now we need to be able to guess whether original formats provided
1399         # by extractor are incomplete or not (i.e. whether extractor provides only
1400         # video-only or audio-only formats) for proper formats selection for
1401         # extractors with such incomplete formats (see
1402         # https://github.com/rg3/youtube-dl/pull/5556).
1403         # Since formats may be filtered during format selection and may not match
1404         # the original formats the results may be incorrect. Thus original formats
1405         # or pre-calculated metrics should be passed to format selection routines
1406         # as well.
1407         # We will pass a context object containing all necessary additional data
1408         # instead of just formats.
1409         # This fixes incorrect format selection issue (see
1410         # https://github.com/rg3/youtube-dl/issues/10083).
1411         incomplete_formats = (
1412             # All formats are video-only or
1413             all(f.get('vcodec') != 'none' and f.get('acodec') == 'none' for f in formats) or
1414             # all formats are audio-only
1415             all(f.get('vcodec') == 'none' and f.get('acodec') != 'none' for f in formats))
1416
1417         ctx = {
1418             'formats': formats,
1419             'incomplete_formats': incomplete_formats,
1420         }
1421
1422         formats_to_download = list(format_selector(ctx))
1423         if not formats_to_download:
1424             raise ExtractorError('requested format not available',
1425                                  expected=True)
1426
1427         if download:
1428             if len(formats_to_download) > 1:
1429                 self.to_screen('[info] %s: downloading video in %s formats' % (info_dict['id'], len(formats_to_download)))
1430             for format in formats_to_download:
1431                 new_info = dict(info_dict)
1432                 new_info.update(format)
1433                 self.process_info(new_info)
1434         # We update the info dict with the best quality format (backwards compatibility)
1435         info_dict.update(formats_to_download[-1])
1436         return info_dict
1437
1438     def process_subtitles(self, video_id, normal_subtitles, automatic_captions):
1439         """Select the requested subtitles and their format"""
1440         available_subs = {}
1441         if normal_subtitles and self.params.get('writesubtitles'):
1442             available_subs.update(normal_subtitles)
1443         if automatic_captions and self.params.get('writeautomaticsub'):
1444             for lang, cap_info in automatic_captions.items():
1445                 if lang not in available_subs:
1446                     available_subs[lang] = cap_info
1447
1448         if (not self.params.get('writesubtitles') and not
1449                 self.params.get('writeautomaticsub') or not
1450                 available_subs):
1451             return None
1452
1453         if self.params.get('allsubtitles', False):
1454             requested_langs = available_subs.keys()
1455         else:
1456             if self.params.get('subtitleslangs', False):
1457                 requested_langs = self.params.get('subtitleslangs')
1458             elif 'en' in available_subs:
1459                 requested_langs = ['en']
1460             else:
1461                 requested_langs = [list(available_subs.keys())[0]]
1462
1463         formats_query = self.params.get('subtitlesformat', 'best')
1464         formats_preference = formats_query.split('/') if formats_query else []
1465         subs = {}
1466         for lang in requested_langs:
1467             formats = available_subs.get(lang)
1468             if formats is None:
1469                 self.report_warning('%s subtitles not available for %s' % (lang, video_id))
1470                 continue
1471             for ext in formats_preference:
1472                 if ext == 'best':
1473                     f = formats[-1]
1474                     break
1475                 matches = list(filter(lambda f: f['ext'] == ext, formats))
1476                 if matches:
1477                     f = matches[-1]
1478                     break
1479             else:
1480                 f = formats[-1]
1481                 self.report_warning(
1482                     'No subtitle format found matching "%s" for language %s, '
1483                     'using %s' % (formats_query, lang, f['ext']))
1484             subs[lang] = f
1485         return subs
1486
1487     def process_info(self, info_dict):
1488         """Process a single resolved IE result."""
1489
1490         assert info_dict.get('_type', 'video') == 'video'
1491
1492         max_downloads = self.params.get('max_downloads')
1493         if max_downloads is not None:
1494             if self._num_downloads >= int(max_downloads):
1495                 raise MaxDownloadsReached()
1496
1497         info_dict['fulltitle'] = info_dict['title']
1498         if len(info_dict['title']) > 200:
1499             info_dict['title'] = info_dict['title'][:197] + '...'
1500
1501         if 'format' not in info_dict:
1502             info_dict['format'] = info_dict['ext']
1503
1504         reason = self._match_entry(info_dict, incomplete=False)
1505         if reason is not None:
1506             self.to_screen('[download] ' + reason)
1507             return
1508
1509         self._num_downloads += 1
1510
1511         info_dict['_filename'] = filename = self.prepare_filename(info_dict)
1512
1513         # Forced printings
1514         if self.params.get('forcetitle', False):
1515             self.to_stdout(info_dict['fulltitle'])
1516         if self.params.get('forceid', False):
1517             self.to_stdout(info_dict['id'])
1518         if self.params.get('forceurl', False):
1519             if info_dict.get('requested_formats') is not None:
1520                 for f in info_dict['requested_formats']:
1521                     self.to_stdout(f['url'] + f.get('play_path', ''))
1522             else:
1523                 # For RTMP URLs, also include the playpath
1524                 self.to_stdout(info_dict['url'] + info_dict.get('play_path', ''))
1525         if self.params.get('forcethumbnail', False) and info_dict.get('thumbnail') is not None:
1526             self.to_stdout(info_dict['thumbnail'])
1527         if self.params.get('forcedescription', False) and info_dict.get('description') is not None:
1528             self.to_stdout(info_dict['description'])
1529         if self.params.get('forcefilename', False) and filename is not None:
1530             self.to_stdout(filename)
1531         if self.params.get('forceduration', False) and info_dict.get('duration') is not None:
1532             self.to_stdout(formatSeconds(info_dict['duration']))
1533         if self.params.get('forceformat', False):
1534             self.to_stdout(info_dict['format'])
1535         if self.params.get('forcejson', False):
1536             self.to_stdout(json.dumps(info_dict))
1537
1538         # Do nothing else if in simulate mode
1539         if self.params.get('simulate', False):
1540             return
1541
1542         if filename is None:
1543             return
1544
1545         try:
1546             dn = os.path.dirname(sanitize_path(encodeFilename(filename)))
1547             if dn and not os.path.exists(dn):
1548                 os.makedirs(dn)
1549         except (OSError, IOError) as err:
1550             self.report_error('unable to create directory ' + error_to_compat_str(err))
1551             return
1552
1553         if self.params.get('writedescription', False):
1554             descfn = replace_extension(filename, 'description', info_dict.get('ext'))
1555             if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(descfn)):
1556                 self.to_screen('[info] Video description is already present')
1557             elif info_dict.get('description') is None:
1558                 self.report_warning('There\'s no description to write.')
1559             else:
1560                 try:
1561                     self.to_screen('[info] Writing video description to: ' + descfn)
1562                     with io.open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile:
1563                         descfile.write(info_dict['description'])
1564                 except (OSError, IOError):
1565                     self.report_error('Cannot write description file ' + descfn)
1566                     return
1567
1568         if self.params.get('writeannotations', False):
1569             annofn = replace_extension(filename, 'annotations.xml', info_dict.get('ext'))
1570             if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(annofn)):
1571                 self.to_screen('[info] Video annotations are already present')
1572             else:
1573                 try:
1574                     self.to_screen('[info] Writing video annotations to: ' + annofn)
1575                     with io.open(encodeFilename(annofn), 'w', encoding='utf-8') as annofile:
1576                         annofile.write(info_dict['annotations'])
1577                 except (KeyError, TypeError):
1578                     self.report_warning('There are no annotations to write.')
1579                 except (OSError, IOError):
1580                     self.report_error('Cannot write annotations file: ' + annofn)
1581                     return
1582
1583         subtitles_are_requested = any([self.params.get('writesubtitles', False),
1584                                        self.params.get('writeautomaticsub')])
1585
1586         if subtitles_are_requested and info_dict.get('requested_subtitles'):
1587             # subtitles download errors are already managed as troubles in relevant IE
1588             # that way it will silently go on when used with unsupporting IE
1589             subtitles = info_dict['requested_subtitles']
1590             ie = self.get_info_extractor(info_dict['extractor_key'])
1591             for sub_lang, sub_info in subtitles.items():
1592                 sub_format = sub_info['ext']
1593                 if sub_info.get('data') is not None:
1594                     sub_data = sub_info['data']
1595                 else:
1596                     try:
1597                         sub_data = ie._download_webpage(
1598                             sub_info['url'], info_dict['id'], note=False)
1599                     except ExtractorError as err:
1600                         self.report_warning('Unable to download subtitle for "%s": %s' %
1601                                             (sub_lang, error_to_compat_str(err.cause)))
1602                         continue
1603                 try:
1604                     sub_filename = subtitles_filename(filename, sub_lang, sub_format)
1605                     if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(sub_filename)):
1606                         self.to_screen('[info] Video subtitle %s.%s is already_present' % (sub_lang, sub_format))
1607                     else:
1608                         self.to_screen('[info] Writing video subtitles to: ' + sub_filename)
1609                         # Use newline='' to prevent conversion of newline characters
1610                         # See https://github.com/rg3/youtube-dl/issues/10268
1611                         with io.open(encodeFilename(sub_filename), 'w', encoding='utf-8', newline='') as subfile:
1612                             subfile.write(sub_data)
1613                 except (OSError, IOError):
1614                     self.report_error('Cannot write subtitles file ' + sub_filename)
1615                     return
1616
1617         if self.params.get('writeinfojson', False):
1618             infofn = replace_extension(filename, 'info.json', info_dict.get('ext'))
1619             if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(infofn)):
1620                 self.to_screen('[info] Video description metadata is already present')
1621             else:
1622                 self.to_screen('[info] Writing video description metadata as JSON to: ' + infofn)
1623                 try:
1624                     write_json_file(self.filter_requested_info(info_dict), infofn)
1625                 except (OSError, IOError):
1626                     self.report_error('Cannot write metadata to JSON file ' + infofn)
1627                     return
1628
1629         self._write_thumbnails(info_dict, filename)
1630
1631         if not self.params.get('skip_download', False):
1632             try:
1633                 def dl(name, info):
1634                     fd = get_suitable_downloader(info, self.params)(self, self.params)
1635                     for ph in self._progress_hooks:
1636                         fd.add_progress_hook(ph)
1637                     if self.params.get('verbose'):
1638                         self.to_stdout('[debug] Invoking downloader on %r' % info.get('url'))
1639                     return fd.download(name, info)
1640
1641                 if info_dict.get('requested_formats') is not None:
1642                     downloaded = []
1643                     success = True
1644                     merger = FFmpegMergerPP(self)
1645                     if not merger.available:
1646                         postprocessors = []
1647                         self.report_warning('You have requested multiple '
1648                                             'formats but ffmpeg or avconv are not installed.'
1649                                             ' The formats won\'t be merged.')
1650                     else:
1651                         postprocessors = [merger]
1652
1653                     def compatible_formats(formats):
1654                         video, audio = formats
1655                         # Check extension
1656                         video_ext, audio_ext = audio.get('ext'), video.get('ext')
1657                         if video_ext and audio_ext:
1658                             COMPATIBLE_EXTS = (
1659                                 ('mp3', 'mp4', 'm4a', 'm4p', 'm4b', 'm4r', 'm4v'),
1660                                 ('webm')
1661                             )
1662                             for exts in COMPATIBLE_EXTS:
1663                                 if video_ext in exts and audio_ext in exts:
1664                                     return True
1665                         # TODO: Check acodec/vcodec
1666                         return False
1667
1668                     filename_real_ext = os.path.splitext(filename)[1][1:]
1669                     filename_wo_ext = (
1670                         os.path.splitext(filename)[0]
1671                         if filename_real_ext == info_dict['ext']
1672                         else filename)
1673                     requested_formats = info_dict['requested_formats']
1674                     if self.params.get('merge_output_format') is None and not compatible_formats(requested_formats):
1675                         info_dict['ext'] = 'mkv'
1676                         self.report_warning(
1677                             'Requested formats are incompatible for merge and will be merged into mkv.')
1678                     # Ensure filename always has a correct extension for successful merge
1679                     filename = '%s.%s' % (filename_wo_ext, info_dict['ext'])
1680                     if os.path.exists(encodeFilename(filename)):
1681                         self.to_screen(
1682                             '[download] %s has already been downloaded and '
1683                             'merged' % filename)
1684                     else:
1685                         for f in requested_formats:
1686                             new_info = dict(info_dict)
1687                             new_info.update(f)
1688                             fname = self.prepare_filename(new_info)
1689                             fname = prepend_extension(fname, 'f%s' % f['format_id'], new_info['ext'])
1690                             downloaded.append(fname)
1691                             partial_success = dl(fname, new_info)
1692                             success = success and partial_success
1693                         info_dict['__postprocessors'] = postprocessors
1694                         info_dict['__files_to_merge'] = downloaded
1695                 else:
1696                     # Just a single file
1697                     success = dl(filename, info_dict)
1698             except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
1699                 self.report_error('unable to download video data: %s' % error_to_compat_str(err))
1700                 return
1701             except (OSError, IOError) as err:
1702                 raise UnavailableVideoError(err)
1703             except (ContentTooShortError, ) as err:
1704                 self.report_error('content too short (expected %s bytes and served %s)' % (err.expected, err.downloaded))
1705                 return
1706
1707             if success and filename != '-':
1708                 # Fixup content
1709                 fixup_policy = self.params.get('fixup')
1710                 if fixup_policy is None:
1711                     fixup_policy = 'detect_or_warn'
1712
1713                 INSTALL_FFMPEG_MESSAGE = 'Install ffmpeg or avconv to fix this automatically.'
1714
1715                 stretched_ratio = info_dict.get('stretched_ratio')
1716                 if stretched_ratio is not None and stretched_ratio != 1:
1717                     if fixup_policy == 'warn':
1718                         self.report_warning('%s: Non-uniform pixel ratio (%s)' % (
1719                             info_dict['id'], stretched_ratio))
1720                     elif fixup_policy == 'detect_or_warn':
1721                         stretched_pp = FFmpegFixupStretchedPP(self)
1722                         if stretched_pp.available:
1723                             info_dict.setdefault('__postprocessors', [])
1724                             info_dict['__postprocessors'].append(stretched_pp)
1725                         else:
1726                             self.report_warning(
1727                                 '%s: Non-uniform pixel ratio (%s). %s'
1728                                 % (info_dict['id'], stretched_ratio, INSTALL_FFMPEG_MESSAGE))
1729                     else:
1730                         assert fixup_policy in ('ignore', 'never')
1731
1732                 if (info_dict.get('requested_formats') is None and
1733                         info_dict.get('container') == 'm4a_dash'):
1734                     if fixup_policy == 'warn':
1735                         self.report_warning(
1736                             '%s: writing DASH m4a. '
1737                             'Only some players support this container.'
1738                             % info_dict['id'])
1739                     elif fixup_policy == 'detect_or_warn':
1740                         fixup_pp = FFmpegFixupM4aPP(self)
1741                         if fixup_pp.available:
1742                             info_dict.setdefault('__postprocessors', [])
1743                             info_dict['__postprocessors'].append(fixup_pp)
1744                         else:
1745                             self.report_warning(
1746                                 '%s: writing DASH m4a. '
1747                                 'Only some players support this container. %s'
1748                                 % (info_dict['id'], INSTALL_FFMPEG_MESSAGE))
1749                     else:
1750                         assert fixup_policy in ('ignore', 'never')
1751
1752                 if (info_dict.get('protocol') == 'm3u8_native' or
1753                         info_dict.get('protocol') == 'm3u8' and
1754                         self.params.get('hls_prefer_native')):
1755                     if fixup_policy == 'warn':
1756                         self.report_warning('%s: malformated aac bitstream.' % (
1757                             info_dict['id']))
1758                     elif fixup_policy == 'detect_or_warn':
1759                         fixup_pp = FFmpegFixupM3u8PP(self)
1760                         if fixup_pp.available:
1761                             info_dict.setdefault('__postprocessors', [])
1762                             info_dict['__postprocessors'].append(fixup_pp)
1763                         else:
1764                             self.report_warning(
1765                                 '%s: malformated aac bitstream. %s'
1766                                 % (info_dict['id'], INSTALL_FFMPEG_MESSAGE))
1767                     else:
1768                         assert fixup_policy in ('ignore', 'never')
1769
1770                 try:
1771                     self.post_process(filename, info_dict)
1772                 except (PostProcessingError) as err:
1773                     self.report_error('postprocessing: %s' % str(err))
1774                     return
1775                 self.record_download_archive(info_dict)
1776
1777     def download(self, url_list):
1778         """Download a given list of URLs."""
1779         outtmpl = self.params.get('outtmpl', DEFAULT_OUTTMPL)
1780         if (len(url_list) > 1 and
1781                 '%' not in outtmpl and
1782                 self.params.get('max_downloads') != 1):
1783             raise SameFileError(outtmpl)
1784
1785         for url in url_list:
1786             try:
1787                 # It also downloads the videos
1788                 res = self.extract_info(
1789                     url, force_generic_extractor=self.params.get('force_generic_extractor', False))
1790             except UnavailableVideoError:
1791                 self.report_error('unable to download video')
1792             except MaxDownloadsReached:
1793                 self.to_screen('[info] Maximum number of downloaded files reached.')
1794                 raise
1795             else:
1796                 if self.params.get('dump_single_json', False):
1797                     self.to_stdout(json.dumps(res))
1798
1799         return self._download_retcode
1800
1801     def download_with_info_file(self, info_filename):
1802         with contextlib.closing(fileinput.FileInput(
1803                 [info_filename], mode='r',
1804                 openhook=fileinput.hook_encoded('utf-8'))) as f:
1805             # FileInput doesn't have a read method, we can't call json.load
1806             info = self.filter_requested_info(json.loads('\n'.join(f)))
1807         try:
1808             self.process_ie_result(info, download=True)
1809         except DownloadError:
1810             webpage_url = info.get('webpage_url')
1811             if webpage_url is not None:
1812                 self.report_warning('The info failed to download, trying with "%s"' % webpage_url)
1813                 return self.download([webpage_url])
1814             else:
1815                 raise
1816         return self._download_retcode
1817
1818     @staticmethod
1819     def filter_requested_info(info_dict):
1820         return dict(
1821             (k, v) for k, v in info_dict.items()
1822             if k not in ['requested_formats', 'requested_subtitles'])
1823
1824     def post_process(self, filename, ie_info):
1825         """Run all the postprocessors on the given file."""
1826         info = dict(ie_info)
1827         info['filepath'] = filename
1828         pps_chain = []
1829         if ie_info.get('__postprocessors') is not None:
1830             pps_chain.extend(ie_info['__postprocessors'])
1831         pps_chain.extend(self._pps)
1832         for pp in pps_chain:
1833             files_to_delete = []
1834             try:
1835                 files_to_delete, info = pp.run(info)
1836             except PostProcessingError as e:
1837                 self.report_error(e.msg)
1838             if files_to_delete and not self.params.get('keepvideo', False):
1839                 for old_filename in files_to_delete:
1840                     self.to_screen('Deleting original file %s (pass -k to keep)' % old_filename)
1841                     try:
1842                         os.remove(encodeFilename(old_filename))
1843                     except (IOError, OSError):
1844                         self.report_warning('Unable to remove downloaded original file')
1845
1846     def _make_archive_id(self, info_dict):
1847         # Future-proof against any change in case
1848         # and backwards compatibility with prior versions
1849         extractor = info_dict.get('extractor_key')
1850         if extractor is None:
1851             if 'id' in info_dict:
1852                 extractor = info_dict.get('ie_key')  # key in a playlist
1853         if extractor is None:
1854             return None  # Incomplete video information
1855         return extractor.lower() + ' ' + info_dict['id']
1856
1857     def in_download_archive(self, info_dict):
1858         fn = self.params.get('download_archive')
1859         if fn is None:
1860             return False
1861
1862         vid_id = self._make_archive_id(info_dict)
1863         if vid_id is None:
1864             return False  # Incomplete video information
1865
1866         try:
1867             with locked_file(fn, 'r', encoding='utf-8') as archive_file:
1868                 for line in archive_file:
1869                     if line.strip() == vid_id:
1870                         return True
1871         except IOError as ioe:
1872             if ioe.errno != errno.ENOENT:
1873                 raise
1874         return False
1875
1876     def record_download_archive(self, info_dict):
1877         fn = self.params.get('download_archive')
1878         if fn is None:
1879             return
1880         vid_id = self._make_archive_id(info_dict)
1881         assert vid_id
1882         with locked_file(fn, 'a', encoding='utf-8') as archive_file:
1883             archive_file.write(vid_id + '\n')
1884
1885     @staticmethod
1886     def format_resolution(format, default='unknown'):
1887         if format.get('vcodec') == 'none':
1888             return 'audio only'
1889         if format.get('resolution') is not None:
1890             return format['resolution']
1891         if format.get('height') is not None:
1892             if format.get('width') is not None:
1893                 res = '%sx%s' % (format['width'], format['height'])
1894             else:
1895                 res = '%sp' % format['height']
1896         elif format.get('width') is not None:
1897             res = '%dx?' % format['width']
1898         else:
1899             res = default
1900         return res
1901
1902     def _format_note(self, fdict):
1903         res = ''
1904         if fdict.get('ext') in ['f4f', 'f4m']:
1905             res += '(unsupported) '
1906         if fdict.get('language'):
1907             if res:
1908                 res += ' '
1909             res += '[%s] ' % fdict['language']
1910         if fdict.get('format_note') is not None:
1911             res += fdict['format_note'] + ' '
1912         if fdict.get('tbr') is not None:
1913             res += '%4dk ' % fdict['tbr']
1914         if fdict.get('container') is not None:
1915             if res:
1916                 res += ', '
1917             res += '%s container' % fdict['container']
1918         if (fdict.get('vcodec') is not None and
1919                 fdict.get('vcodec') != 'none'):
1920             if res:
1921                 res += ', '
1922             res += fdict['vcodec']
1923             if fdict.get('vbr') is not None:
1924                 res += '@'
1925         elif fdict.get('vbr') is not None and fdict.get('abr') is not None:
1926             res += 'video@'
1927         if fdict.get('vbr') is not None:
1928             res += '%4dk' % fdict['vbr']
1929         if fdict.get('fps') is not None:
1930             if res:
1931                 res += ', '
1932             res += '%sfps' % fdict['fps']
1933         if fdict.get('acodec') is not None:
1934             if res:
1935                 res += ', '
1936             if fdict['acodec'] == 'none':
1937                 res += 'video only'
1938             else:
1939                 res += '%-5s' % fdict['acodec']
1940         elif fdict.get('abr') is not None:
1941             if res:
1942                 res += ', '
1943             res += 'audio'
1944         if fdict.get('abr') is not None:
1945             res += '@%3dk' % fdict['abr']
1946         if fdict.get('asr') is not None:
1947             res += ' (%5dHz)' % fdict['asr']
1948         if fdict.get('filesize') is not None:
1949             if res:
1950                 res += ', '
1951             res += format_bytes(fdict['filesize'])
1952         elif fdict.get('filesize_approx') is not None:
1953             if res:
1954                 res += ', '
1955             res += '~' + format_bytes(fdict['filesize_approx'])
1956         return res
1957
1958     def list_formats(self, info_dict):
1959         formats = info_dict.get('formats', [info_dict])
1960         table = [
1961             [f['format_id'], f['ext'], self.format_resolution(f), self._format_note(f)]
1962             for f in formats
1963             if f.get('preference') is None or f['preference'] >= -1000]
1964         if len(formats) > 1:
1965             table[-1][-1] += (' ' if table[-1][-1] else '') + '(best)'
1966
1967         header_line = ['format code', 'extension', 'resolution', 'note']
1968         self.to_screen(
1969             '[info] Available formats for %s:\n%s' %
1970             (info_dict['id'], render_table(header_line, table)))
1971
1972     def list_thumbnails(self, info_dict):
1973         thumbnails = info_dict.get('thumbnails')
1974         if not thumbnails:
1975             self.to_screen('[info] No thumbnails present for %s' % info_dict['id'])
1976             return
1977
1978         self.to_screen(
1979             '[info] Thumbnails for %s:' % info_dict['id'])
1980         self.to_screen(render_table(
1981             ['ID', 'width', 'height', 'URL'],
1982             [[t['id'], t.get('width', 'unknown'), t.get('height', 'unknown'), t['url']] for t in thumbnails]))
1983
1984     def list_subtitles(self, video_id, subtitles, name='subtitles'):
1985         if not subtitles:
1986             self.to_screen('%s has no %s' % (video_id, name))
1987             return
1988         self.to_screen(
1989             'Available %s for %s:' % (name, video_id))
1990         self.to_screen(render_table(
1991             ['Language', 'formats'],
1992             [[lang, ', '.join(f['ext'] for f in reversed(formats))]
1993                 for lang, formats in subtitles.items()]))
1994
1995     def urlopen(self, req):
1996         """ Start an HTTP download """
1997         if isinstance(req, compat_basestring):
1998             req = sanitized_Request(req)
1999         return self._opener.open(req, timeout=self._socket_timeout)
2000
2001     def print_debug_header(self):
2002         if not self.params.get('verbose'):
2003             return
2004
2005         if type('') is not compat_str:
2006             # Python 2.6 on SLES11 SP1 (https://github.com/rg3/youtube-dl/issues/3326)
2007             self.report_warning(
2008                 'Your Python is broken! Update to a newer and supported version')
2009
2010         stdout_encoding = getattr(
2011             sys.stdout, 'encoding', 'missing (%s)' % type(sys.stdout).__name__)
2012         encoding_str = (
2013             '[debug] Encodings: locale %s, fs %s, out %s, pref %s\n' % (
2014                 locale.getpreferredencoding(),
2015                 sys.getfilesystemencoding(),
2016                 stdout_encoding,
2017                 self.get_encoding()))
2018         write_string(encoding_str, encoding=None)
2019
2020         self._write_string('[debug] youtube-dl version ' + __version__ + '\n')
2021         if _LAZY_LOADER:
2022             self._write_string('[debug] Lazy loading extractors enabled' + '\n')
2023         try:
2024             sp = subprocess.Popen(
2025                 ['git', 'rev-parse', '--short', 'HEAD'],
2026                 stdout=subprocess.PIPE, stderr=subprocess.PIPE,
2027                 cwd=os.path.dirname(os.path.abspath(__file__)))
2028             out, err = sp.communicate()
2029             out = out.decode().strip()
2030             if re.match('[0-9a-f]+', out):
2031                 self._write_string('[debug] Git HEAD: ' + out + '\n')
2032         except Exception:
2033             try:
2034                 sys.exc_clear()
2035             except Exception:
2036                 pass
2037         self._write_string('[debug] Python version %s - %s\n' % (
2038             platform.python_version(), platform_name()))
2039
2040         exe_versions = FFmpegPostProcessor.get_versions(self)
2041         exe_versions['rtmpdump'] = rtmpdump_version()
2042         exe_str = ', '.join(
2043             '%s %s' % (exe, v)
2044             for exe, v in sorted(exe_versions.items())
2045             if v
2046         )
2047         if not exe_str:
2048             exe_str = 'none'
2049         self._write_string('[debug] exe versions: %s\n' % exe_str)
2050
2051         proxy_map = {}
2052         for handler in self._opener.handlers:
2053             if hasattr(handler, 'proxies'):
2054                 proxy_map.update(handler.proxies)
2055         self._write_string('[debug] Proxy map: ' + compat_str(proxy_map) + '\n')
2056
2057         if self.params.get('call_home', False):
2058             ipaddr = self.urlopen('https://yt-dl.org/ip').read().decode('utf-8')
2059             self._write_string('[debug] Public IP address: %s\n' % ipaddr)
2060             latest_version = self.urlopen(
2061                 'https://yt-dl.org/latest/version').read().decode('utf-8')
2062             if version_tuple(latest_version) > version_tuple(__version__):
2063                 self.report_warning(
2064                     'You are using an outdated version (newest version: %s)! '
2065                     'See https://yt-dl.org/update if you need help updating.' %
2066                     latest_version)
2067
2068     def _setup_opener(self):
2069         timeout_val = self.params.get('socket_timeout')
2070         self._socket_timeout = 600 if timeout_val is None else float(timeout_val)
2071
2072         opts_cookiefile = self.params.get('cookiefile')
2073         opts_proxy = self.params.get('proxy')
2074
2075         if opts_cookiefile is None:
2076             self.cookiejar = compat_cookiejar.CookieJar()
2077         else:
2078             opts_cookiefile = compat_expanduser(opts_cookiefile)
2079             self.cookiejar = compat_cookiejar.MozillaCookieJar(
2080                 opts_cookiefile)
2081             if os.access(opts_cookiefile, os.R_OK):
2082                 self.cookiejar.load()
2083
2084         cookie_processor = YoutubeDLCookieProcessor(self.cookiejar)
2085         if opts_proxy is not None:
2086             if opts_proxy == '':
2087                 proxies = {}
2088             else:
2089                 proxies = {'http': opts_proxy, 'https': opts_proxy}
2090         else:
2091             proxies = compat_urllib_request.getproxies()
2092             # Set HTTPS proxy to HTTP one if given (https://github.com/rg3/youtube-dl/issues/805)
2093             if 'http' in proxies and 'https' not in proxies:
2094                 proxies['https'] = proxies['http']
2095         proxy_handler = PerRequestProxyHandler(proxies)
2096
2097         debuglevel = 1 if self.params.get('debug_printtraffic') else 0
2098         https_handler = make_HTTPS_handler(self.params, debuglevel=debuglevel)
2099         ydlh = YoutubeDLHandler(self.params, debuglevel=debuglevel)
2100         data_handler = compat_urllib_request_DataHandler()
2101
2102         # When passing our own FileHandler instance, build_opener won't add the
2103         # default FileHandler and allows us to disable the file protocol, which
2104         # can be used for malicious purposes (see
2105         # https://github.com/rg3/youtube-dl/issues/8227)
2106         file_handler = compat_urllib_request.FileHandler()
2107
2108         def file_open(*args, **kwargs):
2109             raise compat_urllib_error.URLError('file:// scheme is explicitly disabled in youtube-dl for security reasons')
2110         file_handler.file_open = file_open
2111
2112         opener = compat_urllib_request.build_opener(
2113             proxy_handler, https_handler, cookie_processor, ydlh, data_handler, file_handler)
2114
2115         # Delete the default user-agent header, which would otherwise apply in
2116         # cases where our custom HTTP handler doesn't come into play
2117         # (See https://github.com/rg3/youtube-dl/issues/1309 for details)
2118         opener.addheaders = []
2119         self._opener = opener
2120
2121     def encode(self, s):
2122         if isinstance(s, bytes):
2123             return s  # Already encoded
2124
2125         try:
2126             return s.encode(self.get_encoding())
2127         except UnicodeEncodeError as err:
2128             err.reason = err.reason + '. Check your system encoding configuration or use the --encoding option.'
2129             raise
2130
2131     def get_encoding(self):
2132         encoding = self.params.get('encoding')
2133         if encoding is None:
2134             encoding = preferredencoding()
2135         return encoding
2136
2137     def _write_thumbnails(self, info_dict, filename):
2138         if self.params.get('writethumbnail', False):
2139             thumbnails = info_dict.get('thumbnails')
2140             if thumbnails:
2141                 thumbnails = [thumbnails[-1]]
2142         elif self.params.get('write_all_thumbnails', False):
2143             thumbnails = info_dict.get('thumbnails')
2144         else:
2145             return
2146
2147         if not thumbnails:
2148             # No thumbnails present, so return immediately
2149             return
2150
2151         for t in thumbnails:
2152             thumb_ext = determine_ext(t['url'], 'jpg')
2153             suffix = '_%s' % t['id'] if len(thumbnails) > 1 else ''
2154             thumb_display_id = '%s ' % t['id'] if len(thumbnails) > 1 else ''
2155             t['filename'] = thumb_filename = os.path.splitext(filename)[0] + suffix + '.' + thumb_ext
2156
2157             if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(thumb_filename)):
2158                 self.to_screen('[%s] %s: Thumbnail %sis already present' %
2159                                (info_dict['extractor'], info_dict['id'], thumb_display_id))
2160             else:
2161                 self.to_screen('[%s] %s: Downloading thumbnail %s...' %
2162                                (info_dict['extractor'], info_dict['id'], thumb_display_id))
2163                 try:
2164                     uf = self.urlopen(t['url'])
2165                     with open(encodeFilename(thumb_filename), 'wb') as thumbf:
2166                         shutil.copyfileobj(uf, thumbf)
2167                     self.to_screen('[%s] %s: Writing thumbnail %sto: %s' %
2168                                    (info_dict['extractor'], info_dict['id'], thumb_display_id, thumb_filename))
2169                 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
2170                     self.report_warning('Unable to download thumbnail "%s": %s' %
2171                                         (t['url'], error_to_compat_str(err)))