git.bitcoin.ninja Git - youtube-dl/blob - youtube_dl/YoutubeDL.py

   1 #!/usr/bin/env python
   2 # coding: utf-8
   3
   4 from __future__ import absolute_import, unicode_literals
   5
   6 import collections
   7 import contextlib
   8 import copy
   9 import datetime
  10 import errno
  11 import fileinput
  12 import io
  13 import itertools
  14 import json
  15 import locale
  16 import operator
  17 import os
  18 import platform
  19 import re
  20 import shutil
  21 import subprocess
  22 import socket
  23 import sys
  24 import time
  25 import tokenize
  26 import traceback
  27 import random
  28
  29 from .compat import (
  30     compat_basestring,
  31     compat_cookiejar,
  32     compat_get_terminal_size,
  33     compat_http_client,
  34     compat_kwargs,
  35     compat_numeric_types,
  36     compat_os_name,
  37     compat_str,
  38     compat_tokenize_tokenize,
  39     compat_urllib_error,
  40     compat_urllib_request,
  41     compat_urllib_request_DataHandler,
  42 )
  43 from .utils import (
  44     age_restricted,
  45     args_to_str,
  46     ContentTooShortError,
  47     date_from_str,
  48     DateRange,
  49     DEFAULT_OUTTMPL,
  50     determine_ext,
  51     determine_protocol,
  52     DownloadError,
  53     encode_compat_str,
  54     encodeFilename,
  55     error_to_compat_str,
  56     expand_path,
  57     ExtractorError,
  58     format_bytes,
  59     formatSeconds,
  60     GeoRestrictedError,
  61     ISO3166Utils,
  62     locked_file,
  63     make_HTTPS_handler,
  64     MaxDownloadsReached,
  65     PagedList,
  66     parse_filesize,
  67     PerRequestProxyHandler,
  68     platform_name,
  69     PostProcessingError,
  70     preferredencoding,
  71     prepend_extension,
  72     register_socks_protocols,
  73     render_table,
  74     replace_extension,
  75     SameFileError,
  76     sanitize_filename,
  77     sanitize_path,
  78     sanitize_url,
  79     sanitized_Request,
  80     std_headers,
  81     subtitles_filename,
  82     UnavailableVideoError,
  83     url_basename,
  84     version_tuple,
  85     write_json_file,
  86     write_string,
  87     YoutubeDLCookieProcessor,
  88     YoutubeDLHandler,
  89 )
  90 from .cache import Cache
  91 from .extractor import get_info_extractor, gen_extractor_classes, _LAZY_LOADER
  92 from .downloader import get_suitable_downloader
  93 from .downloader.rtmp import rtmpdump_version
  94 from .postprocessor import (
  95     FFmpegFixupM3u8PP,
  96     FFmpegFixupM4aPP,
  97     FFmpegFixupStretchedPP,
  98     FFmpegMergerPP,
  99     FFmpegPostProcessor,
 100     get_postprocessor,
 101 )
 102 from .version import __version__
 103
 104 if compat_os_name == 'nt':
 105     import ctypes
 106
 107
 108 class YoutubeDL(object):
 109     """YoutubeDL class.
 110
 111     YoutubeDL objects are the ones responsible of downloading the
 112     actual video file and writing it to disk if the user has requested
 113     it, among some other tasks. In most cases there should be one per
 114     program. As, given a video URL, the downloader doesn't know how to
 115     extract all the needed information, task that InfoExtractors do, it
 116     has to pass the URL to one of them.
 117
 118     For this, YoutubeDL objects have a method that allows
 119     InfoExtractors to be registered in a given order. When it is passed
 120     a URL, the YoutubeDL object handles it to the first InfoExtractor it
 121     finds that reports being able to handle it. The InfoExtractor extracts
 122     all the information about the video or videos the URL refers to, and
 123     YoutubeDL process the extracted information, possibly using a File
 124     Downloader to download the video.
 125
 126     YoutubeDL objects accept a lot of parameters. In order not to saturate
 127     the object constructor with arguments, it receives a dictionary of
 128     options instead. These options are available through the params
 129     attribute for the InfoExtractors to use. The YoutubeDL also
 130     registers itself as the downloader in charge for the InfoExtractors
 131     that are added to it, so this is a "mutual registration".
 132
 133     Available options:
 134
 135     username:          Username for authentication purposes.
 136     password:          Password for authentication purposes.
 137     videopassword:     Password for accessing a video.
 138     ap_mso:            Adobe Pass multiple-system operator identifier.
 139     ap_username:       Multiple-system operator account username.
 140     ap_password:       Multiple-system operator account password.
 141     usenetrc:          Use netrc for authentication instead.
 142     verbose:           Print additional info to stdout.
 143     quiet:             Do not print messages to stdout.
 144     no_warnings:       Do not print out anything for warnings.
 145     forceurl:          Force printing final URL.
 146     forcetitle:        Force printing title.
 147     forceid:           Force printing ID.
 148     forcethumbnail:    Force printing thumbnail URL.
 149     forcedescription:  Force printing description.
 150     forcefilename:     Force printing final filename.
 151     forceduration:     Force printing duration.
 152     forcejson:         Force printing info_dict as JSON.
 153     dump_single_json:  Force printing the info_dict of the whole playlist
 154                        (or video) as a single JSON line.
 155     simulate:          Do not download the video files.
 156     format:            Video format code. See options.py for more information.
 157     outtmpl:           Template for output names.
 158     restrictfilenames: Do not allow "&" and spaces in file names
 159     ignoreerrors:      Do not stop on download errors.
 160     force_generic_extractor: Force downloader to use the generic extractor
 161     nooverwrites:      Prevent overwriting files.
 162     playliststart:     Playlist item to start at.
 163     playlistend:       Playlist item to end at.
 164     playlist_items:    Specific indices of playlist to download.
 165     playlistreverse:   Download playlist items in reverse order.
 166     playlistrandom:    Download playlist items in random order.
 167     matchtitle:        Download only matching titles.
 168     rejecttitle:       Reject downloads for matching titles.
 169     logger:            Log messages to a logging.Logger instance.
 170     logtostderr:       Log messages to stderr instead of stdout.
 171     writedescription:  Write the video description to a .description file
 172     writeinfojson:     Write the video description to a .info.json file
 173     writeannotations:  Write the video annotations to a .annotations.xml file
 174     writethumbnail:    Write the thumbnail image to a file
 175     write_all_thumbnails:  Write all thumbnail formats to files
 176     writesubtitles:    Write the video subtitles to a file
 177     writeautomaticsub: Write the automatically generated subtitles to a file
 178     allsubtitles:      Downloads all the subtitles of the video
 179                        (requires writesubtitles or writeautomaticsub)
 180     listsubtitles:     Lists all available subtitles for the video
 181     subtitlesformat:   The format code for subtitles
 182     subtitleslangs:    List of languages of the subtitles to download
 183     keepvideo:         Keep the video file after post-processing
 184     daterange:         A DateRange object, download only if the upload_date is in the range.
 185     skip_download:     Skip the actual download of the video file
 186     cachedir:          Location of the cache files in the filesystem.
 187                        False to disable filesystem cache.
 188     noplaylist:        Download single video instead of a playlist if in doubt.
 189     age_limit:         An integer representing the user's age in years.
 190                        Unsuitable videos for the given age are skipped.
 191     min_views:         An integer representing the minimum view count the video
 192                        must have in order to not be skipped.
 193                        Videos without view count information are always
 194                        downloaded. None for no limit.
 195     max_views:         An integer representing the maximum view count.
 196                        Videos that are more popular than that are not
 197                        downloaded.
 198                        Videos without view count information are always
 199                        downloaded. None for no limit.
 200     download_archive:  File name of a file where all downloads are recorded.
 201                        Videos already present in the file are not downloaded
 202                        again.
 203     cookiefile:        File name where cookies should be read from and dumped to.
 204     nocheckcertificate:Do not verify SSL certificates
 205     prefer_insecure:   Use HTTP instead of HTTPS to retrieve information.
 206                        At the moment, this is only supported by YouTube.
 207     proxy:             URL of the proxy server to use
 208     geo_verification_proxy:  URL of the proxy to use for IP address verification
 209                        on geo-restricted sites. (Experimental)
 210     socket_timeout:    Time to wait for unresponsive hosts, in seconds
 211     bidi_workaround:   Work around buggy terminals without bidirectional text
 212                        support, using fridibi
 213     debug_printtraffic:Print out sent and received HTTP traffic
 214     include_ads:       Download ads as well
 215     default_search:    Prepend this string if an input url is not valid.
 216                        'auto' for elaborate guessing
 217     encoding:          Use this encoding instead of the system-specified.
 218     extract_flat:      Do not resolve URLs, return the immediate result.
 219                        Pass in 'in_playlist' to only show this behavior for
 220                        playlist items.
 221     postprocessors:    A list of dictionaries, each with an entry
 222                        * key:  The name of the postprocessor. See
 223                                youtube_dl/postprocessor/__init__.py for a list.
 224                        as well as any further keyword arguments for the
 225                        postprocessor.
 226     progress_hooks:    A list of functions that get called on download
 227                        progress, with a dictionary with the entries
 228                        * status: One of "downloading", "error", or "finished".
 229                                  Check this first and ignore unknown values.
 230
 231                        If status is one of "downloading", or "finished", the
 232                        following properties may also be present:
 233                        * filename: The final filename (always present)
 234                        * tmpfilename: The filename we're currently writing to
 235                        * downloaded_bytes: Bytes on disk
 236                        * total_bytes: Size of the whole file, None if unknown
 237                        * total_bytes_estimate: Guess of the eventual file size,
 238                                                None if unavailable.
 239                        * elapsed: The number of seconds since download started.
 240                        * eta: The estimated time in seconds, None if unknown
 241                        * speed: The download speed in bytes/second, None if
 242                                 unknown
 243                        * fragment_index: The counter of the currently
 244                                          downloaded video fragment.
 245                        * fragment_count: The number of fragments (= individual
 246                                          files that will be merged)
 247
 248                        Progress hooks are guaranteed to be called at least once
 249                        (with status "finished") if the download is successful.
 250     merge_output_format: Extension to use when merging formats.
 251     fixup:             Automatically correct known faults of the file.
 252                        One of:
 253                        - "never": do nothing
 254                        - "warn": only emit a warning
 255                        - "detect_or_warn": check whether we can do anything
 256                                            about it, warn otherwise (default)
 257     source_address:    (Experimental) Client-side IP address to bind to.
 258     call_home:         Boolean, true iff we are allowed to contact the
 259                        youtube-dl servers for debugging.
 260     sleep_interval:    Number of seconds to sleep before each download when
 261                        used alone or a lower bound of a range for randomized
 262                        sleep before each download (minimum possible number
 263                        of seconds to sleep) when used along with
 264                        max_sleep_interval.
 265     max_sleep_interval:Upper bound of a range for randomized sleep before each
 266                        download (maximum possible number of seconds to sleep).
 267                        Must only be used along with sleep_interval.
 268                        Actual sleep time will be a random float from range
 269                        [sleep_interval; max_sleep_interval].
 270     listformats:       Print an overview of available video formats and exit.
 271     list_thumbnails:   Print a table of all thumbnails and exit.
 272     match_filter:      A function that gets called with the info_dict of
 273                        every video.
 274                        If it returns a message, the video is ignored.
 275                        If it returns None, the video is downloaded.
 276                        match_filter_func in utils.py is one example for this.
 277     no_color:          Do not emit color codes in output.
 278     geo_bypass:        Bypass geographic restriction via faking X-Forwarded-For
 279                        HTTP header (experimental)
 280     geo_bypass_country:
 281                        Two-letter ISO 3166-2 country code that will be used for
 282                        explicit geographic restriction bypassing via faking
 283                        X-Forwarded-For HTTP header (experimental)
 284
 285     The following options determine which downloader is picked:
 286     external_downloader: Executable of the external downloader to call.
 287                        None or unset for standard (built-in) downloader.
 288     hls_prefer_native: Use the native HLS downloader instead of ffmpeg/avconv
 289                        if True, otherwise use ffmpeg/avconv if False, otherwise
 290                        use downloader suggested by extractor if None.
 291
 292     The following parameters are not used by YoutubeDL itself, they are used by
 293     the downloader (see youtube_dl/downloader/common.py):
 294     nopart, updatetime, buffersize, ratelimit, min_filesize, max_filesize, test,
 295     noresizebuffer, retries, continuedl, noprogress, consoletitle,
 296     xattr_set_filesize, external_downloader_args, hls_use_mpegts.
 297
 298     The following options are used by the post processors:
 299     prefer_ffmpeg:     If True, use ffmpeg instead of avconv if both are available,
 300                        otherwise prefer avconv.
 301     postprocessor_args: A list of additional command-line arguments for the
 302                         postprocessor.
 303     """
 304
 305     params = None
 306     _ies = []
 307     _pps = []
 308     _download_retcode = None
 309     _num_downloads = None
 310     _screen_file = None
 311
 312     def __init__(self, params=None, auto_init=True):
 313         """Create a FileDownloader object with the given options."""
 314         if params is None:
 315             params = {}
 316         self._ies = []
 317         self._ies_instances = {}
 318         self._pps = []
 319         self._progress_hooks = []
 320         self._download_retcode = 0
 321         self._num_downloads = 0
 322         self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)]
 323         self._err_file = sys.stderr
 324         self.params = {
 325             # Default parameters
 326             'nocheckcertificate': False,
 327         }
 328         self.params.update(params)
 329         self.cache = Cache(self)
 330
 331         def check_deprecated(param, option, suggestion):
 332             if self.params.get(param) is not None:
 333                 self.report_warning(
 334                     '%s is deprecated. Use %s instead.' % (option, suggestion))
 335                 return True
 336             return False
 337
 338         if check_deprecated('cn_verification_proxy', '--cn-verification-proxy', '--geo-verification-proxy'):
 339             if self.params.get('geo_verification_proxy') is None:
 340                 self.params['geo_verification_proxy'] = self.params['cn_verification_proxy']
 341
 342         check_deprecated('autonumber_size', '--autonumber-size', 'output template with %(autonumber)0Nd, where N in the number of digits')
 343         check_deprecated('autonumber', '--auto-number', '-o "%(autonumber)s-%(title)s.%(ext)s"')
 344         check_deprecated('usetitle', '--title', '-o "%(title)s-%(id)s.%(ext)s"')
 345
 346         if params.get('bidi_workaround', False):
 347             try:
 348                 import pty
 349                 master, slave = pty.openpty()
 350                 width = compat_get_terminal_size().columns
 351                 if width is None:
 352                     width_args = []
 353                 else:
 354                     width_args = ['-w', str(width)]
 355                 sp_kwargs = dict(
 356                     stdin=subprocess.PIPE,
 357                     stdout=slave,
 358                     stderr=self._err_file)
 359                 try:
 360                     self._output_process = subprocess.Popen(
 361                         ['bidiv'] + width_args, **sp_kwargs
 362                     )
 363                 except OSError:
 364                     self._output_process = subprocess.Popen(
 365                         ['fribidi', '-c', 'UTF-8'] + width_args, **sp_kwargs)
 366                 self._output_channel = os.fdopen(master, 'rb')
 367             except OSError as ose:
 368                 if ose.errno == errno.ENOENT:
 369                     self.report_warning('Could not find fribidi executable, ignoring --bidi-workaround . Make sure that  fribidi  is an executable file in one of the directories in your $PATH.')
 370                 else:
 371                     raise
 372
 373         if (sys.version_info >= (3,) and sys.platform != 'win32' and
 374                 sys.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968'] and
 375                 not params.get('restrictfilenames', False)):
 376             # On Python 3, the Unicode filesystem API will throw errors (#1474)
 377             self.report_warning(
 378                 'Assuming --restrict-filenames since file system encoding '
 379                 'cannot encode all characters. '
 380                 'Set the LC_ALL environment variable to fix this.')
 381             self.params['restrictfilenames'] = True
 382
 383         if isinstance(params.get('outtmpl'), bytes):
 384             self.report_warning(
 385                 'Parameter outtmpl is bytes, but should be a unicode string. '
 386                 'Put  from __future__ import unicode_literals  at the top of your code file or consider switching to Python 3.x.')
 387
 388         self._setup_opener()
 389
 390         if auto_init:
 391             self.print_debug_header()
 392             self.add_default_info_extractors()
 393
 394         for pp_def_raw in self.params.get('postprocessors', []):
 395             pp_class = get_postprocessor(pp_def_raw['key'])
 396             pp_def = dict(pp_def_raw)
 397             del pp_def['key']
 398             pp = pp_class(self, **compat_kwargs(pp_def))
 399             self.add_post_processor(pp)
 400
 401         for ph in self.params.get('progress_hooks', []):
 402             self.add_progress_hook(ph)
 403
 404         register_socks_protocols()
 405
 406     def warn_if_short_id(self, argv):
 407         # short YouTube ID starting with dash?
 408         idxs = [
 409             i for i, a in enumerate(argv)
 410             if re.match(r'^-[0-9A-Za-z_-]{10}$', a)]
 411         if idxs:
 412             correct_argv = (
 413                 ['youtube-dl'] +
 414                 [a for i, a in enumerate(argv) if i not in idxs] +
 415                 ['--'] + [argv[i] for i in idxs]
 416             )
 417             self.report_warning(
 418                 'Long argument string detected. '
 419                 'Use -- to separate parameters and URLs, like this:\n%s\n' %
 420                 args_to_str(correct_argv))
 421
 422     def add_info_extractor(self, ie):
 423         """Add an InfoExtractor object to the end of the list."""
 424         self._ies.append(ie)
 425         if not isinstance(ie, type):
 426             self._ies_instances[ie.ie_key()] = ie
 427             ie.set_downloader(self)
 428
 429     def get_info_extractor(self, ie_key):
 430         """
 431         Get an instance of an IE with name ie_key, it will try to get one from
 432         the _ies list, if there's no instance it will create a new one and add
 433         it to the extractor list.
 434         """
 435         ie = self._ies_instances.get(ie_key)
 436         if ie is None:
 437             ie = get_info_extractor(ie_key)()
 438             self.add_info_extractor(ie)
 439         return ie
 440
 441     def add_default_info_extractors(self):
 442         """
 443         Add the InfoExtractors returned by gen_extractors to the end of the list
 444         """
 445         for ie in gen_extractor_classes():
 446             self.add_info_extractor(ie)
 447
 448     def add_post_processor(self, pp):
 449         """Add a PostProcessor object to the end of the chain."""
 450         self._pps.append(pp)
 451         pp.set_downloader(self)
 452
 453     def add_progress_hook(self, ph):
 454         """Add the progress hook (currently only for the file downloader)"""
 455         self._progress_hooks.append(ph)
 456
 457     def _bidi_workaround(self, message):
 458         if not hasattr(self, '_output_channel'):
 459             return message
 460
 461         assert hasattr(self, '_output_process')
 462         assert isinstance(message, compat_str)
 463         line_count = message.count('\n') + 1
 464         self._output_process.stdin.write((message + '\n').encode('utf-8'))
 465         self._output_process.stdin.flush()
 466         res = ''.join(self._output_channel.readline().decode('utf-8')
 467                       for _ in range(line_count))
 468         return res[:-len('\n')]
 469
 470     def to_screen(self, message, skip_eol=False):
 471         """Print message to stdout if not in quiet mode."""
 472         return self.to_stdout(message, skip_eol, check_quiet=True)
 473
 474     def _write_string(self, s, out=None):
 475         write_string(s, out=out, encoding=self.params.get('encoding'))
 476
 477     def to_stdout(self, message, skip_eol=False, check_quiet=False):
 478         """Print message to stdout if not in quiet mode."""
 479         if self.params.get('logger'):
 480             self.params['logger'].debug(message)
 481         elif not check_quiet or not self.params.get('quiet', False):
 482             message = self._bidi_workaround(message)
 483             terminator = ['\n', ''][skip_eol]
 484             output = message + terminator
 485
 486             self._write_string(output, self._screen_file)
 487
 488     def to_stderr(self, message):
 489         """Print message to stderr."""
 490         assert isinstance(message, compat_str)
 491         if self.params.get('logger'):
 492             self.params['logger'].error(message)
 493         else:
 494             message = self._bidi_workaround(message)
 495             output = message + '\n'
 496             self._write_string(output, self._err_file)
 497
 498     def to_console_title(self, message):
 499         if not self.params.get('consoletitle', False):
 500             return
 501         if compat_os_name == 'nt' and ctypes.windll.kernel32.GetConsoleWindow():
 502             # c_wchar_p() might not be necessary if `message` is
 503             # already of type unicode()
 504             ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message))
 505         elif 'TERM' in os.environ:
 506             self._write_string('\033]0;%s\007' % message, self._screen_file)
 507
 508     def save_console_title(self):
 509         if not self.params.get('consoletitle', False):
 510             return
 511         if 'TERM' in os.environ:
 512             # Save the title on stack
 513             self._write_string('\033[22;0t', self._screen_file)
 514
 515     def restore_console_title(self):
 516         if not self.params.get('consoletitle', False):
 517             return
 518         if 'TERM' in os.environ:
 519             # Restore the title from stack
 520             self._write_string('\033[23;0t', self._screen_file)
 521
 522     def __enter__(self):
 523         self.save_console_title()
 524         return self
 525
 526     def __exit__(self, *args):
 527         self.restore_console_title()
 528
 529         if self.params.get('cookiefile') is not None:
 530             self.cookiejar.save()
 531
 532     def trouble(self, message=None, tb=None):
 533         """Determine action to take when a download problem appears.
 534
 535         Depending on if the downloader has been configured to ignore
 536         download errors or not, this method may throw an exception or
 537         not when errors are found, after printing the message.
 538
 539         tb, if given, is additional traceback information.
 540         """
 541         if message is not None:
 542             self.to_stderr(message)
 543         if self.params.get('verbose'):
 544             if tb is None:
 545                 if sys.exc_info()[0]:  # if .trouble has been called from an except block
 546                     tb = ''
 547                     if hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
 548                         tb += ''.join(traceback.format_exception(*sys.exc_info()[1].exc_info))
 549                     tb += encode_compat_str(traceback.format_exc())
 550                 else:
 551                     tb_data = traceback.format_list(traceback.extract_stack())
 552                     tb = ''.join(tb_data)
 553             self.to_stderr(tb)
 554         if not self.params.get('ignoreerrors', False):
 555             if sys.exc_info()[0] and hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
 556                 exc_info = sys.exc_info()[1].exc_info
 557             else:
 558                 exc_info = sys.exc_info()
 559             raise DownloadError(message, exc_info)
 560         self._download_retcode = 1
 561
 562     def report_warning(self, message):
 563         '''
 564         Print the message to stderr, it will be prefixed with 'WARNING:'
 565         If stderr is a tty file the 'WARNING:' will be colored
 566         '''
 567         if self.params.get('logger') is not None:
 568             self.params['logger'].warning(message)
 569         else:
 570             if self.params.get('no_warnings'):
 571                 return
 572             if not self.params.get('no_color') and self._err_file.isatty() and compat_os_name != 'nt':
 573                 _msg_header = '\033[0;33mWARNING:\033[0m'
 574             else:
 575                 _msg_header = 'WARNING:'
 576             warning_message = '%s %s' % (_msg_header, message)
 577             self.to_stderr(warning_message)
 578
 579     def report_error(self, message, tb=None):
 580         '''
 581         Do the same as trouble, but prefixes the message with 'ERROR:', colored
 582         in red if stderr is a tty file.
 583         '''
 584         if not self.params.get('no_color') and self._err_file.isatty() and compat_os_name != 'nt':
 585             _msg_header = '\033[0;31mERROR:\033[0m'
 586         else:
 587             _msg_header = 'ERROR:'
 588         error_message = '%s %s' % (_msg_header, message)
 589         self.trouble(error_message, tb)
 590
 591     def report_file_already_downloaded(self, file_name):
 592         """Report file has already been fully downloaded."""
 593         try:
 594             self.to_screen('[download] %s has already been downloaded' % file_name)
 595         except UnicodeEncodeError:
 596             self.to_screen('[download] The file has already been downloaded')
 597
 598     def prepare_filename(self, info_dict):
 599         """Generate the output filename."""
 600         try:
 601             template_dict = dict(info_dict)
 602
 603             template_dict['epoch'] = int(time.time())
 604             autonumber_size = self.params.get('autonumber_size')
 605             if autonumber_size is None:
 606                 autonumber_size = 5
 607             template_dict['autonumber'] = self.params.get('autonumber_start', 1) - 1 + self._num_downloads
 608             if template_dict.get('resolution') is None:
 609                 if template_dict.get('width') and template_dict.get('height'):
 610                     template_dict['resolution'] = '%dx%d' % (template_dict['width'], template_dict['height'])
 611                 elif template_dict.get('height'):
 612                     template_dict['resolution'] = '%sp' % template_dict['height']
 613                 elif template_dict.get('width'):
 614                     template_dict['resolution'] = '%dx?' % template_dict['width']
 615
 616             sanitize = lambda k, v: sanitize_filename(
 617                 compat_str(v),
 618                 restricted=self.params.get('restrictfilenames'),
 619                 is_id=(k == 'id' or k.endswith('_id')))
 620             template_dict = dict((k, v if isinstance(v, compat_numeric_types) else sanitize(k, v))
 621                                  for k, v in template_dict.items()
 622                                  if v is not None and not isinstance(v, (list, tuple, dict)))
 623             template_dict = collections.defaultdict(lambda: 'NA', template_dict)
 624
 625             outtmpl = self.params.get('outtmpl', DEFAULT_OUTTMPL)
 626
 627             # For fields playlist_index and autonumber convert all occurrences
 628             # of %(field)s to %(field)0Nd for backward compatibility
 629             field_size_compat_map = {
 630                 'playlist_index': len(str(template_dict['n_entries'])),
 631                 'autonumber': autonumber_size,
 632             }
 633             FIELD_SIZE_COMPAT_RE = r'(?<!%)%\((?P<field>autonumber|playlist_index)\)s'
 634             mobj = re.search(FIELD_SIZE_COMPAT_RE, outtmpl)
 635             if mobj:
 636                 outtmpl = re.sub(
 637                     FIELD_SIZE_COMPAT_RE,
 638                     r'%%(\1)0%dd' % field_size_compat_map[mobj.group('field')],
 639                     outtmpl)
 640
 641             NUMERIC_FIELDS = set((
 642                 'width', 'height', 'tbr', 'abr', 'asr', 'vbr', 'fps', 'filesize', 'filesize_approx',
 643                 'upload_year', 'upload_month', 'upload_day',
 644                 'duration', 'view_count', 'like_count', 'dislike_count', 'repost_count',
 645                 'average_rating', 'comment_count', 'age_limit',
 646                 'start_time', 'end_time',
 647                 'chapter_number', 'season_number', 'episode_number',
 648                 'track_number', 'disc_number', 'release_year',
 649                 'playlist_index',
 650             ))
 651
 652             # Missing numeric fields used together with integer presentation types
 653             # in format specification will break the argument substitution since
 654             # string 'NA' is returned for missing fields. We will patch output
 655             # template for missing fields to meet string presentation type.
 656             for numeric_field in NUMERIC_FIELDS:
 657                 if numeric_field not in template_dict:
 658                     # As of [1] format syntax is:
 659                     #  %[mapping_key][conversion_flags][minimum_width][.precision][length_modifier]type
 660                     # 1. https://docs.python.org/2/library/stdtypes.html#string-formatting
 661                     FORMAT_RE = r'''(?x)
 662                         (?<!%)
 663                         %
 664                         \({0}\)  # mapping key
 665                         (?:[#0\-+ ]+)?  # conversion flags (optional)
 666                         (?:\d+)?  # minimum field width (optional)
 667                         (?:\.\d+)?  # precision (optional)
 668                         [hlL]?  # length modifier (optional)
 669                         [diouxXeEfFgGcrs%]  # conversion type
 670                     '''
 671                     outtmpl = re.sub(
 672                         FORMAT_RE.format(numeric_field),
 673                         r'%({0})s'.format(numeric_field), outtmpl)
 674
 675             tmpl = expand_path(outtmpl)
 676             filename = tmpl % template_dict
 677             # Temporary fix for #4787
 678             # 'Treat' all problem characters by passing filename through preferredencoding
 679             # to workaround encoding issues with subprocess on python2 @ Windows
 680             if sys.version_info < (3, 0) and sys.platform == 'win32':
 681                 filename = encodeFilename(filename, True).decode(preferredencoding())
 682             return sanitize_path(filename)
 683         except ValueError as err:
 684             self.report_error('Error in output template: ' + str(err) + ' (encoding: ' + repr(preferredencoding()) + ')')
 685             return None
 686
 687     def _match_entry(self, info_dict, incomplete):
 688         """ Returns None iff the file should be downloaded """
 689
 690         video_title = info_dict.get('title', info_dict.get('id', 'video'))
 691         if 'title' in info_dict:
 692             # This can happen when we're just evaluating the playlist
 693             title = info_dict['title']
 694             matchtitle = self.params.get('matchtitle', False)
 695             if matchtitle:
 696                 if not re.search(matchtitle, title, re.IGNORECASE):
 697                     return '"' + title + '" title did not match pattern "' + matchtitle + '"'
 698             rejecttitle = self.params.get('rejecttitle', False)
 699             if rejecttitle:
 700                 if re.search(rejecttitle, title, re.IGNORECASE):
 701                     return '"' + title + '" title matched reject pattern "' + rejecttitle + '"'
 702         date = info_dict.get('upload_date')
 703         if date is not None:
 704             dateRange = self.params.get('daterange', DateRange())
 705             if date not in dateRange:
 706                 return '%s upload date is not in range %s' % (date_from_str(date).isoformat(), dateRange)
 707         view_count = info_dict.get('view_count')
 708         if view_count is not None:
 709             min_views = self.params.get('min_views')
 710             if min_views is not None and view_count < min_views:
 711                 return 'Skipping %s, because it has not reached minimum view count (%d/%d)' % (video_title, view_count, min_views)
 712             max_views = self.params.get('max_views')
 713             if max_views is not None and view_count > max_views:
 714                 return 'Skipping %s, because it has exceeded the maximum view count (%d/%d)' % (video_title, view_count, max_views)
 715         if age_restricted(info_dict.get('age_limit'), self.params.get('age_limit')):
 716             return 'Skipping "%s" because it is age restricted' % video_title
 717         if self.in_download_archive(info_dict):
 718             return '%s has already been recorded in archive' % video_title
 719
 720         if not incomplete:
 721             match_filter = self.params.get('match_filter')
 722             if match_filter is not None:
 723                 ret = match_filter(info_dict)
 724                 if ret is not None:
 725                     return ret
 726
 727         return None
 728
 729     @staticmethod
 730     def add_extra_info(info_dict, extra_info):
 731         '''Set the keys from extra_info in info dict if they are missing'''
 732         for key, value in extra_info.items():
 733             info_dict.setdefault(key, value)
 734
 735     def extract_info(self, url, download=True, ie_key=None, extra_info={},
 736                      process=True, force_generic_extractor=False):
 737         '''
 738         Returns a list with a dictionary for each video we find.
 739         If 'download', also downloads the videos.
 740         extra_info is a dict containing the extra values to add to each result
 741         '''
 742
 743         if not ie_key and force_generic_extractor:
 744             ie_key = 'Generic'
 745
 746         if ie_key:
 747             ies = [self.get_info_extractor(ie_key)]
 748         else:
 749             ies = self._ies
 750
 751         for ie in ies:
 752             if not ie.suitable(url):
 753                 continue
 754
 755             ie = self.get_info_extractor(ie.ie_key())
 756             if not ie.working():
 757                 self.report_warning('The program functionality for this site has been marked as broken, '
 758                                     'and will probably not work.')
 759
 760             try:
 761                 ie_result = ie.extract(url)
 762                 if ie_result is None:  # Finished already (backwards compatibility; listformats and friends should be moved here)
 763                     break
 764                 if isinstance(ie_result, list):
 765                     # Backwards compatibility: old IE result format
 766                     ie_result = {
 767                         '_type': 'compat_list',
 768                         'entries': ie_result,
 769                     }
 770                 self.add_default_extra_info(ie_result, ie, url)
 771                 if process:
 772                     return self.process_ie_result(ie_result, download, extra_info)
 773                 else:
 774                     return ie_result
 775             except GeoRestrictedError as e:
 776                 msg = e.msg
 777                 if e.countries:
 778                     msg += '\nThis video is available in %s.' % ', '.join(
 779                         map(ISO3166Utils.short2full, e.countries))
 780                 msg += '\nYou might want to use a VPN or a proxy server (with --proxy) to workaround.'
 781                 self.report_error(msg)
 782                 break
 783             except ExtractorError as e:  # An error we somewhat expected
 784                 self.report_error(compat_str(e), e.format_traceback())
 785                 break
 786             except MaxDownloadsReached:
 787                 raise
 788             except Exception as e:
 789                 if self.params.get('ignoreerrors', False):
 790                     self.report_error(error_to_compat_str(e), tb=encode_compat_str(traceback.format_exc()))
 791                     break
 792                 else:
 793                     raise
 794         else:
 795             self.report_error('no suitable InfoExtractor for URL %s' % url)
 796
 797     def add_default_extra_info(self, ie_result, ie, url):
 798         self.add_extra_info(ie_result, {
 799             'extractor': ie.IE_NAME,
 800             'webpage_url': url,
 801             'webpage_url_basename': url_basename(url),
 802             'extractor_key': ie.ie_key(),
 803         })
 804
 805     def process_ie_result(self, ie_result, download=True, extra_info={}):
 806         """
 807         Take the result of the ie(may be modified) and resolve all unresolved
 808         references (URLs, playlist items).
 809
 810         It will also download the videos if 'download'.
 811         Returns the resolved ie_result.
 812         """
 813         result_type = ie_result.get('_type', 'video')
 814
 815         if result_type in ('url', 'url_transparent'):
 816             ie_result['url'] = sanitize_url(ie_result['url'])
 817             extract_flat = self.params.get('extract_flat', False)
 818             if ((extract_flat == 'in_playlist' and 'playlist' in extra_info) or
 819                     extract_flat is True):
 820                 if self.params.get('forcejson', False):
 821                     self.to_stdout(json.dumps(ie_result))
 822                 return ie_result
 823
 824         if result_type == 'video':
 825             self.add_extra_info(ie_result, extra_info)
 826             return self.process_video_result(ie_result, download=download)
 827         elif result_type == 'url':
 828             # We have to add extra_info to the results because it may be
 829             # contained in a playlist
 830             return self.extract_info(ie_result['url'],
 831                                      download,
 832                                      ie_key=ie_result.get('ie_key'),
 833                                      extra_info=extra_info)
 834         elif result_type == 'url_transparent':
 835             # Use the information from the embedding page
 836             info = self.extract_info(
 837                 ie_result['url'], ie_key=ie_result.get('ie_key'),
 838                 extra_info=extra_info, download=False, process=False)
 839
 840             # extract_info may return None when ignoreerrors is enabled and
 841             # extraction failed with an error, don't crash and return early
 842             # in this case
 843             if not info:
 844                 return info
 845
 846             force_properties = dict(
 847                 (k, v) for k, v in ie_result.items() if v is not None)
 848             for f in ('_type', 'url', 'ie_key'):
 849                 if f in force_properties:
 850                     del force_properties[f]
 851             new_result = info.copy()
 852             new_result.update(force_properties)
 853
 854             assert new_result.get('_type') != 'url_transparent'
 855
 856             return self.process_ie_result(
 857                 new_result, download=download, extra_info=extra_info)
 858         elif result_type == 'playlist' or result_type == 'multi_video':
 859             # We process each entry in the playlist
 860             playlist = ie_result.get('title') or ie_result.get('id')
 861             self.to_screen('[download] Downloading playlist: %s' % playlist)
 862
 863             playlist_results = []
 864
 865             playliststart = self.params.get('playliststart', 1) - 1
 866             playlistend = self.params.get('playlistend')
 867             # For backwards compatibility, interpret -1 as whole list
 868             if playlistend == -1:
 869                 playlistend = None
 870
 871             playlistitems_str = self.params.get('playlist_items')
 872             playlistitems = None
 873             if playlistitems_str is not None:
 874                 def iter_playlistitems(format):
 875                     for string_segment in format.split(','):
 876                         if '-' in string_segment:
 877                             start, end = string_segment.split('-')
 878                             for item in range(int(start), int(end) + 1):
 879                                 yield int(item)
 880                         else:
 881                             yield int(string_segment)
 882                 playlistitems = iter_playlistitems(playlistitems_str)
 883
 884             ie_entries = ie_result['entries']
 885             if isinstance(ie_entries, list):
 886                 n_all_entries = len(ie_entries)
 887                 if playlistitems:
 888                     entries = [
 889                         ie_entries[i - 1] for i in playlistitems
 890                         if -n_all_entries <= i - 1 < n_all_entries]
 891                 else:
 892                     entries = ie_entries[playliststart:playlistend]
 893                 n_entries = len(entries)
 894                 self.to_screen(
 895                     '[%s] playlist %s: Collected %d video ids (downloading %d of them)' %
 896                     (ie_result['extractor'], playlist, n_all_entries, n_entries))
 897             elif isinstance(ie_entries, PagedList):
 898                 if playlistitems:
 899                     entries = []
 900                     for item in playlistitems:
 901                         entries.extend(ie_entries.getslice(
 902                             item - 1, item
 903                         ))
 904                 else:
 905                     entries = ie_entries.getslice(
 906                         playliststart, playlistend)
 907                 n_entries = len(entries)
 908                 self.to_screen(
 909                     '[%s] playlist %s: Downloading %d videos' %
 910                     (ie_result['extractor'], playlist, n_entries))
 911             else:  # iterable
 912                 if playlistitems:
 913                     entry_list = list(ie_entries)
 914                     entries = [entry_list[i - 1] for i in playlistitems]
 915                 else:
 916                     entries = list(itertools.islice(
 917                         ie_entries, playliststart, playlistend))
 918                 n_entries = len(entries)
 919                 self.to_screen(
 920                     '[%s] playlist %s: Downloading %d videos' %
 921                     (ie_result['extractor'], playlist, n_entries))
 922
 923             if self.params.get('playlistreverse', False):
 924                 entries = entries[::-1]
 925
 926             if self.params.get('playlistrandom', False):
 927                 random.shuffle(entries)
 928
 929             x_forwarded_for = ie_result.get('__x_forwarded_for_ip')
 930
 931             for i, entry in enumerate(entries, 1):
 932                 self.to_screen('[download] Downloading video %s of %s' % (i, n_entries))
 933                 # This __x_forwarded_for_ip thing is a bit ugly but requires
 934                 # minimal changes
 935                 if x_forwarded_for:
 936                     entry['__x_forwarded_for_ip'] = x_forwarded_for
 937                 extra = {
 938                     'n_entries': n_entries,
 939                     'playlist': playlist,
 940                     'playlist_id': ie_result.get('id'),
 941                     'playlist_title': ie_result.get('title'),
 942                     'playlist_index': i + playliststart,
 943                     'extractor': ie_result['extractor'],
 944                     'webpage_url': ie_result['webpage_url'],
 945                     'webpage_url_basename': url_basename(ie_result['webpage_url']),
 946                     'extractor_key': ie_result['extractor_key'],
 947                 }
 948
 949                 reason = self._match_entry(entry, incomplete=True)
 950                 if reason is not None:
 951                     self.to_screen('[download] ' + reason)
 952                     continue
 953
 954                 entry_result = self.process_ie_result(entry,
 955                                                       download=download,
 956                                                       extra_info=extra)
 957                 playlist_results.append(entry_result)
 958             ie_result['entries'] = playlist_results
 959             self.to_screen('[download] Finished downloading playlist: %s' % playlist)
 960             return ie_result
 961         elif result_type == 'compat_list':
 962             self.report_warning(
 963                 'Extractor %s returned a compat_list result. '
 964                 'It needs to be updated.' % ie_result.get('extractor'))
 965
 966             def _fixup(r):
 967                 self.add_extra_info(
 968                     r,
 969                     {
 970                         'extractor': ie_result['extractor'],
 971                         'webpage_url': ie_result['webpage_url'],
 972                         'webpage_url_basename': url_basename(ie_result['webpage_url']),
 973                         'extractor_key': ie_result['extractor_key'],
 974                     }
 975                 )
 976                 return r
 977             ie_result['entries'] = [
 978                 self.process_ie_result(_fixup(r), download, extra_info)
 979                 for r in ie_result['entries']
 980             ]
 981             return ie_result
 982         else:
 983             raise Exception('Invalid result type: %s' % result_type)
 984
 985     def _build_format_filter(self, filter_spec):
 986         " Returns a function to filter the formats according to the filter_spec "
 987
 988         OPERATORS = {
 989             '<': operator.lt,
 990             '<=': operator.le,
 991             '>': operator.gt,
 992             '>=': operator.ge,
 993             '=': operator.eq,
 994             '!=': operator.ne,
 995         }
 996         operator_rex = re.compile(r'''(?x)\s*
 997             (?P<key>width|height|tbr|abr|vbr|asr|filesize|fps)
 998             \s*(?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
 999             (?P<value>[0-9.]+(?:[kKmMgGtTpPeEzZyY]i?[Bb]?)?)
1000             $
1001             ''' % '|'.join(map(re.escape, OPERATORS.keys())))
1002         m = operator_rex.search(filter_spec)
1003         if m:
1004             try:
1005                 comparison_value = int(m.group('value'))
1006             except ValueError:
1007                 comparison_value = parse_filesize(m.group('value'))
1008                 if comparison_value is None:
1009                     comparison_value = parse_filesize(m.group('value') + 'B')
1010                 if comparison_value is None:
1011                     raise ValueError(
1012                         'Invalid value %r in format specification %r' % (
1013                             m.group('value'), filter_spec))
1014             op = OPERATORS[m.group('op')]
1015
1016         if not m:
1017             STR_OPERATORS = {
1018                 '=': operator.eq,
1019                 '!=': operator.ne,
1020                 '^=': lambda attr, value: attr.startswith(value),
1021                 '$=': lambda attr, value: attr.endswith(value),
1022                 '*=': lambda attr, value: value in attr,
1023             }
1024             str_operator_rex = re.compile(r'''(?x)
1025                 \s*(?P<key>ext|acodec|vcodec|container|protocol|format_id)
1026                 \s*(?P<op>%s)(?P<none_inclusive>\s*\?)?
1027                 \s*(?P<value>[a-zA-Z0-9._-]+)
1028                 \s*$
1029                 ''' % '|'.join(map(re.escape, STR_OPERATORS.keys())))
1030             m = str_operator_rex.search(filter_spec)
1031             if m:
1032                 comparison_value = m.group('value')
1033                 op = STR_OPERATORS[m.group('op')]
1034
1035         if not m:
1036             raise ValueError('Invalid filter specification %r' % filter_spec)
1037
1038         def _filter(f):
1039             actual_value = f.get(m.group('key'))
1040             if actual_value is None:
1041                 return m.group('none_inclusive')
1042             return op(actual_value, comparison_value)
1043         return _filter
1044
1045     def build_format_selector(self, format_spec):
1046         def syntax_error(note, start):
1047             message = (
1048                 'Invalid format specification: '
1049                 '{0}\n\t{1}\n\t{2}^'.format(note, format_spec, ' ' * start[1]))
1050             return SyntaxError(message)
1051
1052         PICKFIRST = 'PICKFIRST'
1053         MERGE = 'MERGE'
1054         SINGLE = 'SINGLE'
1055         GROUP = 'GROUP'
1056         FormatSelector = collections.namedtuple('FormatSelector', ['type', 'selector', 'filters'])
1057
1058         def _parse_filter(tokens):
1059             filter_parts = []
1060             for type, string, start, _, _ in tokens:
1061                 if type == tokenize.OP and string == ']':
1062                     return ''.join(filter_parts)
1063                 else:
1064                     filter_parts.append(string)
1065
1066         def _remove_unused_ops(tokens):
1067             # Remove operators that we don't use and join them with the surrounding strings
1068             # for example: 'mp4' '-' 'baseline' '-' '16x9' is converted to 'mp4-baseline-16x9'
1069             ALLOWED_OPS = ('/', '+', ',', '(', ')')
1070             last_string, last_start, last_end, last_line = None, None, None, None
1071             for type, string, start, end, line in tokens:
1072                 if type == tokenize.OP and string == '[':
1073                     if last_string:
1074                         yield tokenize.NAME, last_string, last_start, last_end, last_line
1075                         last_string = None
1076                     yield type, string, start, end, line
1077                     # everything inside brackets will be handled by _parse_filter
1078                     for type, string, start, end, line in tokens:
1079                         yield type, string, start, end, line
1080                         if type == tokenize.OP and string == ']':
1081                             break
1082                 elif type == tokenize.OP and string in ALLOWED_OPS:
1083                     if last_string:
1084                         yield tokenize.NAME, last_string, last_start, last_end, last_line
1085                         last_string = None
1086                     yield type, string, start, end, line
1087                 elif type in [tokenize.NAME, tokenize.NUMBER, tokenize.OP]:
1088                     if not last_string:
1089                         last_string = string
1090                         last_start = start
1091                         last_end = end
1092                     else:
1093                         last_string += string
1094             if last_string:
1095                 yield tokenize.NAME, last_string, last_start, last_end, last_line
1096
1097         def _parse_format_selection(tokens, inside_merge=False, inside_choice=False, inside_group=False):
1098             selectors = []
1099             current_selector = None
1100             for type, string, start, _, _ in tokens:
1101                 # ENCODING is only defined in python 3.x
1102                 if type == getattr(tokenize, 'ENCODING', None):
1103                     continue
1104                 elif type in [tokenize.NAME, tokenize.NUMBER]:
1105                     current_selector = FormatSelector(SINGLE, string, [])
1106                 elif type == tokenize.OP:
1107                     if string == ')':
1108                         if not inside_group:
1109                             # ')' will be handled by the parentheses group
1110                             tokens.restore_last_token()
1111                         break
1112                     elif inside_merge and string in ['/', ',']:
1113                         tokens.restore_last_token()
1114                         break
1115                     elif inside_choice and string == ',':
1116                         tokens.restore_last_token()
1117                         break
1118                     elif string == ',':
1119                         if not current_selector:
1120                             raise syntax_error('"," must follow a format selector', start)
1121                         selectors.append(current_selector)
1122                         current_selector = None
1123                     elif string == '/':
1124                         if not current_selector:
1125                             raise syntax_error('"/" must follow a format selector', start)
1126                         first_choice = current_selector
1127                         second_choice = _parse_format_selection(tokens, inside_choice=True)
1128                         current_selector = FormatSelector(PICKFIRST, (first_choice, second_choice), [])
1129                     elif string == '[':
1130                         if not current_selector:
1131                             current_selector = FormatSelector(SINGLE, 'best', [])
1132                         format_filter = _parse_filter(tokens)
1133                         current_selector.filters.append(format_filter)
1134                     elif string == '(':
1135                         if current_selector:
1136                             raise syntax_error('Unexpected "("', start)
1137                         group = _parse_format_selection(tokens, inside_group=True)
1138                         current_selector = FormatSelector(GROUP, group, [])
1139                     elif string == '+':
1140                         video_selector = current_selector
1141                         audio_selector = _parse_format_selection(tokens, inside_merge=True)
1142                         if not video_selector or not audio_selector:
1143                             raise syntax_error('"+" must be between two format selectors', start)
1144                         current_selector = FormatSelector(MERGE, (video_selector, audio_selector), [])
1145                     else:
1146                         raise syntax_error('Operator not recognized: "{0}"'.format(string), start)
1147                 elif type == tokenize.ENDMARKER:
1148                     break
1149             if current_selector:
1150                 selectors.append(current_selector)
1151             return selectors
1152
1153         def _build_selector_function(selector):
1154             if isinstance(selector, list):
1155                 fs = [_build_selector_function(s) for s in selector]
1156
1157                 def selector_function(ctx):
1158                     for f in fs:
1159                         for format in f(ctx):
1160                             yield format
1161                 return selector_function
1162             elif selector.type == GROUP:
1163                 selector_function = _build_selector_function(selector.selector)
1164             elif selector.type == PICKFIRST:
1165                 fs = [_build_selector_function(s) for s in selector.selector]
1166
1167                 def selector_function(ctx):
1168                     for f in fs:
1169                         picked_formats = list(f(ctx))
1170                         if picked_formats:
1171                             return picked_formats
1172                     return []
1173             elif selector.type == SINGLE:
1174                 format_spec = selector.selector
1175
1176                 def selector_function(ctx):
1177                     formats = list(ctx['formats'])
1178                     if not formats:
1179                         return
1180                     if format_spec == 'all':
1181                         for f in formats:
1182                             yield f
1183                     elif format_spec in ['best', 'worst', None]:
1184                         format_idx = 0 if format_spec == 'worst' else -1
1185                         audiovideo_formats = [
1186                             f for f in formats
1187                             if f.get('vcodec') != 'none' and f.get('acodec') != 'none']
1188                         if audiovideo_formats:
1189                             yield audiovideo_formats[format_idx]
1190                         # for extractors with incomplete formats (audio only (soundcloud)
1191                         # or video only (imgur)) we will fallback to best/worst
1192                         # {video,audio}-only format
1193                         elif ctx['incomplete_formats']:
1194                             yield formats[format_idx]
1195                     elif format_spec == 'bestaudio':
1196                         audio_formats = [
1197                             f for f in formats
1198                             if f.get('vcodec') == 'none']
1199                         if audio_formats:
1200                             yield audio_formats[-1]
1201                     elif format_spec == 'worstaudio':
1202                         audio_formats = [
1203                             f for f in formats
1204                             if f.get('vcodec') == 'none']
1205                         if audio_formats:
1206                             yield audio_formats[0]
1207                     elif format_spec == 'bestvideo':
1208                         video_formats = [
1209                             f for f in formats
1210                             if f.get('acodec') == 'none']
1211                         if video_formats:
1212                             yield video_formats[-1]
1213                     elif format_spec == 'worstvideo':
1214                         video_formats = [
1215                             f for f in formats
1216                             if f.get('acodec') == 'none']
1217                         if video_formats:
1218                             yield video_formats[0]
1219                     else:
1220                         extensions = ['mp4', 'flv', 'webm', '3gp', 'm4a', 'mp3', 'ogg', 'aac', 'wav']
1221                         if format_spec in extensions:
1222                             filter_f = lambda f: f['ext'] == format_spec
1223                         else:
1224                             filter_f = lambda f: f['format_id'] == format_spec
1225                         matches = list(filter(filter_f, formats))
1226                         if matches:
1227                             yield matches[-1]
1228             elif selector.type == MERGE:
1229                 def _merge(formats_info):
1230                     format_1, format_2 = [f['format_id'] for f in formats_info]
1231                     # The first format must contain the video and the
1232                     # second the audio
1233                     if formats_info[0].get('vcodec') == 'none':
1234                         self.report_error('The first format must '
1235                                           'contain the video, try using '
1236                                           '"-f %s+%s"' % (format_2, format_1))
1237                         return
1238                     # Formats must be opposite (video+audio)
1239                     if formats_info[0].get('acodec') == 'none' and formats_info[1].get('acodec') == 'none':
1240                         self.report_error(
1241                             'Both formats %s and %s are video-only, you must specify "-f video+audio"'
1242                             % (format_1, format_2))
1243                         return
1244                     output_ext = (
1245                         formats_info[0]['ext']
1246                         if self.params.get('merge_output_format') is None
1247                         else self.params['merge_output_format'])
1248                     return {
1249                         'requested_formats': formats_info,
1250                         'format': '%s+%s' % (formats_info[0].get('format'),
1251                                              formats_info[1].get('format')),
1252                         'format_id': '%s+%s' % (formats_info[0].get('format_id'),
1253                                                 formats_info[1].get('format_id')),
1254                         'width': formats_info[0].get('width'),
1255                         'height': formats_info[0].get('height'),
1256                         'resolution': formats_info[0].get('resolution'),
1257                         'fps': formats_info[0].get('fps'),
1258                         'vcodec': formats_info[0].get('vcodec'),
1259                         'vbr': formats_info[0].get('vbr'),
1260                         'stretched_ratio': formats_info[0].get('stretched_ratio'),
1261                         'acodec': formats_info[1].get('acodec'),
1262                         'abr': formats_info[1].get('abr'),
1263                         'ext': output_ext,
1264                     }
1265                 video_selector, audio_selector = map(_build_selector_function, selector.selector)
1266
1267                 def selector_function(ctx):
1268                     for pair in itertools.product(
1269                             video_selector(copy.deepcopy(ctx)), audio_selector(copy.deepcopy(ctx))):
1270                         yield _merge(pair)
1271
1272             filters = [self._build_format_filter(f) for f in selector.filters]
1273
1274             def final_selector(ctx):
1275                 ctx_copy = copy.deepcopy(ctx)
1276                 for _filter in filters:
1277                     ctx_copy['formats'] = list(filter(_filter, ctx_copy['formats']))
1278                 return selector_function(ctx_copy)
1279             return final_selector
1280
1281         stream = io.BytesIO(format_spec.encode('utf-8'))
1282         try:
1283             tokens = list(_remove_unused_ops(compat_tokenize_tokenize(stream.readline)))
1284         except tokenize.TokenError:
1285             raise syntax_error('Missing closing/opening brackets or parenthesis', (0, len(format_spec)))
1286
1287         class TokenIterator(object):
1288             def __init__(self, tokens):
1289                 self.tokens = tokens
1290                 self.counter = 0
1291
1292             def __iter__(self):
1293                 return self
1294
1295             def __next__(self):
1296                 if self.counter >= len(self.tokens):
1297                     raise StopIteration()
1298                 value = self.tokens[self.counter]
1299                 self.counter += 1
1300                 return value
1301
1302             next = __next__
1303
1304             def restore_last_token(self):
1305                 self.counter -= 1
1306
1307         parsed_selector = _parse_format_selection(iter(TokenIterator(tokens)))
1308         return _build_selector_function(parsed_selector)
1309
1310     def _calc_headers(self, info_dict):
1311         res = std_headers.copy()
1312
1313         add_headers = info_dict.get('http_headers')
1314         if add_headers:
1315             res.update(add_headers)
1316
1317         cookies = self._calc_cookies(info_dict)
1318         if cookies:
1319             res['Cookie'] = cookies
1320
1321         if 'X-Forwarded-For' not in res:
1322             x_forwarded_for_ip = info_dict.get('__x_forwarded_for_ip')
1323             if x_forwarded_for_ip:
1324                 res['X-Forwarded-For'] = x_forwarded_for_ip
1325
1326         return res
1327
1328     def _calc_cookies(self, info_dict):
1329         pr = sanitized_Request(info_dict['url'])
1330         self.cookiejar.add_cookie_header(pr)
1331         return pr.get_header('Cookie')
1332
1333     def process_video_result(self, info_dict, download=True):
1334         assert info_dict.get('_type', 'video') == 'video'
1335
1336         if 'id' not in info_dict:
1337             raise ExtractorError('Missing "id" field in extractor result')
1338         if 'title' not in info_dict:
1339             raise ExtractorError('Missing "title" field in extractor result')
1340
1341         if not isinstance(info_dict['id'], compat_str):
1342             self.report_warning('"id" field is not a string - forcing string conversion')
1343             info_dict['id'] = compat_str(info_dict['id'])
1344
1345         if 'playlist' not in info_dict:
1346             # It isn't part of a playlist
1347             info_dict['playlist'] = None
1348             info_dict['playlist_index'] = None
1349
1350         thumbnails = info_dict.get('thumbnails')
1351         if thumbnails is None:
1352             thumbnail = info_dict.get('thumbnail')
1353             if thumbnail:
1354                 info_dict['thumbnails'] = thumbnails = [{'url': thumbnail}]
1355         if thumbnails:
1356             thumbnails.sort(key=lambda t: (
1357                 t.get('preference') if t.get('preference') is not None else -1,
1358                 t.get('width') if t.get('width') is not None else -1,
1359                 t.get('height') if t.get('height') is not None else -1,
1360                 t.get('id') if t.get('id') is not None else '', t.get('url')))
1361             for i, t in enumerate(thumbnails):
1362                 t['url'] = sanitize_url(t['url'])
1363                 if t.get('width') and t.get('height'):
1364                     t['resolution'] = '%dx%d' % (t['width'], t['height'])
1365                 if t.get('id') is None:
1366                     t['id'] = '%d' % i
1367
1368         if self.params.get('list_thumbnails'):
1369             self.list_thumbnails(info_dict)
1370             return
1371
1372         thumbnail = info_dict.get('thumbnail')
1373         if thumbnail:
1374             info_dict['thumbnail'] = sanitize_url(thumbnail)
1375         elif thumbnails:
1376             info_dict['thumbnail'] = thumbnails[-1]['url']
1377
1378         if 'display_id' not in info_dict and 'id' in info_dict:
1379             info_dict['display_id'] = info_dict['id']
1380
1381         if info_dict.get('upload_date') is None and info_dict.get('timestamp') is not None:
1382             # Working around out-of-range timestamp values (e.g. negative ones on Windows,
1383             # see http://bugs.python.org/issue1646728)
1384             try:
1385                 upload_date = datetime.datetime.utcfromtimestamp(info_dict['timestamp'])
1386                 info_dict['upload_date'] = upload_date.strftime('%Y%m%d')
1387             except (ValueError, OverflowError, OSError):
1388                 pass
1389
1390         # Auto generate title fields corresponding to the *_number fields when missing
1391         # in order to always have clean titles. This is very common for TV series.
1392         for field in ('chapter', 'season', 'episode'):
1393             if info_dict.get('%s_number' % field) is not None and not info_dict.get(field):
1394                 info_dict[field] = '%s %d' % (field.capitalize(), info_dict['%s_number' % field])
1395
1396         subtitles = info_dict.get('subtitles')
1397         if subtitles:
1398             for _, subtitle in subtitles.items():
1399                 for subtitle_format in subtitle:
1400                     if subtitle_format.get('url'):
1401                         subtitle_format['url'] = sanitize_url(subtitle_format['url'])
1402                     if subtitle_format.get('ext') is None:
1403                         subtitle_format['ext'] = determine_ext(subtitle_format['url']).lower()
1404
1405         if self.params.get('listsubtitles', False):
1406             if 'automatic_captions' in info_dict:
1407                 self.list_subtitles(info_dict['id'], info_dict.get('automatic_captions'), 'automatic captions')
1408             self.list_subtitles(info_dict['id'], subtitles, 'subtitles')
1409             return
1410         info_dict['requested_subtitles'] = self.process_subtitles(
1411             info_dict['id'], subtitles,
1412             info_dict.get('automatic_captions'))
1413
1414         # We now pick which formats have to be downloaded
1415         if info_dict.get('formats') is None:
1416             # There's only one format available
1417             formats = [info_dict]
1418         else:
1419             formats = info_dict['formats']
1420
1421         if not formats:
1422             raise ExtractorError('No video formats found!')
1423
1424         formats_dict = {}
1425
1426         # We check that all the formats have the format and format_id fields
1427         for i, format in enumerate(formats):
1428             if 'url' not in format:
1429                 raise ExtractorError('Missing "url" key in result (index %d)' % i)
1430
1431             format['url'] = sanitize_url(format['url'])
1432
1433             if format.get('format_id') is None:
1434                 format['format_id'] = compat_str(i)
1435             else:
1436                 # Sanitize format_id from characters used in format selector expression
1437                 format['format_id'] = re.sub(r'[\s,/+\[\]()]', '_', format['format_id'])
1438             format_id = format['format_id']
1439             if format_id not in formats_dict:
1440                 formats_dict[format_id] = []
1441             formats_dict[format_id].append(format)
1442
1443         # Make sure all formats have unique format_id
1444         for format_id, ambiguous_formats in formats_dict.items():
1445             if len(ambiguous_formats) > 1:
1446                 for i, format in enumerate(ambiguous_formats):
1447                     format['format_id'] = '%s-%d' % (format_id, i)
1448
1449         for i, format in enumerate(formats):
1450             if format.get('format') is None:
1451                 format['format'] = '{id} - {res}{note}'.format(
1452                     id=format['format_id'],
1453                     res=self.format_resolution(format),
1454                     note=' ({0})'.format(format['format_note']) if format.get('format_note') is not None else '',
1455                 )
1456             # Automatically determine file extension if missing
1457             if format.get('ext') is None:
1458                 format['ext'] = determine_ext(format['url']).lower()
1459             # Automatically determine protocol if missing (useful for format
1460             # selection purposes)
1461             if format.get('protocol') is None:
1462                 format['protocol'] = determine_protocol(format)
1463             # Add HTTP headers, so that external programs can use them from the
1464             # json output
1465             full_format_info = info_dict.copy()
1466             full_format_info.update(format)
1467             format['http_headers'] = self._calc_headers(full_format_info)
1468         # Remove private housekeeping stuff
1469         if '__x_forwarded_for_ip' in info_dict:
1470             del info_dict['__x_forwarded_for_ip']
1471
1472         # TODO Central sorting goes here
1473
1474         if formats[0] is not info_dict:
1475             # only set the 'formats' fields if the original info_dict list them
1476             # otherwise we end up with a circular reference, the first (and unique)
1477             # element in the 'formats' field in info_dict is info_dict itself,
1478             # which can't be exported to json
1479             info_dict['formats'] = formats
1480         if self.params.get('listformats'):
1481             self.list_formats(info_dict)
1482             return
1483
1484         req_format = self.params.get('format')
1485         if req_format is None:
1486             req_format_list = []
1487             if (self.params.get('outtmpl', DEFAULT_OUTTMPL) != '-' and
1488                     not info_dict.get('is_live')):
1489                 merger = FFmpegMergerPP(self)
1490                 if merger.available and merger.can_merge():
1491                     req_format_list.append('bestvideo+bestaudio')
1492             req_format_list.append('best')
1493             req_format = '/'.join(req_format_list)
1494         format_selector = self.build_format_selector(req_format)
1495
1496         # While in format selection we may need to have an access to the original
1497         # format set in order to calculate some metrics or do some processing.
1498         # For now we need to be able to guess whether original formats provided
1499         # by extractor are incomplete or not (i.e. whether extractor provides only
1500         # video-only or audio-only formats) for proper formats selection for
1501         # extractors with such incomplete formats (see
1502         # https://github.com/rg3/youtube-dl/pull/5556).
1503         # Since formats may be filtered during format selection and may not match
1504         # the original formats the results may be incorrect. Thus original formats
1505         # or pre-calculated metrics should be passed to format selection routines
1506         # as well.
1507         # We will pass a context object containing all necessary additional data
1508         # instead of just formats.
1509         # This fixes incorrect format selection issue (see
1510         # https://github.com/rg3/youtube-dl/issues/10083).
1511         incomplete_formats = (
1512             # All formats are video-only or
1513             all(f.get('vcodec') != 'none' and f.get('acodec') == 'none' for f in formats) or
1514             # all formats are audio-only
1515             all(f.get('vcodec') == 'none' and f.get('acodec') != 'none' for f in formats))
1516
1517         ctx = {
1518             'formats': formats,
1519             'incomplete_formats': incomplete_formats,
1520         }
1521
1522         formats_to_download = list(format_selector(ctx))
1523         if not formats_to_download:
1524             raise ExtractorError('requested format not available',
1525                                  expected=True)
1526
1527         if download:
1528             if len(formats_to_download) > 1:
1529                 self.to_screen('[info] %s: downloading video in %s formats' % (info_dict['id'], len(formats_to_download)))
1530             for format in formats_to_download:
1531                 new_info = dict(info_dict)
1532                 new_info.update(format)
1533                 self.process_info(new_info)
1534         # We update the info dict with the best quality format (backwards compatibility)
1535         info_dict.update(formats_to_download[-1])
1536         return info_dict
1537
1538     def process_subtitles(self, video_id, normal_subtitles, automatic_captions):
1539         """Select the requested subtitles and their format"""
1540         available_subs = {}
1541         if normal_subtitles and self.params.get('writesubtitles'):
1542             available_subs.update(normal_subtitles)
1543         if automatic_captions and self.params.get('writeautomaticsub'):
1544             for lang, cap_info in automatic_captions.items():
1545                 if lang not in available_subs:
1546                     available_subs[lang] = cap_info
1547
1548         if (not self.params.get('writesubtitles') and not
1549                 self.params.get('writeautomaticsub') or not
1550                 available_subs):
1551             return None
1552
1553         if self.params.get('allsubtitles', False):
1554             requested_langs = available_subs.keys()
1555         else:
1556             if self.params.get('subtitleslangs', False):
1557                 requested_langs = self.params.get('subtitleslangs')
1558             elif 'en' in available_subs:
1559                 requested_langs = ['en']
1560             else:
1561                 requested_langs = [list(available_subs.keys())[0]]
1562
1563         formats_query = self.params.get('subtitlesformat', 'best')
1564         formats_preference = formats_query.split('/') if formats_query else []
1565         subs = {}
1566         for lang in requested_langs:
1567             formats = available_subs.get(lang)
1568             if formats is None:
1569                 self.report_warning('%s subtitles not available for %s' % (lang, video_id))
1570                 continue
1571             for ext in formats_preference:
1572                 if ext == 'best':
1573                     f = formats[-1]
1574                     break
1575                 matches = list(filter(lambda f: f['ext'] == ext, formats))
1576                 if matches:
1577                     f = matches[-1]
1578                     break
1579             else:
1580                 f = formats[-1]
1581                 self.report_warning(
1582                     'No subtitle format found matching "%s" for language %s, '
1583                     'using %s' % (formats_query, lang, f['ext']))
1584             subs[lang] = f
1585         return subs
1586
1587     def process_info(self, info_dict):
1588         """Process a single resolved IE result."""
1589
1590         assert info_dict.get('_type', 'video') == 'video'
1591
1592         max_downloads = self.params.get('max_downloads')
1593         if max_downloads is not None:
1594             if self._num_downloads >= int(max_downloads):
1595                 raise MaxDownloadsReached()
1596
1597         info_dict['fulltitle'] = info_dict['title']
1598         if len(info_dict['title']) > 200:
1599             info_dict['title'] = info_dict['title'][:197] + '...'
1600
1601         if 'format' not in info_dict:
1602             info_dict['format'] = info_dict['ext']
1603
1604         reason = self._match_entry(info_dict, incomplete=False)
1605         if reason is not None:
1606             self.to_screen('[download] ' + reason)
1607             return
1608
1609         self._num_downloads += 1
1610
1611         info_dict['_filename'] = filename = self.prepare_filename(info_dict)
1612
1613         # Forced printings
1614         if self.params.get('forcetitle', False):
1615             self.to_stdout(info_dict['fulltitle'])
1616         if self.params.get('forceid', False):
1617             self.to_stdout(info_dict['id'])
1618         if self.params.get('forceurl', False):
1619             if info_dict.get('requested_formats') is not None:
1620                 for f in info_dict['requested_formats']:
1621                     self.to_stdout(f['url'] + f.get('play_path', ''))
1622             else:
1623                 # For RTMP URLs, also include the playpath
1624                 self.to_stdout(info_dict['url'] + info_dict.get('play_path', ''))
1625         if self.params.get('forcethumbnail', False) and info_dict.get('thumbnail') is not None:
1626             self.to_stdout(info_dict['thumbnail'])
1627         if self.params.get('forcedescription', False) and info_dict.get('description') is not None:
1628             self.to_stdout(info_dict['description'])
1629         if self.params.get('forcefilename', False) and filename is not None:
1630             self.to_stdout(filename)
1631         if self.params.get('forceduration', False) and info_dict.get('duration') is not None:
1632             self.to_stdout(formatSeconds(info_dict['duration']))
1633         if self.params.get('forceformat', False):
1634             self.to_stdout(info_dict['format'])
1635         if self.params.get('forcejson', False):
1636             self.to_stdout(json.dumps(info_dict))
1637
1638         # Do nothing else if in simulate mode
1639         if self.params.get('simulate', False):
1640             return
1641
1642         if filename is None:
1643             return
1644
1645         try:
1646             dn = os.path.dirname(sanitize_path(encodeFilename(filename)))
1647             if dn and not os.path.exists(dn):
1648                 os.makedirs(dn)
1649         except (OSError, IOError) as err:
1650             self.report_error('unable to create directory ' + error_to_compat_str(err))
1651             return
1652
1653         if self.params.get('writedescription', False):
1654             descfn = replace_extension(filename, 'description', info_dict.get('ext'))
1655             if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(descfn)):
1656                 self.to_screen('[info] Video description is already present')
1657             elif info_dict.get('description') is None:
1658                 self.report_warning('There\'s no description to write.')
1659             else:
1660                 try:
1661                     self.to_screen('[info] Writing video description to: ' + descfn)
1662                     with io.open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile:
1663                         descfile.write(info_dict['description'])
1664                 except (OSError, IOError):
1665                     self.report_error('Cannot write description file ' + descfn)
1666                     return
1667
1668         if self.params.get('writeannotations', False):
1669             annofn = replace_extension(filename, 'annotations.xml', info_dict.get('ext'))
1670             if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(annofn)):
1671                 self.to_screen('[info] Video annotations are already present')
1672             else:
1673                 try:
1674                     self.to_screen('[info] Writing video annotations to: ' + annofn)
1675                     with io.open(encodeFilename(annofn), 'w', encoding='utf-8') as annofile:
1676                         annofile.write(info_dict['annotations'])
1677                 except (KeyError, TypeError):
1678                     self.report_warning('There are no annotations to write.')
1679                 except (OSError, IOError):
1680                     self.report_error('Cannot write annotations file: ' + annofn)
1681                     return
1682
1683         subtitles_are_requested = any([self.params.get('writesubtitles', False),
1684                                        self.params.get('writeautomaticsub')])
1685
1686         if subtitles_are_requested and info_dict.get('requested_subtitles'):
1687             # subtitles download errors are already managed as troubles in relevant IE
1688             # that way it will silently go on when used with unsupporting IE
1689             subtitles = info_dict['requested_subtitles']
1690             ie = self.get_info_extractor(info_dict['extractor_key'])
1691             for sub_lang, sub_info in subtitles.items():
1692                 sub_format = sub_info['ext']
1693                 if sub_info.get('data') is not None:
1694                     sub_data = sub_info['data']
1695                 else:
1696                     try:
1697                         sub_data = ie._download_webpage(
1698                             sub_info['url'], info_dict['id'], note=False)
1699                     except ExtractorError as err:
1700                         self.report_warning('Unable to download subtitle for "%s": %s' %
1701                                             (sub_lang, error_to_compat_str(err.cause)))
1702                         continue
1703                 try:
1704                     sub_filename = subtitles_filename(filename, sub_lang, sub_format)
1705                     if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(sub_filename)):
1706                         self.to_screen('[info] Video subtitle %s.%s is already_present' % (sub_lang, sub_format))
1707                     else:
1708                         self.to_screen('[info] Writing video subtitles to: ' + sub_filename)
1709                         # Use newline='' to prevent conversion of newline characters
1710                         # See https://github.com/rg3/youtube-dl/issues/10268
1711                         with io.open(encodeFilename(sub_filename), 'w', encoding='utf-8', newline='') as subfile:
1712                             subfile.write(sub_data)
1713                 except (OSError, IOError):
1714                     self.report_error('Cannot write subtitles file ' + sub_filename)
1715                     return
1716
1717         if self.params.get('writeinfojson', False):
1718             infofn = replace_extension(filename, 'info.json', info_dict.get('ext'))
1719             if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(infofn)):
1720                 self.to_screen('[info] Video description metadata is already present')
1721             else:
1722                 self.to_screen('[info] Writing video description metadata as JSON to: ' + infofn)
1723                 try:
1724                     write_json_file(self.filter_requested_info(info_dict), infofn)
1725                 except (OSError, IOError):
1726                     self.report_error('Cannot write metadata to JSON file ' + infofn)
1727                     return
1728
1729         self._write_thumbnails(info_dict, filename)
1730
1731         if not self.params.get('skip_download', False):
1732             try:
1733                 def dl(name, info):
1734                     fd = get_suitable_downloader(info, self.params)(self, self.params)
1735                     for ph in self._progress_hooks:
1736                         fd.add_progress_hook(ph)
1737                     if self.params.get('verbose'):
1738                         self.to_stdout('[debug] Invoking downloader on %r' % info.get('url'))
1739                     return fd.download(name, info)
1740
1741                 if info_dict.get('requested_formats') is not None:
1742                     downloaded = []
1743                     success = True
1744                     merger = FFmpegMergerPP(self)
1745                     if not merger.available:
1746                         postprocessors = []
1747                         self.report_warning('You have requested multiple '
1748                                             'formats but ffmpeg or avconv are not installed.'
1749                                             ' The formats won\'t be merged.')
1750                     else:
1751                         postprocessors = [merger]
1752
1753                     def compatible_formats(formats):
1754                         video, audio = formats
1755                         # Check extension
1756                         video_ext, audio_ext = audio.get('ext'), video.get('ext')
1757                         if video_ext and audio_ext:
1758                             COMPATIBLE_EXTS = (
1759                                 ('mp3', 'mp4', 'm4a', 'm4p', 'm4b', 'm4r', 'm4v', 'ismv', 'isma'),
1760                                 ('webm')
1761                             )
1762                             for exts in COMPATIBLE_EXTS:
1763                                 if video_ext in exts and audio_ext in exts:
1764                                     return True
1765                         # TODO: Check acodec/vcodec
1766                         return False
1767
1768                     filename_real_ext = os.path.splitext(filename)[1][1:]
1769                     filename_wo_ext = (
1770                         os.path.splitext(filename)[0]
1771                         if filename_real_ext == info_dict['ext']
1772                         else filename)
1773                     requested_formats = info_dict['requested_formats']
1774                     if self.params.get('merge_output_format') is None and not compatible_formats(requested_formats):
1775                         info_dict['ext'] = 'mkv'
1776                         self.report_warning(
1777                             'Requested formats are incompatible for merge and will be merged into mkv.')
1778                     # Ensure filename always has a correct extension for successful merge
1779                     filename = '%s.%s' % (filename_wo_ext, info_dict['ext'])
1780                     if os.path.exists(encodeFilename(filename)):
1781                         self.to_screen(
1782                             '[download] %s has already been downloaded and '
1783                             'merged' % filename)
1784                     else:
1785                         for f in requested_formats:
1786                             new_info = dict(info_dict)
1787                             new_info.update(f)
1788                             fname = self.prepare_filename(new_info)
1789                             fname = prepend_extension(fname, 'f%s' % f['format_id'], new_info['ext'])
1790                             downloaded.append(fname)
1791                             partial_success = dl(fname, new_info)
1792                             success = success and partial_success
1793                         info_dict['__postprocessors'] = postprocessors
1794                         info_dict['__files_to_merge'] = downloaded
1795                 else:
1796                     # Just a single file
1797                     success = dl(filename, info_dict)
1798             except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
1799                 self.report_error('unable to download video data: %s' % error_to_compat_str(err))
1800                 return
1801             except (OSError, IOError) as err:
1802                 raise UnavailableVideoError(err)
1803             except (ContentTooShortError, ) as err:
1804                 self.report_error('content too short (expected %s bytes and served %s)' % (err.expected, err.downloaded))
1805                 return
1806
1807             if success and filename != '-':
1808                 # Fixup content
1809                 fixup_policy = self.params.get('fixup')
1810                 if fixup_policy is None:
1811                     fixup_policy = 'detect_or_warn'
1812
1813                 INSTALL_FFMPEG_MESSAGE = 'Install ffmpeg or avconv to fix this automatically.'
1814
1815                 stretched_ratio = info_dict.get('stretched_ratio')
1816                 if stretched_ratio is not None and stretched_ratio != 1:
1817                     if fixup_policy == 'warn':
1818                         self.report_warning('%s: Non-uniform pixel ratio (%s)' % (
1819                             info_dict['id'], stretched_ratio))
1820                     elif fixup_policy == 'detect_or_warn':
1821                         stretched_pp = FFmpegFixupStretchedPP(self)
1822                         if stretched_pp.available:
1823                             info_dict.setdefault('__postprocessors', [])
1824                             info_dict['__postprocessors'].append(stretched_pp)
1825                         else:
1826                             self.report_warning(
1827                                 '%s: Non-uniform pixel ratio (%s). %s'
1828                                 % (info_dict['id'], stretched_ratio, INSTALL_FFMPEG_MESSAGE))
1829                     else:
1830                         assert fixup_policy in ('ignore', 'never')
1831
1832                 if (info_dict.get('requested_formats') is None and
1833                         info_dict.get('container') == 'm4a_dash'):
1834                     if fixup_policy == 'warn':
1835                         self.report_warning(
1836                             '%s: writing DASH m4a. '
1837                             'Only some players support this container.'
1838                             % info_dict['id'])
1839                     elif fixup_policy == 'detect_or_warn':
1840                         fixup_pp = FFmpegFixupM4aPP(self)
1841                         if fixup_pp.available:
1842                             info_dict.setdefault('__postprocessors', [])
1843                             info_dict['__postprocessors'].append(fixup_pp)
1844                         else:
1845                             self.report_warning(
1846                                 '%s: writing DASH m4a. '
1847                                 'Only some players support this container. %s'
1848                                 % (info_dict['id'], INSTALL_FFMPEG_MESSAGE))
1849                     else:
1850                         assert fixup_policy in ('ignore', 'never')
1851
1852                 if (info_dict.get('protocol') == 'm3u8_native' or
1853                         info_dict.get('protocol') == 'm3u8' and
1854                         self.params.get('hls_prefer_native')):
1855                     if fixup_policy == 'warn':
1856                         self.report_warning('%s: malformated aac bitstream.' % (
1857                             info_dict['id']))
1858                     elif fixup_policy == 'detect_or_warn':
1859                         fixup_pp = FFmpegFixupM3u8PP(self)
1860                         if fixup_pp.available:
1861                             info_dict.setdefault('__postprocessors', [])
1862                             info_dict['__postprocessors'].append(fixup_pp)
1863                         else:
1864                             self.report_warning(
1865                                 '%s: malformated aac bitstream. %s'
1866                                 % (info_dict['id'], INSTALL_FFMPEG_MESSAGE))
1867                     else:
1868                         assert fixup_policy in ('ignore', 'never')
1869
1870                 try:
1871                     self.post_process(filename, info_dict)
1872                 except (PostProcessingError) as err:
1873                     self.report_error('postprocessing: %s' % str(err))
1874                     return
1875                 self.record_download_archive(info_dict)
1876
1877     def download(self, url_list):
1878         """Download a given list of URLs."""
1879         outtmpl = self.params.get('outtmpl', DEFAULT_OUTTMPL)
1880         if (len(url_list) > 1 and
1881                 outtmpl != '-' and
1882                 '%' not in outtmpl and
1883                 self.params.get('max_downloads') != 1):
1884             raise SameFileError(outtmpl)
1885
1886         for url in url_list:
1887             try:
1888                 # It also downloads the videos
1889                 res = self.extract_info(
1890                     url, force_generic_extractor=self.params.get('force_generic_extractor', False))
1891             except UnavailableVideoError:
1892                 self.report_error('unable to download video')
1893             except MaxDownloadsReached:
1894                 self.to_screen('[info] Maximum number of downloaded files reached.')
1895                 raise
1896             else:
1897                 if self.params.get('dump_single_json', False):
1898                     self.to_stdout(json.dumps(res))
1899
1900         return self._download_retcode
1901
1902     def download_with_info_file(self, info_filename):
1903         with contextlib.closing(fileinput.FileInput(
1904                 [info_filename], mode='r',
1905                 openhook=fileinput.hook_encoded('utf-8'))) as f:
1906             # FileInput doesn't have a read method, we can't call json.load
1907             info = self.filter_requested_info(json.loads('\n'.join(f)))
1908         try:
1909             self.process_ie_result(info, download=True)
1910         except DownloadError:
1911             webpage_url = info.get('webpage_url')
1912             if webpage_url is not None:
1913                 self.report_warning('The info failed to download, trying with "%s"' % webpage_url)
1914                 return self.download([webpage_url])
1915             else:
1916                 raise
1917         return self._download_retcode
1918
1919     @staticmethod
1920     def filter_requested_info(info_dict):
1921         return dict(
1922             (k, v) for k, v in info_dict.items()
1923             if k not in ['requested_formats', 'requested_subtitles'])
1924
1925     def post_process(self, filename, ie_info):
1926         """Run all the postprocessors on the given file."""
1927         info = dict(ie_info)
1928         info['filepath'] = filename
1929         pps_chain = []
1930         if ie_info.get('__postprocessors') is not None:
1931             pps_chain.extend(ie_info['__postprocessors'])
1932         pps_chain.extend(self._pps)
1933         for pp in pps_chain:
1934             files_to_delete = []
1935             try:
1936                 files_to_delete, info = pp.run(info)
1937             except PostProcessingError as e:
1938                 self.report_error(e.msg)
1939             if files_to_delete and not self.params.get('keepvideo', False):
1940                 for old_filename in files_to_delete:
1941                     self.to_screen('Deleting original file %s (pass -k to keep)' % old_filename)
1942                     try:
1943                         os.remove(encodeFilename(old_filename))
1944                     except (IOError, OSError):
1945                         self.report_warning('Unable to remove downloaded original file')
1946
1947     def _make_archive_id(self, info_dict):
1948         # Future-proof against any change in case
1949         # and backwards compatibility with prior versions
1950         extractor = info_dict.get('extractor_key')
1951         if extractor is None:
1952             if 'id' in info_dict:
1953                 extractor = info_dict.get('ie_key')  # key in a playlist
1954         if extractor is None:
1955             return None  # Incomplete video information
1956         return extractor.lower() + ' ' + info_dict['id']
1957
1958     def in_download_archive(self, info_dict):
1959         fn = self.params.get('download_archive')
1960         if fn is None:
1961             return False
1962
1963         vid_id = self._make_archive_id(info_dict)
1964         if vid_id is None:
1965             return False  # Incomplete video information
1966
1967         try:
1968             with locked_file(fn, 'r', encoding='utf-8') as archive_file:
1969                 for line in archive_file:
1970                     if line.strip() == vid_id:
1971                         return True
1972         except IOError as ioe:
1973             if ioe.errno != errno.ENOENT:
1974                 raise
1975         return False
1976
1977     def record_download_archive(self, info_dict):
1978         fn = self.params.get('download_archive')
1979         if fn is None:
1980             return
1981         vid_id = self._make_archive_id(info_dict)
1982         assert vid_id
1983         with locked_file(fn, 'a', encoding='utf-8') as archive_file:
1984             archive_file.write(vid_id + '\n')
1985
1986     @staticmethod
1987     def format_resolution(format, default='unknown'):
1988         if format.get('vcodec') == 'none':
1989             return 'audio only'
1990         if format.get('resolution') is not None:
1991             return format['resolution']
1992         if format.get('height') is not None:
1993             if format.get('width') is not None:
1994                 res = '%sx%s' % (format['width'], format['height'])
1995             else:
1996                 res = '%sp' % format['height']
1997         elif format.get('width') is not None:
1998             res = '%dx?' % format['width']
1999         else:
2000             res = default
2001         return res
2002
2003     def _format_note(self, fdict):
2004         res = ''
2005         if fdict.get('ext') in ['f4f', 'f4m']:
2006             res += '(unsupported) '
2007         if fdict.get('language'):
2008             if res:
2009                 res += ' '
2010             res += '[%s] ' % fdict['language']
2011         if fdict.get('format_note') is not None:
2012             res += fdict['format_note'] + ' '
2013         if fdict.get('tbr') is not None:
2014             res += '%4dk ' % fdict['tbr']
2015         if fdict.get('container') is not None:
2016             if res:
2017                 res += ', '
2018             res += '%s container' % fdict['container']
2019         if (fdict.get('vcodec') is not None and
2020                 fdict.get('vcodec') != 'none'):
2021             if res:
2022                 res += ', '
2023             res += fdict['vcodec']
2024             if fdict.get('vbr') is not None:
2025                 res += '@'
2026         elif fdict.get('vbr') is not None and fdict.get('abr') is not None:
2027             res += 'video@'
2028         if fdict.get('vbr') is not None:
2029             res += '%4dk' % fdict['vbr']
2030         if fdict.get('fps') is not None:
2031             if res:
2032                 res += ', '
2033             res += '%sfps' % fdict['fps']
2034         if fdict.get('acodec') is not None:
2035             if res:
2036                 res += ', '
2037             if fdict['acodec'] == 'none':
2038                 res += 'video only'
2039             else:
2040                 res += '%-5s' % fdict['acodec']
2041         elif fdict.get('abr') is not None:
2042             if res:
2043                 res += ', '
2044             res += 'audio'
2045         if fdict.get('abr') is not None:
2046             res += '@%3dk' % fdict['abr']
2047         if fdict.get('asr') is not None:
2048             res += ' (%5dHz)' % fdict['asr']
2049         if fdict.get('filesize') is not None:
2050             if res:
2051                 res += ', '
2052             res += format_bytes(fdict['filesize'])
2053         elif fdict.get('filesize_approx') is not None:
2054             if res:
2055                 res += ', '
2056             res += '~' + format_bytes(fdict['filesize_approx'])
2057         return res
2058
2059     def list_formats(self, info_dict):
2060         formats = info_dict.get('formats', [info_dict])
2061         table = [
2062             [f['format_id'], f['ext'], self.format_resolution(f), self._format_note(f)]
2063             for f in formats
2064             if f.get('preference') is None or f['preference'] >= -1000]
2065         if len(formats) > 1:
2066             table[-1][-1] += (' ' if table[-1][-1] else '') + '(best)'
2067
2068         header_line = ['format code', 'extension', 'resolution', 'note']
2069         self.to_screen(
2070             '[info] Available formats for %s:\n%s' %
2071             (info_dict['id'], render_table(header_line, table)))
2072
2073     def list_thumbnails(self, info_dict):
2074         thumbnails = info_dict.get('thumbnails')
2075         if not thumbnails:
2076             self.to_screen('[info] No thumbnails present for %s' % info_dict['id'])
2077             return
2078
2079         self.to_screen(
2080             '[info] Thumbnails for %s:' % info_dict['id'])
2081         self.to_screen(render_table(
2082             ['ID', 'width', 'height', 'URL'],
2083             [[t['id'], t.get('width', 'unknown'), t.get('height', 'unknown'), t['url']] for t in thumbnails]))
2084
2085     def list_subtitles(self, video_id, subtitles, name='subtitles'):
2086         if not subtitles:
2087             self.to_screen('%s has no %s' % (video_id, name))
2088             return
2089         self.to_screen(
2090             'Available %s for %s:' % (name, video_id))
2091         self.to_screen(render_table(
2092             ['Language', 'formats'],
2093             [[lang, ', '.join(f['ext'] for f in reversed(formats))]
2094                 for lang, formats in subtitles.items()]))
2095
2096     def urlopen(self, req):
2097         """ Start an HTTP download """
2098         if isinstance(req, compat_basestring):
2099             req = sanitized_Request(req)
2100         return self._opener.open(req, timeout=self._socket_timeout)
2101
2102     def print_debug_header(self):
2103         if not self.params.get('verbose'):
2104             return
2105
2106         if type('') is not compat_str:
2107             # Python 2.6 on SLES11 SP1 (https://github.com/rg3/youtube-dl/issues/3326)
2108             self.report_warning(
2109                 'Your Python is broken! Update to a newer and supported version')
2110
2111         stdout_encoding = getattr(
2112             sys.stdout, 'encoding', 'missing (%s)' % type(sys.stdout).__name__)
2113         encoding_str = (
2114             '[debug] Encodings: locale %s, fs %s, out %s, pref %s\n' % (
2115                 locale.getpreferredencoding(),
2116                 sys.getfilesystemencoding(),
2117                 stdout_encoding,
2118                 self.get_encoding()))
2119         write_string(encoding_str, encoding=None)
2120
2121         self._write_string('[debug] youtube-dl version ' + __version__ + '\n')
2122         if _LAZY_LOADER:
2123             self._write_string('[debug] Lazy loading extractors enabled' + '\n')
2124         try:
2125             sp = subprocess.Popen(
2126                 ['git', 'rev-parse', '--short', 'HEAD'],
2127                 stdout=subprocess.PIPE, stderr=subprocess.PIPE,
2128                 cwd=os.path.dirname(os.path.abspath(__file__)))
2129             out, err = sp.communicate()
2130             out = out.decode().strip()
2131             if re.match('[0-9a-f]+', out):
2132                 self._write_string('[debug] Git HEAD: ' + out + '\n')
2133         except Exception:
2134             try:
2135                 sys.exc_clear()
2136             except Exception:
2137                 pass
2138         self._write_string('[debug] Python version %s - %s\n' % (
2139             platform.python_version(), platform_name()))
2140
2141         exe_versions = FFmpegPostProcessor.get_versions(self)
2142         exe_versions['rtmpdump'] = rtmpdump_version()
2143         exe_str = ', '.join(
2144             '%s %s' % (exe, v)
2145             for exe, v in sorted(exe_versions.items())
2146             if v
2147         )
2148         if not exe_str:
2149             exe_str = 'none'
2150         self._write_string('[debug] exe versions: %s\n' % exe_str)
2151
2152         proxy_map = {}
2153         for handler in self._opener.handlers:
2154             if hasattr(handler, 'proxies'):
2155                 proxy_map.update(handler.proxies)
2156         self._write_string('[debug] Proxy map: ' + compat_str(proxy_map) + '\n')
2157
2158         if self.params.get('call_home', False):
2159             ipaddr = self.urlopen('https://yt-dl.org/ip').read().decode('utf-8')
2160             self._write_string('[debug] Public IP address: %s\n' % ipaddr)
2161             latest_version = self.urlopen(
2162                 'https://yt-dl.org/latest/version').read().decode('utf-8')
2163             if version_tuple(latest_version) > version_tuple(__version__):
2164                 self.report_warning(
2165                     'You are using an outdated version (newest version: %s)! '
2166                     'See https://yt-dl.org/update if you need help updating.' %
2167                     latest_version)
2168
2169     def _setup_opener(self):
2170         timeout_val = self.params.get('socket_timeout')
2171         self._socket_timeout = 600 if timeout_val is None else float(timeout_val)
2172
2173         opts_cookiefile = self.params.get('cookiefile')
2174         opts_proxy = self.params.get('proxy')
2175
2176         if opts_cookiefile is None:
2177             self.cookiejar = compat_cookiejar.CookieJar()
2178         else:
2179             opts_cookiefile = expand_path(opts_cookiefile)
2180             self.cookiejar = compat_cookiejar.MozillaCookieJar(
2181                 opts_cookiefile)
2182             if os.access(opts_cookiefile, os.R_OK):
2183                 self.cookiejar.load()
2184
2185         cookie_processor = YoutubeDLCookieProcessor(self.cookiejar)
2186         if opts_proxy is not None:
2187             if opts_proxy == '':
2188                 proxies = {}
2189             else:
2190                 proxies = {'http': opts_proxy, 'https': opts_proxy}
2191         else:
2192             proxies = compat_urllib_request.getproxies()
2193             # Set HTTPS proxy to HTTP one if given (https://github.com/rg3/youtube-dl/issues/805)
2194             if 'http' in proxies and 'https' not in proxies:
2195                 proxies['https'] = proxies['http']
2196         proxy_handler = PerRequestProxyHandler(proxies)
2197
2198         debuglevel = 1 if self.params.get('debug_printtraffic') else 0
2199         https_handler = make_HTTPS_handler(self.params, debuglevel=debuglevel)
2200         ydlh = YoutubeDLHandler(self.params, debuglevel=debuglevel)
2201         data_handler = compat_urllib_request_DataHandler()
2202
2203         # When passing our own FileHandler instance, build_opener won't add the
2204         # default FileHandler and allows us to disable the file protocol, which
2205         # can be used for malicious purposes (see
2206         # https://github.com/rg3/youtube-dl/issues/8227)
2207         file_handler = compat_urllib_request.FileHandler()
2208
2209         def file_open(*args, **kwargs):
2210             raise compat_urllib_error.URLError('file:// scheme is explicitly disabled in youtube-dl for security reasons')
2211         file_handler.file_open = file_open
2212
2213         opener = compat_urllib_request.build_opener(
2214             proxy_handler, https_handler, cookie_processor, ydlh, data_handler, file_handler)
2215
2216         # Delete the default user-agent header, which would otherwise apply in
2217         # cases where our custom HTTP handler doesn't come into play
2218         # (See https://github.com/rg3/youtube-dl/issues/1309 for details)
2219         opener.addheaders = []
2220         self._opener = opener
2221
2222     def encode(self, s):
2223         if isinstance(s, bytes):
2224             return s  # Already encoded
2225
2226         try:
2227             return s.encode(self.get_encoding())
2228         except UnicodeEncodeError as err:
2229             err.reason = err.reason + '. Check your system encoding configuration or use the --encoding option.'
2230             raise
2231
2232     def get_encoding(self):
2233         encoding = self.params.get('encoding')
2234         if encoding is None:
2235             encoding = preferredencoding()
2236         return encoding
2237
2238     def _write_thumbnails(self, info_dict, filename):
2239         if self.params.get('writethumbnail', False):
2240             thumbnails = info_dict.get('thumbnails')
2241             if thumbnails:
2242                 thumbnails = [thumbnails[-1]]
2243         elif self.params.get('write_all_thumbnails', False):
2244             thumbnails = info_dict.get('thumbnails')
2245         else:
2246             return
2247
2248         if not thumbnails:
2249             # No thumbnails present, so return immediately
2250             return
2251
2252         for t in thumbnails:
2253             thumb_ext = determine_ext(t['url'], 'jpg')
2254             suffix = '_%s' % t['id'] if len(thumbnails) > 1 else ''
2255             thumb_display_id = '%s ' % t['id'] if len(thumbnails) > 1 else ''
2256             t['filename'] = thumb_filename = os.path.splitext(filename)[0] + suffix + '.' + thumb_ext
2257
2258             if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(thumb_filename)):
2259                 self.to_screen('[%s] %s: Thumbnail %sis already present' %
2260                                (info_dict['extractor'], info_dict['id'], thumb_display_id))
2261             else:
2262                 self.to_screen('[%s] %s: Downloading thumbnail %s...' %
2263                                (info_dict['extractor'], info_dict['id'], thumb_display_id))
2264                 try:
2265                     uf = self.urlopen(t['url'])
2266                     with open(encodeFilename(thumb_filename), 'wb') as thumbf:
2267                         shutil.copyfileobj(uf, thumbf)
2268                     self.to_screen('[%s] %s: Writing thumbnail %sto: %s' %
2269                                    (info_dict['extractor'], info_dict['id'], thumb_display_id, thumb_filename))
2270                 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
2271                     self.report_warning('Unable to download thumbnail "%s": %s' %
2272                                         (t['url'], error_to_compat_str(err)))