_ Git - youtube-dl/blob - youtube_dl/YoutubeDL.py

   1 #!/usr/bin/env python
   2 # -*- coding: utf-8 -*-
   3
   4 from __future__ import absolute_import, unicode_literals
   5
   6 import collections
   7 import contextlib
   8 import datetime
   9 import errno
  10 import fileinput
  11 import io
  12 import itertools
  13 import json
  14 import locale
  15 import operator
  16 import os
  17 import platform
  18 import re
  19 import shutil
  20 import subprocess
  21 import socket
  22 import sys
  23 import time
  24 import tokenize
  25 import traceback
  26
  27 from .compat import (
  28     compat_basestring,
  29     compat_cookiejar,
  30     compat_expanduser,
  31     compat_get_terminal_size,
  32     compat_http_client,
  33     compat_kwargs,
  34     compat_os_name,
  35     compat_str,
  36     compat_tokenize_tokenize,
  37     compat_urllib_error,
  38     compat_urllib_request,
  39     compat_urllib_request_DataHandler,
  40 )
  41 from .utils import (
  42     ContentTooShortError,
  43     date_from_str,
  44     DateRange,
  45     DEFAULT_OUTTMPL,
  46     determine_ext,
  47     determine_protocol,
  48     DownloadError,
  49     encode_compat_str,
  50     encodeFilename,
  51     error_to_compat_str,
  52     ExtractorError,
  53     format_bytes,
  54     formatSeconds,
  55     locked_file,
  56     make_HTTPS_handler,
  57     MaxDownloadsReached,
  58     PagedList,
  59     parse_filesize,
  60     PerRequestProxyHandler,
  61     PostProcessingError,
  62     platform_name,
  63     preferredencoding,
  64     render_table,
  65     SameFileError,
  66     sanitize_filename,
  67     sanitize_path,
  68     sanitized_Request,
  69     std_headers,
  70     subtitles_filename,
  71     UnavailableVideoError,
  72     url_basename,
  73     version_tuple,
  74     write_json_file,
  75     write_string,
  76     YoutubeDLCookieProcessor,
  77     YoutubeDLHandler,
  78     prepend_extension,
  79     replace_extension,
  80     args_to_str,
  81     age_restricted,
  82 )
  83 from .cache import Cache
  84 from .extractor import get_info_extractor, gen_extractors
  85 from .downloader import get_suitable_downloader
  86 from .downloader.rtmp import rtmpdump_version
  87 from .postprocessor import (
  88     FFmpegFixupM3u8PP,
  89     FFmpegFixupM4aPP,
  90     FFmpegFixupStretchedPP,
  91     FFmpegMergerPP,
  92     FFmpegPostProcessor,
  93     get_postprocessor,
  94 )
  95 from .version import __version__
  96
  97 if compat_os_name == 'nt':
  98     import ctypes
  99
 100
 101 class YoutubeDL(object):
 102     """YoutubeDL class.
 103
 104     YoutubeDL objects are the ones responsible of downloading the
 105     actual video file and writing it to disk if the user has requested
 106     it, among some other tasks. In most cases there should be one per
 107     program. As, given a video URL, the downloader doesn't know how to
 108     extract all the needed information, task that InfoExtractors do, it
 109     has to pass the URL to one of them.
 110
 111     For this, YoutubeDL objects have a method that allows
 112     InfoExtractors to be registered in a given order. When it is passed
 113     a URL, the YoutubeDL object handles it to the first InfoExtractor it
 114     finds that reports being able to handle it. The InfoExtractor extracts
 115     all the information about the video or videos the URL refers to, and
 116     YoutubeDL process the extracted information, possibly using a File
 117     Downloader to download the video.
 118
 119     YoutubeDL objects accept a lot of parameters. In order not to saturate
 120     the object constructor with arguments, it receives a dictionary of
 121     options instead. These options are available through the params
 122     attribute for the InfoExtractors to use. The YoutubeDL also
 123     registers itself as the downloader in charge for the InfoExtractors
 124     that are added to it, so this is a "mutual registration".
 125
 126     Available options:
 127
 128     username:          Username for authentication purposes.
 129     password:          Password for authentication purposes.
 130     videopassword:     Password for accessing a video.
 131     usenetrc:          Use netrc for authentication instead.
 132     verbose:           Print additional info to stdout.
 133     quiet:             Do not print messages to stdout.
 134     no_warnings:       Do not print out anything for warnings.
 135     forceurl:          Force printing final URL.
 136     forcetitle:        Force printing title.
 137     forceid:           Force printing ID.
 138     forcethumbnail:    Force printing thumbnail URL.
 139     forcedescription:  Force printing description.
 140     forcefilename:     Force printing final filename.
 141     forceduration:     Force printing duration.
 142     forcejson:         Force printing info_dict as JSON.
 143     dump_single_json:  Force printing the info_dict of the whole playlist
 144                        (or video) as a single JSON line.
 145     simulate:          Do not download the video files.
 146     format:            Video format code. See options.py for more information.
 147     outtmpl:           Template for output names.
 148     restrictfilenames: Do not allow "&" and spaces in file names
 149     ignoreerrors:      Do not stop on download errors.
 150     force_generic_extractor: Force downloader to use the generic extractor
 151     nooverwrites:      Prevent overwriting files.
 152     playliststart:     Playlist item to start at.
 153     playlistend:       Playlist item to end at.
 154     playlist_items:    Specific indices of playlist to download.
 155     playlistreverse:   Download playlist items in reverse order.
 156     matchtitle:        Download only matching titles.
 157     rejecttitle:       Reject downloads for matching titles.
 158     logger:            Log messages to a logging.Logger instance.
 159     logtostderr:       Log messages to stderr instead of stdout.
 160     writedescription:  Write the video description to a .description file
 161     writeinfojson:     Write the video description to a .info.json file
 162     writeannotations:  Write the video annotations to a .annotations.xml file
 163     writethumbnail:    Write the thumbnail image to a file
 164     write_all_thumbnails:  Write all thumbnail formats to files
 165     writesubtitles:    Write the video subtitles to a file
 166     writeautomaticsub: Write the automatically generated subtitles to a file
 167     allsubtitles:      Downloads all the subtitles of the video
 168                        (requires writesubtitles or writeautomaticsub)
 169     listsubtitles:     Lists all available subtitles for the video
 170     subtitlesformat:   The format code for subtitles
 171     subtitleslangs:    List of languages of the subtitles to download
 172     keepvideo:         Keep the video file after post-processing
 173     daterange:         A DateRange object, download only if the upload_date is in the range.
 174     skip_download:     Skip the actual download of the video file
 175     cachedir:          Location of the cache files in the filesystem.
 176                        False to disable filesystem cache.
 177     noplaylist:        Download single video instead of a playlist if in doubt.
 178     age_limit:         An integer representing the user's age in years.
 179                        Unsuitable videos for the given age are skipped.
 180     min_views:         An integer representing the minimum view count the video
 181                        must have in order to not be skipped.
 182                        Videos without view count information are always
 183                        downloaded. None for no limit.
 184     max_views:         An integer representing the maximum view count.
 185                        Videos that are more popular than that are not
 186                        downloaded.
 187                        Videos without view count information are always
 188                        downloaded. None for no limit.
 189     download_archive:  File name of a file where all downloads are recorded.
 190                        Videos already present in the file are not downloaded
 191                        again.
 192     cookiefile:        File name where cookies should be read from and dumped to.
 193     nocheckcertificate:Do not verify SSL certificates
 194     prefer_insecure:   Use HTTP instead of HTTPS to retrieve information.
 195                        At the moment, this is only supported by YouTube.
 196     proxy:             URL of the proxy server to use
 197     cn_verification_proxy:  URL of the proxy to use for IP address verification
 198                        on Chinese sites. (Experimental)
 199     socket_timeout:    Time to wait for unresponsive hosts, in seconds
 200     bidi_workaround:   Work around buggy terminals without bidirectional text
 201                        support, using fridibi
 202     debug_printtraffic:Print out sent and received HTTP traffic
 203     include_ads:       Download ads as well
 204     default_search:    Prepend this string if an input url is not valid.
 205                        'auto' for elaborate guessing
 206     encoding:          Use this encoding instead of the system-specified.
 207     extract_flat:      Do not resolve URLs, return the immediate result.
 208                        Pass in 'in_playlist' to only show this behavior for
 209                        playlist items.
 210     postprocessors:    A list of dictionaries, each with an entry
 211                        * key:  The name of the postprocessor. See
 212                                youtube_dl/postprocessor/__init__.py for a list.
 213                        as well as any further keyword arguments for the
 214                        postprocessor.
 215     progress_hooks:    A list of functions that get called on download
 216                        progress, with a dictionary with the entries
 217                        * status: One of "downloading", "error", or "finished".
 218                                  Check this first and ignore unknown values.
 219
 220                        If status is one of "downloading", or "finished", the
 221                        following properties may also be present:
 222                        * filename: The final filename (always present)
 223                        * tmpfilename: The filename we're currently writing to
 224                        * downloaded_bytes: Bytes on disk
 225                        * total_bytes: Size of the whole file, None if unknown
 226                        * total_bytes_estimate: Guess of the eventual file size,
 227                                                None if unavailable.
 228                        * elapsed: The number of seconds since download started.
 229                        * eta: The estimated time in seconds, None if unknown
 230                        * speed: The download speed in bytes/second, None if
 231                                 unknown
 232                        * fragment_index: The counter of the currently
 233                                          downloaded video fragment.
 234                        * fragment_count: The number of fragments (= individual
 235                                          files that will be merged)
 236
 237                        Progress hooks are guaranteed to be called at least once
 238                        (with status "finished") if the download is successful.
 239     merge_output_format: Extension to use when merging formats.
 240     fixup:             Automatically correct known faults of the file.
 241                        One of:
 242                        - "never": do nothing
 243                        - "warn": only emit a warning
 244                        - "detect_or_warn": check whether we can do anything
 245                                            about it, warn otherwise (default)
 246     source_address:    (Experimental) Client-side IP address to bind to.
 247     call_home:         Boolean, true iff we are allowed to contact the
 248                        youtube-dl servers for debugging.
 249     sleep_interval:    Number of seconds to sleep before each download.
 250     listformats:       Print an overview of available video formats and exit.
 251     list_thumbnails:   Print a table of all thumbnails and exit.
 252     match_filter:      A function that gets called with the info_dict of
 253                        every video.
 254                        If it returns a message, the video is ignored.
 255                        If it returns None, the video is downloaded.
 256                        match_filter_func in utils.py is one example for this.
 257     no_color:          Do not emit color codes in output.
 258
 259     The following options determine which downloader is picked:
 260     external_downloader: Executable of the external downloader to call.
 261                        None or unset for standard (built-in) downloader.
 262     hls_prefer_native: Use the native HLS downloader instead of ffmpeg/avconv.
 263
 264     The following parameters are not used by YoutubeDL itself, they are used by
 265     the downloader (see youtube_dl/downloader/common.py):
 266     nopart, updatetime, buffersize, ratelimit, min_filesize, max_filesize, test,
 267     noresizebuffer, retries, continuedl, noprogress, consoletitle,
 268     xattr_set_filesize, external_downloader_args, hls_use_mpegts.
 269
 270     The following options are used by the post processors:
 271     prefer_ffmpeg:     If True, use ffmpeg instead of avconv if both are available,
 272                        otherwise prefer avconv.
 273     postprocessor_args: A list of additional command-line arguments for the
 274                         postprocessor.
 275     """
 276
 277     params = None
 278     _ies = []
 279     _pps = []
 280     _download_retcode = None
 281     _num_downloads = None
 282     _screen_file = None
 283
 284     def __init__(self, params=None, auto_init=True):
 285         """Create a FileDownloader object with the given options."""
 286         if params is None:
 287             params = {}
 288         self._ies = []
 289         self._ies_instances = {}
 290         self._pps = []
 291         self._progress_hooks = []
 292         self._download_retcode = 0
 293         self._num_downloads = 0
 294         self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)]
 295         self._err_file = sys.stderr
 296         self.params = {
 297             # Default parameters
 298             'nocheckcertificate': False,
 299         }
 300         self.params.update(params)
 301         self.cache = Cache(self)
 302
 303         if params.get('bidi_workaround', False):
 304             try:
 305                 import pty
 306                 master, slave = pty.openpty()
 307                 width = compat_get_terminal_size().columns
 308                 if width is None:
 309                     width_args = []
 310                 else:
 311                     width_args = ['-w', str(width)]
 312                 sp_kwargs = dict(
 313                     stdin=subprocess.PIPE,
 314                     stdout=slave,
 315                     stderr=self._err_file)
 316                 try:
 317                     self._output_process = subprocess.Popen(
 318                         ['bidiv'] + width_args, **sp_kwargs
 319                     )
 320                 except OSError:
 321                     self._output_process = subprocess.Popen(
 322                         ['fribidi', '-c', 'UTF-8'] + width_args, **sp_kwargs)
 323                 self._output_channel = os.fdopen(master, 'rb')
 324             except OSError as ose:
 325                 if ose.errno == 2:
 326                     self.report_warning('Could not find fribidi executable, ignoring --bidi-workaround . Make sure that  fribidi  is an executable file in one of the directories in your $PATH.')
 327                 else:
 328                     raise
 329
 330         if (sys.version_info >= (3,) and sys.platform != 'win32' and
 331                 sys.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968'] and
 332                 not params.get('restrictfilenames', False)):
 333             # On Python 3, the Unicode filesystem API will throw errors (#1474)
 334             self.report_warning(
 335                 'Assuming --restrict-filenames since file system encoding '
 336                 'cannot encode all characters. '
 337                 'Set the LC_ALL environment variable to fix this.')
 338             self.params['restrictfilenames'] = True
 339
 340         if isinstance(params.get('outtmpl'), bytes):
 341             self.report_warning(
 342                 'Parameter outtmpl is bytes, but should be a unicode string. '
 343                 'Put  from __future__ import unicode_literals  at the top of your code file or consider switching to Python 3.x.')
 344
 345         self._setup_opener()
 346
 347         if auto_init:
 348             self.print_debug_header()
 349             self.add_default_info_extractors()
 350
 351         for pp_def_raw in self.params.get('postprocessors', []):
 352             pp_class = get_postprocessor(pp_def_raw['key'])
 353             pp_def = dict(pp_def_raw)
 354             del pp_def['key']
 355             pp = pp_class(self, **compat_kwargs(pp_def))
 356             self.add_post_processor(pp)
 357
 358         for ph in self.params.get('progress_hooks', []):
 359             self.add_progress_hook(ph)
 360
 361     def warn_if_short_id(self, argv):
 362         # short YouTube ID starting with dash?
 363         idxs = [
 364             i for i, a in enumerate(argv)
 365             if re.match(r'^-[0-9A-Za-z_-]{10}$', a)]
 366         if idxs:
 367             correct_argv = (
 368                 ['youtube-dl'] +
 369                 [a for i, a in enumerate(argv) if i not in idxs] +
 370                 ['--'] + [argv[i] for i in idxs]
 371             )
 372             self.report_warning(
 373                 'Long argument string detected. '
 374                 'Use -- to separate parameters and URLs, like this:\n%s\n' %
 375                 args_to_str(correct_argv))
 376
 377     def add_info_extractor(self, ie):
 378         """Add an InfoExtractor object to the end of the list."""
 379         self._ies.append(ie)
 380         self._ies_instances[ie.ie_key()] = ie
 381         ie.set_downloader(self)
 382
 383     def get_info_extractor(self, ie_key):
 384         """
 385         Get an instance of an IE with name ie_key, it will try to get one from
 386         the _ies list, if there's no instance it will create a new one and add
 387         it to the extractor list.
 388         """
 389         ie = self._ies_instances.get(ie_key)
 390         if ie is None:
 391             ie = get_info_extractor(ie_key)()
 392             self.add_info_extractor(ie)
 393         return ie
 394
 395     def add_default_info_extractors(self):
 396         """
 397         Add the InfoExtractors returned by gen_extractors to the end of the list
 398         """
 399         for ie in gen_extractors():
 400             self.add_info_extractor(ie)
 401
 402     def add_post_processor(self, pp):
 403         """Add a PostProcessor object to the end of the chain."""
 404         self._pps.append(pp)
 405         pp.set_downloader(self)
 406
 407     def add_progress_hook(self, ph):
 408         """Add the progress hook (currently only for the file downloader)"""
 409         self._progress_hooks.append(ph)
 410
 411     def _bidi_workaround(self, message):
 412         if not hasattr(self, '_output_channel'):
 413             return message
 414
 415         assert hasattr(self, '_output_process')
 416         assert isinstance(message, compat_str)
 417         line_count = message.count('\n') + 1
 418         self._output_process.stdin.write((message + '\n').encode('utf-8'))
 419         self._output_process.stdin.flush()
 420         res = ''.join(self._output_channel.readline().decode('utf-8')
 421                       for _ in range(line_count))
 422         return res[:-len('\n')]
 423
 424     def to_screen(self, message, skip_eol=False):
 425         """Print message to stdout if not in quiet mode."""
 426         return self.to_stdout(message, skip_eol, check_quiet=True)
 427
 428     def _write_string(self, s, out=None):
 429         write_string(s, out=out, encoding=self.params.get('encoding'))
 430
 431     def to_stdout(self, message, skip_eol=False, check_quiet=False):
 432         """Print message to stdout if not in quiet mode."""
 433         if self.params.get('logger'):
 434             self.params['logger'].debug(message)
 435         elif not check_quiet or not self.params.get('quiet', False):
 436             message = self._bidi_workaround(message)
 437             terminator = ['\n', ''][skip_eol]
 438             output = message + terminator
 439
 440             self._write_string(output, self._screen_file)
 441
 442     def to_stderr(self, message):
 443         """Print message to stderr."""
 444         assert isinstance(message, compat_str)
 445         if self.params.get('logger'):
 446             self.params['logger'].error(message)
 447         else:
 448             message = self._bidi_workaround(message)
 449             output = message + '\n'
 450             self._write_string(output, self._err_file)
 451
 452     def to_console_title(self, message):
 453         if not self.params.get('consoletitle', False):
 454             return
 455         if compat_os_name == 'nt' and ctypes.windll.kernel32.GetConsoleWindow():
 456             # c_wchar_p() might not be necessary if `message` is
 457             # already of type unicode()
 458             ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message))
 459         elif 'TERM' in os.environ:
 460             self._write_string('\033]0;%s\007' % message, self._screen_file)
 461
 462     def save_console_title(self):
 463         if not self.params.get('consoletitle', False):
 464             return
 465         if 'TERM' in os.environ:
 466             # Save the title on stack
 467             self._write_string('\033[22;0t', self._screen_file)
 468
 469     def restore_console_title(self):
 470         if not self.params.get('consoletitle', False):
 471             return
 472         if 'TERM' in os.environ:
 473             # Restore the title from stack
 474             self._write_string('\033[23;0t', self._screen_file)
 475
 476     def __enter__(self):
 477         self.save_console_title()
 478         return self
 479
 480     def __exit__(self, *args):
 481         self.restore_console_title()
 482
 483         if self.params.get('cookiefile') is not None:
 484             self.cookiejar.save()
 485
 486     def trouble(self, message=None, tb=None):
 487         """Determine action to take when a download problem appears.
 488
 489         Depending on if the downloader has been configured to ignore
 490         download errors or not, this method may throw an exception or
 491         not when errors are found, after printing the message.
 492
 493         tb, if given, is additional traceback information.
 494         """
 495         if message is not None:
 496             self.to_stderr(message)
 497         if self.params.get('verbose'):
 498             if tb is None:
 499                 if sys.exc_info()[0]:  # if .trouble has been called from an except block
 500                     tb = ''
 501                     if hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
 502                         tb += ''.join(traceback.format_exception(*sys.exc_info()[1].exc_info))
 503                     tb += encode_compat_str(traceback.format_exc())
 504                 else:
 505                     tb_data = traceback.format_list(traceback.extract_stack())
 506                     tb = ''.join(tb_data)
 507             self.to_stderr(tb)
 508         if not self.params.get('ignoreerrors', False):
 509             if sys.exc_info()[0] and hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
 510                 exc_info = sys.exc_info()[1].exc_info
 511             else:
 512                 exc_info = sys.exc_info()
 513             raise DownloadError(message, exc_info)
 514         self._download_retcode = 1
 515
 516     def report_warning(self, message):
 517         '''
 518         Print the message to stderr, it will be prefixed with 'WARNING:'
 519         If stderr is a tty file the 'WARNING:' will be colored
 520         '''
 521         if self.params.get('logger') is not None:
 522             self.params['logger'].warning(message)
 523         else:
 524             if self.params.get('no_warnings'):
 525                 return
 526             if not self.params.get('no_color') and self._err_file.isatty() and compat_os_name != 'nt':
 527                 _msg_header = '\033[0;33mWARNING:\033[0m'
 528             else:
 529                 _msg_header = 'WARNING:'
 530             warning_message = '%s %s' % (_msg_header, message)
 531             self.to_stderr(warning_message)
 532
 533     def report_error(self, message, tb=None):
 534         '''
 535         Do the same as trouble, but prefixes the message with 'ERROR:', colored
 536         in red if stderr is a tty file.
 537         '''
 538         if not self.params.get('no_color') and self._err_file.isatty() and compat_os_name != 'nt':
 539             _msg_header = '\033[0;31mERROR:\033[0m'
 540         else:
 541             _msg_header = 'ERROR:'
 542         error_message = '%s %s' % (_msg_header, message)
 543         self.trouble(error_message, tb)
 544
 545     def report_file_already_downloaded(self, file_name):
 546         """Report file has already been fully downloaded."""
 547         try:
 548             self.to_screen('[download] %s has already been downloaded' % file_name)
 549         except UnicodeEncodeError:
 550             self.to_screen('[download] The file has already been downloaded')
 551
 552     def prepare_filename(self, info_dict):
 553         """Generate the output filename."""
 554         try:
 555             template_dict = dict(info_dict)
 556
 557             template_dict['epoch'] = int(time.time())
 558             autonumber_size = self.params.get('autonumber_size')
 559             if autonumber_size is None:
 560                 autonumber_size = 5
 561             autonumber_templ = '%0' + str(autonumber_size) + 'd'
 562             template_dict['autonumber'] = autonumber_templ % self._num_downloads
 563             if template_dict.get('playlist_index') is not None:
 564                 template_dict['playlist_index'] = '%0*d' % (len(str(template_dict['n_entries'])), template_dict['playlist_index'])
 565             if template_dict.get('resolution') is None:
 566                 if template_dict.get('width') and template_dict.get('height'):
 567                     template_dict['resolution'] = '%dx%d' % (template_dict['width'], template_dict['height'])
 568                 elif template_dict.get('height'):
 569                     template_dict['resolution'] = '%sp' % template_dict['height']
 570                 elif template_dict.get('width'):
 571                     template_dict['resolution'] = '%dx?' % template_dict['width']
 572
 573             sanitize = lambda k, v: sanitize_filename(
 574                 compat_str(v),
 575                 restricted=self.params.get('restrictfilenames'),
 576                 is_id=(k == 'id'))
 577             template_dict = dict((k, sanitize(k, v))
 578                                  for k, v in template_dict.items()
 579                                  if v is not None)
 580             template_dict = collections.defaultdict(lambda: 'NA', template_dict)
 581
 582             outtmpl = self.params.get('outtmpl', DEFAULT_OUTTMPL)
 583             tmpl = compat_expanduser(outtmpl)
 584             filename = tmpl % template_dict
 585             # Temporary fix for #4787
 586             # 'Treat' all problem characters by passing filename through preferredencoding
 587             # to workaround encoding issues with subprocess on python2 @ Windows
 588             if sys.version_info < (3, 0) and sys.platform == 'win32':
 589                 filename = encodeFilename(filename, True).decode(preferredencoding())
 590             return sanitize_path(filename)
 591         except ValueError as err:
 592             self.report_error('Error in output template: ' + str(err) + ' (encoding: ' + repr(preferredencoding()) + ')')
 593             return None
 594
 595     def _match_entry(self, info_dict, incomplete):
 596         """ Returns None iff the file should be downloaded """
 597
 598         video_title = info_dict.get('title', info_dict.get('id', 'video'))
 599         if 'title' in info_dict:
 600             # This can happen when we're just evaluating the playlist
 601             title = info_dict['title']
 602             matchtitle = self.params.get('matchtitle', False)
 603             if matchtitle:
 604                 if not re.search(matchtitle, title, re.IGNORECASE):
 605                     return '"' + title + '" title did not match pattern "' + matchtitle + '"'
 606             rejecttitle = self.params.get('rejecttitle', False)
 607             if rejecttitle:
 608                 if re.search(rejecttitle, title, re.IGNORECASE):
 609                     return '"' + title + '" title matched reject pattern "' + rejecttitle + '"'
 610         date = info_dict.get('upload_date')
 611         if date is not None:
 612             dateRange = self.params.get('daterange', DateRange())
 613             if date not in dateRange:
 614                 return '%s upload date is not in range %s' % (date_from_str(date).isoformat(), dateRange)
 615         view_count = info_dict.get('view_count')
 616         if view_count is not None:
 617             min_views = self.params.get('min_views')
 618             if min_views is not None and view_count < min_views:
 619                 return 'Skipping %s, because it has not reached minimum view count (%d/%d)' % (video_title, view_count, min_views)
 620             max_views = self.params.get('max_views')
 621             if max_views is not None and view_count > max_views:
 622                 return 'Skipping %s, because it has exceeded the maximum view count (%d/%d)' % (video_title, view_count, max_views)
 623         if age_restricted(info_dict.get('age_limit'), self.params.get('age_limit')):
 624             return 'Skipping "%s" because it is age restricted' % video_title
 625         if self.in_download_archive(info_dict):
 626             return '%s has already been recorded in archive' % video_title
 627
 628         if not incomplete:
 629             match_filter = self.params.get('match_filter')
 630             if match_filter is not None:
 631                 ret = match_filter(info_dict)
 632                 if ret is not None:
 633                     return ret
 634
 635         return None
 636
 637     @staticmethod
 638     def add_extra_info(info_dict, extra_info):
 639         '''Set the keys from extra_info in info dict if they are missing'''
 640         for key, value in extra_info.items():
 641             info_dict.setdefault(key, value)
 642
 643     def extract_info(self, url, download=True, ie_key=None, extra_info={},
 644                      process=True, force_generic_extractor=False):
 645         '''
 646         Returns a list with a dictionary for each video we find.
 647         If 'download', also downloads the videos.
 648         extra_info is a dict containing the extra values to add to each result
 649         '''
 650
 651         if not ie_key and force_generic_extractor:
 652             ie_key = 'Generic'
 653
 654         if ie_key:
 655             ies = [self.get_info_extractor(ie_key)]
 656         else:
 657             ies = self._ies
 658
 659         for ie in ies:
 660             if not ie.suitable(url):
 661                 continue
 662
 663             if not ie.working():
 664                 self.report_warning('The program functionality for this site has been marked as broken, '
 665                                     'and will probably not work.')
 666
 667             try:
 668                 ie_result = ie.extract(url)
 669                 if ie_result is None:  # Finished already (backwards compatibility; listformats and friends should be moved here)
 670                     break
 671                 if isinstance(ie_result, list):
 672                     # Backwards compatibility: old IE result format
 673                     ie_result = {
 674                         '_type': 'compat_list',
 675                         'entries': ie_result,
 676                     }
 677                 self.add_default_extra_info(ie_result, ie, url)
 678                 if process:
 679                     return self.process_ie_result(ie_result, download, extra_info)
 680                 else:
 681                     return ie_result
 682             except ExtractorError as e:  # An error we somewhat expected
 683                 self.report_error(compat_str(e), e.format_traceback())
 684                 break
 685             except MaxDownloadsReached:
 686                 raise
 687             except Exception as e:
 688                 if self.params.get('ignoreerrors', False):
 689                     self.report_error(error_to_compat_str(e), tb=encode_compat_str(traceback.format_exc()))
 690                     break
 691                 else:
 692                     raise
 693         else:
 694             self.report_error('no suitable InfoExtractor for URL %s' % url)
 695
 696     def add_default_extra_info(self, ie_result, ie, url):
 697         self.add_extra_info(ie_result, {
 698             'extractor': ie.IE_NAME,
 699             'webpage_url': url,
 700             'webpage_url_basename': url_basename(url),
 701             'extractor_key': ie.ie_key(),
 702         })
 703
 704     def process_ie_result(self, ie_result, download=True, extra_info={}):
 705         """
 706         Take the result of the ie(may be modified) and resolve all unresolved
 707         references (URLs, playlist items).
 708
 709         It will also download the videos if 'download'.
 710         Returns the resolved ie_result.
 711         """
 712         result_type = ie_result.get('_type', 'video')
 713
 714         if result_type in ('url', 'url_transparent'):
 715             extract_flat = self.params.get('extract_flat', False)
 716             if ((extract_flat == 'in_playlist' and 'playlist' in extra_info) or
 717                     extract_flat is True):
 718                 if self.params.get('forcejson', False):
 719                     self.to_stdout(json.dumps(ie_result))
 720                 return ie_result
 721
 722         if result_type == 'video':
 723             self.add_extra_info(ie_result, extra_info)
 724             return self.process_video_result(ie_result, download=download)
 725         elif result_type == 'url':
 726             # We have to add extra_info to the results because it may be
 727             # contained in a playlist
 728             return self.extract_info(ie_result['url'],
 729                                      download,
 730                                      ie_key=ie_result.get('ie_key'),
 731                                      extra_info=extra_info)
 732         elif result_type == 'url_transparent':
 733             # Use the information from the embedding page
 734             info = self.extract_info(
 735                 ie_result['url'], ie_key=ie_result.get('ie_key'),
 736                 extra_info=extra_info, download=False, process=False)
 737
 738             force_properties = dict(
 739                 (k, v) for k, v in ie_result.items() if v is not None)
 740             for f in ('_type', 'url', 'ie_key'):
 741                 if f in force_properties:
 742                     del force_properties[f]
 743             new_result = info.copy()
 744             new_result.update(force_properties)
 745
 746             assert new_result.get('_type') != 'url_transparent'
 747
 748             return self.process_ie_result(
 749                 new_result, download=download, extra_info=extra_info)
 750         elif result_type == 'playlist' or result_type == 'multi_video':
 751             # We process each entry in the playlist
 752             playlist = ie_result.get('title') or ie_result.get('id')
 753             self.to_screen('[download] Downloading playlist: %s' % playlist)
 754
 755             playlist_results = []
 756
 757             playliststart = self.params.get('playliststart', 1) - 1
 758             playlistend = self.params.get('playlistend')
 759             # For backwards compatibility, interpret -1 as whole list
 760             if playlistend == -1:
 761                 playlistend = None
 762
 763             playlistitems_str = self.params.get('playlist_items')
 764             playlistitems = None
 765             if playlistitems_str is not None:
 766                 def iter_playlistitems(format):
 767                     for string_segment in format.split(','):
 768                         if '-' in string_segment:
 769                             start, end = string_segment.split('-')
 770                             for item in range(int(start), int(end) + 1):
 771                                 yield int(item)
 772                         else:
 773                             yield int(string_segment)
 774                 playlistitems = iter_playlistitems(playlistitems_str)
 775
 776             ie_entries = ie_result['entries']
 777             if isinstance(ie_entries, list):
 778                 n_all_entries = len(ie_entries)
 779                 if playlistitems:
 780                     entries = [
 781                         ie_entries[i - 1] for i in playlistitems
 782                         if -n_all_entries <= i - 1 < n_all_entries]
 783                 else:
 784                     entries = ie_entries[playliststart:playlistend]
 785                 n_entries = len(entries)
 786                 self.to_screen(
 787                     '[%s] playlist %s: Collected %d video ids (downloading %d of them)' %
 788                     (ie_result['extractor'], playlist, n_all_entries, n_entries))
 789             elif isinstance(ie_entries, PagedList):
 790                 if playlistitems:
 791                     entries = []
 792                     for item in playlistitems:
 793                         entries.extend(ie_entries.getslice(
 794                             item - 1, item
 795                         ))
 796                 else:
 797                     entries = ie_entries.getslice(
 798                         playliststart, playlistend)
 799                 n_entries = len(entries)
 800                 self.to_screen(
 801                     '[%s] playlist %s: Downloading %d videos' %
 802                     (ie_result['extractor'], playlist, n_entries))
 803             else:  # iterable
 804                 if playlistitems:
 805                     entry_list = list(ie_entries)
 806                     entries = [entry_list[i - 1] for i in playlistitems]
 807                 else:
 808                     entries = list(itertools.islice(
 809                         ie_entries, playliststart, playlistend))
 810                 n_entries = len(entries)
 811                 self.to_screen(
 812                     '[%s] playlist %s: Downloading %d videos' %
 813                     (ie_result['extractor'], playlist, n_entries))
 814
 815             if self.params.get('playlistreverse', False):
 816                 entries = entries[::-1]
 817
 818             for i, entry in enumerate(entries, 1):
 819                 self.to_screen('[download] Downloading video %s of %s' % (i, n_entries))
 820                 extra = {
 821                     'n_entries': n_entries,
 822                     'playlist': playlist,
 823                     'playlist_id': ie_result.get('id'),
 824                     'playlist_title': ie_result.get('title'),
 825                     'playlist_index': i + playliststart,
 826                     'extractor': ie_result['extractor'],
 827                     'webpage_url': ie_result['webpage_url'],
 828                     'webpage_url_basename': url_basename(ie_result['webpage_url']),
 829                     'extractor_key': ie_result['extractor_key'],
 830                 }
 831
 832                 reason = self._match_entry(entry, incomplete=True)
 833                 if reason is not None:
 834                     self.to_screen('[download] ' + reason)
 835                     continue
 836
 837                 entry_result = self.process_ie_result(entry,
 838                                                       download=download,
 839                                                       extra_info=extra)
 840                 playlist_results.append(entry_result)
 841             ie_result['entries'] = playlist_results
 842             self.to_screen('[download] Finished downloading playlist: %s' % playlist)
 843             return ie_result
 844         elif result_type == 'compat_list':
 845             self.report_warning(
 846                 'Extractor %s returned a compat_list result. '
 847                 'It needs to be updated.' % ie_result.get('extractor'))
 848
 849             def _fixup(r):
 850                 self.add_extra_info(
 851                     r,
 852                     {
 853                         'extractor': ie_result['extractor'],
 854                         'webpage_url': ie_result['webpage_url'],
 855                         'webpage_url_basename': url_basename(ie_result['webpage_url']),
 856                         'extractor_key': ie_result['extractor_key'],
 857                     }
 858                 )
 859                 return r
 860             ie_result['entries'] = [
 861                 self.process_ie_result(_fixup(r), download, extra_info)
 862                 for r in ie_result['entries']
 863             ]
 864             return ie_result
 865         else:
 866             raise Exception('Invalid result type: %s' % result_type)
 867
 868     def _build_format_filter(self, filter_spec):
 869         " Returns a function to filter the formats according to the filter_spec "
 870
 871         OPERATORS = {
 872             '<': operator.lt,
 873             '<=': operator.le,
 874             '>': operator.gt,
 875             '>=': operator.ge,
 876             '=': operator.eq,
 877             '!=': operator.ne,
 878         }
 879         operator_rex = re.compile(r'''(?x)\s*
 880             (?P<key>width|height|tbr|abr|vbr|asr|filesize|fps)
 881             \s*(?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
 882             (?P<value>[0-9.]+(?:[kKmMgGtTpPeEzZyY]i?[Bb]?)?)
 883             $
 884             ''' % '|'.join(map(re.escape, OPERATORS.keys())))
 885         m = operator_rex.search(filter_spec)
 886         if m:
 887             try:
 888                 comparison_value = int(m.group('value'))
 889             except ValueError:
 890                 comparison_value = parse_filesize(m.group('value'))
 891                 if comparison_value is None:
 892                     comparison_value = parse_filesize(m.group('value') + 'B')
 893                 if comparison_value is None:
 894                     raise ValueError(
 895                         'Invalid value %r in format specification %r' % (
 896                             m.group('value'), filter_spec))
 897             op = OPERATORS[m.group('op')]
 898
 899         if not m:
 900             STR_OPERATORS = {
 901                 '=': operator.eq,
 902                 '!=': operator.ne,
 903                 '^=': lambda attr, value: attr.startswith(value),
 904                 '$=': lambda attr, value: attr.endswith(value),
 905                 '*=': lambda attr, value: value in attr,
 906             }
 907             str_operator_rex = re.compile(r'''(?x)
 908                 \s*(?P<key>ext|acodec|vcodec|container|protocol)
 909                 \s*(?P<op>%s)(?P<none_inclusive>\s*\?)?
 910                 \s*(?P<value>[a-zA-Z0-9._-]+)
 911                 \s*$
 912                 ''' % '|'.join(map(re.escape, STR_OPERATORS.keys())))
 913             m = str_operator_rex.search(filter_spec)
 914             if m:
 915                 comparison_value = m.group('value')
 916                 op = STR_OPERATORS[m.group('op')]
 917
 918         if not m:
 919             raise ValueError('Invalid filter specification %r' % filter_spec)
 920
 921         def _filter(f):
 922             actual_value = f.get(m.group('key'))
 923             if actual_value is None:
 924                 return m.group('none_inclusive')
 925             return op(actual_value, comparison_value)
 926         return _filter
 927
 928     def build_format_selector(self, format_spec):
 929         def syntax_error(note, start):
 930             message = (
 931                 'Invalid format specification: '
 932                 '{0}\n\t{1}\n\t{2}^'.format(note, format_spec, ' ' * start[1]))
 933             return SyntaxError(message)
 934
 935         PICKFIRST = 'PICKFIRST'
 936         MERGE = 'MERGE'
 937         SINGLE = 'SINGLE'
 938         GROUP = 'GROUP'
 939         FormatSelector = collections.namedtuple('FormatSelector', ['type', 'selector', 'filters'])
 940
 941         def _parse_filter(tokens):
 942             filter_parts = []
 943             for type, string, start, _, _ in tokens:
 944                 if type == tokenize.OP and string == ']':
 945                     return ''.join(filter_parts)
 946                 else:
 947                     filter_parts.append(string)
 948
 949         def _remove_unused_ops(tokens):
 950             # Remove operators that we don't use and join them with the surrounding strings
 951             # for example: 'mp4' '-' 'baseline' '-' '16x9' is converted to 'mp4-baseline-16x9'
 952             ALLOWED_OPS = ('/', '+', ',', '(', ')')
 953             last_string, last_start, last_end, last_line = None, None, None, None
 954             for type, string, start, end, line in tokens:
 955                 if type == tokenize.OP and string == '[':
 956                     if last_string:
 957                         yield tokenize.NAME, last_string, last_start, last_end, last_line
 958                         last_string = None
 959                     yield type, string, start, end, line
 960                     # everything inside brackets will be handled by _parse_filter
 961                     for type, string, start, end, line in tokens:
 962                         yield type, string, start, end, line
 963                         if type == tokenize.OP and string == ']':
 964                             break
 965                 elif type == tokenize.OP and string in ALLOWED_OPS:
 966                     if last_string:
 967                         yield tokenize.NAME, last_string, last_start, last_end, last_line
 968                         last_string = None
 969                     yield type, string, start, end, line
 970                 elif type in [tokenize.NAME, tokenize.NUMBER, tokenize.OP]:
 971                     if not last_string:
 972                         last_string = string
 973                         last_start = start
 974                         last_end = end
 975                     else:
 976                         last_string += string
 977             if last_string:
 978                 yield tokenize.NAME, last_string, last_start, last_end, last_line
 979
 980         def _parse_format_selection(tokens, inside_merge=False, inside_choice=False, inside_group=False):
 981             selectors = []
 982             current_selector = None
 983             for type, string, start, _, _ in tokens:
 984                 # ENCODING is only defined in python 3.x
 985                 if type == getattr(tokenize, 'ENCODING', None):
 986                     continue
 987                 elif type in [tokenize.NAME, tokenize.NUMBER]:
 988                     current_selector = FormatSelector(SINGLE, string, [])
 989                 elif type == tokenize.OP:
 990                     if string == ')':
 991                         if not inside_group:
 992                             # ')' will be handled by the parentheses group
 993                             tokens.restore_last_token()
 994                         break
 995                     elif inside_merge and string in ['/', ',']:
 996                         tokens.restore_last_token()
 997                         break
 998                     elif inside_choice and string == ',':
 999                         tokens.restore_last_token()
1000                         break
1001                     elif string == ',':
1002                         if not current_selector:
1003                             raise syntax_error('"," must follow a format selector', start)
1004                         selectors.append(current_selector)
1005                         current_selector = None
1006                     elif string == '/':
1007                         if not current_selector:
1008                             raise syntax_error('"/" must follow a format selector', start)
1009                         first_choice = current_selector
1010                         second_choice = _parse_format_selection(tokens, inside_choice=True)
1011                         current_selector = FormatSelector(PICKFIRST, (first_choice, second_choice), [])
1012                     elif string == '[':
1013                         if not current_selector:
1014                             current_selector = FormatSelector(SINGLE, 'best', [])
1015                         format_filter = _parse_filter(tokens)
1016                         current_selector.filters.append(format_filter)
1017                     elif string == '(':
1018                         if current_selector:
1019                             raise syntax_error('Unexpected "("', start)
1020                         group = _parse_format_selection(tokens, inside_group=True)
1021                         current_selector = FormatSelector(GROUP, group, [])
1022                     elif string == '+':
1023                         video_selector = current_selector
1024                         audio_selector = _parse_format_selection(tokens, inside_merge=True)
1025                         if not video_selector or not audio_selector:
1026                             raise syntax_error('"+" must be between two format selectors', start)
1027                         current_selector = FormatSelector(MERGE, (video_selector, audio_selector), [])
1028                     else:
1029                         raise syntax_error('Operator not recognized: "{0}"'.format(string), start)
1030                 elif type == tokenize.ENDMARKER:
1031                     break
1032             if current_selector:
1033                 selectors.append(current_selector)
1034             return selectors
1035
1036         def _build_selector_function(selector):
1037             if isinstance(selector, list):
1038                 fs = [_build_selector_function(s) for s in selector]
1039
1040                 def selector_function(formats):
1041                     for f in fs:
1042                         for format in f(formats):
1043                             yield format
1044                 return selector_function
1045             elif selector.type == GROUP:
1046                 selector_function = _build_selector_function(selector.selector)
1047             elif selector.type == PICKFIRST:
1048                 fs = [_build_selector_function(s) for s in selector.selector]
1049
1050                 def selector_function(formats):
1051                     for f in fs:
1052                         picked_formats = list(f(formats))
1053                         if picked_formats:
1054                             return picked_formats
1055                     return []
1056             elif selector.type == SINGLE:
1057                 format_spec = selector.selector
1058
1059                 def selector_function(formats):
1060                     formats = list(formats)
1061                     if not formats:
1062                         return
1063                     if format_spec == 'all':
1064                         for f in formats:
1065                             yield f
1066                     elif format_spec in ['best', 'worst', None]:
1067                         format_idx = 0 if format_spec == 'worst' else -1
1068                         audiovideo_formats = [
1069                             f for f in formats
1070                             if f.get('vcodec') != 'none' and f.get('acodec') != 'none']
1071                         if audiovideo_formats:
1072                             yield audiovideo_formats[format_idx]
1073                         # for audio only (soundcloud) or video only (imgur) urls, select the best/worst audio format
1074                         elif (all(f.get('acodec') != 'none' for f in formats) or
1075                               all(f.get('vcodec') != 'none' for f in formats)):
1076                             yield formats[format_idx]
1077                     elif format_spec == 'bestaudio':
1078                         audio_formats = [
1079                             f for f in formats
1080                             if f.get('vcodec') == 'none']
1081                         if audio_formats:
1082                             yield audio_formats[-1]
1083                     elif format_spec == 'worstaudio':
1084                         audio_formats = [
1085                             f for f in formats
1086                             if f.get('vcodec') == 'none']
1087                         if audio_formats:
1088                             yield audio_formats[0]
1089                     elif format_spec == 'bestvideo':
1090                         video_formats = [
1091                             f for f in formats
1092                             if f.get('acodec') == 'none']
1093                         if video_formats:
1094                             yield video_formats[-1]
1095                     elif format_spec == 'worstvideo':
1096                         video_formats = [
1097                             f for f in formats
1098                             if f.get('acodec') == 'none']
1099                         if video_formats:
1100                             yield video_formats[0]
1101                     else:
1102                         extensions = ['mp4', 'flv', 'webm', '3gp', 'm4a', 'mp3', 'ogg', 'aac', 'wav']
1103                         if format_spec in extensions:
1104                             filter_f = lambda f: f['ext'] == format_spec
1105                         else:
1106                             filter_f = lambda f: f['format_id'] == format_spec
1107                         matches = list(filter(filter_f, formats))
1108                         if matches:
1109                             yield matches[-1]
1110             elif selector.type == MERGE:
1111                 def _merge(formats_info):
1112                     format_1, format_2 = [f['format_id'] for f in formats_info]
1113                     # The first format must contain the video and the
1114                     # second the audio
1115                     if formats_info[0].get('vcodec') == 'none':
1116                         self.report_error('The first format must '
1117                                           'contain the video, try using '
1118                                           '"-f %s+%s"' % (format_2, format_1))
1119                         return
1120                     # Formats must be opposite (video+audio)
1121                     if formats_info[0].get('acodec') == 'none' and formats_info[1].get('acodec') == 'none':
1122                         self.report_error(
1123                             'Both formats %s and %s are video-only, you must specify "-f video+audio"'
1124                             % (format_1, format_2))
1125                         return
1126                     output_ext = (
1127                         formats_info[0]['ext']
1128                         if self.params.get('merge_output_format') is None
1129                         else self.params['merge_output_format'])
1130                     return {
1131                         'requested_formats': formats_info,
1132                         'format': '%s+%s' % (formats_info[0].get('format'),
1133                                              formats_info[1].get('format')),
1134                         'format_id': '%s+%s' % (formats_info[0].get('format_id'),
1135                                                 formats_info[1].get('format_id')),
1136                         'width': formats_info[0].get('width'),
1137                         'height': formats_info[0].get('height'),
1138                         'resolution': formats_info[0].get('resolution'),
1139                         'fps': formats_info[0].get('fps'),
1140                         'vcodec': formats_info[0].get('vcodec'),
1141                         'vbr': formats_info[0].get('vbr'),
1142                         'stretched_ratio': formats_info[0].get('stretched_ratio'),
1143                         'acodec': formats_info[1].get('acodec'),
1144                         'abr': formats_info[1].get('abr'),
1145                         'ext': output_ext,
1146                     }
1147                 video_selector, audio_selector = map(_build_selector_function, selector.selector)
1148
1149                 def selector_function(formats):
1150                     formats = list(formats)
1151                     for pair in itertools.product(video_selector(formats), audio_selector(formats)):
1152                         yield _merge(pair)
1153
1154             filters = [self._build_format_filter(f) for f in selector.filters]
1155
1156             def final_selector(formats):
1157                 for _filter in filters:
1158                     formats = list(filter(_filter, formats))
1159                 return selector_function(formats)
1160             return final_selector
1161
1162         stream = io.BytesIO(format_spec.encode('utf-8'))
1163         try:
1164             tokens = list(_remove_unused_ops(compat_tokenize_tokenize(stream.readline)))
1165         except tokenize.TokenError:
1166             raise syntax_error('Missing closing/opening brackets or parenthesis', (0, len(format_spec)))
1167
1168         class TokenIterator(object):
1169             def __init__(self, tokens):
1170                 self.tokens = tokens
1171                 self.counter = 0
1172
1173             def __iter__(self):
1174                 return self
1175
1176             def __next__(self):
1177                 if self.counter >= len(self.tokens):
1178                     raise StopIteration()
1179                 value = self.tokens[self.counter]
1180                 self.counter += 1
1181                 return value
1182
1183             next = __next__
1184
1185             def restore_last_token(self):
1186                 self.counter -= 1
1187
1188         parsed_selector = _parse_format_selection(iter(TokenIterator(tokens)))
1189         return _build_selector_function(parsed_selector)
1190
1191     def _calc_headers(self, info_dict):
1192         res = std_headers.copy()
1193
1194         add_headers = info_dict.get('http_headers')
1195         if add_headers:
1196             res.update(add_headers)
1197
1198         cookies = self._calc_cookies(info_dict)
1199         if cookies:
1200             res['Cookie'] = cookies
1201
1202         return res
1203
1204     def _calc_cookies(self, info_dict):
1205         pr = sanitized_Request(info_dict['url'])
1206         self.cookiejar.add_cookie_header(pr)
1207         return pr.get_header('Cookie')
1208
1209     def process_video_result(self, info_dict, download=True):
1210         assert info_dict.get('_type', 'video') == 'video'
1211
1212         if 'id' not in info_dict:
1213             raise ExtractorError('Missing "id" field in extractor result')
1214         if 'title' not in info_dict:
1215             raise ExtractorError('Missing "title" field in extractor result')
1216
1217         if 'playlist' not in info_dict:
1218             # It isn't part of a playlist
1219             info_dict['playlist'] = None
1220             info_dict['playlist_index'] = None
1221
1222         thumbnails = info_dict.get('thumbnails')
1223         if thumbnails is None:
1224             thumbnail = info_dict.get('thumbnail')
1225             if thumbnail:
1226                 info_dict['thumbnails'] = thumbnails = [{'url': thumbnail}]
1227         if thumbnails:
1228             thumbnails.sort(key=lambda t: (
1229                 t.get('preference'), t.get('width'), t.get('height'),
1230                 t.get('id'), t.get('url')))
1231             for i, t in enumerate(thumbnails):
1232                 if t.get('width') and t.get('height'):
1233                     t['resolution'] = '%dx%d' % (t['width'], t['height'])
1234                 if t.get('id') is None:
1235                     t['id'] = '%d' % i
1236
1237         if thumbnails and 'thumbnail' not in info_dict:
1238             info_dict['thumbnail'] = thumbnails[-1]['url']
1239
1240         if 'display_id' not in info_dict and 'id' in info_dict:
1241             info_dict['display_id'] = info_dict['id']
1242
1243         if info_dict.get('upload_date') is None and info_dict.get('timestamp') is not None:
1244             # Working around out-of-range timestamp values (e.g. negative ones on Windows,
1245             # see http://bugs.python.org/issue1646728)
1246             try:
1247                 upload_date = datetime.datetime.utcfromtimestamp(info_dict['timestamp'])
1248                 info_dict['upload_date'] = upload_date.strftime('%Y%m%d')
1249             except (ValueError, OverflowError, OSError):
1250                 pass
1251
1252         # Auto generate title fields corresponding to the *_number fields when missing
1253         # in order to always have clean titles. This is very common for TV series.
1254         for field in ('chapter', 'season', 'episode'):
1255             if info_dict.get('%s_number' % field) is not None and not info_dict.get(field):
1256                 info_dict[field] = '%s %d' % (field.capitalize(), info_dict['%s_number' % field])
1257
1258         subtitles = info_dict.get('subtitles')
1259         if subtitles:
1260             for _, subtitle in subtitles.items():
1261                 for subtitle_format in subtitle:
1262                     if 'ext' not in subtitle_format:
1263                         subtitle_format['ext'] = determine_ext(subtitle_format['url']).lower()
1264
1265         if self.params.get('listsubtitles', False):
1266             if 'automatic_captions' in info_dict:
1267                 self.list_subtitles(info_dict['id'], info_dict.get('automatic_captions'), 'automatic captions')
1268             self.list_subtitles(info_dict['id'], subtitles, 'subtitles')
1269             return
1270         info_dict['requested_subtitles'] = self.process_subtitles(
1271             info_dict['id'], subtitles,
1272             info_dict.get('automatic_captions'))
1273
1274         # We now pick which formats have to be downloaded
1275         if info_dict.get('formats') is None:
1276             # There's only one format available
1277             formats = [info_dict]
1278         else:
1279             formats = info_dict['formats']
1280
1281         if not formats:
1282             raise ExtractorError('No video formats found!')
1283
1284         formats_dict = {}
1285
1286         # We check that all the formats have the format and format_id fields
1287         for i, format in enumerate(formats):
1288             if 'url' not in format:
1289                 raise ExtractorError('Missing "url" key in result (index %d)' % i)
1290
1291             if format.get('format_id') is None:
1292                 format['format_id'] = compat_str(i)
1293             else:
1294                 # Sanitize format_id from characters used in format selector expression
1295                 format['format_id'] = re.sub('[\s,/+\[\]()]', '_', format['format_id'])
1296             format_id = format['format_id']
1297             if format_id not in formats_dict:
1298                 formats_dict[format_id] = []
1299             formats_dict[format_id].append(format)
1300
1301         # Make sure all formats have unique format_id
1302         for format_id, ambiguous_formats in formats_dict.items():
1303             if len(ambiguous_formats) > 1:
1304                 for i, format in enumerate(ambiguous_formats):
1305                     format['format_id'] = '%s-%d' % (format_id, i)
1306
1307         for i, format in enumerate(formats):
1308             if format.get('format') is None:
1309                 format['format'] = '{id} - {res}{note}'.format(
1310                     id=format['format_id'],
1311                     res=self.format_resolution(format),
1312                     note=' ({0})'.format(format['format_note']) if format.get('format_note') is not None else '',
1313                 )
1314             # Automatically determine file extension if missing
1315             if 'ext' not in format:
1316                 format['ext'] = determine_ext(format['url']).lower()
1317             # Automatically determine protocol if missing (useful for format
1318             # selection purposes)
1319             if 'protocol' not in format:
1320                 format['protocol'] = determine_protocol(format)
1321             # Add HTTP headers, so that external programs can use them from the
1322             # json output
1323             full_format_info = info_dict.copy()
1324             full_format_info.update(format)
1325             format['http_headers'] = self._calc_headers(full_format_info)
1326
1327         # TODO Central sorting goes here
1328
1329         if formats[0] is not info_dict:
1330             # only set the 'formats' fields if the original info_dict list them
1331             # otherwise we end up with a circular reference, the first (and unique)
1332             # element in the 'formats' field in info_dict is info_dict itself,
1333             # which can't be exported to json
1334             info_dict['formats'] = formats
1335         if self.params.get('listformats'):
1336             self.list_formats(info_dict)
1337             return
1338         if self.params.get('list_thumbnails'):
1339             self.list_thumbnails(info_dict)
1340             return
1341
1342         req_format = self.params.get('format')
1343         if req_format is None:
1344             req_format_list = []
1345             if (self.params.get('outtmpl', DEFAULT_OUTTMPL) != '-' and
1346                     not info_dict.get('is_live')):
1347                 merger = FFmpegMergerPP(self)
1348                 if merger.available and merger.can_merge():
1349                     req_format_list.append('bestvideo+bestaudio')
1350             req_format_list.append('best')
1351             req_format = '/'.join(req_format_list)
1352         format_selector = self.build_format_selector(req_format)
1353         formats_to_download = list(format_selector(formats))
1354         if not formats_to_download:
1355             raise ExtractorError('requested format not available',
1356                                  expected=True)
1357
1358         if download:
1359             if len(formats_to_download) > 1:
1360                 self.to_screen('[info] %s: downloading video in %s formats' % (info_dict['id'], len(formats_to_download)))
1361             for format in formats_to_download:
1362                 new_info = dict(info_dict)
1363                 new_info.update(format)
1364                 self.process_info(new_info)
1365         # We update the info dict with the best quality format (backwards compatibility)
1366         info_dict.update(formats_to_download[-1])
1367         return info_dict
1368
1369     def process_subtitles(self, video_id, normal_subtitles, automatic_captions):
1370         """Select the requested subtitles and their format"""
1371         available_subs = {}
1372         if normal_subtitles and self.params.get('writesubtitles'):
1373             available_subs.update(normal_subtitles)
1374         if automatic_captions and self.params.get('writeautomaticsub'):
1375             for lang, cap_info in automatic_captions.items():
1376                 if lang not in available_subs:
1377                     available_subs[lang] = cap_info
1378
1379         if (not self.params.get('writesubtitles') and not
1380                 self.params.get('writeautomaticsub') or not
1381                 available_subs):
1382             return None
1383
1384         if self.params.get('allsubtitles', False):
1385             requested_langs = available_subs.keys()
1386         else:
1387             if self.params.get('subtitleslangs', False):
1388                 requested_langs = self.params.get('subtitleslangs')
1389             elif 'en' in available_subs:
1390                 requested_langs = ['en']
1391             else:
1392                 requested_langs = [list(available_subs.keys())[0]]
1393
1394         formats_query = self.params.get('subtitlesformat', 'best')
1395         formats_preference = formats_query.split('/') if formats_query else []
1396         subs = {}
1397         for lang in requested_langs:
1398             formats = available_subs.get(lang)
1399             if formats is None:
1400                 self.report_warning('%s subtitles not available for %s' % (lang, video_id))
1401                 continue
1402             for ext in formats_preference:
1403                 if ext == 'best':
1404                     f = formats[-1]
1405                     break
1406                 matches = list(filter(lambda f: f['ext'] == ext, formats))
1407                 if matches:
1408                     f = matches[-1]
1409                     break
1410             else:
1411                 f = formats[-1]
1412                 self.report_warning(
1413                     'No subtitle format found matching "%s" for language %s, '
1414                     'using %s' % (formats_query, lang, f['ext']))
1415             subs[lang] = f
1416         return subs
1417
1418     def process_info(self, info_dict):
1419         """Process a single resolved IE result."""
1420
1421         assert info_dict.get('_type', 'video') == 'video'
1422
1423         max_downloads = self.params.get('max_downloads')
1424         if max_downloads is not None:
1425             if self._num_downloads >= int(max_downloads):
1426                 raise MaxDownloadsReached()
1427
1428         info_dict['fulltitle'] = info_dict['title']
1429         if len(info_dict['title']) > 200:
1430             info_dict['title'] = info_dict['title'][:197] + '...'
1431
1432         if 'format' not in info_dict:
1433             info_dict['format'] = info_dict['ext']
1434
1435         reason = self._match_entry(info_dict, incomplete=False)
1436         if reason is not None:
1437             self.to_screen('[download] ' + reason)
1438             return
1439
1440         self._num_downloads += 1
1441
1442         info_dict['_filename'] = filename = self.prepare_filename(info_dict)
1443
1444         # Forced printings
1445         if self.params.get('forcetitle', False):
1446             self.to_stdout(info_dict['fulltitle'])
1447         if self.params.get('forceid', False):
1448             self.to_stdout(info_dict['id'])
1449         if self.params.get('forceurl', False):
1450             if info_dict.get('requested_formats') is not None:
1451                 for f in info_dict['requested_formats']:
1452                     self.to_stdout(f['url'] + f.get('play_path', ''))
1453             else:
1454                 # For RTMP URLs, also include the playpath
1455                 self.to_stdout(info_dict['url'] + info_dict.get('play_path', ''))
1456         if self.params.get('forcethumbnail', False) and info_dict.get('thumbnail') is not None:
1457             self.to_stdout(info_dict['thumbnail'])
1458         if self.params.get('forcedescription', False) and info_dict.get('description') is not None:
1459             self.to_stdout(info_dict['description'])
1460         if self.params.get('forcefilename', False) and filename is not None:
1461             self.to_stdout(filename)
1462         if self.params.get('forceduration', False) and info_dict.get('duration') is not None:
1463             self.to_stdout(formatSeconds(info_dict['duration']))
1464         if self.params.get('forceformat', False):
1465             self.to_stdout(info_dict['format'])
1466         if self.params.get('forcejson', False):
1467             self.to_stdout(json.dumps(info_dict))
1468
1469         # Do nothing else if in simulate mode
1470         if self.params.get('simulate', False):
1471             return
1472
1473         if filename is None:
1474             return
1475
1476         try:
1477             dn = os.path.dirname(sanitize_path(encodeFilename(filename)))
1478             if dn and not os.path.exists(dn):
1479                 os.makedirs(dn)
1480         except (OSError, IOError) as err:
1481             self.report_error('unable to create directory ' + error_to_compat_str(err))
1482             return
1483
1484         if self.params.get('writedescription', False):
1485             descfn = replace_extension(filename, 'description', info_dict.get('ext'))
1486             if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(descfn)):
1487                 self.to_screen('[info] Video description is already present')
1488             elif info_dict.get('description') is None:
1489                 self.report_warning('There\'s no description to write.')
1490             else:
1491                 try:
1492                     self.to_screen('[info] Writing video description to: ' + descfn)
1493                     with io.open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile:
1494                         descfile.write(info_dict['description'])
1495                 except (OSError, IOError):
1496                     self.report_error('Cannot write description file ' + descfn)
1497                     return
1498
1499         if self.params.get('writeannotations', False):
1500             annofn = replace_extension(filename, 'annotations.xml', info_dict.get('ext'))
1501             if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(annofn)):
1502                 self.to_screen('[info] Video annotations are already present')
1503             else:
1504                 try:
1505                     self.to_screen('[info] Writing video annotations to: ' + annofn)
1506                     with io.open(encodeFilename(annofn), 'w', encoding='utf-8') as annofile:
1507                         annofile.write(info_dict['annotations'])
1508                 except (KeyError, TypeError):
1509                     self.report_warning('There are no annotations to write.')
1510                 except (OSError, IOError):
1511                     self.report_error('Cannot write annotations file: ' + annofn)
1512                     return
1513
1514         subtitles_are_requested = any([self.params.get('writesubtitles', False),
1515                                        self.params.get('writeautomaticsub')])
1516
1517         if subtitles_are_requested and info_dict.get('requested_subtitles'):
1518             # subtitles download errors are already managed as troubles in relevant IE
1519             # that way it will silently go on when used with unsupporting IE
1520             subtitles = info_dict['requested_subtitles']
1521             ie = self.get_info_extractor(info_dict['extractor_key'])
1522             for sub_lang, sub_info in subtitles.items():
1523                 sub_format = sub_info['ext']
1524                 if sub_info.get('data') is not None:
1525                     sub_data = sub_info['data']
1526                 else:
1527                     try:
1528                         sub_data = ie._download_webpage(
1529                             sub_info['url'], info_dict['id'], note=False)
1530                     except ExtractorError as err:
1531                         self.report_warning('Unable to download subtitle for "%s": %s' %
1532                                             (sub_lang, error_to_compat_str(err.cause)))
1533                         continue
1534                 try:
1535                     sub_filename = subtitles_filename(filename, sub_lang, sub_format)
1536                     if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(sub_filename)):
1537                         self.to_screen('[info] Video subtitle %s.%s is already_present' % (sub_lang, sub_format))
1538                     else:
1539                         self.to_screen('[info] Writing video subtitles to: ' + sub_filename)
1540                         with io.open(encodeFilename(sub_filename), 'w', encoding='utf-8') as subfile:
1541                             subfile.write(sub_data)
1542                 except (OSError, IOError):
1543                     self.report_error('Cannot write subtitles file ' + sub_filename)
1544                     return
1545
1546         if self.params.get('writeinfojson', False):
1547             infofn = replace_extension(filename, 'info.json', info_dict.get('ext'))
1548             if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(infofn)):
1549                 self.to_screen('[info] Video description metadata is already present')
1550             else:
1551                 self.to_screen('[info] Writing video description metadata as JSON to: ' + infofn)
1552                 try:
1553                     write_json_file(self.filter_requested_info(info_dict), infofn)
1554                 except (OSError, IOError):
1555                     self.report_error('Cannot write metadata to JSON file ' + infofn)
1556                     return
1557
1558         self._write_thumbnails(info_dict, filename)
1559
1560         if not self.params.get('skip_download', False):
1561             try:
1562                 def dl(name, info):
1563                     fd = get_suitable_downloader(info, self.params)(self, self.params)
1564                     for ph in self._progress_hooks:
1565                         fd.add_progress_hook(ph)
1566                     if self.params.get('verbose'):
1567                         self.to_stdout('[debug] Invoking downloader on %r' % info.get('url'))
1568                     return fd.download(name, info)
1569
1570                 if info_dict.get('requested_formats') is not None:
1571                     downloaded = []
1572                     success = True
1573                     merger = FFmpegMergerPP(self)
1574                     if not merger.available:
1575                         postprocessors = []
1576                         self.report_warning('You have requested multiple '
1577                                             'formats but ffmpeg or avconv are not installed.'
1578                                             ' The formats won\'t be merged.')
1579                     else:
1580                         postprocessors = [merger]
1581
1582                     def compatible_formats(formats):
1583                         video, audio = formats
1584                         # Check extension
1585                         video_ext, audio_ext = audio.get('ext'), video.get('ext')
1586                         if video_ext and audio_ext:
1587                             COMPATIBLE_EXTS = (
1588                                 ('mp3', 'mp4', 'm4a', 'm4p', 'm4b', 'm4r', 'm4v'),
1589                                 ('webm')
1590                             )
1591                             for exts in COMPATIBLE_EXTS:
1592                                 if video_ext in exts and audio_ext in exts:
1593                                     return True
1594                         # TODO: Check acodec/vcodec
1595                         return False
1596
1597                     filename_real_ext = os.path.splitext(filename)[1][1:]
1598                     filename_wo_ext = (
1599                         os.path.splitext(filename)[0]
1600                         if filename_real_ext == info_dict['ext']
1601                         else filename)
1602                     requested_formats = info_dict['requested_formats']
1603                     if self.params.get('merge_output_format') is None and not compatible_formats(requested_formats):
1604                         info_dict['ext'] = 'mkv'
1605                         self.report_warning(
1606                             'Requested formats are incompatible for merge and will be merged into mkv.')
1607                     # Ensure filename always has a correct extension for successful merge
1608                     filename = '%s.%s' % (filename_wo_ext, info_dict['ext'])
1609                     if os.path.exists(encodeFilename(filename)):
1610                         self.to_screen(
1611                             '[download] %s has already been downloaded and '
1612                             'merged' % filename)
1613                     else:
1614                         for f in requested_formats:
1615                             new_info = dict(info_dict)
1616                             new_info.update(f)
1617                             fname = self.prepare_filename(new_info)
1618                             fname = prepend_extension(fname, 'f%s' % f['format_id'], new_info['ext'])
1619                             downloaded.append(fname)
1620                             partial_success = dl(fname, new_info)
1621                             success = success and partial_success
1622                         info_dict['__postprocessors'] = postprocessors
1623                         info_dict['__files_to_merge'] = downloaded
1624                 else:
1625                     # Just a single file
1626                     success = dl(filename, info_dict)
1627             except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
1628                 self.report_error('unable to download video data: %s' % str(err))
1629                 return
1630             except (OSError, IOError) as err:
1631                 raise UnavailableVideoError(err)
1632             except (ContentTooShortError, ) as err:
1633                 self.report_error('content too short (expected %s bytes and served %s)' % (err.expected, err.downloaded))
1634                 return
1635
1636             if success and filename != '-':
1637                 # Fixup content
1638                 fixup_policy = self.params.get('fixup')
1639                 if fixup_policy is None:
1640                     fixup_policy = 'detect_or_warn'
1641
1642                 INSTALL_FFMPEG_MESSAGE = 'Install ffmpeg or avconv to fix this automatically.'
1643
1644                 stretched_ratio = info_dict.get('stretched_ratio')
1645                 if stretched_ratio is not None and stretched_ratio != 1:
1646                     if fixup_policy == 'warn':
1647                         self.report_warning('%s: Non-uniform pixel ratio (%s)' % (
1648                             info_dict['id'], stretched_ratio))
1649                     elif fixup_policy == 'detect_or_warn':
1650                         stretched_pp = FFmpegFixupStretchedPP(self)
1651                         if stretched_pp.available:
1652                             info_dict.setdefault('__postprocessors', [])
1653                             info_dict['__postprocessors'].append(stretched_pp)
1654                         else:
1655                             self.report_warning(
1656                                 '%s: Non-uniform pixel ratio (%s). %s'
1657                                 % (info_dict['id'], stretched_ratio, INSTALL_FFMPEG_MESSAGE))
1658                     else:
1659                         assert fixup_policy in ('ignore', 'never')
1660
1661                 if (info_dict.get('requested_formats') is None and
1662                         info_dict.get('container') == 'm4a_dash'):
1663                     if fixup_policy == 'warn':
1664                         self.report_warning(
1665                             '%s: writing DASH m4a. '
1666                             'Only some players support this container.'
1667                             % info_dict['id'])
1668                     elif fixup_policy == 'detect_or_warn':
1669                         fixup_pp = FFmpegFixupM4aPP(self)
1670                         if fixup_pp.available:
1671                             info_dict.setdefault('__postprocessors', [])
1672                             info_dict['__postprocessors'].append(fixup_pp)
1673                         else:
1674                             self.report_warning(
1675                                 '%s: writing DASH m4a. '
1676                                 'Only some players support this container. %s'
1677                                 % (info_dict['id'], INSTALL_FFMPEG_MESSAGE))
1678                     else:
1679                         assert fixup_policy in ('ignore', 'never')
1680
1681                 if (info_dict.get('protocol') == 'm3u8_native' or
1682                         info_dict.get('protocol') == 'm3u8' and
1683                         self.params.get('hls_prefer_native')):
1684                     if fixup_policy == 'warn':
1685                         self.report_warning('%s: malformated aac bitstream.' % (
1686                             info_dict['id']))
1687                     elif fixup_policy == 'detect_or_warn':
1688                         fixup_pp = FFmpegFixupM3u8PP(self)
1689                         if fixup_pp.available:
1690                             info_dict.setdefault('__postprocessors', [])
1691                             info_dict['__postprocessors'].append(fixup_pp)
1692                         else:
1693                             self.report_warning(
1694                                 '%s: malformated aac bitstream. %s'
1695                                 % (info_dict['id'], INSTALL_FFMPEG_MESSAGE))
1696                     else:
1697                         assert fixup_policy in ('ignore', 'never')
1698
1699                 try:
1700                     self.post_process(filename, info_dict)
1701                 except (PostProcessingError) as err:
1702                     self.report_error('postprocessing: %s' % str(err))
1703                     return
1704                 self.record_download_archive(info_dict)
1705
1706     def download(self, url_list):
1707         """Download a given list of URLs."""
1708         outtmpl = self.params.get('outtmpl', DEFAULT_OUTTMPL)
1709         if (len(url_list) > 1 and
1710                 '%' not in outtmpl and
1711                 self.params.get('max_downloads') != 1):
1712             raise SameFileError(outtmpl)
1713
1714         for url in url_list:
1715             try:
1716                 # It also downloads the videos
1717                 res = self.extract_info(
1718                     url, force_generic_extractor=self.params.get('force_generic_extractor', False))
1719             except UnavailableVideoError:
1720                 self.report_error('unable to download video')
1721             except MaxDownloadsReached:
1722                 self.to_screen('[info] Maximum number of downloaded files reached.')
1723                 raise
1724             else:
1725                 if self.params.get('dump_single_json', False):
1726                     self.to_stdout(json.dumps(res))
1727
1728         return self._download_retcode
1729
1730     def download_with_info_file(self, info_filename):
1731         with contextlib.closing(fileinput.FileInput(
1732                 [info_filename], mode='r',
1733                 openhook=fileinput.hook_encoded('utf-8'))) as f:
1734             # FileInput doesn't have a read method, we can't call json.load
1735             info = self.filter_requested_info(json.loads('\n'.join(f)))
1736         try:
1737             self.process_ie_result(info, download=True)
1738         except DownloadError:
1739             webpage_url = info.get('webpage_url')
1740             if webpage_url is not None:
1741                 self.report_warning('The info failed to download, trying with "%s"' % webpage_url)
1742                 return self.download([webpage_url])
1743             else:
1744                 raise
1745         return self._download_retcode
1746
1747     @staticmethod
1748     def filter_requested_info(info_dict):
1749         return dict(
1750             (k, v) for k, v in info_dict.items()
1751             if k not in ['requested_formats', 'requested_subtitles'])
1752
1753     def post_process(self, filename, ie_info):
1754         """Run all the postprocessors on the given file."""
1755         info = dict(ie_info)
1756         info['filepath'] = filename
1757         pps_chain = []
1758         if ie_info.get('__postprocessors') is not None:
1759             pps_chain.extend(ie_info['__postprocessors'])
1760         pps_chain.extend(self._pps)
1761         for pp in pps_chain:
1762             files_to_delete = []
1763             try:
1764                 files_to_delete, info = pp.run(info)
1765             except PostProcessingError as e:
1766                 self.report_error(e.msg)
1767             if files_to_delete and not self.params.get('keepvideo', False):
1768                 for old_filename in files_to_delete:
1769                     self.to_screen('Deleting original file %s (pass -k to keep)' % old_filename)
1770                     try:
1771                         os.remove(encodeFilename(old_filename))
1772                     except (IOError, OSError):
1773                         self.report_warning('Unable to remove downloaded original file')
1774
1775     def _make_archive_id(self, info_dict):
1776         # Future-proof against any change in case
1777         # and backwards compatibility with prior versions
1778         extractor = info_dict.get('extractor_key')
1779         if extractor is None:
1780             if 'id' in info_dict:
1781                 extractor = info_dict.get('ie_key')  # key in a playlist
1782         if extractor is None:
1783             return None  # Incomplete video information
1784         return extractor.lower() + ' ' + info_dict['id']
1785
1786     def in_download_archive(self, info_dict):
1787         fn = self.params.get('download_archive')
1788         if fn is None:
1789             return False
1790
1791         vid_id = self._make_archive_id(info_dict)
1792         if vid_id is None:
1793             return False  # Incomplete video information
1794
1795         try:
1796             with locked_file(fn, 'r', encoding='utf-8') as archive_file:
1797                 for line in archive_file:
1798                     if line.strip() == vid_id:
1799                         return True
1800         except IOError as ioe:
1801             if ioe.errno != errno.ENOENT:
1802                 raise
1803         return False
1804
1805     def record_download_archive(self, info_dict):
1806         fn = self.params.get('download_archive')
1807         if fn is None:
1808             return
1809         vid_id = self._make_archive_id(info_dict)
1810         assert vid_id
1811         with locked_file(fn, 'a', encoding='utf-8') as archive_file:
1812             archive_file.write(vid_id + '\n')
1813
1814     @staticmethod
1815     def format_resolution(format, default='unknown'):
1816         if format.get('vcodec') == 'none':
1817             return 'audio only'
1818         if format.get('resolution') is not None:
1819             return format['resolution']
1820         if format.get('height') is not None:
1821             if format.get('width') is not None:
1822                 res = '%sx%s' % (format['width'], format['height'])
1823             else:
1824                 res = '%sp' % format['height']
1825         elif format.get('width') is not None:
1826             res = '%dx?' % format['width']
1827         else:
1828             res = default
1829         return res
1830
1831     def _format_note(self, fdict):
1832         res = ''
1833         if fdict.get('ext') in ['f4f', 'f4m']:
1834             res += '(unsupported) '
1835         if fdict.get('language'):
1836             if res:
1837                 res += ' '
1838             res += '[%s]' % fdict['language']
1839         if fdict.get('format_note') is not None:
1840             res += fdict['format_note'] + ' '
1841         if fdict.get('tbr') is not None:
1842             res += '%4dk ' % fdict['tbr']
1843         if fdict.get('container') is not None:
1844             if res:
1845                 res += ', '
1846             res += '%s container' % fdict['container']
1847         if (fdict.get('vcodec') is not None and
1848                 fdict.get('vcodec') != 'none'):
1849             if res:
1850                 res += ', '
1851             res += fdict['vcodec']
1852             if fdict.get('vbr') is not None:
1853                 res += '@'
1854         elif fdict.get('vbr') is not None and fdict.get('abr') is not None:
1855             res += 'video@'
1856         if fdict.get('vbr') is not None:
1857             res += '%4dk' % fdict['vbr']
1858         if fdict.get('fps') is not None:
1859             if res:
1860                 res += ', '
1861             res += '%sfps' % fdict['fps']
1862         if fdict.get('acodec') is not None:
1863             if res:
1864                 res += ', '
1865             if fdict['acodec'] == 'none':
1866                 res += 'video only'
1867             else:
1868                 res += '%-5s' % fdict['acodec']
1869         elif fdict.get('abr') is not None:
1870             if res:
1871                 res += ', '
1872             res += 'audio'
1873         if fdict.get('abr') is not None:
1874             res += '@%3dk' % fdict['abr']
1875         if fdict.get('asr') is not None:
1876             res += ' (%5dHz)' % fdict['asr']
1877         if fdict.get('filesize') is not None:
1878             if res:
1879                 res += ', '
1880             res += format_bytes(fdict['filesize'])
1881         elif fdict.get('filesize_approx') is not None:
1882             if res:
1883                 res += ', '
1884             res += '~' + format_bytes(fdict['filesize_approx'])
1885         return res
1886
1887     def list_formats(self, info_dict):
1888         formats = info_dict.get('formats', [info_dict])
1889         table = [
1890             [f['format_id'], f['ext'], self.format_resolution(f), self._format_note(f)]
1891             for f in formats
1892             if f.get('preference') is None or f['preference'] >= -1000]
1893         if len(formats) > 1:
1894             table[-1][-1] += (' ' if table[-1][-1] else '') + '(best)'
1895
1896         header_line = ['format code', 'extension', 'resolution', 'note']
1897         self.to_screen(
1898             '[info] Available formats for %s:\n%s' %
1899             (info_dict['id'], render_table(header_line, table)))
1900
1901     def list_thumbnails(self, info_dict):
1902         thumbnails = info_dict.get('thumbnails')
1903         if not thumbnails:
1904             tn_url = info_dict.get('thumbnail')
1905             if tn_url:
1906                 thumbnails = [{'id': '0', 'url': tn_url}]
1907             else:
1908                 self.to_screen(
1909                     '[info] No thumbnails present for %s' % info_dict['id'])
1910                 return
1911
1912         self.to_screen(
1913             '[info] Thumbnails for %s:' % info_dict['id'])
1914         self.to_screen(render_table(
1915             ['ID', 'width', 'height', 'URL'],
1916             [[t['id'], t.get('width', 'unknown'), t.get('height', 'unknown'), t['url']] for t in thumbnails]))
1917
1918     def list_subtitles(self, video_id, subtitles, name='subtitles'):
1919         if not subtitles:
1920             self.to_screen('%s has no %s' % (video_id, name))
1921             return
1922         self.to_screen(
1923             'Available %s for %s:' % (name, video_id))
1924         self.to_screen(render_table(
1925             ['Language', 'formats'],
1926             [[lang, ', '.join(f['ext'] for f in reversed(formats))]
1927                 for lang, formats in subtitles.items()]))
1928
1929     def urlopen(self, req):
1930         """ Start an HTTP download """
1931         if isinstance(req, compat_basestring):
1932             req = sanitized_Request(req)
1933         return self._opener.open(req, timeout=self._socket_timeout)
1934
1935     def print_debug_header(self):
1936         if not self.params.get('verbose'):
1937             return
1938
1939         if type('') is not compat_str:
1940             # Python 2.6 on SLES11 SP1 (https://github.com/rg3/youtube-dl/issues/3326)
1941             self.report_warning(
1942                 'Your Python is broken! Update to a newer and supported version')
1943
1944         stdout_encoding = getattr(
1945             sys.stdout, 'encoding', 'missing (%s)' % type(sys.stdout).__name__)
1946         encoding_str = (
1947             '[debug] Encodings: locale %s, fs %s, out %s, pref %s\n' % (
1948                 locale.getpreferredencoding(),
1949                 sys.getfilesystemencoding(),
1950                 stdout_encoding,
1951                 self.get_encoding()))
1952         write_string(encoding_str, encoding=None)
1953
1954         self._write_string('[debug] youtube-dl version ' + __version__ + '\n')
1955         try:
1956             sp = subprocess.Popen(
1957                 ['git', 'rev-parse', '--short', 'HEAD'],
1958                 stdout=subprocess.PIPE, stderr=subprocess.PIPE,
1959                 cwd=os.path.dirname(os.path.abspath(__file__)))
1960             out, err = sp.communicate()
1961             out = out.decode().strip()
1962             if re.match('[0-9a-f]+', out):
1963                 self._write_string('[debug] Git HEAD: ' + out + '\n')
1964         except Exception:
1965             try:
1966                 sys.exc_clear()
1967             except Exception:
1968                 pass
1969         self._write_string('[debug] Python version %s - %s\n' % (
1970             platform.python_version(), platform_name()))
1971
1972         exe_versions = FFmpegPostProcessor.get_versions(self)
1973         exe_versions['rtmpdump'] = rtmpdump_version()
1974         exe_str = ', '.join(
1975             '%s %s' % (exe, v)
1976             for exe, v in sorted(exe_versions.items())
1977             if v
1978         )
1979         if not exe_str:
1980             exe_str = 'none'
1981         self._write_string('[debug] exe versions: %s\n' % exe_str)
1982
1983         proxy_map = {}
1984         for handler in self._opener.handlers:
1985             if hasattr(handler, 'proxies'):
1986                 proxy_map.update(handler.proxies)
1987         self._write_string('[debug] Proxy map: ' + compat_str(proxy_map) + '\n')
1988
1989         if self.params.get('call_home', False):
1990             ipaddr = self.urlopen('https://yt-dl.org/ip').read().decode('utf-8')
1991             self._write_string('[debug] Public IP address: %s\n' % ipaddr)
1992             latest_version = self.urlopen(
1993                 'https://yt-dl.org/latest/version').read().decode('utf-8')
1994             if version_tuple(latest_version) > version_tuple(__version__):
1995                 self.report_warning(
1996                     'You are using an outdated version (newest version: %s)! '
1997                     'See https://yt-dl.org/update if you need help updating.' %
1998                     latest_version)
1999
2000     def _setup_opener(self):
2001         timeout_val = self.params.get('socket_timeout')
2002         self._socket_timeout = 600 if timeout_val is None else float(timeout_val)
2003
2004         opts_cookiefile = self.params.get('cookiefile')
2005         opts_proxy = self.params.get('proxy')
2006
2007         if opts_cookiefile is None:
2008             self.cookiejar = compat_cookiejar.CookieJar()
2009         else:
2010             self.cookiejar = compat_cookiejar.MozillaCookieJar(
2011                 opts_cookiefile)
2012             if os.access(opts_cookiefile, os.R_OK):
2013                 self.cookiejar.load()
2014
2015         cookie_processor = YoutubeDLCookieProcessor(self.cookiejar)
2016         if opts_proxy is not None:
2017             if opts_proxy == '':
2018                 proxies = {}
2019             else:
2020                 proxies = {'http': opts_proxy, 'https': opts_proxy}
2021         else:
2022             proxies = compat_urllib_request.getproxies()
2023             # Set HTTPS proxy to HTTP one if given (https://github.com/rg3/youtube-dl/issues/805)
2024             if 'http' in proxies and 'https' not in proxies:
2025                 proxies['https'] = proxies['http']
2026         proxy_handler = PerRequestProxyHandler(proxies)
2027
2028         debuglevel = 1 if self.params.get('debug_printtraffic') else 0
2029         https_handler = make_HTTPS_handler(self.params, debuglevel=debuglevel)
2030         ydlh = YoutubeDLHandler(self.params, debuglevel=debuglevel)
2031         data_handler = compat_urllib_request_DataHandler()
2032
2033         # When passing our own FileHandler instance, build_opener won't add the
2034         # default FileHandler and allows us to disable the file protocol, which
2035         # can be used for malicious purposes (see
2036         # https://github.com/rg3/youtube-dl/issues/8227)
2037         file_handler = compat_urllib_request.FileHandler()
2038
2039         def file_open(*args, **kwargs):
2040             raise compat_urllib_error.URLError('file:// scheme is explicitly disabled in youtube-dl for security reasons')
2041         file_handler.file_open = file_open
2042
2043         opener = compat_urllib_request.build_opener(
2044             proxy_handler, https_handler, cookie_processor, ydlh, data_handler, file_handler)
2045
2046         # Delete the default user-agent header, which would otherwise apply in
2047         # cases where our custom HTTP handler doesn't come into play
2048         # (See https://github.com/rg3/youtube-dl/issues/1309 for details)
2049         opener.addheaders = []
2050         self._opener = opener
2051
2052     def encode(self, s):
2053         if isinstance(s, bytes):
2054             return s  # Already encoded
2055
2056         try:
2057             return s.encode(self.get_encoding())
2058         except UnicodeEncodeError as err:
2059             err.reason = err.reason + '. Check your system encoding configuration or use the --encoding option.'
2060             raise
2061
2062     def get_encoding(self):
2063         encoding = self.params.get('encoding')
2064         if encoding is None:
2065             encoding = preferredencoding()
2066         return encoding
2067
2068     def _write_thumbnails(self, info_dict, filename):
2069         if self.params.get('writethumbnail', False):
2070             thumbnails = info_dict.get('thumbnails')
2071             if thumbnails:
2072                 thumbnails = [thumbnails[-1]]
2073         elif self.params.get('write_all_thumbnails', False):
2074             thumbnails = info_dict.get('thumbnails')
2075         else:
2076             return
2077
2078         if not thumbnails:
2079             # No thumbnails present, so return immediately
2080             return
2081
2082         for t in thumbnails:
2083             thumb_ext = determine_ext(t['url'], 'jpg')
2084             suffix = '_%s' % t['id'] if len(thumbnails) > 1 else ''
2085             thumb_display_id = '%s ' % t['id'] if len(thumbnails) > 1 else ''
2086             t['filename'] = thumb_filename = os.path.splitext(filename)[0] + suffix + '.' + thumb_ext
2087
2088             if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(thumb_filename)):
2089                 self.to_screen('[%s] %s: Thumbnail %sis already present' %
2090                                (info_dict['extractor'], info_dict['id'], thumb_display_id))
2091             else:
2092                 self.to_screen('[%s] %s: Downloading thumbnail %s...' %
2093                                (info_dict['extractor'], info_dict['id'], thumb_display_id))
2094                 try:
2095                     uf = self.urlopen(t['url'])
2096                     with open(encodeFilename(thumb_filename), 'wb') as thumbf:
2097                         shutil.copyfileobj(uf, thumbf)
2098                     self.to_screen('[%s] %s: Writing thumbnail %sto: %s' %
2099                                    (info_dict['extractor'], info_dict['id'], thumb_display_id, thumb_filename))
2100                 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
2101                     self.report_warning('Unable to download thumbnail "%s": %s' %
2102                                         (t['url'], error_to_compat_str(err)))