_ Git - youtube-dl/blob - youtube_dl/YoutubeDL.py

   1 #!/usr/bin/env python
   2 # -*- coding: utf-8 -*-
   3
   4 from __future__ import absolute_import, unicode_literals
   5
   6 import collections
   7 import contextlib
   8 import datetime
   9 import errno
  10 import fileinput
  11 import io
  12 import itertools
  13 import json
  14 import locale
  15 import operator
  16 import os
  17 import platform
  18 import re
  19 import shutil
  20 import subprocess
  21 import socket
  22 import sys
  23 import time
  24 import tokenize
  25 import traceback
  26
  27 from .compat import (
  28     compat_basestring,
  29     compat_cookiejar,
  30     compat_expanduser,
  31     compat_get_terminal_size,
  32     compat_http_client,
  33     compat_kwargs,
  34     compat_os_name,
  35     compat_str,
  36     compat_tokenize_tokenize,
  37     compat_urllib_error,
  38     compat_urllib_request,
  39     compat_urllib_request_DataHandler,
  40 )
  41 from .utils import (
  42     age_restricted,
  43     args_to_str,
  44     ContentTooShortError,
  45     date_from_str,
  46     DateRange,
  47     DEFAULT_OUTTMPL,
  48     determine_ext,
  49     determine_protocol,
  50     DownloadError,
  51     encode_compat_str,
  52     encodeFilename,
  53     error_to_compat_str,
  54     ExtractorError,
  55     format_bytes,
  56     formatSeconds,
  57     locked_file,
  58     make_HTTPS_handler,
  59     MaxDownloadsReached,
  60     PagedList,
  61     parse_filesize,
  62     PerRequestProxyHandler,
  63     platform_name,
  64     PostProcessingError,
  65     preferredencoding,
  66     prepend_extension,
  67     render_table,
  68     replace_extension,
  69     SameFileError,
  70     sanitize_filename,
  71     sanitize_path,
  72     sanitize_url,
  73     sanitized_Request,
  74     std_headers,
  75     subtitles_filename,
  76     UnavailableVideoError,
  77     url_basename,
  78     version_tuple,
  79     write_json_file,
  80     write_string,
  81     YoutubeDLCookieProcessor,
  82     YoutubeDLHandler,
  83 )
  84 from .cache import Cache
  85 from .extractor import get_info_extractor, gen_extractors
  86 from .downloader import get_suitable_downloader
  87 from .downloader.rtmp import rtmpdump_version
  88 from .postprocessor import (
  89     FFmpegFixupM3u8PP,
  90     FFmpegFixupM4aPP,
  91     FFmpegFixupStretchedPP,
  92     FFmpegMergerPP,
  93     FFmpegPostProcessor,
  94     get_postprocessor,
  95 )
  96 from .version import __version__
  97
  98 if compat_os_name == 'nt':
  99     import ctypes
 100
 101
 102 class YoutubeDL(object):
 103     """YoutubeDL class.
 104
 105     YoutubeDL objects are the ones responsible of downloading the
 106     actual video file and writing it to disk if the user has requested
 107     it, among some other tasks. In most cases there should be one per
 108     program. As, given a video URL, the downloader doesn't know how to
 109     extract all the needed information, task that InfoExtractors do, it
 110     has to pass the URL to one of them.
 111
 112     For this, YoutubeDL objects have a method that allows
 113     InfoExtractors to be registered in a given order. When it is passed
 114     a URL, the YoutubeDL object handles it to the first InfoExtractor it
 115     finds that reports being able to handle it. The InfoExtractor extracts
 116     all the information about the video or videos the URL refers to, and
 117     YoutubeDL process the extracted information, possibly using a File
 118     Downloader to download the video.
 119
 120     YoutubeDL objects accept a lot of parameters. In order not to saturate
 121     the object constructor with arguments, it receives a dictionary of
 122     options instead. These options are available through the params
 123     attribute for the InfoExtractors to use. The YoutubeDL also
 124     registers itself as the downloader in charge for the InfoExtractors
 125     that are added to it, so this is a "mutual registration".
 126
 127     Available options:
 128
 129     username:          Username for authentication purposes.
 130     password:          Password for authentication purposes.
 131     videopassword:     Password for accessing a video.
 132     usenetrc:          Use netrc for authentication instead.
 133     verbose:           Print additional info to stdout.
 134     quiet:             Do not print messages to stdout.
 135     no_warnings:       Do not print out anything for warnings.
 136     forceurl:          Force printing final URL.
 137     forcetitle:        Force printing title.
 138     forceid:           Force printing ID.
 139     forcethumbnail:    Force printing thumbnail URL.
 140     forcedescription:  Force printing description.
 141     forcefilename:     Force printing final filename.
 142     forceduration:     Force printing duration.
 143     forcejson:         Force printing info_dict as JSON.
 144     dump_single_json:  Force printing the info_dict of the whole playlist
 145                        (or video) as a single JSON line.
 146     simulate:          Do not download the video files.
 147     format:            Video format code. See options.py for more information.
 148     outtmpl:           Template for output names.
 149     restrictfilenames: Do not allow "&" and spaces in file names
 150     ignoreerrors:      Do not stop on download errors.
 151     force_generic_extractor: Force downloader to use the generic extractor
 152     nooverwrites:      Prevent overwriting files.
 153     playliststart:     Playlist item to start at.
 154     playlistend:       Playlist item to end at.
 155     playlist_items:    Specific indices of playlist to download.
 156     playlistreverse:   Download playlist items in reverse order.
 157     matchtitle:        Download only matching titles.
 158     rejecttitle:       Reject downloads for matching titles.
 159     logger:            Log messages to a logging.Logger instance.
 160     logtostderr:       Log messages to stderr instead of stdout.
 161     writedescription:  Write the video description to a .description file
 162     writeinfojson:     Write the video description to a .info.json file
 163     writeannotations:  Write the video annotations to a .annotations.xml file
 164     writethumbnail:    Write the thumbnail image to a file
 165     write_all_thumbnails:  Write all thumbnail formats to files
 166     writesubtitles:    Write the video subtitles to a file
 167     writeautomaticsub: Write the automatically generated subtitles to a file
 168     allsubtitles:      Downloads all the subtitles of the video
 169                        (requires writesubtitles or writeautomaticsub)
 170     listsubtitles:     Lists all available subtitles for the video
 171     subtitlesformat:   The format code for subtitles
 172     subtitleslangs:    List of languages of the subtitles to download
 173     keepvideo:         Keep the video file after post-processing
 174     daterange:         A DateRange object, download only if the upload_date is in the range.
 175     skip_download:     Skip the actual download of the video file
 176     cachedir:          Location of the cache files in the filesystem.
 177                        False to disable filesystem cache.
 178     noplaylist:        Download single video instead of a playlist if in doubt.
 179     age_limit:         An integer representing the user's age in years.
 180                        Unsuitable videos for the given age are skipped.
 181     min_views:         An integer representing the minimum view count the video
 182                        must have in order to not be skipped.
 183                        Videos without view count information are always
 184                        downloaded. None for no limit.
 185     max_views:         An integer representing the maximum view count.
 186                        Videos that are more popular than that are not
 187                        downloaded.
 188                        Videos without view count information are always
 189                        downloaded. None for no limit.
 190     download_archive:  File name of a file where all downloads are recorded.
 191                        Videos already present in the file are not downloaded
 192                        again.
 193     cookiefile:        File name where cookies should be read from and dumped to.
 194     nocheckcertificate:Do not verify SSL certificates
 195     prefer_insecure:   Use HTTP instead of HTTPS to retrieve information.
 196                        At the moment, this is only supported by YouTube.
 197     proxy:             URL of the proxy server to use
 198     cn_verification_proxy:  URL of the proxy to use for IP address verification
 199                        on Chinese sites. (Experimental)
 200     socket_timeout:    Time to wait for unresponsive hosts, in seconds
 201     bidi_workaround:   Work around buggy terminals without bidirectional text
 202                        support, using fridibi
 203     debug_printtraffic:Print out sent and received HTTP traffic
 204     include_ads:       Download ads as well
 205     default_search:    Prepend this string if an input url is not valid.
 206                        'auto' for elaborate guessing
 207     encoding:          Use this encoding instead of the system-specified.
 208     extract_flat:      Do not resolve URLs, return the immediate result.
 209                        Pass in 'in_playlist' to only show this behavior for
 210                        playlist items.
 211     postprocessors:    A list of dictionaries, each with an entry
 212                        * key:  The name of the postprocessor. See
 213                                youtube_dl/postprocessor/__init__.py for a list.
 214                        as well as any further keyword arguments for the
 215                        postprocessor.
 216     progress_hooks:    A list of functions that get called on download
 217                        progress, with a dictionary with the entries
 218                        * status: One of "downloading", "error", or "finished".
 219                                  Check this first and ignore unknown values.
 220
 221                        If status is one of "downloading", or "finished", the
 222                        following properties may also be present:
 223                        * filename: The final filename (always present)
 224                        * tmpfilename: The filename we're currently writing to
 225                        * downloaded_bytes: Bytes on disk
 226                        * total_bytes: Size of the whole file, None if unknown
 227                        * total_bytes_estimate: Guess of the eventual file size,
 228                                                None if unavailable.
 229                        * elapsed: The number of seconds since download started.
 230                        * eta: The estimated time in seconds, None if unknown
 231                        * speed: The download speed in bytes/second, None if
 232                                 unknown
 233                        * fragment_index: The counter of the currently
 234                                          downloaded video fragment.
 235                        * fragment_count: The number of fragments (= individual
 236                                          files that will be merged)
 237
 238                        Progress hooks are guaranteed to be called at least once
 239                        (with status "finished") if the download is successful.
 240     merge_output_format: Extension to use when merging formats.
 241     fixup:             Automatically correct known faults of the file.
 242                        One of:
 243                        - "never": do nothing
 244                        - "warn": only emit a warning
 245                        - "detect_or_warn": check whether we can do anything
 246                                            about it, warn otherwise (default)
 247     source_address:    (Experimental) Client-side IP address to bind to.
 248     call_home:         Boolean, true iff we are allowed to contact the
 249                        youtube-dl servers for debugging.
 250     sleep_interval:    Number of seconds to sleep before each download.
 251     listformats:       Print an overview of available video formats and exit.
 252     list_thumbnails:   Print a table of all thumbnails and exit.
 253     match_filter:      A function that gets called with the info_dict of
 254                        every video.
 255                        If it returns a message, the video is ignored.
 256                        If it returns None, the video is downloaded.
 257                        match_filter_func in utils.py is one example for this.
 258     no_color:          Do not emit color codes in output.
 259
 260     The following options determine which downloader is picked:
 261     external_downloader: Executable of the external downloader to call.
 262                        None or unset for standard (built-in) downloader.
 263     hls_prefer_native: Use the native HLS downloader instead of ffmpeg/avconv.
 264
 265     The following parameters are not used by YoutubeDL itself, they are used by
 266     the downloader (see youtube_dl/downloader/common.py):
 267     nopart, updatetime, buffersize, ratelimit, min_filesize, max_filesize, test,
 268     noresizebuffer, retries, continuedl, noprogress, consoletitle,
 269     xattr_set_filesize, external_downloader_args, hls_use_mpegts.
 270
 271     The following options are used by the post processors:
 272     prefer_ffmpeg:     If True, use ffmpeg instead of avconv if both are available,
 273                        otherwise prefer avconv.
 274     postprocessor_args: A list of additional command-line arguments for the
 275                         postprocessor.
 276     """
 277
 278     params = None
 279     _ies = []
 280     _pps = []
 281     _download_retcode = None
 282     _num_downloads = None
 283     _screen_file = None
 284
 285     def __init__(self, params=None, auto_init=True):
 286         """Create a FileDownloader object with the given options."""
 287         if params is None:
 288             params = {}
 289         self._ies = []
 290         self._ies_instances = {}
 291         self._pps = []
 292         self._progress_hooks = []
 293         self._download_retcode = 0
 294         self._num_downloads = 0
 295         self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)]
 296         self._err_file = sys.stderr
 297         self.params = {
 298             # Default parameters
 299             'nocheckcertificate': False,
 300         }
 301         self.params.update(params)
 302         self.cache = Cache(self)
 303
 304         if params.get('bidi_workaround', False):
 305             try:
 306                 import pty
 307                 master, slave = pty.openpty()
 308                 width = compat_get_terminal_size().columns
 309                 if width is None:
 310                     width_args = []
 311                 else:
 312                     width_args = ['-w', str(width)]
 313                 sp_kwargs = dict(
 314                     stdin=subprocess.PIPE,
 315                     stdout=slave,
 316                     stderr=self._err_file)
 317                 try:
 318                     self._output_process = subprocess.Popen(
 319                         ['bidiv'] + width_args, **sp_kwargs
 320                     )
 321                 except OSError:
 322                     self._output_process = subprocess.Popen(
 323                         ['fribidi', '-c', 'UTF-8'] + width_args, **sp_kwargs)
 324                 self._output_channel = os.fdopen(master, 'rb')
 325             except OSError as ose:
 326                 if ose.errno == 2:
 327                     self.report_warning('Could not find fribidi executable, ignoring --bidi-workaround . Make sure that  fribidi  is an executable file in one of the directories in your $PATH.')
 328                 else:
 329                     raise
 330
 331         if (sys.version_info >= (3,) and sys.platform != 'win32' and
 332                 sys.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968'] and
 333                 not params.get('restrictfilenames', False)):
 334             # On Python 3, the Unicode filesystem API will throw errors (#1474)
 335             self.report_warning(
 336                 'Assuming --restrict-filenames since file system encoding '
 337                 'cannot encode all characters. '
 338                 'Set the LC_ALL environment variable to fix this.')
 339             self.params['restrictfilenames'] = True
 340
 341         if isinstance(params.get('outtmpl'), bytes):
 342             self.report_warning(
 343                 'Parameter outtmpl is bytes, but should be a unicode string. '
 344                 'Put  from __future__ import unicode_literals  at the top of your code file or consider switching to Python 3.x.')
 345
 346         self._setup_opener()
 347
 348         if auto_init:
 349             self.print_debug_header()
 350             self.add_default_info_extractors()
 351
 352         for pp_def_raw in self.params.get('postprocessors', []):
 353             pp_class = get_postprocessor(pp_def_raw['key'])
 354             pp_def = dict(pp_def_raw)
 355             del pp_def['key']
 356             pp = pp_class(self, **compat_kwargs(pp_def))
 357             self.add_post_processor(pp)
 358
 359         for ph in self.params.get('progress_hooks', []):
 360             self.add_progress_hook(ph)
 361
 362     def warn_if_short_id(self, argv):
 363         # short YouTube ID starting with dash?
 364         idxs = [
 365             i for i, a in enumerate(argv)
 366             if re.match(r'^-[0-9A-Za-z_-]{10}$', a)]
 367         if idxs:
 368             correct_argv = (
 369                 ['youtube-dl'] +
 370                 [a for i, a in enumerate(argv) if i not in idxs] +
 371                 ['--'] + [argv[i] for i in idxs]
 372             )
 373             self.report_warning(
 374                 'Long argument string detected. '
 375                 'Use -- to separate parameters and URLs, like this:\n%s\n' %
 376                 args_to_str(correct_argv))
 377
 378     def add_info_extractor(self, ie):
 379         """Add an InfoExtractor object to the end of the list."""
 380         self._ies.append(ie)
 381         self._ies_instances[ie.ie_key()] = ie
 382         ie.set_downloader(self)
 383
 384     def get_info_extractor(self, ie_key):
 385         """
 386         Get an instance of an IE with name ie_key, it will try to get one from
 387         the _ies list, if there's no instance it will create a new one and add
 388         it to the extractor list.
 389         """
 390         ie = self._ies_instances.get(ie_key)
 391         if ie is None:
 392             ie = get_info_extractor(ie_key)()
 393             self.add_info_extractor(ie)
 394         return ie
 395
 396     def add_default_info_extractors(self):
 397         """
 398         Add the InfoExtractors returned by gen_extractors to the end of the list
 399         """
 400         for ie in gen_extractors():
 401             self.add_info_extractor(ie)
 402
 403     def add_post_processor(self, pp):
 404         """Add a PostProcessor object to the end of the chain."""
 405         self._pps.append(pp)
 406         pp.set_downloader(self)
 407
 408     def add_progress_hook(self, ph):
 409         """Add the progress hook (currently only for the file downloader)"""
 410         self._progress_hooks.append(ph)
 411
 412     def _bidi_workaround(self, message):
 413         if not hasattr(self, '_output_channel'):
 414             return message
 415
 416         assert hasattr(self, '_output_process')
 417         assert isinstance(message, compat_str)
 418         line_count = message.count('\n') + 1
 419         self._output_process.stdin.write((message + '\n').encode('utf-8'))
 420         self._output_process.stdin.flush()
 421         res = ''.join(self._output_channel.readline().decode('utf-8')
 422                       for _ in range(line_count))
 423         return res[:-len('\n')]
 424
 425     def to_screen(self, message, skip_eol=False):
 426         """Print message to stdout if not in quiet mode."""
 427         return self.to_stdout(message, skip_eol, check_quiet=True)
 428
 429     def _write_string(self, s, out=None):
 430         write_string(s, out=out, encoding=self.params.get('encoding'))
 431
 432     def to_stdout(self, message, skip_eol=False, check_quiet=False):
 433         """Print message to stdout if not in quiet mode."""
 434         if self.params.get('logger'):
 435             self.params['logger'].debug(message)
 436         elif not check_quiet or not self.params.get('quiet', False):
 437             message = self._bidi_workaround(message)
 438             terminator = ['\n', ''][skip_eol]
 439             output = message + terminator
 440
 441             self._write_string(output, self._screen_file)
 442
 443     def to_stderr(self, message):
 444         """Print message to stderr."""
 445         assert isinstance(message, compat_str)
 446         if self.params.get('logger'):
 447             self.params['logger'].error(message)
 448         else:
 449             message = self._bidi_workaround(message)
 450             output = message + '\n'
 451             self._write_string(output, self._err_file)
 452
 453     def to_console_title(self, message):
 454         if not self.params.get('consoletitle', False):
 455             return
 456         if compat_os_name == 'nt' and ctypes.windll.kernel32.GetConsoleWindow():
 457             # c_wchar_p() might not be necessary if `message` is
 458             # already of type unicode()
 459             ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message))
 460         elif 'TERM' in os.environ:
 461             self._write_string('\033]0;%s\007' % message, self._screen_file)
 462
 463     def save_console_title(self):
 464         if not self.params.get('consoletitle', False):
 465             return
 466         if 'TERM' in os.environ:
 467             # Save the title on stack
 468             self._write_string('\033[22;0t', self._screen_file)
 469
 470     def restore_console_title(self):
 471         if not self.params.get('consoletitle', False):
 472             return
 473         if 'TERM' in os.environ:
 474             # Restore the title from stack
 475             self._write_string('\033[23;0t', self._screen_file)
 476
 477     def __enter__(self):
 478         self.save_console_title()
 479         return self
 480
 481     def __exit__(self, *args):
 482         self.restore_console_title()
 483
 484         if self.params.get('cookiefile') is not None:
 485             self.cookiejar.save()
 486
 487     def trouble(self, message=None, tb=None):
 488         """Determine action to take when a download problem appears.
 489
 490         Depending on if the downloader has been configured to ignore
 491         download errors or not, this method may throw an exception or
 492         not when errors are found, after printing the message.
 493
 494         tb, if given, is additional traceback information.
 495         """
 496         if message is not None:
 497             self.to_stderr(message)
 498         if self.params.get('verbose'):
 499             if tb is None:
 500                 if sys.exc_info()[0]:  # if .trouble has been called from an except block
 501                     tb = ''
 502                     if hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
 503                         tb += ''.join(traceback.format_exception(*sys.exc_info()[1].exc_info))
 504                     tb += encode_compat_str(traceback.format_exc())
 505                 else:
 506                     tb_data = traceback.format_list(traceback.extract_stack())
 507                     tb = ''.join(tb_data)
 508             self.to_stderr(tb)
 509         if not self.params.get('ignoreerrors', False):
 510             if sys.exc_info()[0] and hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
 511                 exc_info = sys.exc_info()[1].exc_info
 512             else:
 513                 exc_info = sys.exc_info()
 514             raise DownloadError(message, exc_info)
 515         self._download_retcode = 1
 516
 517     def report_warning(self, message):
 518         '''
 519         Print the message to stderr, it will be prefixed with 'WARNING:'
 520         If stderr is a tty file the 'WARNING:' will be colored
 521         '''
 522         if self.params.get('logger') is not None:
 523             self.params['logger'].warning(message)
 524         else:
 525             if self.params.get('no_warnings'):
 526                 return
 527             if not self.params.get('no_color') and self._err_file.isatty() and compat_os_name != 'nt':
 528                 _msg_header = '\033[0;33mWARNING:\033[0m'
 529             else:
 530                 _msg_header = 'WARNING:'
 531             warning_message = '%s %s' % (_msg_header, message)
 532             self.to_stderr(warning_message)
 533
 534     def report_error(self, message, tb=None):
 535         '''
 536         Do the same as trouble, but prefixes the message with 'ERROR:', colored
 537         in red if stderr is a tty file.
 538         '''
 539         if not self.params.get('no_color') and self._err_file.isatty() and compat_os_name != 'nt':
 540             _msg_header = '\033[0;31mERROR:\033[0m'
 541         else:
 542             _msg_header = 'ERROR:'
 543         error_message = '%s %s' % (_msg_header, message)
 544         self.trouble(error_message, tb)
 545
 546     def report_file_already_downloaded(self, file_name):
 547         """Report file has already been fully downloaded."""
 548         try:
 549             self.to_screen('[download] %s has already been downloaded' % file_name)
 550         except UnicodeEncodeError:
 551             self.to_screen('[download] The file has already been downloaded')
 552
 553     def prepare_filename(self, info_dict):
 554         """Generate the output filename."""
 555         try:
 556             template_dict = dict(info_dict)
 557
 558             template_dict['epoch'] = int(time.time())
 559             autonumber_size = self.params.get('autonumber_size')
 560             if autonumber_size is None:
 561                 autonumber_size = 5
 562             autonumber_templ = '%0' + str(autonumber_size) + 'd'
 563             template_dict['autonumber'] = autonumber_templ % self._num_downloads
 564             if template_dict.get('playlist_index') is not None:
 565                 template_dict['playlist_index'] = '%0*d' % (len(str(template_dict['n_entries'])), template_dict['playlist_index'])
 566             if template_dict.get('resolution') is None:
 567                 if template_dict.get('width') and template_dict.get('height'):
 568                     template_dict['resolution'] = '%dx%d' % (template_dict['width'], template_dict['height'])
 569                 elif template_dict.get('height'):
 570                     template_dict['resolution'] = '%sp' % template_dict['height']
 571                 elif template_dict.get('width'):
 572                     template_dict['resolution'] = '%dx?' % template_dict['width']
 573
 574             sanitize = lambda k, v: sanitize_filename(
 575                 compat_str(v),
 576                 restricted=self.params.get('restrictfilenames'),
 577                 is_id=(k == 'id'))
 578             template_dict = dict((k, sanitize(k, v))
 579                                  for k, v in template_dict.items()
 580                                  if v is not None)
 581             template_dict = collections.defaultdict(lambda: 'NA', template_dict)
 582
 583             outtmpl = self.params.get('outtmpl', DEFAULT_OUTTMPL)
 584             tmpl = compat_expanduser(outtmpl)
 585             filename = tmpl % template_dict
 586             # Temporary fix for #4787
 587             # 'Treat' all problem characters by passing filename through preferredencoding
 588             # to workaround encoding issues with subprocess on python2 @ Windows
 589             if sys.version_info < (3, 0) and sys.platform == 'win32':
 590                 filename = encodeFilename(filename, True).decode(preferredencoding())
 591             return sanitize_path(filename)
 592         except ValueError as err:
 593             self.report_error('Error in output template: ' + str(err) + ' (encoding: ' + repr(preferredencoding()) + ')')
 594             return None
 595
 596     def _match_entry(self, info_dict, incomplete):
 597         """ Returns None iff the file should be downloaded """
 598
 599         video_title = info_dict.get('title', info_dict.get('id', 'video'))
 600         if 'title' in info_dict:
 601             # This can happen when we're just evaluating the playlist
 602             title = info_dict['title']
 603             matchtitle = self.params.get('matchtitle', False)
 604             if matchtitle:
 605                 if not re.search(matchtitle, title, re.IGNORECASE):
 606                     return '"' + title + '" title did not match pattern "' + matchtitle + '"'
 607             rejecttitle = self.params.get('rejecttitle', False)
 608             if rejecttitle:
 609                 if re.search(rejecttitle, title, re.IGNORECASE):
 610                     return '"' + title + '" title matched reject pattern "' + rejecttitle + '"'
 611         date = info_dict.get('upload_date')
 612         if date is not None:
 613             dateRange = self.params.get('daterange', DateRange())
 614             if date not in dateRange:
 615                 return '%s upload date is not in range %s' % (date_from_str(date).isoformat(), dateRange)
 616         view_count = info_dict.get('view_count')
 617         if view_count is not None:
 618             min_views = self.params.get('min_views')
 619             if min_views is not None and view_count < min_views:
 620                 return 'Skipping %s, because it has not reached minimum view count (%d/%d)' % (video_title, view_count, min_views)
 621             max_views = self.params.get('max_views')
 622             if max_views is not None and view_count > max_views:
 623                 return 'Skipping %s, because it has exceeded the maximum view count (%d/%d)' % (video_title, view_count, max_views)
 624         if age_restricted(info_dict.get('age_limit'), self.params.get('age_limit')):
 625             return 'Skipping "%s" because it is age restricted' % video_title
 626         if self.in_download_archive(info_dict):
 627             return '%s has already been recorded in archive' % video_title
 628
 629         if not incomplete:
 630             match_filter = self.params.get('match_filter')
 631             if match_filter is not None:
 632                 ret = match_filter(info_dict)
 633                 if ret is not None:
 634                     return ret
 635
 636         return None
 637
 638     @staticmethod
 639     def add_extra_info(info_dict, extra_info):
 640         '''Set the keys from extra_info in info dict if they are missing'''
 641         for key, value in extra_info.items():
 642             info_dict.setdefault(key, value)
 643
 644     def extract_info(self, url, download=True, ie_key=None, extra_info={},
 645                      process=True, force_generic_extractor=False):
 646         '''
 647         Returns a list with a dictionary for each video we find.
 648         If 'download', also downloads the videos.
 649         extra_info is a dict containing the extra values to add to each result
 650         '''
 651
 652         if not ie_key and force_generic_extractor:
 653             ie_key = 'Generic'
 654
 655         if ie_key:
 656             ies = [self.get_info_extractor(ie_key)]
 657         else:
 658             ies = self._ies
 659
 660         for ie in ies:
 661             if not ie.suitable(url):
 662                 continue
 663
 664             if not ie.working():
 665                 self.report_warning('The program functionality for this site has been marked as broken, '
 666                                     'and will probably not work.')
 667
 668             try:
 669                 ie_result = ie.extract(url)
 670                 if ie_result is None:  # Finished already (backwards compatibility; listformats and friends should be moved here)
 671                     break
 672                 if isinstance(ie_result, list):
 673                     # Backwards compatibility: old IE result format
 674                     ie_result = {
 675                         '_type': 'compat_list',
 676                         'entries': ie_result,
 677                     }
 678                 self.add_default_extra_info(ie_result, ie, url)
 679                 if process:
 680                     return self.process_ie_result(ie_result, download, extra_info)
 681                 else:
 682                     return ie_result
 683             except ExtractorError as e:  # An error we somewhat expected
 684                 self.report_error(compat_str(e), e.format_traceback())
 685                 break
 686             except MaxDownloadsReached:
 687                 raise
 688             except Exception as e:
 689                 if self.params.get('ignoreerrors', False):
 690                     self.report_error(error_to_compat_str(e), tb=encode_compat_str(traceback.format_exc()))
 691                     break
 692                 else:
 693                     raise
 694         else:
 695             self.report_error('no suitable InfoExtractor for URL %s' % url)
 696
 697     def add_default_extra_info(self, ie_result, ie, url):
 698         self.add_extra_info(ie_result, {
 699             'extractor': ie.IE_NAME,
 700             'webpage_url': url,
 701             'webpage_url_basename': url_basename(url),
 702             'extractor_key': ie.ie_key(),
 703         })
 704
 705     def process_ie_result(self, ie_result, download=True, extra_info={}):
 706         """
 707         Take the result of the ie(may be modified) and resolve all unresolved
 708         references (URLs, playlist items).
 709
 710         It will also download the videos if 'download'.
 711         Returns the resolved ie_result.
 712         """
 713         result_type = ie_result.get('_type', 'video')
 714
 715         if result_type in ('url', 'url_transparent'):
 716             extract_flat = self.params.get('extract_flat', False)
 717             if ((extract_flat == 'in_playlist' and 'playlist' in extra_info) or
 718                     extract_flat is True):
 719                 if self.params.get('forcejson', False):
 720                     self.to_stdout(json.dumps(ie_result))
 721                 return ie_result
 722
 723         if result_type == 'video':
 724             self.add_extra_info(ie_result, extra_info)
 725             return self.process_video_result(ie_result, download=download)
 726         elif result_type == 'url':
 727             # We have to add extra_info to the results because it may be
 728             # contained in a playlist
 729             return self.extract_info(ie_result['url'],
 730                                      download,
 731                                      ie_key=ie_result.get('ie_key'),
 732                                      extra_info=extra_info)
 733         elif result_type == 'url_transparent':
 734             # Use the information from the embedding page
 735             info = self.extract_info(
 736                 ie_result['url'], ie_key=ie_result.get('ie_key'),
 737                 extra_info=extra_info, download=False, process=False)
 738
 739             force_properties = dict(
 740                 (k, v) for k, v in ie_result.items() if v is not None)
 741             for f in ('_type', 'url', 'ie_key'):
 742                 if f in force_properties:
 743                     del force_properties[f]
 744             new_result = info.copy()
 745             new_result.update(force_properties)
 746
 747             assert new_result.get('_type') != 'url_transparent'
 748
 749             return self.process_ie_result(
 750                 new_result, download=download, extra_info=extra_info)
 751         elif result_type == 'playlist' or result_type == 'multi_video':
 752             # We process each entry in the playlist
 753             playlist = ie_result.get('title') or ie_result.get('id')
 754             self.to_screen('[download] Downloading playlist: %s' % playlist)
 755
 756             playlist_results = []
 757
 758             playliststart = self.params.get('playliststart', 1) - 1
 759             playlistend = self.params.get('playlistend')
 760             # For backwards compatibility, interpret -1 as whole list
 761             if playlistend == -1:
 762                 playlistend = None
 763
 764             playlistitems_str = self.params.get('playlist_items')
 765             playlistitems = None
 766             if playlistitems_str is not None:
 767                 def iter_playlistitems(format):
 768                     for string_segment in format.split(','):
 769                         if '-' in string_segment:
 770                             start, end = string_segment.split('-')
 771                             for item in range(int(start), int(end) + 1):
 772                                 yield int(item)
 773                         else:
 774                             yield int(string_segment)
 775                 playlistitems = iter_playlistitems(playlistitems_str)
 776
 777             ie_entries = ie_result['entries']
 778             if isinstance(ie_entries, list):
 779                 n_all_entries = len(ie_entries)
 780                 if playlistitems:
 781                     entries = [
 782                         ie_entries[i - 1] for i in playlistitems
 783                         if -n_all_entries <= i - 1 < n_all_entries]
 784                 else:
 785                     entries = ie_entries[playliststart:playlistend]
 786                 n_entries = len(entries)
 787                 self.to_screen(
 788                     '[%s] playlist %s: Collected %d video ids (downloading %d of them)' %
 789                     (ie_result['extractor'], playlist, n_all_entries, n_entries))
 790             elif isinstance(ie_entries, PagedList):
 791                 if playlistitems:
 792                     entries = []
 793                     for item in playlistitems:
 794                         entries.extend(ie_entries.getslice(
 795                             item - 1, item
 796                         ))
 797                 else:
 798                     entries = ie_entries.getslice(
 799                         playliststart, playlistend)
 800                 n_entries = len(entries)
 801                 self.to_screen(
 802                     '[%s] playlist %s: Downloading %d videos' %
 803                     (ie_result['extractor'], playlist, n_entries))
 804             else:  # iterable
 805                 if playlistitems:
 806                     entry_list = list(ie_entries)
 807                     entries = [entry_list[i - 1] for i in playlistitems]
 808                 else:
 809                     entries = list(itertools.islice(
 810                         ie_entries, playliststart, playlistend))
 811                 n_entries = len(entries)
 812                 self.to_screen(
 813                     '[%s] playlist %s: Downloading %d videos' %
 814                     (ie_result['extractor'], playlist, n_entries))
 815
 816             if self.params.get('playlistreverse', False):
 817                 entries = entries[::-1]
 818
 819             for i, entry in enumerate(entries, 1):
 820                 self.to_screen('[download] Downloading video %s of %s' % (i, n_entries))
 821                 extra = {
 822                     'n_entries': n_entries,
 823                     'playlist': playlist,
 824                     'playlist_id': ie_result.get('id'),
 825                     'playlist_title': ie_result.get('title'),
 826                     'playlist_index': i + playliststart,
 827                     'extractor': ie_result['extractor'],
 828                     'webpage_url': ie_result['webpage_url'],
 829                     'webpage_url_basename': url_basename(ie_result['webpage_url']),
 830                     'extractor_key': ie_result['extractor_key'],
 831                 }
 832
 833                 reason = self._match_entry(entry, incomplete=True)
 834                 if reason is not None:
 835                     self.to_screen('[download] ' + reason)
 836                     continue
 837
 838                 entry_result = self.process_ie_result(entry,
 839                                                       download=download,
 840                                                       extra_info=extra)
 841                 playlist_results.append(entry_result)
 842             ie_result['entries'] = playlist_results
 843             self.to_screen('[download] Finished downloading playlist: %s' % playlist)
 844             return ie_result
 845         elif result_type == 'compat_list':
 846             self.report_warning(
 847                 'Extractor %s returned a compat_list result. '
 848                 'It needs to be updated.' % ie_result.get('extractor'))
 849
 850             def _fixup(r):
 851                 self.add_extra_info(
 852                     r,
 853                     {
 854                         'extractor': ie_result['extractor'],
 855                         'webpage_url': ie_result['webpage_url'],
 856                         'webpage_url_basename': url_basename(ie_result['webpage_url']),
 857                         'extractor_key': ie_result['extractor_key'],
 858                     }
 859                 )
 860                 return r
 861             ie_result['entries'] = [
 862                 self.process_ie_result(_fixup(r), download, extra_info)
 863                 for r in ie_result['entries']
 864             ]
 865             return ie_result
 866         else:
 867             raise Exception('Invalid result type: %s' % result_type)
 868
 869     def _build_format_filter(self, filter_spec):
 870         " Returns a function to filter the formats according to the filter_spec "
 871
 872         OPERATORS = {
 873             '<': operator.lt,
 874             '<=': operator.le,
 875             '>': operator.gt,
 876             '>=': operator.ge,
 877             '=': operator.eq,
 878             '!=': operator.ne,
 879         }
 880         operator_rex = re.compile(r'''(?x)\s*
 881             (?P<key>width|height|tbr|abr|vbr|asr|filesize|fps)
 882             \s*(?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
 883             (?P<value>[0-9.]+(?:[kKmMgGtTpPeEzZyY]i?[Bb]?)?)
 884             $
 885             ''' % '|'.join(map(re.escape, OPERATORS.keys())))
 886         m = operator_rex.search(filter_spec)
 887         if m:
 888             try:
 889                 comparison_value = int(m.group('value'))
 890             except ValueError:
 891                 comparison_value = parse_filesize(m.group('value'))
 892                 if comparison_value is None:
 893                     comparison_value = parse_filesize(m.group('value') + 'B')
 894                 if comparison_value is None:
 895                     raise ValueError(
 896                         'Invalid value %r in format specification %r' % (
 897                             m.group('value'), filter_spec))
 898             op = OPERATORS[m.group('op')]
 899
 900         if not m:
 901             STR_OPERATORS = {
 902                 '=': operator.eq,
 903                 '!=': operator.ne,
 904                 '^=': lambda attr, value: attr.startswith(value),
 905                 '$=': lambda attr, value: attr.endswith(value),
 906                 '*=': lambda attr, value: value in attr,
 907             }
 908             str_operator_rex = re.compile(r'''(?x)
 909                 \s*(?P<key>ext|acodec|vcodec|container|protocol|format_id)
 910                 \s*(?P<op>%s)(?P<none_inclusive>\s*\?)?
 911                 \s*(?P<value>[a-zA-Z0-9._-]+)
 912                 \s*$
 913                 ''' % '|'.join(map(re.escape, STR_OPERATORS.keys())))
 914             m = str_operator_rex.search(filter_spec)
 915             if m:
 916                 comparison_value = m.group('value')
 917                 op = STR_OPERATORS[m.group('op')]
 918
 919         if not m:
 920             raise ValueError('Invalid filter specification %r' % filter_spec)
 921
 922         def _filter(f):
 923             actual_value = f.get(m.group('key'))
 924             if actual_value is None:
 925                 return m.group('none_inclusive')
 926             return op(actual_value, comparison_value)
 927         return _filter
 928
 929     def build_format_selector(self, format_spec):
 930         def syntax_error(note, start):
 931             message = (
 932                 'Invalid format specification: '
 933                 '{0}\n\t{1}\n\t{2}^'.format(note, format_spec, ' ' * start[1]))
 934             return SyntaxError(message)
 935
 936         PICKFIRST = 'PICKFIRST'
 937         MERGE = 'MERGE'
 938         SINGLE = 'SINGLE'
 939         GROUP = 'GROUP'
 940         FormatSelector = collections.namedtuple('FormatSelector', ['type', 'selector', 'filters'])
 941
 942         def _parse_filter(tokens):
 943             filter_parts = []
 944             for type, string, start, _, _ in tokens:
 945                 if type == tokenize.OP and string == ']':
 946                     return ''.join(filter_parts)
 947                 else:
 948                     filter_parts.append(string)
 949
 950         def _remove_unused_ops(tokens):
 951             # Remove operators that we don't use and join them with the surrounding strings
 952             # for example: 'mp4' '-' 'baseline' '-' '16x9' is converted to 'mp4-baseline-16x9'
 953             ALLOWED_OPS = ('/', '+', ',', '(', ')')
 954             last_string, last_start, last_end, last_line = None, None, None, None
 955             for type, string, start, end, line in tokens:
 956                 if type == tokenize.OP and string == '[':
 957                     if last_string:
 958                         yield tokenize.NAME, last_string, last_start, last_end, last_line
 959                         last_string = None
 960                     yield type, string, start, end, line
 961                     # everything inside brackets will be handled by _parse_filter
 962                     for type, string, start, end, line in tokens:
 963                         yield type, string, start, end, line
 964                         if type == tokenize.OP and string == ']':
 965                             break
 966                 elif type == tokenize.OP and string in ALLOWED_OPS:
 967                     if last_string:
 968                         yield tokenize.NAME, last_string, last_start, last_end, last_line
 969                         last_string = None
 970                     yield type, string, start, end, line
 971                 elif type in [tokenize.NAME, tokenize.NUMBER, tokenize.OP]:
 972                     if not last_string:
 973                         last_string = string
 974                         last_start = start
 975                         last_end = end
 976                     else:
 977                         last_string += string
 978             if last_string:
 979                 yield tokenize.NAME, last_string, last_start, last_end, last_line
 980
 981         def _parse_format_selection(tokens, inside_merge=False, inside_choice=False, inside_group=False):
 982             selectors = []
 983             current_selector = None
 984             for type, string, start, _, _ in tokens:
 985                 # ENCODING is only defined in python 3.x
 986                 if type == getattr(tokenize, 'ENCODING', None):
 987                     continue
 988                 elif type in [tokenize.NAME, tokenize.NUMBER]:
 989                     current_selector = FormatSelector(SINGLE, string, [])
 990                 elif type == tokenize.OP:
 991                     if string == ')':
 992                         if not inside_group:
 993                             # ')' will be handled by the parentheses group
 994                             tokens.restore_last_token()
 995                         break
 996                     elif inside_merge and string in ['/', ',']:
 997                         tokens.restore_last_token()
 998                         break
 999                     elif inside_choice and string == ',':
1000                         tokens.restore_last_token()
1001                         break
1002                     elif string == ',':
1003                         if not current_selector:
1004                             raise syntax_error('"," must follow a format selector', start)
1005                         selectors.append(current_selector)
1006                         current_selector = None
1007                     elif string == '/':
1008                         if not current_selector:
1009                             raise syntax_error('"/" must follow a format selector', start)
1010                         first_choice = current_selector
1011                         second_choice = _parse_format_selection(tokens, inside_choice=True)
1012                         current_selector = FormatSelector(PICKFIRST, (first_choice, second_choice), [])
1013                     elif string == '[':
1014                         if not current_selector:
1015                             current_selector = FormatSelector(SINGLE, 'best', [])
1016                         format_filter = _parse_filter(tokens)
1017                         current_selector.filters.append(format_filter)
1018                     elif string == '(':
1019                         if current_selector:
1020                             raise syntax_error('Unexpected "("', start)
1021                         group = _parse_format_selection(tokens, inside_group=True)
1022                         current_selector = FormatSelector(GROUP, group, [])
1023                     elif string == '+':
1024                         video_selector = current_selector
1025                         audio_selector = _parse_format_selection(tokens, inside_merge=True)
1026                         if not video_selector or not audio_selector:
1027                             raise syntax_error('"+" must be between two format selectors', start)
1028                         current_selector = FormatSelector(MERGE, (video_selector, audio_selector), [])
1029                     else:
1030                         raise syntax_error('Operator not recognized: "{0}"'.format(string), start)
1031                 elif type == tokenize.ENDMARKER:
1032                     break
1033             if current_selector:
1034                 selectors.append(current_selector)
1035             return selectors
1036
1037         def _build_selector_function(selector):
1038             if isinstance(selector, list):
1039                 fs = [_build_selector_function(s) for s in selector]
1040
1041                 def selector_function(formats):
1042                     for f in fs:
1043                         for format in f(formats):
1044                             yield format
1045                 return selector_function
1046             elif selector.type == GROUP:
1047                 selector_function = _build_selector_function(selector.selector)
1048             elif selector.type == PICKFIRST:
1049                 fs = [_build_selector_function(s) for s in selector.selector]
1050
1051                 def selector_function(formats):
1052                     for f in fs:
1053                         picked_formats = list(f(formats))
1054                         if picked_formats:
1055                             return picked_formats
1056                     return []
1057             elif selector.type == SINGLE:
1058                 format_spec = selector.selector
1059
1060                 def selector_function(formats):
1061                     formats = list(formats)
1062                     if not formats:
1063                         return
1064                     if format_spec == 'all':
1065                         for f in formats:
1066                             yield f
1067                     elif format_spec in ['best', 'worst', None]:
1068                         format_idx = 0 if format_spec == 'worst' else -1
1069                         audiovideo_formats = [
1070                             f for f in formats
1071                             if f.get('vcodec') != 'none' and f.get('acodec') != 'none']
1072                         if audiovideo_formats:
1073                             yield audiovideo_formats[format_idx]
1074                         # for audio only (soundcloud) or video only (imgur) urls, select the best/worst audio format
1075                         elif (all(f.get('acodec') != 'none' for f in formats) or
1076                               all(f.get('vcodec') != 'none' for f in formats)):
1077                             yield formats[format_idx]
1078                     elif format_spec == 'bestaudio':
1079                         audio_formats = [
1080                             f for f in formats
1081                             if f.get('vcodec') == 'none']
1082                         if audio_formats:
1083                             yield audio_formats[-1]
1084                     elif format_spec == 'worstaudio':
1085                         audio_formats = [
1086                             f for f in formats
1087                             if f.get('vcodec') == 'none']
1088                         if audio_formats:
1089                             yield audio_formats[0]
1090                     elif format_spec == 'bestvideo':
1091                         video_formats = [
1092                             f for f in formats
1093                             if f.get('acodec') == 'none']
1094                         if video_formats:
1095                             yield video_formats[-1]
1096                     elif format_spec == 'worstvideo':
1097                         video_formats = [
1098                             f for f in formats
1099                             if f.get('acodec') == 'none']
1100                         if video_formats:
1101                             yield video_formats[0]
1102                     else:
1103                         extensions = ['mp4', 'flv', 'webm', '3gp', 'm4a', 'mp3', 'ogg', 'aac', 'wav']
1104                         if format_spec in extensions:
1105                             filter_f = lambda f: f['ext'] == format_spec
1106                         else:
1107                             filter_f = lambda f: f['format_id'] == format_spec
1108                         matches = list(filter(filter_f, formats))
1109                         if matches:
1110                             yield matches[-1]
1111             elif selector.type == MERGE:
1112                 def _merge(formats_info):
1113                     format_1, format_2 = [f['format_id'] for f in formats_info]
1114                     # The first format must contain the video and the
1115                     # second the audio
1116                     if formats_info[0].get('vcodec') == 'none':
1117                         self.report_error('The first format must '
1118                                           'contain the video, try using '
1119                                           '"-f %s+%s"' % (format_2, format_1))
1120                         return
1121                     # Formats must be opposite (video+audio)
1122                     if formats_info[0].get('acodec') == 'none' and formats_info[1].get('acodec') == 'none':
1123                         self.report_error(
1124                             'Both formats %s and %s are video-only, you must specify "-f video+audio"'
1125                             % (format_1, format_2))
1126                         return
1127                     output_ext = (
1128                         formats_info[0]['ext']
1129                         if self.params.get('merge_output_format') is None
1130                         else self.params['merge_output_format'])
1131                     return {
1132                         'requested_formats': formats_info,
1133                         'format': '%s+%s' % (formats_info[0].get('format'),
1134                                              formats_info[1].get('format')),
1135                         'format_id': '%s+%s' % (formats_info[0].get('format_id'),
1136                                                 formats_info[1].get('format_id')),
1137                         'width': formats_info[0].get('width'),
1138                         'height': formats_info[0].get('height'),
1139                         'resolution': formats_info[0].get('resolution'),
1140                         'fps': formats_info[0].get('fps'),
1141                         'vcodec': formats_info[0].get('vcodec'),
1142                         'vbr': formats_info[0].get('vbr'),
1143                         'stretched_ratio': formats_info[0].get('stretched_ratio'),
1144                         'acodec': formats_info[1].get('acodec'),
1145                         'abr': formats_info[1].get('abr'),
1146                         'ext': output_ext,
1147                     }
1148                 video_selector, audio_selector = map(_build_selector_function, selector.selector)
1149
1150                 def selector_function(formats):
1151                     formats = list(formats)
1152                     for pair in itertools.product(video_selector(formats), audio_selector(formats)):
1153                         yield _merge(pair)
1154
1155             filters = [self._build_format_filter(f) for f in selector.filters]
1156
1157             def final_selector(formats):
1158                 for _filter in filters:
1159                     formats = list(filter(_filter, formats))
1160                 return selector_function(formats)
1161             return final_selector
1162
1163         stream = io.BytesIO(format_spec.encode('utf-8'))
1164         try:
1165             tokens = list(_remove_unused_ops(compat_tokenize_tokenize(stream.readline)))
1166         except tokenize.TokenError:
1167             raise syntax_error('Missing closing/opening brackets or parenthesis', (0, len(format_spec)))
1168
1169         class TokenIterator(object):
1170             def __init__(self, tokens):
1171                 self.tokens = tokens
1172                 self.counter = 0
1173
1174             def __iter__(self):
1175                 return self
1176
1177             def __next__(self):
1178                 if self.counter >= len(self.tokens):
1179                     raise StopIteration()
1180                 value = self.tokens[self.counter]
1181                 self.counter += 1
1182                 return value
1183
1184             next = __next__
1185
1186             def restore_last_token(self):
1187                 self.counter -= 1
1188
1189         parsed_selector = _parse_format_selection(iter(TokenIterator(tokens)))
1190         return _build_selector_function(parsed_selector)
1191
1192     def _calc_headers(self, info_dict):
1193         res = std_headers.copy()
1194
1195         add_headers = info_dict.get('http_headers')
1196         if add_headers:
1197             res.update(add_headers)
1198
1199         cookies = self._calc_cookies(info_dict)
1200         if cookies:
1201             res['Cookie'] = cookies
1202
1203         return res
1204
1205     def _calc_cookies(self, info_dict):
1206         pr = sanitized_Request(info_dict['url'])
1207         self.cookiejar.add_cookie_header(pr)
1208         return pr.get_header('Cookie')
1209
1210     def process_video_result(self, info_dict, download=True):
1211         assert info_dict.get('_type', 'video') == 'video'
1212
1213         if 'id' not in info_dict:
1214             raise ExtractorError('Missing "id" field in extractor result')
1215         if 'title' not in info_dict:
1216             raise ExtractorError('Missing "title" field in extractor result')
1217
1218         if 'playlist' not in info_dict:
1219             # It isn't part of a playlist
1220             info_dict['playlist'] = None
1221             info_dict['playlist_index'] = None
1222
1223         thumbnails = info_dict.get('thumbnails')
1224         if thumbnails is None:
1225             thumbnail = info_dict.get('thumbnail')
1226             if thumbnail:
1227                 info_dict['thumbnails'] = thumbnails = [{'url': thumbnail}]
1228         if thumbnails:
1229             thumbnails.sort(key=lambda t: (
1230                 t.get('preference'), t.get('width'), t.get('height'),
1231                 t.get('id'), t.get('url')))
1232             for i, t in enumerate(thumbnails):
1233                 t['url'] = sanitize_url(t['url'])
1234                 if t.get('width') and t.get('height'):
1235                     t['resolution'] = '%dx%d' % (t['width'], t['height'])
1236                 if t.get('id') is None:
1237                     t['id'] = '%d' % i
1238
1239         if self.params.get('list_thumbnails'):
1240             self.list_thumbnails(info_dict)
1241             return
1242
1243         thumbnail = info_dict.get('thumbnail')
1244         if thumbnail:
1245             info_dict['thumbnail'] = sanitize_url(thumbnail)
1246         elif thumbnails:
1247             info_dict['thumbnail'] = thumbnails[-1]['url']
1248
1249         if 'display_id' not in info_dict and 'id' in info_dict:
1250             info_dict['display_id'] = info_dict['id']
1251
1252         if info_dict.get('upload_date') is None and info_dict.get('timestamp') is not None:
1253             # Working around out-of-range timestamp values (e.g. negative ones on Windows,
1254             # see http://bugs.python.org/issue1646728)
1255             try:
1256                 upload_date = datetime.datetime.utcfromtimestamp(info_dict['timestamp'])
1257                 info_dict['upload_date'] = upload_date.strftime('%Y%m%d')
1258             except (ValueError, OverflowError, OSError):
1259                 pass
1260
1261         # Auto generate title fields corresponding to the *_number fields when missing
1262         # in order to always have clean titles. This is very common for TV series.
1263         for field in ('chapter', 'season', 'episode'):
1264             if info_dict.get('%s_number' % field) is not None and not info_dict.get(field):
1265                 info_dict[field] = '%s %d' % (field.capitalize(), info_dict['%s_number' % field])
1266
1267         subtitles = info_dict.get('subtitles')
1268         if subtitles:
1269             for _, subtitle in subtitles.items():
1270                 for subtitle_format in subtitle:
1271                     if subtitle_format.get('url'):
1272                         subtitle_format['url'] = sanitize_url(subtitle_format['url'])
1273                     if 'ext' not in subtitle_format:
1274                         subtitle_format['ext'] = determine_ext(subtitle_format['url']).lower()
1275
1276         if self.params.get('listsubtitles', False):
1277             if 'automatic_captions' in info_dict:
1278                 self.list_subtitles(info_dict['id'], info_dict.get('automatic_captions'), 'automatic captions')
1279             self.list_subtitles(info_dict['id'], subtitles, 'subtitles')
1280             return
1281         info_dict['requested_subtitles'] = self.process_subtitles(
1282             info_dict['id'], subtitles,
1283             info_dict.get('automatic_captions'))
1284
1285         # We now pick which formats have to be downloaded
1286         if info_dict.get('formats') is None:
1287             # There's only one format available
1288             formats = [info_dict]
1289         else:
1290             formats = info_dict['formats']
1291
1292         if not formats:
1293             raise ExtractorError('No video formats found!')
1294
1295         formats_dict = {}
1296
1297         # We check that all the formats have the format and format_id fields
1298         for i, format in enumerate(formats):
1299             if 'url' not in format:
1300                 raise ExtractorError('Missing "url" key in result (index %d)' % i)
1301
1302             format['url'] = sanitize_url(format['url'])
1303
1304             if format.get('format_id') is None:
1305                 format['format_id'] = compat_str(i)
1306             else:
1307                 # Sanitize format_id from characters used in format selector expression
1308                 format['format_id'] = re.sub('[\s,/+\[\]()]', '_', format['format_id'])
1309             format_id = format['format_id']
1310             if format_id not in formats_dict:
1311                 formats_dict[format_id] = []
1312             formats_dict[format_id].append(format)
1313
1314         # Make sure all formats have unique format_id
1315         for format_id, ambiguous_formats in formats_dict.items():
1316             if len(ambiguous_formats) > 1:
1317                 for i, format in enumerate(ambiguous_formats):
1318                     format['format_id'] = '%s-%d' % (format_id, i)
1319
1320         for i, format in enumerate(formats):
1321             if format.get('format') is None:
1322                 format['format'] = '{id} - {res}{note}'.format(
1323                     id=format['format_id'],
1324                     res=self.format_resolution(format),
1325                     note=' ({0})'.format(format['format_note']) if format.get('format_note') is not None else '',
1326                 )
1327             # Automatically determine file extension if missing
1328             if 'ext' not in format:
1329                 format['ext'] = determine_ext(format['url']).lower()
1330             # Automatically determine protocol if missing (useful for format
1331             # selection purposes)
1332             if 'protocol' not in format:
1333                 format['protocol'] = determine_protocol(format)
1334             # Add HTTP headers, so that external programs can use them from the
1335             # json output
1336             full_format_info = info_dict.copy()
1337             full_format_info.update(format)
1338             format['http_headers'] = self._calc_headers(full_format_info)
1339
1340         # TODO Central sorting goes here
1341
1342         if formats[0] is not info_dict:
1343             # only set the 'formats' fields if the original info_dict list them
1344             # otherwise we end up with a circular reference, the first (and unique)
1345             # element in the 'formats' field in info_dict is info_dict itself,
1346             # which can't be exported to json
1347             info_dict['formats'] = formats
1348         if self.params.get('listformats'):
1349             self.list_formats(info_dict)
1350             return
1351
1352         req_format = self.params.get('format')
1353         if req_format is None:
1354             req_format_list = []
1355             if (self.params.get('outtmpl', DEFAULT_OUTTMPL) != '-' and
1356                     not info_dict.get('is_live')):
1357                 merger = FFmpegMergerPP(self)
1358                 if merger.available and merger.can_merge():
1359                     req_format_list.append('bestvideo+bestaudio')
1360             req_format_list.append('best')
1361             req_format = '/'.join(req_format_list)
1362         format_selector = self.build_format_selector(req_format)
1363         formats_to_download = list(format_selector(formats))
1364         if not formats_to_download:
1365             raise ExtractorError('requested format not available',
1366                                  expected=True)
1367
1368         if download:
1369             if len(formats_to_download) > 1:
1370                 self.to_screen('[info] %s: downloading video in %s formats' % (info_dict['id'], len(formats_to_download)))
1371             for format in formats_to_download:
1372                 new_info = dict(info_dict)
1373                 new_info.update(format)
1374                 self.process_info(new_info)
1375         # We update the info dict with the best quality format (backwards compatibility)
1376         info_dict.update(formats_to_download[-1])
1377         return info_dict
1378
1379     def process_subtitles(self, video_id, normal_subtitles, automatic_captions):
1380         """Select the requested subtitles and their format"""
1381         available_subs = {}
1382         if normal_subtitles and self.params.get('writesubtitles'):
1383             available_subs.update(normal_subtitles)
1384         if automatic_captions and self.params.get('writeautomaticsub'):
1385             for lang, cap_info in automatic_captions.items():
1386                 if lang not in available_subs:
1387                     available_subs[lang] = cap_info
1388
1389         if (not self.params.get('writesubtitles') and not
1390                 self.params.get('writeautomaticsub') or not
1391                 available_subs):
1392             return None
1393
1394         if self.params.get('allsubtitles', False):
1395             requested_langs = available_subs.keys()
1396         else:
1397             if self.params.get('subtitleslangs', False):
1398                 requested_langs = self.params.get('subtitleslangs')
1399             elif 'en' in available_subs:
1400                 requested_langs = ['en']
1401             else:
1402                 requested_langs = [list(available_subs.keys())[0]]
1403
1404         formats_query = self.params.get('subtitlesformat', 'best')
1405         formats_preference = formats_query.split('/') if formats_query else []
1406         subs = {}
1407         for lang in requested_langs:
1408             formats = available_subs.get(lang)
1409             if formats is None:
1410                 self.report_warning('%s subtitles not available for %s' % (lang, video_id))
1411                 continue
1412             for ext in formats_preference:
1413                 if ext == 'best':
1414                     f = formats[-1]
1415                     break
1416                 matches = list(filter(lambda f: f['ext'] == ext, formats))
1417                 if matches:
1418                     f = matches[-1]
1419                     break
1420             else:
1421                 f = formats[-1]
1422                 self.report_warning(
1423                     'No subtitle format found matching "%s" for language %s, '
1424                     'using %s' % (formats_query, lang, f['ext']))
1425             subs[lang] = f
1426         return subs
1427
1428     def process_info(self, info_dict):
1429         """Process a single resolved IE result."""
1430
1431         assert info_dict.get('_type', 'video') == 'video'
1432
1433         max_downloads = self.params.get('max_downloads')
1434         if max_downloads is not None:
1435             if self._num_downloads >= int(max_downloads):
1436                 raise MaxDownloadsReached()
1437
1438         info_dict['fulltitle'] = info_dict['title']
1439         if len(info_dict['title']) > 200:
1440             info_dict['title'] = info_dict['title'][:197] + '...'
1441
1442         if 'format' not in info_dict:
1443             info_dict['format'] = info_dict['ext']
1444
1445         reason = self._match_entry(info_dict, incomplete=False)
1446         if reason is not None:
1447             self.to_screen('[download] ' + reason)
1448             return
1449
1450         self._num_downloads += 1
1451
1452         info_dict['_filename'] = filename = self.prepare_filename(info_dict)
1453
1454         # Forced printings
1455         if self.params.get('forcetitle', False):
1456             self.to_stdout(info_dict['fulltitle'])
1457         if self.params.get('forceid', False):
1458             self.to_stdout(info_dict['id'])
1459         if self.params.get('forceurl', False):
1460             if info_dict.get('requested_formats') is not None:
1461                 for f in info_dict['requested_formats']:
1462                     self.to_stdout(f['url'] + f.get('play_path', ''))
1463             else:
1464                 # For RTMP URLs, also include the playpath
1465                 self.to_stdout(info_dict['url'] + info_dict.get('play_path', ''))
1466         if self.params.get('forcethumbnail', False) and info_dict.get('thumbnail') is not None:
1467             self.to_stdout(info_dict['thumbnail'])
1468         if self.params.get('forcedescription', False) and info_dict.get('description') is not None:
1469             self.to_stdout(info_dict['description'])
1470         if self.params.get('forcefilename', False) and filename is not None:
1471             self.to_stdout(filename)
1472         if self.params.get('forceduration', False) and info_dict.get('duration') is not None:
1473             self.to_stdout(formatSeconds(info_dict['duration']))
1474         if self.params.get('forceformat', False):
1475             self.to_stdout(info_dict['format'])
1476         if self.params.get('forcejson', False):
1477             self.to_stdout(json.dumps(info_dict))
1478
1479         # Do nothing else if in simulate mode
1480         if self.params.get('simulate', False):
1481             return
1482
1483         if filename is None:
1484             return
1485
1486         try:
1487             dn = os.path.dirname(sanitize_path(encodeFilename(filename)))
1488             if dn and not os.path.exists(dn):
1489                 os.makedirs(dn)
1490         except (OSError, IOError) as err:
1491             self.report_error('unable to create directory ' + error_to_compat_str(err))
1492             return
1493
1494         if self.params.get('writedescription', False):
1495             descfn = replace_extension(filename, 'description', info_dict.get('ext'))
1496             if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(descfn)):
1497                 self.to_screen('[info] Video description is already present')
1498             elif info_dict.get('description') is None:
1499                 self.report_warning('There\'s no description to write.')
1500             else:
1501                 try:
1502                     self.to_screen('[info] Writing video description to: ' + descfn)
1503                     with io.open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile:
1504                         descfile.write(info_dict['description'])
1505                 except (OSError, IOError):
1506                     self.report_error('Cannot write description file ' + descfn)
1507                     return
1508
1509         if self.params.get('writeannotations', False):
1510             annofn = replace_extension(filename, 'annotations.xml', info_dict.get('ext'))
1511             if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(annofn)):
1512                 self.to_screen('[info] Video annotations are already present')
1513             else:
1514                 try:
1515                     self.to_screen('[info] Writing video annotations to: ' + annofn)
1516                     with io.open(encodeFilename(annofn), 'w', encoding='utf-8') as annofile:
1517                         annofile.write(info_dict['annotations'])
1518                 except (KeyError, TypeError):
1519                     self.report_warning('There are no annotations to write.')
1520                 except (OSError, IOError):
1521                     self.report_error('Cannot write annotations file: ' + annofn)
1522                     return
1523
1524         subtitles_are_requested = any([self.params.get('writesubtitles', False),
1525                                        self.params.get('writeautomaticsub')])
1526
1527         if subtitles_are_requested and info_dict.get('requested_subtitles'):
1528             # subtitles download errors are already managed as troubles in relevant IE
1529             # that way it will silently go on when used with unsupporting IE
1530             subtitles = info_dict['requested_subtitles']
1531             ie = self.get_info_extractor(info_dict['extractor_key'])
1532             for sub_lang, sub_info in subtitles.items():
1533                 sub_format = sub_info['ext']
1534                 if sub_info.get('data') is not None:
1535                     sub_data = sub_info['data']
1536                 else:
1537                     try:
1538                         sub_data = ie._download_webpage(
1539                             sub_info['url'], info_dict['id'], note=False)
1540                     except ExtractorError as err:
1541                         self.report_warning('Unable to download subtitle for "%s": %s' %
1542                                             (sub_lang, error_to_compat_str(err.cause)))
1543                         continue
1544                 try:
1545                     sub_filename = subtitles_filename(filename, sub_lang, sub_format)
1546                     if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(sub_filename)):
1547                         self.to_screen('[info] Video subtitle %s.%s is already_present' % (sub_lang, sub_format))
1548                     else:
1549                         self.to_screen('[info] Writing video subtitles to: ' + sub_filename)
1550                         with io.open(encodeFilename(sub_filename), 'w', encoding='utf-8') as subfile:
1551                             subfile.write(sub_data)
1552                 except (OSError, IOError):
1553                     self.report_error('Cannot write subtitles file ' + sub_filename)
1554                     return
1555
1556         if self.params.get('writeinfojson', False):
1557             infofn = replace_extension(filename, 'info.json', info_dict.get('ext'))
1558             if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(infofn)):
1559                 self.to_screen('[info] Video description metadata is already present')
1560             else:
1561                 self.to_screen('[info] Writing video description metadata as JSON to: ' + infofn)
1562                 try:
1563                     write_json_file(self.filter_requested_info(info_dict), infofn)
1564                 except (OSError, IOError):
1565                     self.report_error('Cannot write metadata to JSON file ' + infofn)
1566                     return
1567
1568         self._write_thumbnails(info_dict, filename)
1569
1570         if not self.params.get('skip_download', False):
1571             try:
1572                 def dl(name, info):
1573                     fd = get_suitable_downloader(info, self.params)(self, self.params)
1574                     for ph in self._progress_hooks:
1575                         fd.add_progress_hook(ph)
1576                     if self.params.get('verbose'):
1577                         self.to_stdout('[debug] Invoking downloader on %r' % info.get('url'))
1578                     return fd.download(name, info)
1579
1580                 if info_dict.get('requested_formats') is not None:
1581                     downloaded = []
1582                     success = True
1583                     merger = FFmpegMergerPP(self)
1584                     if not merger.available:
1585                         postprocessors = []
1586                         self.report_warning('You have requested multiple '
1587                                             'formats but ffmpeg or avconv are not installed.'
1588                                             ' The formats won\'t be merged.')
1589                     else:
1590                         postprocessors = [merger]
1591
1592                     def compatible_formats(formats):
1593                         video, audio = formats
1594                         # Check extension
1595                         video_ext, audio_ext = audio.get('ext'), video.get('ext')
1596                         if video_ext and audio_ext:
1597                             COMPATIBLE_EXTS = (
1598                                 ('mp3', 'mp4', 'm4a', 'm4p', 'm4b', 'm4r', 'm4v'),
1599                                 ('webm')
1600                             )
1601                             for exts in COMPATIBLE_EXTS:
1602                                 if video_ext in exts and audio_ext in exts:
1603                                     return True
1604                         # TODO: Check acodec/vcodec
1605                         return False
1606
1607                     filename_real_ext = os.path.splitext(filename)[1][1:]
1608                     filename_wo_ext = (
1609                         os.path.splitext(filename)[0]
1610                         if filename_real_ext == info_dict['ext']
1611                         else filename)
1612                     requested_formats = info_dict['requested_formats']
1613                     if self.params.get('merge_output_format') is None and not compatible_formats(requested_formats):
1614                         info_dict['ext'] = 'mkv'
1615                         self.report_warning(
1616                             'Requested formats are incompatible for merge and will be merged into mkv.')
1617                     # Ensure filename always has a correct extension for successful merge
1618                     filename = '%s.%s' % (filename_wo_ext, info_dict['ext'])
1619                     if os.path.exists(encodeFilename(filename)):
1620                         self.to_screen(
1621                             '[download] %s has already been downloaded and '
1622                             'merged' % filename)
1623                     else:
1624                         for f in requested_formats:
1625                             new_info = dict(info_dict)
1626                             new_info.update(f)
1627                             fname = self.prepare_filename(new_info)
1628                             fname = prepend_extension(fname, 'f%s' % f['format_id'], new_info['ext'])
1629                             downloaded.append(fname)
1630                             partial_success = dl(fname, new_info)
1631                             success = success and partial_success
1632                         info_dict['__postprocessors'] = postprocessors
1633                         info_dict['__files_to_merge'] = downloaded
1634                 else:
1635                     # Just a single file
1636                     success = dl(filename, info_dict)
1637             except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
1638                 self.report_error('unable to download video data: %s' % str(err))
1639                 return
1640             except (OSError, IOError) as err:
1641                 raise UnavailableVideoError(err)
1642             except (ContentTooShortError, ) as err:
1643                 self.report_error('content too short (expected %s bytes and served %s)' % (err.expected, err.downloaded))
1644                 return
1645
1646             if success and filename != '-':
1647                 # Fixup content
1648                 fixup_policy = self.params.get('fixup')
1649                 if fixup_policy is None:
1650                     fixup_policy = 'detect_or_warn'
1651
1652                 INSTALL_FFMPEG_MESSAGE = 'Install ffmpeg or avconv to fix this automatically.'
1653
1654                 stretched_ratio = info_dict.get('stretched_ratio')
1655                 if stretched_ratio is not None and stretched_ratio != 1:
1656                     if fixup_policy == 'warn':
1657                         self.report_warning('%s: Non-uniform pixel ratio (%s)' % (
1658                             info_dict['id'], stretched_ratio))
1659                     elif fixup_policy == 'detect_or_warn':
1660                         stretched_pp = FFmpegFixupStretchedPP(self)
1661                         if stretched_pp.available:
1662                             info_dict.setdefault('__postprocessors', [])
1663                             info_dict['__postprocessors'].append(stretched_pp)
1664                         else:
1665                             self.report_warning(
1666                                 '%s: Non-uniform pixel ratio (%s). %s'
1667                                 % (info_dict['id'], stretched_ratio, INSTALL_FFMPEG_MESSAGE))
1668                     else:
1669                         assert fixup_policy in ('ignore', 'never')
1670
1671                 if (info_dict.get('requested_formats') is None and
1672                         info_dict.get('container') == 'm4a_dash'):
1673                     if fixup_policy == 'warn':
1674                         self.report_warning(
1675                             '%s: writing DASH m4a. '
1676                             'Only some players support this container.'
1677                             % info_dict['id'])
1678                     elif fixup_policy == 'detect_or_warn':
1679                         fixup_pp = FFmpegFixupM4aPP(self)
1680                         if fixup_pp.available:
1681                             info_dict.setdefault('__postprocessors', [])
1682                             info_dict['__postprocessors'].append(fixup_pp)
1683                         else:
1684                             self.report_warning(
1685                                 '%s: writing DASH m4a. '
1686                                 'Only some players support this container. %s'
1687                                 % (info_dict['id'], INSTALL_FFMPEG_MESSAGE))
1688                     else:
1689                         assert fixup_policy in ('ignore', 'never')
1690
1691                 if (info_dict.get('protocol') == 'm3u8_native' or
1692                         info_dict.get('protocol') == 'm3u8' and
1693                         self.params.get('hls_prefer_native')):
1694                     if fixup_policy == 'warn':
1695                         self.report_warning('%s: malformated aac bitstream.' % (
1696                             info_dict['id']))
1697                     elif fixup_policy == 'detect_or_warn':
1698                         fixup_pp = FFmpegFixupM3u8PP(self)
1699                         if fixup_pp.available:
1700                             info_dict.setdefault('__postprocessors', [])
1701                             info_dict['__postprocessors'].append(fixup_pp)
1702                         else:
1703                             self.report_warning(
1704                                 '%s: malformated aac bitstream. %s'
1705                                 % (info_dict['id'], INSTALL_FFMPEG_MESSAGE))
1706                     else:
1707                         assert fixup_policy in ('ignore', 'never')
1708
1709                 try:
1710                     self.post_process(filename, info_dict)
1711                 except (PostProcessingError) as err:
1712                     self.report_error('postprocessing: %s' % str(err))
1713                     return
1714                 self.record_download_archive(info_dict)
1715
1716     def download(self, url_list):
1717         """Download a given list of URLs."""
1718         outtmpl = self.params.get('outtmpl', DEFAULT_OUTTMPL)
1719         if (len(url_list) > 1 and
1720                 '%' not in outtmpl and
1721                 self.params.get('max_downloads') != 1):
1722             raise SameFileError(outtmpl)
1723
1724         for url in url_list:
1725             try:
1726                 # It also downloads the videos
1727                 res = self.extract_info(
1728                     url, force_generic_extractor=self.params.get('force_generic_extractor', False))
1729             except UnavailableVideoError:
1730                 self.report_error('unable to download video')
1731             except MaxDownloadsReached:
1732                 self.to_screen('[info] Maximum number of downloaded files reached.')
1733                 raise
1734             else:
1735                 if self.params.get('dump_single_json', False):
1736                     self.to_stdout(json.dumps(res))
1737
1738         return self._download_retcode
1739
1740     def download_with_info_file(self, info_filename):
1741         with contextlib.closing(fileinput.FileInput(
1742                 [info_filename], mode='r',
1743                 openhook=fileinput.hook_encoded('utf-8'))) as f:
1744             # FileInput doesn't have a read method, we can't call json.load
1745             info = self.filter_requested_info(json.loads('\n'.join(f)))
1746         try:
1747             self.process_ie_result(info, download=True)
1748         except DownloadError:
1749             webpage_url = info.get('webpage_url')
1750             if webpage_url is not None:
1751                 self.report_warning('The info failed to download, trying with "%s"' % webpage_url)
1752                 return self.download([webpage_url])
1753             else:
1754                 raise
1755         return self._download_retcode
1756
1757     @staticmethod
1758     def filter_requested_info(info_dict):
1759         return dict(
1760             (k, v) for k, v in info_dict.items()
1761             if k not in ['requested_formats', 'requested_subtitles'])
1762
1763     def post_process(self, filename, ie_info):
1764         """Run all the postprocessors on the given file."""
1765         info = dict(ie_info)
1766         info['filepath'] = filename
1767         pps_chain = []
1768         if ie_info.get('__postprocessors') is not None:
1769             pps_chain.extend(ie_info['__postprocessors'])
1770         pps_chain.extend(self._pps)
1771         for pp in pps_chain:
1772             files_to_delete = []
1773             try:
1774                 files_to_delete, info = pp.run(info)
1775             except PostProcessingError as e:
1776                 self.report_error(e.msg)
1777             if files_to_delete and not self.params.get('keepvideo', False):
1778                 for old_filename in files_to_delete:
1779                     self.to_screen('Deleting original file %s (pass -k to keep)' % old_filename)
1780                     try:
1781                         os.remove(encodeFilename(old_filename))
1782                     except (IOError, OSError):
1783                         self.report_warning('Unable to remove downloaded original file')
1784
1785     def _make_archive_id(self, info_dict):
1786         # Future-proof against any change in case
1787         # and backwards compatibility with prior versions
1788         extractor = info_dict.get('extractor_key')
1789         if extractor is None:
1790             if 'id' in info_dict:
1791                 extractor = info_dict.get('ie_key')  # key in a playlist
1792         if extractor is None:
1793             return None  # Incomplete video information
1794         return extractor.lower() + ' ' + info_dict['id']
1795
1796     def in_download_archive(self, info_dict):
1797         fn = self.params.get('download_archive')
1798         if fn is None:
1799             return False
1800
1801         vid_id = self._make_archive_id(info_dict)
1802         if vid_id is None:
1803             return False  # Incomplete video information
1804
1805         try:
1806             with locked_file(fn, 'r', encoding='utf-8') as archive_file:
1807                 for line in archive_file:
1808                     if line.strip() == vid_id:
1809                         return True
1810         except IOError as ioe:
1811             if ioe.errno != errno.ENOENT:
1812                 raise
1813         return False
1814
1815     def record_download_archive(self, info_dict):
1816         fn = self.params.get('download_archive')
1817         if fn is None:
1818             return
1819         vid_id = self._make_archive_id(info_dict)
1820         assert vid_id
1821         with locked_file(fn, 'a', encoding='utf-8') as archive_file:
1822             archive_file.write(vid_id + '\n')
1823
1824     @staticmethod
1825     def format_resolution(format, default='unknown'):
1826         if format.get('vcodec') == 'none':
1827             return 'audio only'
1828         if format.get('resolution') is not None:
1829             return format['resolution']
1830         if format.get('height') is not None:
1831             if format.get('width') is not None:
1832                 res = '%sx%s' % (format['width'], format['height'])
1833             else:
1834                 res = '%sp' % format['height']
1835         elif format.get('width') is not None:
1836             res = '%dx?' % format['width']
1837         else:
1838             res = default
1839         return res
1840
1841     def _format_note(self, fdict):
1842         res = ''
1843         if fdict.get('ext') in ['f4f', 'f4m']:
1844             res += '(unsupported) '
1845         if fdict.get('language'):
1846             if res:
1847                 res += ' '
1848             res += '[%s] ' % fdict['language']
1849         if fdict.get('format_note') is not None:
1850             res += fdict['format_note'] + ' '
1851         if fdict.get('tbr') is not None:
1852             res += '%4dk ' % fdict['tbr']
1853         if fdict.get('container') is not None:
1854             if res:
1855                 res += ', '
1856             res += '%s container' % fdict['container']
1857         if (fdict.get('vcodec') is not None and
1858                 fdict.get('vcodec') != 'none'):
1859             if res:
1860                 res += ', '
1861             res += fdict['vcodec']
1862             if fdict.get('vbr') is not None:
1863                 res += '@'
1864         elif fdict.get('vbr') is not None and fdict.get('abr') is not None:
1865             res += 'video@'
1866         if fdict.get('vbr') is not None:
1867             res += '%4dk' % fdict['vbr']
1868         if fdict.get('fps') is not None:
1869             if res:
1870                 res += ', '
1871             res += '%sfps' % fdict['fps']
1872         if fdict.get('acodec') is not None:
1873             if res:
1874                 res += ', '
1875             if fdict['acodec'] == 'none':
1876                 res += 'video only'
1877             else:
1878                 res += '%-5s' % fdict['acodec']
1879         elif fdict.get('abr') is not None:
1880             if res:
1881                 res += ', '
1882             res += 'audio'
1883         if fdict.get('abr') is not None:
1884             res += '@%3dk' % fdict['abr']
1885         if fdict.get('asr') is not None:
1886             res += ' (%5dHz)' % fdict['asr']
1887         if fdict.get('filesize') is not None:
1888             if res:
1889                 res += ', '
1890             res += format_bytes(fdict['filesize'])
1891         elif fdict.get('filesize_approx') is not None:
1892             if res:
1893                 res += ', '
1894             res += '~' + format_bytes(fdict['filesize_approx'])
1895         return res
1896
1897     def list_formats(self, info_dict):
1898         formats = info_dict.get('formats', [info_dict])
1899         table = [
1900             [f['format_id'], f['ext'], self.format_resolution(f), self._format_note(f)]
1901             for f in formats
1902             if f.get('preference') is None or f['preference'] >= -1000]
1903         if len(formats) > 1:
1904             table[-1][-1] += (' ' if table[-1][-1] else '') + '(best)'
1905
1906         header_line = ['format code', 'extension', 'resolution', 'note']
1907         self.to_screen(
1908             '[info] Available formats for %s:\n%s' %
1909             (info_dict['id'], render_table(header_line, table)))
1910
1911     def list_thumbnails(self, info_dict):
1912         thumbnails = info_dict.get('thumbnails')
1913         if not thumbnails:
1914             self.to_screen('[info] No thumbnails present for %s' % info_dict['id'])
1915             return
1916
1917         self.to_screen(
1918             '[info] Thumbnails for %s:' % info_dict['id'])
1919         self.to_screen(render_table(
1920             ['ID', 'width', 'height', 'URL'],
1921             [[t['id'], t.get('width', 'unknown'), t.get('height', 'unknown'), t['url']] for t in thumbnails]))
1922
1923     def list_subtitles(self, video_id, subtitles, name='subtitles'):
1924         if not subtitles:
1925             self.to_screen('%s has no %s' % (video_id, name))
1926             return
1927         self.to_screen(
1928             'Available %s for %s:' % (name, video_id))
1929         self.to_screen(render_table(
1930             ['Language', 'formats'],
1931             [[lang, ', '.join(f['ext'] for f in reversed(formats))]
1932                 for lang, formats in subtitles.items()]))
1933
1934     def urlopen(self, req):
1935         """ Start an HTTP download """
1936         if isinstance(req, compat_basestring):
1937             req = sanitized_Request(req)
1938         return self._opener.open(req, timeout=self._socket_timeout)
1939
1940     def print_debug_header(self):
1941         if not self.params.get('verbose'):
1942             return
1943
1944         if type('') is not compat_str:
1945             # Python 2.6 on SLES11 SP1 (https://github.com/rg3/youtube-dl/issues/3326)
1946             self.report_warning(
1947                 'Your Python is broken! Update to a newer and supported version')
1948
1949         stdout_encoding = getattr(
1950             sys.stdout, 'encoding', 'missing (%s)' % type(sys.stdout).__name__)
1951         encoding_str = (
1952             '[debug] Encodings: locale %s, fs %s, out %s, pref %s\n' % (
1953                 locale.getpreferredencoding(),
1954                 sys.getfilesystemencoding(),
1955                 stdout_encoding,
1956                 self.get_encoding()))
1957         write_string(encoding_str, encoding=None)
1958
1959         self._write_string('[debug] youtube-dl version ' + __version__ + '\n')
1960         try:
1961             sp = subprocess.Popen(
1962                 ['git', 'rev-parse', '--short', 'HEAD'],
1963                 stdout=subprocess.PIPE, stderr=subprocess.PIPE,
1964                 cwd=os.path.dirname(os.path.abspath(__file__)))
1965             out, err = sp.communicate()
1966             out = out.decode().strip()
1967             if re.match('[0-9a-f]+', out):
1968                 self._write_string('[debug] Git HEAD: ' + out + '\n')
1969         except Exception:
1970             try:
1971                 sys.exc_clear()
1972             except Exception:
1973                 pass
1974         self._write_string('[debug] Python version %s - %s\n' % (
1975             platform.python_version(), platform_name()))
1976
1977         exe_versions = FFmpegPostProcessor.get_versions(self)
1978         exe_versions['rtmpdump'] = rtmpdump_version()
1979         exe_str = ', '.join(
1980             '%s %s' % (exe, v)
1981             for exe, v in sorted(exe_versions.items())
1982             if v
1983         )
1984         if not exe_str:
1985             exe_str = 'none'
1986         self._write_string('[debug] exe versions: %s\n' % exe_str)
1987
1988         proxy_map = {}
1989         for handler in self._opener.handlers:
1990             if hasattr(handler, 'proxies'):
1991                 proxy_map.update(handler.proxies)
1992         self._write_string('[debug] Proxy map: ' + compat_str(proxy_map) + '\n')
1993
1994         if self.params.get('call_home', False):
1995             ipaddr = self.urlopen('https://yt-dl.org/ip').read().decode('utf-8')
1996             self._write_string('[debug] Public IP address: %s\n' % ipaddr)
1997             latest_version = self.urlopen(
1998                 'https://yt-dl.org/latest/version').read().decode('utf-8')
1999             if version_tuple(latest_version) > version_tuple(__version__):
2000                 self.report_warning(
2001                     'You are using an outdated version (newest version: %s)! '
2002                     'See https://yt-dl.org/update if you need help updating.' %
2003                     latest_version)
2004
2005     def _setup_opener(self):
2006         timeout_val = self.params.get('socket_timeout')
2007         self._socket_timeout = 600 if timeout_val is None else float(timeout_val)
2008
2009         opts_cookiefile = self.params.get('cookiefile')
2010         opts_proxy = self.params.get('proxy')
2011
2012         if opts_cookiefile is None:
2013             self.cookiejar = compat_cookiejar.CookieJar()
2014         else:
2015             self.cookiejar = compat_cookiejar.MozillaCookieJar(
2016                 opts_cookiefile)
2017             if os.access(opts_cookiefile, os.R_OK):
2018                 self.cookiejar.load()
2019
2020         cookie_processor = YoutubeDLCookieProcessor(self.cookiejar)
2021         if opts_proxy is not None:
2022             if opts_proxy == '':
2023                 proxies = {}
2024             else:
2025                 proxies = {'http': opts_proxy, 'https': opts_proxy}
2026         else:
2027             proxies = compat_urllib_request.getproxies()
2028             # Set HTTPS proxy to HTTP one if given (https://github.com/rg3/youtube-dl/issues/805)
2029             if 'http' in proxies and 'https' not in proxies:
2030                 proxies['https'] = proxies['http']
2031         proxy_handler = PerRequestProxyHandler(proxies)
2032
2033         debuglevel = 1 if self.params.get('debug_printtraffic') else 0
2034         https_handler = make_HTTPS_handler(self.params, debuglevel=debuglevel)
2035         ydlh = YoutubeDLHandler(self.params, debuglevel=debuglevel)
2036         data_handler = compat_urllib_request_DataHandler()
2037
2038         # When passing our own FileHandler instance, build_opener won't add the
2039         # default FileHandler and allows us to disable the file protocol, which
2040         # can be used for malicious purposes (see
2041         # https://github.com/rg3/youtube-dl/issues/8227)
2042         file_handler = compat_urllib_request.FileHandler()
2043
2044         def file_open(*args, **kwargs):
2045             raise compat_urllib_error.URLError('file:// scheme is explicitly disabled in youtube-dl for security reasons')
2046         file_handler.file_open = file_open
2047
2048         opener = compat_urllib_request.build_opener(
2049             proxy_handler, https_handler, cookie_processor, ydlh, data_handler, file_handler)
2050
2051         # Delete the default user-agent header, which would otherwise apply in
2052         # cases where our custom HTTP handler doesn't come into play
2053         # (See https://github.com/rg3/youtube-dl/issues/1309 for details)
2054         opener.addheaders = []
2055         self._opener = opener
2056
2057     def encode(self, s):
2058         if isinstance(s, bytes):
2059             return s  # Already encoded
2060
2061         try:
2062             return s.encode(self.get_encoding())
2063         except UnicodeEncodeError as err:
2064             err.reason = err.reason + '. Check your system encoding configuration or use the --encoding option.'
2065             raise
2066
2067     def get_encoding(self):
2068         encoding = self.params.get('encoding')
2069         if encoding is None:
2070             encoding = preferredencoding()
2071         return encoding
2072
2073     def _write_thumbnails(self, info_dict, filename):
2074         if self.params.get('writethumbnail', False):
2075             thumbnails = info_dict.get('thumbnails')
2076             if thumbnails:
2077                 thumbnails = [thumbnails[-1]]
2078         elif self.params.get('write_all_thumbnails', False):
2079             thumbnails = info_dict.get('thumbnails')
2080         else:
2081             return
2082
2083         if not thumbnails:
2084             # No thumbnails present, so return immediately
2085             return
2086
2087         for t in thumbnails:
2088             thumb_ext = determine_ext(t['url'], 'jpg')
2089             suffix = '_%s' % t['id'] if len(thumbnails) > 1 else ''
2090             thumb_display_id = '%s ' % t['id'] if len(thumbnails) > 1 else ''
2091             t['filename'] = thumb_filename = os.path.splitext(filename)[0] + suffix + '.' + thumb_ext
2092
2093             if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(thumb_filename)):
2094                 self.to_screen('[%s] %s: Thumbnail %sis already present' %
2095                                (info_dict['extractor'], info_dict['id'], thumb_display_id))
2096             else:
2097                 self.to_screen('[%s] %s: Downloading thumbnail %s...' %
2098                                (info_dict['extractor'], info_dict['id'], thumb_display_id))
2099                 try:
2100                     uf = self.urlopen(t['url'])
2101                     with open(encodeFilename(thumb_filename), 'wb') as thumbf:
2102                         shutil.copyfileobj(uf, thumbf)
2103                     self.to_screen('[%s] %s: Writing thumbnail %sto: %s' %
2104                                    (info_dict['extractor'], info_dict['id'], thumb_display_id, thumb_filename))
2105                 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
2106                     self.report_warning('Unable to download thumbnail "%s": %s' %
2107                                         (t['url'], error_to_compat_str(err)))