_ Git - youtube-dl/blob - youtube_dl/YoutubeDL.py

   1 #!/usr/bin/env python
   2 # -*- coding: utf-8 -*-
   3
   4 from __future__ import absolute_import, unicode_literals
   5
   6 import collections
   7 import datetime
   8 import errno
   9 import io
  10 import itertools
  11 import json
  12 import locale
  13 import operator
  14 import os
  15 import platform
  16 import re
  17 import shutil
  18 import subprocess
  19 import socket
  20 import sys
  21 import time
  22 import traceback
  23
  24 if os.name == 'nt':
  25     import ctypes
  26
  27 from .compat import (
  28     compat_basestring,
  29     compat_cookiejar,
  30     compat_expanduser,
  31     compat_http_client,
  32     compat_kwargs,
  33     compat_str,
  34     compat_urllib_error,
  35     compat_urllib_request,
  36 )
  37 from .utils import (
  38     escape_url,
  39     ContentTooShortError,
  40     date_from_str,
  41     DateRange,
  42     DEFAULT_OUTTMPL,
  43     determine_ext,
  44     DownloadError,
  45     encodeFilename,
  46     ExtractorError,
  47     format_bytes,
  48     formatSeconds,
  49     get_term_width,
  50     locked_file,
  51     make_HTTPS_handler,
  52     MaxDownloadsReached,
  53     PagedList,
  54     parse_filesize,
  55     PostProcessingError,
  56     platform_name,
  57     preferredencoding,
  58     render_table,
  59     SameFileError,
  60     sanitize_filename,
  61     std_headers,
  62     subtitles_filename,
  63     takewhile_inclusive,
  64     UnavailableVideoError,
  65     url_basename,
  66     version_tuple,
  67     write_json_file,
  68     write_string,
  69     YoutubeDLHandler,
  70     prepend_extension,
  71     args_to_str,
  72     age_restricted,
  73 )
  74 from .cache import Cache
  75 from .extractor import get_info_extractor, gen_extractors
  76 from .downloader import get_suitable_downloader
  77 from .downloader.rtmp import rtmpdump_version
  78 from .postprocessor import (
  79     FFmpegFixupM4aPP,
  80     FFmpegFixupStretchedPP,
  81     FFmpegMergerPP,
  82     FFmpegPostProcessor,
  83     get_postprocessor,
  84 )
  85 from .version import __version__
  86
  87
  88 class YoutubeDL(object):
  89     """YoutubeDL class.
  90
  91     YoutubeDL objects are the ones responsible of downloading the
  92     actual video file and writing it to disk if the user has requested
  93     it, among some other tasks. In most cases there should be one per
  94     program. As, given a video URL, the downloader doesn't know how to
  95     extract all the needed information, task that InfoExtractors do, it
  96     has to pass the URL to one of them.
  97
  98     For this, YoutubeDL objects have a method that allows
  99     InfoExtractors to be registered in a given order. When it is passed
 100     a URL, the YoutubeDL object handles it to the first InfoExtractor it
 101     finds that reports being able to handle it. The InfoExtractor extracts
 102     all the information about the video or videos the URL refers to, and
 103     YoutubeDL process the extracted information, possibly using a File
 104     Downloader to download the video.
 105
 106     YoutubeDL objects accept a lot of parameters. In order not to saturate
 107     the object constructor with arguments, it receives a dictionary of
 108     options instead. These options are available through the params
 109     attribute for the InfoExtractors to use. The YoutubeDL also
 110     registers itself as the downloader in charge for the InfoExtractors
 111     that are added to it, so this is a "mutual registration".
 112
 113     Available options:
 114
 115     username:          Username for authentication purposes.
 116     password:          Password for authentication purposes.
 117     videopassword:     Password for acces a video.
 118     usenetrc:          Use netrc for authentication instead.
 119     verbose:           Print additional info to stdout.
 120     quiet:             Do not print messages to stdout.
 121     no_warnings:       Do not print out anything for warnings.
 122     forceurl:          Force printing final URL.
 123     forcetitle:        Force printing title.
 124     forceid:           Force printing ID.
 125     forcethumbnail:    Force printing thumbnail URL.
 126     forcedescription:  Force printing description.
 127     forcefilename:     Force printing final filename.
 128     forceduration:     Force printing duration.
 129     forcejson:         Force printing info_dict as JSON.
 130     dump_single_json:  Force printing the info_dict of the whole playlist
 131                        (or video) as a single JSON line.
 132     simulate:          Do not download the video files.
 133     format:            Video format code. See options.py for more information.
 134     format_limit:      Highest quality format to try.
 135     outtmpl:           Template for output names.
 136     restrictfilenames: Do not allow "&" and spaces in file names
 137     ignoreerrors:      Do not stop on download errors.
 138     nooverwrites:      Prevent overwriting files.
 139     playliststart:     Playlist item to start at.
 140     playlistend:       Playlist item to end at.
 141     playlist_items:    Specific indices of playlist to download.
 142     playlistreverse:   Download playlist items in reverse order.
 143     matchtitle:        Download only matching titles.
 144     rejecttitle:       Reject downloads for matching titles.
 145     logger:            Log messages to a logging.Logger instance.
 146     logtostderr:       Log messages to stderr instead of stdout.
 147     writedescription:  Write the video description to a .description file
 148     writeinfojson:     Write the video description to a .info.json file
 149     writeannotations:  Write the video annotations to a .annotations.xml file
 150     writethumbnail:    Write the thumbnail image to a file
 151     write_all_thumbnails:  Write all thumbnail formats to files
 152     writesubtitles:    Write the video subtitles to a file
 153     writeautomaticsub: Write the automatic subtitles to a file
 154     allsubtitles:      Downloads all the subtitles of the video
 155                        (requires writesubtitles or writeautomaticsub)
 156     listsubtitles:     Lists all available subtitles for the video
 157     subtitlesformat:   Subtitle format [srt/sbv/vtt] (default=srt)
 158     subtitleslangs:    List of languages of the subtitles to download
 159     keepvideo:         Keep the video file after post-processing
 160     daterange:         A DateRange object, download only if the upload_date is in the range.
 161     skip_download:     Skip the actual download of the video file
 162     cachedir:          Location of the cache files in the filesystem.
 163                        False to disable filesystem cache.
 164     noplaylist:        Download single video instead of a playlist if in doubt.
 165     age_limit:         An integer representing the user's age in years.
 166                        Unsuitable videos for the given age are skipped.
 167     min_views:         An integer representing the minimum view count the video
 168                        must have in order to not be skipped.
 169                        Videos without view count information are always
 170                        downloaded. None for no limit.
 171     max_views:         An integer representing the maximum view count.
 172                        Videos that are more popular than that are not
 173                        downloaded.
 174                        Videos without view count information are always
 175                        downloaded. None for no limit.
 176     download_archive:  File name of a file where all downloads are recorded.
 177                        Videos already present in the file are not downloaded
 178                        again.
 179     cookiefile:        File name where cookies should be read from and dumped to.
 180     nocheckcertificate:Do not verify SSL certificates
 181     prefer_insecure:   Use HTTP instead of HTTPS to retrieve information.
 182                        At the moment, this is only supported by YouTube.
 183     proxy:             URL of the proxy server to use
 184     socket_timeout:    Time to wait for unresponsive hosts, in seconds
 185     bidi_workaround:   Work around buggy terminals without bidirectional text
 186                        support, using fridibi
 187     debug_printtraffic:Print out sent and received HTTP traffic
 188     include_ads:       Download ads as well
 189     default_search:    Prepend this string if an input url is not valid.
 190                        'auto' for elaborate guessing
 191     encoding:          Use this encoding instead of the system-specified.
 192     extract_flat:      Do not resolve URLs, return the immediate result.
 193                        Pass in 'in_playlist' to only show this behavior for
 194                        playlist items.
 195     postprocessors:    A list of dictionaries, each with an entry
 196                        * key:  The name of the postprocessor. See
 197                                youtube_dl/postprocessor/__init__.py for a list.
 198                        as well as any further keyword arguments for the
 199                        postprocessor.
 200     progress_hooks:    A list of functions that get called on download
 201                        progress, with a dictionary with the entries
 202                        * status: One of "downloading" and "finished".
 203                                  Check this first and ignore unknown values.
 204
 205                        If status is one of "downloading" or "finished", the
 206                        following properties may also be present:
 207                        * filename: The final filename (always present)
 208                        * downloaded_bytes: Bytes on disk
 209                        * total_bytes: Size of the whole file, None if unknown
 210                        * tmpfilename: The filename we're currently writing to
 211                        * eta: The estimated time in seconds, None if unknown
 212                        * speed: The download speed in bytes/second, None if
 213                                 unknown
 214
 215                        Progress hooks are guaranteed to be called at least once
 216                        (with status "finished") if the download is successful.
 217     merge_output_format: Extension to use when merging formats.
 218     fixup:             Automatically correct known faults of the file.
 219                        One of:
 220                        - "never": do nothing
 221                        - "warn": only emit a warning
 222                        - "detect_or_warn": check whether we can do anything
 223                                            about it, warn otherwise (default)
 224     source_address:    (Experimental) Client-side IP address to bind to.
 225     call_home:         Boolean, true iff we are allowed to contact the
 226                        youtube-dl servers for debugging.
 227     sleep_interval:    Number of seconds to sleep before each download.
 228     external_downloader:  Executable of the external downloader to call.
 229     listformats:       Print an overview of available video formats and exit.
 230     list_thumbnails:   Print a table of all thumbnails and exit.
 231     match_filter:      A function that gets called with the info_dict of
 232                        every video.
 233                        If it returns a message, the video is ignored.
 234                        If it returns None, the video is downloaded.
 235                        match_filter_func in utils.py is one example for this.
 236     no_color:          Do not emit color codes in output.
 237
 238
 239     The following parameters are not used by YoutubeDL itself, they are used by
 240     the FileDownloader:
 241     nopart, updatetime, buffersize, ratelimit, min_filesize, max_filesize, test,
 242     noresizebuffer, retries, continuedl, noprogress, consoletitle,
 243     xattr_set_filesize.
 244
 245     The following options are used by the post processors:
 246     prefer_ffmpeg:     If True, use ffmpeg instead of avconv if both are available,
 247                        otherwise prefer avconv.
 248     exec_cmd:          Arbitrary command to run after downloading
 249     """
 250
 251     params = None
 252     _ies = []
 253     _pps = []
 254     _download_retcode = None
 255     _num_downloads = None
 256     _screen_file = None
 257
 258     def __init__(self, params=None, auto_init=True):
 259         """Create a FileDownloader object with the given options."""
 260         if params is None:
 261             params = {}
 262         self._ies = []
 263         self._ies_instances = {}
 264         self._pps = []
 265         self._progress_hooks = []
 266         self._download_retcode = 0
 267         self._num_downloads = 0
 268         self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)]
 269         self._err_file = sys.stderr
 270         self.params = params
 271         self.cache = Cache(self)
 272
 273         if params.get('bidi_workaround', False):
 274             try:
 275                 import pty
 276                 master, slave = pty.openpty()
 277                 width = get_term_width()
 278                 if width is None:
 279                     width_args = []
 280                 else:
 281                     width_args = ['-w', str(width)]
 282                 sp_kwargs = dict(
 283                     stdin=subprocess.PIPE,
 284                     stdout=slave,
 285                     stderr=self._err_file)
 286                 try:
 287                     self._output_process = subprocess.Popen(
 288                         ['bidiv'] + width_args, **sp_kwargs
 289                     )
 290                 except OSError:
 291                     self._output_process = subprocess.Popen(
 292                         ['fribidi', '-c', 'UTF-8'] + width_args, **sp_kwargs)
 293                 self._output_channel = os.fdopen(master, 'rb')
 294             except OSError as ose:
 295                 if ose.errno == 2:
 296                     self.report_warning('Could not find fribidi executable, ignoring --bidi-workaround . Make sure that  fribidi  is an executable file in one of the directories in your $PATH.')
 297                 else:
 298                     raise
 299
 300         if (sys.version_info >= (3,) and sys.platform != 'win32' and
 301                 sys.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968']
 302                 and not params.get('restrictfilenames', False)):
 303             # On Python 3, the Unicode filesystem API will throw errors (#1474)
 304             self.report_warning(
 305                 'Assuming --restrict-filenames since file system encoding '
 306                 'cannot encode all characters. '
 307                 'Set the LC_ALL environment variable to fix this.')
 308             self.params['restrictfilenames'] = True
 309
 310         if '%(stitle)s' in self.params.get('outtmpl', ''):
 311             self.report_warning('%(stitle)s is deprecated. Use the %(title)s and the --restrict-filenames flag(which also secures %(uploader)s et al) instead.')
 312
 313         self._setup_opener()
 314
 315         if auto_init:
 316             self.print_debug_header()
 317             self.add_default_info_extractors()
 318
 319         for pp_def_raw in self.params.get('postprocessors', []):
 320             pp_class = get_postprocessor(pp_def_raw['key'])
 321             pp_def = dict(pp_def_raw)
 322             del pp_def['key']
 323             pp = pp_class(self, **compat_kwargs(pp_def))
 324             self.add_post_processor(pp)
 325
 326         for ph in self.params.get('progress_hooks', []):
 327             self.add_progress_hook(ph)
 328
 329     def warn_if_short_id(self, argv):
 330         # short YouTube ID starting with dash?
 331         idxs = [
 332             i for i, a in enumerate(argv)
 333             if re.match(r'^-[0-9A-Za-z_-]{10}$', a)]
 334         if idxs:
 335             correct_argv = (
 336                 ['youtube-dl'] +
 337                 [a for i, a in enumerate(argv) if i not in idxs] +
 338                 ['--'] + [argv[i] for i in idxs]
 339             )
 340             self.report_warning(
 341                 'Long argument string detected. '
 342                 'Use -- to separate parameters and URLs, like this:\n%s\n' %
 343                 args_to_str(correct_argv))
 344
 345     def add_info_extractor(self, ie):
 346         """Add an InfoExtractor object to the end of the list."""
 347         self._ies.append(ie)
 348         self._ies_instances[ie.ie_key()] = ie
 349         ie.set_downloader(self)
 350
 351     def get_info_extractor(self, ie_key):
 352         """
 353         Get an instance of an IE with name ie_key, it will try to get one from
 354         the _ies list, if there's no instance it will create a new one and add
 355         it to the extractor list.
 356         """
 357         ie = self._ies_instances.get(ie_key)
 358         if ie is None:
 359             ie = get_info_extractor(ie_key)()
 360             self.add_info_extractor(ie)
 361         return ie
 362
 363     def add_default_info_extractors(self):
 364         """
 365         Add the InfoExtractors returned by gen_extractors to the end of the list
 366         """
 367         for ie in gen_extractors():
 368             self.add_info_extractor(ie)
 369
 370     def add_post_processor(self, pp):
 371         """Add a PostProcessor object to the end of the chain."""
 372         self._pps.append(pp)
 373         pp.set_downloader(self)
 374
 375     def add_progress_hook(self, ph):
 376         """Add the progress hook (currently only for the file downloader)"""
 377         self._progress_hooks.append(ph)
 378
 379     def _bidi_workaround(self, message):
 380         if not hasattr(self, '_output_channel'):
 381             return message
 382
 383         assert hasattr(self, '_output_process')
 384         assert isinstance(message, compat_str)
 385         line_count = message.count('\n') + 1
 386         self._output_process.stdin.write((message + '\n').encode('utf-8'))
 387         self._output_process.stdin.flush()
 388         res = ''.join(self._output_channel.readline().decode('utf-8')
 389                       for _ in range(line_count))
 390         return res[:-len('\n')]
 391
 392     def to_screen(self, message, skip_eol=False):
 393         """Print message to stdout if not in quiet mode."""
 394         return self.to_stdout(message, skip_eol, check_quiet=True)
 395
 396     def _write_string(self, s, out=None):
 397         write_string(s, out=out, encoding=self.params.get('encoding'))
 398
 399     def to_stdout(self, message, skip_eol=False, check_quiet=False):
 400         """Print message to stdout if not in quiet mode."""
 401         if self.params.get('logger'):
 402             self.params['logger'].debug(message)
 403         elif not check_quiet or not self.params.get('quiet', False):
 404             message = self._bidi_workaround(message)
 405             terminator = ['\n', ''][skip_eol]
 406             output = message + terminator
 407
 408             self._write_string(output, self._screen_file)
 409
 410     def to_stderr(self, message):
 411         """Print message to stderr."""
 412         assert isinstance(message, compat_str)
 413         if self.params.get('logger'):
 414             self.params['logger'].error(message)
 415         else:
 416             message = self._bidi_workaround(message)
 417             output = message + '\n'
 418             self._write_string(output, self._err_file)
 419
 420     def to_console_title(self, message):
 421         if not self.params.get('consoletitle', False):
 422             return
 423         if os.name == 'nt' and ctypes.windll.kernel32.GetConsoleWindow():
 424             # c_wchar_p() might not be necessary if `message` is
 425             # already of type unicode()
 426             ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message))
 427         elif 'TERM' in os.environ:
 428             self._write_string('\033]0;%s\007' % message, self._screen_file)
 429
 430     def save_console_title(self):
 431         if not self.params.get('consoletitle', False):
 432             return
 433         if 'TERM' in os.environ:
 434             # Save the title on stack
 435             self._write_string('\033[22;0t', self._screen_file)
 436
 437     def restore_console_title(self):
 438         if not self.params.get('consoletitle', False):
 439             return
 440         if 'TERM' in os.environ:
 441             # Restore the title from stack
 442             self._write_string('\033[23;0t', self._screen_file)
 443
 444     def __enter__(self):
 445         self.save_console_title()
 446         return self
 447
 448     def __exit__(self, *args):
 449         self.restore_console_title()
 450
 451         if self.params.get('cookiefile') is not None:
 452             self.cookiejar.save()
 453
 454     def trouble(self, message=None, tb=None):
 455         """Determine action to take when a download problem appears.
 456
 457         Depending on if the downloader has been configured to ignore
 458         download errors or not, this method may throw an exception or
 459         not when errors are found, after printing the message.
 460
 461         tb, if given, is additional traceback information.
 462         """
 463         if message is not None:
 464             self.to_stderr(message)
 465         if self.params.get('verbose'):
 466             if tb is None:
 467                 if sys.exc_info()[0]:  # if .trouble has been called from an except block
 468                     tb = ''
 469                     if hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
 470                         tb += ''.join(traceback.format_exception(*sys.exc_info()[1].exc_info))
 471                     tb += compat_str(traceback.format_exc())
 472                 else:
 473                     tb_data = traceback.format_list(traceback.extract_stack())
 474                     tb = ''.join(tb_data)
 475             self.to_stderr(tb)
 476         if not self.params.get('ignoreerrors', False):
 477             if sys.exc_info()[0] and hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
 478                 exc_info = sys.exc_info()[1].exc_info
 479             else:
 480                 exc_info = sys.exc_info()
 481             raise DownloadError(message, exc_info)
 482         self._download_retcode = 1
 483
 484     def report_warning(self, message):
 485         '''
 486         Print the message to stderr, it will be prefixed with 'WARNING:'
 487         If stderr is a tty file the 'WARNING:' will be colored
 488         '''
 489         if self.params.get('logger') is not None:
 490             self.params['logger'].warning(message)
 491         else:
 492             if self.params.get('no_warnings'):
 493                 return
 494             if not self.params.get('no_color') and self._err_file.isatty() and os.name != 'nt':
 495                 _msg_header = '\033[0;33mWARNING:\033[0m'
 496             else:
 497                 _msg_header = 'WARNING:'
 498             warning_message = '%s %s' % (_msg_header, message)
 499             self.to_stderr(warning_message)
 500
 501     def report_error(self, message, tb=None):
 502         '''
 503         Do the same as trouble, but prefixes the message with 'ERROR:', colored
 504         in red if stderr is a tty file.
 505         '''
 506         if not self.params.get('no_color') and self._err_file.isatty() and os.name != 'nt':
 507             _msg_header = '\033[0;31mERROR:\033[0m'
 508         else:
 509             _msg_header = 'ERROR:'
 510         error_message = '%s %s' % (_msg_header, message)
 511         self.trouble(error_message, tb)
 512
 513     def report_file_already_downloaded(self, file_name):
 514         """Report file has already been fully downloaded."""
 515         try:
 516             self.to_screen('[download] %s has already been downloaded' % file_name)
 517         except UnicodeEncodeError:
 518             self.to_screen('[download] The file has already been downloaded')
 519
 520     def prepare_filename(self, info_dict):
 521         """Generate the output filename."""
 522         try:
 523             template_dict = dict(info_dict)
 524
 525             template_dict['epoch'] = int(time.time())
 526             autonumber_size = self.params.get('autonumber_size')
 527             if autonumber_size is None:
 528                 autonumber_size = 5
 529             autonumber_templ = '%0' + str(autonumber_size) + 'd'
 530             template_dict['autonumber'] = autonumber_templ % self._num_downloads
 531             if template_dict.get('playlist_index') is not None:
 532                 template_dict['playlist_index'] = '%0*d' % (len(str(template_dict['n_entries'])), template_dict['playlist_index'])
 533             if template_dict.get('resolution') is None:
 534                 if template_dict.get('width') and template_dict.get('height'):
 535                     template_dict['resolution'] = '%dx%d' % (template_dict['width'], template_dict['height'])
 536                 elif template_dict.get('height'):
 537                     template_dict['resolution'] = '%sp' % template_dict['height']
 538                 elif template_dict.get('width'):
 539                     template_dict['resolution'] = '?x%d' % template_dict['width']
 540
 541             sanitize = lambda k, v: sanitize_filename(
 542                 compat_str(v),
 543                 restricted=self.params.get('restrictfilenames'),
 544                 is_id=(k == 'id'))
 545             template_dict = dict((k, sanitize(k, v))
 546                                  for k, v in template_dict.items()
 547                                  if v is not None)
 548             template_dict = collections.defaultdict(lambda: 'NA', template_dict)
 549
 550             outtmpl = self.params.get('outtmpl', DEFAULT_OUTTMPL)
 551             tmpl = compat_expanduser(outtmpl)
 552             filename = tmpl % template_dict
 553             # Temporary fix for #4787
 554             # 'Treat' all problem characters by passing filename through preferredencoding
 555             # to workaround encoding issues with subprocess on python2 @ Windows
 556             if sys.version_info < (3, 0) and sys.platform == 'win32':
 557                 filename = encodeFilename(filename, True).decode(preferredencoding())
 558             return filename
 559         except ValueError as err:
 560             self.report_error('Error in output template: ' + str(err) + ' (encoding: ' + repr(preferredencoding()) + ')')
 561             return None
 562
 563     def _match_entry(self, info_dict):
 564         """ Returns None iff the file should be downloaded """
 565
 566         video_title = info_dict.get('title', info_dict.get('id', 'video'))
 567         if 'title' in info_dict:
 568             # This can happen when we're just evaluating the playlist
 569             title = info_dict['title']
 570             matchtitle = self.params.get('matchtitle', False)
 571             if matchtitle:
 572                 if not re.search(matchtitle, title, re.IGNORECASE):
 573                     return '"' + title + '" title did not match pattern "' + matchtitle + '"'
 574             rejecttitle = self.params.get('rejecttitle', False)
 575             if rejecttitle:
 576                 if re.search(rejecttitle, title, re.IGNORECASE):
 577                     return '"' + title + '" title matched reject pattern "' + rejecttitle + '"'
 578         date = info_dict.get('upload_date', None)
 579         if date is not None:
 580             dateRange = self.params.get('daterange', DateRange())
 581             if date not in dateRange:
 582                 return '%s upload date is not in range %s' % (date_from_str(date).isoformat(), dateRange)
 583         view_count = info_dict.get('view_count', None)
 584         if view_count is not None:
 585             min_views = self.params.get('min_views')
 586             if min_views is not None and view_count < min_views:
 587                 return 'Skipping %s, because it has not reached minimum view count (%d/%d)' % (video_title, view_count, min_views)
 588             max_views = self.params.get('max_views')
 589             if max_views is not None and view_count > max_views:
 590                 return 'Skipping %s, because it has exceeded the maximum view count (%d/%d)' % (video_title, view_count, max_views)
 591         if age_restricted(info_dict.get('age_limit'), self.params.get('age_limit')):
 592             return 'Skipping "%s" because it is age restricted' % video_title
 593         if self.in_download_archive(info_dict):
 594             return '%s has already been recorded in archive' % video_title
 595
 596         match_filter = self.params.get('match_filter')
 597         if match_filter is not None:
 598             ret = match_filter(info_dict)
 599             if ret is not None:
 600                 return ret
 601
 602         return None
 603
 604     @staticmethod
 605     def add_extra_info(info_dict, extra_info):
 606         '''Set the keys from extra_info in info dict if they are missing'''
 607         for key, value in extra_info.items():
 608             info_dict.setdefault(key, value)
 609
 610     def extract_info(self, url, download=True, ie_key=None, extra_info={},
 611                      process=True):
 612         '''
 613         Returns a list with a dictionary for each video we find.
 614         If 'download', also downloads the videos.
 615         extra_info is a dict containing the extra values to add to each result
 616          '''
 617
 618         if ie_key:
 619             ies = [self.get_info_extractor(ie_key)]
 620         else:
 621             ies = self._ies
 622
 623         for ie in ies:
 624             if not ie.suitable(url):
 625                 continue
 626
 627             if not ie.working():
 628                 self.report_warning('The program functionality for this site has been marked as broken, '
 629                                     'and will probably not work.')
 630
 631             try:
 632                 ie_result = ie.extract(url)
 633                 if ie_result is None:  # Finished already (backwards compatibility; listformats and friends should be moved here)
 634                     break
 635                 if isinstance(ie_result, list):
 636                     # Backwards compatibility: old IE result format
 637                     ie_result = {
 638                         '_type': 'compat_list',
 639                         'entries': ie_result,
 640                     }
 641                 self.add_default_extra_info(ie_result, ie, url)
 642                 if process:
 643                     return self.process_ie_result(ie_result, download, extra_info)
 644                 else:
 645                     return ie_result
 646             except ExtractorError as de:  # An error we somewhat expected
 647                 self.report_error(compat_str(de), de.format_traceback())
 648                 break
 649             except MaxDownloadsReached:
 650                 raise
 651             except Exception as e:
 652                 if self.params.get('ignoreerrors', False):
 653                     self.report_error(compat_str(e), tb=compat_str(traceback.format_exc()))
 654                     break
 655                 else:
 656                     raise
 657         else:
 658             self.report_error('no suitable InfoExtractor for URL %s' % url)
 659
 660     def add_default_extra_info(self, ie_result, ie, url):
 661         self.add_extra_info(ie_result, {
 662             'extractor': ie.IE_NAME,
 663             'webpage_url': url,
 664             'webpage_url_basename': url_basename(url),
 665             'extractor_key': ie.ie_key(),
 666         })
 667
 668     def process_ie_result(self, ie_result, download=True, extra_info={}):
 669         """
 670         Take the result of the ie(may be modified) and resolve all unresolved
 671         references (URLs, playlist items).
 672
 673         It will also download the videos if 'download'.
 674         Returns the resolved ie_result.
 675         """
 676
 677         result_type = ie_result.get('_type', 'video')
 678
 679         if result_type in ('url', 'url_transparent'):
 680             extract_flat = self.params.get('extract_flat', False)
 681             if ((extract_flat == 'in_playlist' and 'playlist' in extra_info) or
 682                     extract_flat is True):
 683                 if self.params.get('forcejson', False):
 684                     self.to_stdout(json.dumps(ie_result))
 685                 return ie_result
 686
 687         if result_type == 'video':
 688             self.add_extra_info(ie_result, extra_info)
 689             return self.process_video_result(ie_result, download=download)
 690         elif result_type == 'url':
 691             # We have to add extra_info to the results because it may be
 692             # contained in a playlist
 693             return self.extract_info(ie_result['url'],
 694                                      download,
 695                                      ie_key=ie_result.get('ie_key'),
 696                                      extra_info=extra_info)
 697         elif result_type == 'url_transparent':
 698             # Use the information from the embedding page
 699             info = self.extract_info(
 700                 ie_result['url'], ie_key=ie_result.get('ie_key'),
 701                 extra_info=extra_info, download=False, process=False)
 702
 703             force_properties = dict(
 704                 (k, v) for k, v in ie_result.items() if v is not None)
 705             for f in ('_type', 'url'):
 706                 if f in force_properties:
 707                     del force_properties[f]
 708             new_result = info.copy()
 709             new_result.update(force_properties)
 710
 711             assert new_result.get('_type') != 'url_transparent'
 712
 713             return self.process_ie_result(
 714                 new_result, download=download, extra_info=extra_info)
 715         elif result_type == 'playlist' or result_type == 'multi_video':
 716             # We process each entry in the playlist
 717             playlist = ie_result.get('title', None) or ie_result.get('id', None)
 718             self.to_screen('[download] Downloading playlist: %s' % playlist)
 719
 720             playlist_results = []
 721
 722             playliststart = self.params.get('playliststart', 1) - 1
 723             playlistend = self.params.get('playlistend', None)
 724             # For backwards compatibility, interpret -1 as whole list
 725             if playlistend == -1:
 726                 playlistend = None
 727
 728             playlistitems_str = self.params.get('playlist_items', None)
 729             playlistitems = None
 730             if playlistitems_str is not None:
 731                 def iter_playlistitems(format):
 732                     for string_segment in format.split(','):
 733                         if '-' in string_segment:
 734                             start, end = string_segment.split('-')
 735                             for item in range(int(start), int(end) + 1):
 736                                 yield int(item)
 737                         else:
 738                             yield int(string_segment)
 739                 playlistitems = iter_playlistitems(playlistitems_str)
 740
 741             ie_entries = ie_result['entries']
 742             if isinstance(ie_entries, list):
 743                 n_all_entries = len(ie_entries)
 744                 if playlistitems:
 745                     entries = [ie_entries[i - 1] for i in playlistitems]
 746                 else:
 747                     entries = ie_entries[playliststart:playlistend]
 748                 n_entries = len(entries)
 749                 self.to_screen(
 750                     "[%s] playlist %s: Collected %d video ids (downloading %d of them)" %
 751                     (ie_result['extractor'], playlist, n_all_entries, n_entries))
 752             elif isinstance(ie_entries, PagedList):
 753                 if playlistitems:
 754                     entries = []
 755                     for item in playlistitems:
 756                         entries.extend(ie_entries.getslice(
 757                             item - 1, item
 758                         ))
 759                 else:
 760                     entries = ie_entries.getslice(
 761                         playliststart, playlistend)
 762                 n_entries = len(entries)
 763                 self.to_screen(
 764                     "[%s] playlist %s: Downloading %d videos" %
 765                     (ie_result['extractor'], playlist, n_entries))
 766             else:  # iterable
 767                 if playlistitems:
 768                     entry_list = list(ie_entries)
 769                     entries = [entry_list[i - 1] for i in playlistitems]
 770                 else:
 771                     entries = list(itertools.islice(
 772                         ie_entries, playliststart, playlistend))
 773                 n_entries = len(entries)
 774                 self.to_screen(
 775                     "[%s] playlist %s: Downloading %d videos" %
 776                     (ie_result['extractor'], playlist, n_entries))
 777
 778             if self.params.get('playlistreverse', False):
 779                 entries = entries[::-1]
 780
 781             for i, entry in enumerate(entries, 1):
 782                 self.to_screen('[download] Downloading video %s of %s' % (i, n_entries))
 783                 extra = {
 784                     'n_entries': n_entries,
 785                     'playlist': playlist,
 786                     'playlist_id': ie_result.get('id'),
 787                     'playlist_title': ie_result.get('title'),
 788                     'playlist_index': i + playliststart,
 789                     'extractor': ie_result['extractor'],
 790                     'webpage_url': ie_result['webpage_url'],
 791                     'webpage_url_basename': url_basename(ie_result['webpage_url']),
 792                     'extractor_key': ie_result['extractor_key'],
 793                 }
 794
 795                 reason = self._match_entry(entry)
 796                 if reason is not None:
 797                     self.to_screen('[download] ' + reason)
 798                     continue
 799
 800                 entry_result = self.process_ie_result(entry,
 801                                                       download=download,
 802                                                       extra_info=extra)
 803                 playlist_results.append(entry_result)
 804             ie_result['entries'] = playlist_results
 805             return ie_result
 806         elif result_type == 'compat_list':
 807             self.report_warning(
 808                 'Extractor %s returned a compat_list result. '
 809                 'It needs to be updated.' % ie_result.get('extractor'))
 810
 811             def _fixup(r):
 812                 self.add_extra_info(
 813                     r,
 814                     {
 815                         'extractor': ie_result['extractor'],
 816                         'webpage_url': ie_result['webpage_url'],
 817                         'webpage_url_basename': url_basename(ie_result['webpage_url']),
 818                         'extractor_key': ie_result['extractor_key'],
 819                     }
 820                 )
 821                 return r
 822             ie_result['entries'] = [
 823                 self.process_ie_result(_fixup(r), download, extra_info)
 824                 for r in ie_result['entries']
 825             ]
 826             return ie_result
 827         else:
 828             raise Exception('Invalid result type: %s' % result_type)
 829
 830     def _apply_format_filter(self, format_spec, available_formats):
 831         " Returns a tuple of the remaining format_spec and filtered formats "
 832
 833         OPERATORS = {
 834             '<': operator.lt,
 835             '<=': operator.le,
 836             '>': operator.gt,
 837             '>=': operator.ge,
 838             '=': operator.eq,
 839             '!=': operator.ne,
 840         }
 841         operator_rex = re.compile(r'''(?x)\s*\[
 842             (?P<key>width|height|tbr|abr|vbr|asr|filesize|fps)
 843             \s*(?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
 844             (?P<value>[0-9.]+(?:[kKmMgGtTpPeEzZyY]i?[Bb]?)?)
 845             \]$
 846             ''' % '|'.join(map(re.escape, OPERATORS.keys())))
 847         m = operator_rex.search(format_spec)
 848         if m:
 849             try:
 850                 comparison_value = int(m.group('value'))
 851             except ValueError:
 852                 comparison_value = parse_filesize(m.group('value'))
 853                 if comparison_value is None:
 854                     comparison_value = parse_filesize(m.group('value') + 'B')
 855                 if comparison_value is None:
 856                     raise ValueError(
 857                         'Invalid value %r in format specification %r' % (
 858                             m.group('value'), format_spec))
 859             op = OPERATORS[m.group('op')]
 860
 861         if not m:
 862             STR_OPERATORS = {
 863                 '=': operator.eq,
 864                 '!=': operator.ne,
 865             }
 866             str_operator_rex = re.compile(r'''(?x)\s*\[
 867                 \s*(?P<key>ext|acodec|vcodec|container|protocol)
 868                 \s*(?P<op>%s)(?P<none_inclusive>\s*\?)?
 869                 \s*(?P<value>[a-zA-Z0-9_-]+)
 870                 \s*\]$
 871                 ''' % '|'.join(map(re.escape, STR_OPERATORS.keys())))
 872             m = str_operator_rex.search(format_spec)
 873             if m:
 874                 comparison_value = m.group('value')
 875                 op = STR_OPERATORS[m.group('op')]
 876
 877         if not m:
 878             raise ValueError('Invalid format specification %r' % format_spec)
 879
 880         def _filter(f):
 881             actual_value = f.get(m.group('key'))
 882             if actual_value is None:
 883                 return m.group('none_inclusive')
 884             return op(actual_value, comparison_value)
 885         new_formats = [f for f in available_formats if _filter(f)]
 886
 887         new_format_spec = format_spec[:-len(m.group(0))]
 888         if not new_format_spec:
 889             new_format_spec = 'best'
 890
 891         return (new_format_spec, new_formats)
 892
 893     def select_format(self, format_spec, available_formats):
 894         while format_spec.endswith(']'):
 895             format_spec, available_formats = self._apply_format_filter(
 896                 format_spec, available_formats)
 897         if not available_formats:
 898             return None
 899
 900         if format_spec == 'best' or format_spec is None:
 901             return available_formats[-1]
 902         elif format_spec == 'worst':
 903             return available_formats[0]
 904         elif format_spec == 'bestaudio':
 905             audio_formats = [
 906                 f for f in available_formats
 907                 if f.get('vcodec') == 'none']
 908             if audio_formats:
 909                 return audio_formats[-1]
 910         elif format_spec == 'worstaudio':
 911             audio_formats = [
 912                 f for f in available_formats
 913                 if f.get('vcodec') == 'none']
 914             if audio_formats:
 915                 return audio_formats[0]
 916         elif format_spec == 'bestvideo':
 917             video_formats = [
 918                 f for f in available_formats
 919                 if f.get('acodec') == 'none']
 920             if video_formats:
 921                 return video_formats[-1]
 922         elif format_spec == 'worstvideo':
 923             video_formats = [
 924                 f for f in available_formats
 925                 if f.get('acodec') == 'none']
 926             if video_formats:
 927                 return video_formats[0]
 928         else:
 929             extensions = ['mp4', 'flv', 'webm', '3gp', 'm4a', 'mp3', 'ogg', 'aac', 'wav']
 930             if format_spec in extensions:
 931                 filter_f = lambda f: f['ext'] == format_spec
 932             else:
 933                 filter_f = lambda f: f['format_id'] == format_spec
 934             matches = list(filter(filter_f, available_formats))
 935             if matches:
 936                 return matches[-1]
 937         return None
 938
 939     def _calc_headers(self, info_dict):
 940         res = std_headers.copy()
 941
 942         add_headers = info_dict.get('http_headers')
 943         if add_headers:
 944             res.update(add_headers)
 945
 946         cookies = self._calc_cookies(info_dict)
 947         if cookies:
 948             res['Cookie'] = cookies
 949
 950         return res
 951
 952     def _calc_cookies(self, info_dict):
 953         class _PseudoRequest(object):
 954             def __init__(self, url):
 955                 self.url = url
 956                 self.headers = {}
 957                 self.unverifiable = False
 958
 959             def add_unredirected_header(self, k, v):
 960                 self.headers[k] = v
 961
 962             def get_full_url(self):
 963                 return self.url
 964
 965             def is_unverifiable(self):
 966                 return self.unverifiable
 967
 968             def has_header(self, h):
 969                 return h in self.headers
 970
 971             def get_header(self, h, default=None):
 972                 return self.headers.get(h, default)
 973
 974         pr = _PseudoRequest(info_dict['url'])
 975         self.cookiejar.add_cookie_header(pr)
 976         return pr.headers.get('Cookie')
 977
 978     def process_video_result(self, info_dict, download=True):
 979         assert info_dict.get('_type', 'video') == 'video'
 980
 981         if 'id' not in info_dict:
 982             raise ExtractorError('Missing "id" field in extractor result')
 983         if 'title' not in info_dict:
 984             raise ExtractorError('Missing "title" field in extractor result')
 985
 986         if 'playlist' not in info_dict:
 987             # It isn't part of a playlist
 988             info_dict['playlist'] = None
 989             info_dict['playlist_index'] = None
 990
 991         thumbnails = info_dict.get('thumbnails')
 992         if thumbnails is None:
 993             thumbnail = info_dict.get('thumbnail')
 994             if thumbnail:
 995                 info_dict['thumbnails'] = thumbnails = [{'url': thumbnail}]
 996         if thumbnails:
 997             thumbnails.sort(key=lambda t: (
 998                 t.get('preference'), t.get('width'), t.get('height'),
 999                 t.get('id'), t.get('url')))
1000             for i, t in enumerate(thumbnails):
1001                 if 'width' in t and 'height' in t:
1002                     t['resolution'] = '%dx%d' % (t['width'], t['height'])
1003                 if t.get('id') is None:
1004                     t['id'] = '%d' % i
1005
1006         if thumbnails and 'thumbnail' not in info_dict:
1007             info_dict['thumbnail'] = thumbnails[-1]['url']
1008
1009         if 'display_id' not in info_dict and 'id' in info_dict:
1010             info_dict['display_id'] = info_dict['id']
1011
1012         if info_dict.get('upload_date') is None and info_dict.get('timestamp') is not None:
1013             # Working around negative timestamps in Windows
1014             # (see http://bugs.python.org/issue1646728)
1015             if info_dict['timestamp'] < 0 and os.name == 'nt':
1016                 info_dict['timestamp'] = 0
1017             upload_date = datetime.datetime.utcfromtimestamp(
1018                 info_dict['timestamp'])
1019             info_dict['upload_date'] = upload_date.strftime('%Y%m%d')
1020
1021         # This extractors handle format selection themselves
1022         if info_dict['extractor'] in ['Youku']:
1023             if download:
1024                 self.process_info(info_dict)
1025             return info_dict
1026
1027         # We now pick which formats have to be downloaded
1028         if info_dict.get('formats') is None:
1029             # There's only one format available
1030             formats = [info_dict]
1031         else:
1032             formats = info_dict['formats']
1033
1034         if not formats:
1035             raise ExtractorError('No video formats found!')
1036
1037         # We check that all the formats have the format and format_id fields
1038         for i, format in enumerate(formats):
1039             if 'url' not in format:
1040                 raise ExtractorError('Missing "url" key in result (index %d)' % i)
1041
1042             if format.get('format_id') is None:
1043                 format['format_id'] = compat_str(i)
1044             if format.get('format') is None:
1045                 format['format'] = '{id} - {res}{note}'.format(
1046                     id=format['format_id'],
1047                     res=self.format_resolution(format),
1048                     note=' ({0})'.format(format['format_note']) if format.get('format_note') is not None else '',
1049                 )
1050             # Automatically determine file extension if missing
1051             if 'ext' not in format:
1052                 format['ext'] = determine_ext(format['url']).lower()
1053             # Add HTTP headers, so that external programs can use them from the
1054             # json output
1055             full_format_info = info_dict.copy()
1056             full_format_info.update(format)
1057             format['http_headers'] = self._calc_headers(full_format_info)
1058
1059         format_limit = self.params.get('format_limit', None)
1060         if format_limit:
1061             formats = list(takewhile_inclusive(
1062                 lambda f: f['format_id'] != format_limit, formats
1063             ))
1064
1065         # TODO Central sorting goes here
1066
1067         if formats[0] is not info_dict:
1068             # only set the 'formats' fields if the original info_dict list them
1069             # otherwise we end up with a circular reference, the first (and unique)
1070             # element in the 'formats' field in info_dict is info_dict itself,
1071             # wich can't be exported to json
1072             info_dict['formats'] = formats
1073         if self.params.get('listformats'):
1074             self.list_formats(info_dict)
1075             return
1076         if self.params.get('list_thumbnails'):
1077             self.list_thumbnails(info_dict)
1078             return
1079
1080         req_format = self.params.get('format')
1081         if req_format is None:
1082             req_format = 'best'
1083         formats_to_download = []
1084         # The -1 is for supporting YoutubeIE
1085         if req_format in ('-1', 'all'):
1086             formats_to_download = formats
1087         else:
1088             for rfstr in req_format.split(','):
1089                 # We can accept formats requested in the format: 34/5/best, we pick
1090                 # the first that is available, starting from left
1091                 req_formats = rfstr.split('/')
1092                 for rf in req_formats:
1093                     if re.match(r'.+?\+.+?', rf) is not None:
1094                         # Two formats have been requested like '137+139'
1095                         format_1, format_2 = rf.split('+')
1096                         formats_info = (self.select_format(format_1, formats),
1097                                         self.select_format(format_2, formats))
1098                         if all(formats_info):
1099                             # The first format must contain the video and the
1100                             # second the audio
1101                             if formats_info[0].get('vcodec') == 'none':
1102                                 self.report_error('The first format must '
1103                                                   'contain the video, try using '
1104                                                   '"-f %s+%s"' % (format_2, format_1))
1105                                 return
1106                             output_ext = (
1107                                 formats_info[0]['ext']
1108                                 if self.params.get('merge_output_format') is None
1109                                 else self.params['merge_output_format'])
1110                             selected_format = {
1111                                 'requested_formats': formats_info,
1112                                 'format': '%s+%s' % (formats_info[0].get('format'),
1113                                                      formats_info[1].get('format')),
1114                                 'format_id': '%s+%s' % (formats_info[0].get('format_id'),
1115                                                         formats_info[1].get('format_id')),
1116                                 'width': formats_info[0].get('width'),
1117                                 'height': formats_info[0].get('height'),
1118                                 'resolution': formats_info[0].get('resolution'),
1119                                 'fps': formats_info[0].get('fps'),
1120                                 'vcodec': formats_info[0].get('vcodec'),
1121                                 'vbr': formats_info[0].get('vbr'),
1122                                 'stretched_ratio': formats_info[0].get('stretched_ratio'),
1123                                 'acodec': formats_info[1].get('acodec'),
1124                                 'abr': formats_info[1].get('abr'),
1125                                 'ext': output_ext,
1126                             }
1127                         else:
1128                             selected_format = None
1129                     else:
1130                         selected_format = self.select_format(rf, formats)
1131                     if selected_format is not None:
1132                         formats_to_download.append(selected_format)
1133                         break
1134         if not formats_to_download:
1135             raise ExtractorError('requested format not available',
1136                                  expected=True)
1137
1138         if download:
1139             if len(formats_to_download) > 1:
1140                 self.to_screen('[info] %s: downloading video in %s formats' % (info_dict['id'], len(formats_to_download)))
1141             for format in formats_to_download:
1142                 new_info = dict(info_dict)
1143                 new_info.update(format)
1144                 self.process_info(new_info)
1145         # We update the info dict with the best quality format (backwards compatibility)
1146         info_dict.update(formats_to_download[-1])
1147         return info_dict
1148
1149     def process_info(self, info_dict):
1150         """Process a single resolved IE result."""
1151
1152         assert info_dict.get('_type', 'video') == 'video'
1153
1154         max_downloads = self.params.get('max_downloads')
1155         if max_downloads is not None:
1156             if self._num_downloads >= int(max_downloads):
1157                 raise MaxDownloadsReached()
1158
1159         info_dict['fulltitle'] = info_dict['title']
1160         if len(info_dict['title']) > 200:
1161             info_dict['title'] = info_dict['title'][:197] + '...'
1162
1163         # Keep for backwards compatibility
1164         info_dict['stitle'] = info_dict['title']
1165
1166         if 'format' not in info_dict:
1167             info_dict['format'] = info_dict['ext']
1168
1169         reason = self._match_entry(info_dict)
1170         if reason is not None:
1171             self.to_screen('[download] ' + reason)
1172             return
1173
1174         self._num_downloads += 1
1175
1176         info_dict['_filename'] = filename = self.prepare_filename(info_dict)
1177
1178         # Forced printings
1179         if self.params.get('forcetitle', False):
1180             self.to_stdout(info_dict['fulltitle'])
1181         if self.params.get('forceid', False):
1182             self.to_stdout(info_dict['id'])
1183         if self.params.get('forceurl', False):
1184             if info_dict.get('requested_formats') is not None:
1185                 for f in info_dict['requested_formats']:
1186                     self.to_stdout(f['url'] + f.get('play_path', ''))
1187             else:
1188                 # For RTMP URLs, also include the playpath
1189                 self.to_stdout(info_dict['url'] + info_dict.get('play_path', ''))
1190         if self.params.get('forcethumbnail', False) and info_dict.get('thumbnail') is not None:
1191             self.to_stdout(info_dict['thumbnail'])
1192         if self.params.get('forcedescription', False) and info_dict.get('description') is not None:
1193             self.to_stdout(info_dict['description'])
1194         if self.params.get('forcefilename', False) and filename is not None:
1195             self.to_stdout(filename)
1196         if self.params.get('forceduration', False) and info_dict.get('duration') is not None:
1197             self.to_stdout(formatSeconds(info_dict['duration']))
1198         if self.params.get('forceformat', False):
1199             self.to_stdout(info_dict['format'])
1200         if self.params.get('forcejson', False):
1201             self.to_stdout(json.dumps(info_dict))
1202
1203         # Do nothing else if in simulate mode
1204         if self.params.get('simulate', False):
1205             return
1206
1207         if filename is None:
1208             return
1209
1210         try:
1211             dn = os.path.dirname(encodeFilename(filename))
1212             if dn and not os.path.exists(dn):
1213                 os.makedirs(dn)
1214         except (OSError, IOError) as err:
1215             self.report_error('unable to create directory ' + compat_str(err))
1216             return
1217
1218         if self.params.get('writedescription', False):
1219             descfn = filename + '.description'
1220             if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(descfn)):
1221                 self.to_screen('[info] Video description is already present')
1222             elif info_dict.get('description') is None:
1223                 self.report_warning('There\'s no description to write.')
1224             else:
1225                 try:
1226                     self.to_screen('[info] Writing video description to: ' + descfn)
1227                     with io.open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile:
1228                         descfile.write(info_dict['description'])
1229                 except (OSError, IOError):
1230                     self.report_error('Cannot write description file ' + descfn)
1231                     return
1232
1233         if self.params.get('writeannotations', False):
1234             annofn = filename + '.annotations.xml'
1235             if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(annofn)):
1236                 self.to_screen('[info] Video annotations are already present')
1237             else:
1238                 try:
1239                     self.to_screen('[info] Writing video annotations to: ' + annofn)
1240                     with io.open(encodeFilename(annofn), 'w', encoding='utf-8') as annofile:
1241                         annofile.write(info_dict['annotations'])
1242                 except (KeyError, TypeError):
1243                     self.report_warning('There are no annotations to write.')
1244                 except (OSError, IOError):
1245                     self.report_error('Cannot write annotations file: ' + annofn)
1246                     return
1247
1248         subtitles_are_requested = any([self.params.get('writesubtitles', False),
1249                                        self.params.get('writeautomaticsub')])
1250
1251         if subtitles_are_requested and 'subtitles' in info_dict and info_dict['subtitles']:
1252             # subtitles download errors are already managed as troubles in relevant IE
1253             # that way it will silently go on when used with unsupporting IE
1254             subtitles = info_dict['subtitles']
1255             sub_format = self.params.get('subtitlesformat', 'srt')
1256             for sub_lang in subtitles.keys():
1257                 sub = subtitles[sub_lang]
1258                 if sub is None:
1259                     continue
1260                 try:
1261                     sub_filename = subtitles_filename(filename, sub_lang, sub_format)
1262                     if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(sub_filename)):
1263                         self.to_screen('[info] Video subtitle %s.%s is already_present' % (sub_lang, sub_format))
1264                     else:
1265                         self.to_screen('[info] Writing video subtitles to: ' + sub_filename)
1266                         with io.open(encodeFilename(sub_filename), 'w', encoding='utf-8') as subfile:
1267                             subfile.write(sub)
1268                 except (OSError, IOError):
1269                     self.report_error('Cannot write subtitles file ' + sub_filename)
1270                     return
1271
1272         if self.params.get('writeinfojson', False):
1273             infofn = os.path.splitext(filename)[0] + '.info.json'
1274             if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(infofn)):
1275                 self.to_screen('[info] Video description metadata is already present')
1276             else:
1277                 self.to_screen('[info] Writing video description metadata as JSON to: ' + infofn)
1278                 try:
1279                     write_json_file(info_dict, infofn)
1280                 except (OSError, IOError):
1281                     self.report_error('Cannot write metadata to JSON file ' + infofn)
1282                     return
1283
1284         self._write_thumbnails(info_dict, filename)
1285
1286         if not self.params.get('skip_download', False):
1287             try:
1288                 def dl(name, info):
1289                     fd = get_suitable_downloader(info, self.params)(self, self.params)
1290                     for ph in self._progress_hooks:
1291                         fd.add_progress_hook(ph)
1292                     if self.params.get('verbose'):
1293                         self.to_stdout('[debug] Invoking downloader on %r' % info.get('url'))
1294                     return fd.download(name, info)
1295
1296                 if info_dict.get('requested_formats') is not None:
1297                     downloaded = []
1298                     success = True
1299                     merger = FFmpegMergerPP(self, not self.params.get('keepvideo'))
1300                     if not merger._executable:
1301                         postprocessors = []
1302                         self.report_warning('You have requested multiple '
1303                                             'formats but ffmpeg or avconv are not installed.'
1304                                             ' The formats won\'t be merged')
1305                     else:
1306                         postprocessors = [merger]
1307                     for f in info_dict['requested_formats']:
1308                         new_info = dict(info_dict)
1309                         new_info.update(f)
1310                         fname = self.prepare_filename(new_info)
1311                         fname = prepend_extension(fname, 'f%s' % f['format_id'])
1312                         downloaded.append(fname)
1313                         partial_success = dl(fname, new_info)
1314                         success = success and partial_success
1315                     info_dict['__postprocessors'] = postprocessors
1316                     info_dict['__files_to_merge'] = downloaded
1317                 else:
1318                     # Just a single file
1319                     success = dl(filename, info_dict)
1320             except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
1321                 self.report_error('unable to download video data: %s' % str(err))
1322                 return
1323             except (OSError, IOError) as err:
1324                 raise UnavailableVideoError(err)
1325             except (ContentTooShortError, ) as err:
1326                 self.report_error('content too short (expected %s bytes and served %s)' % (err.expected, err.downloaded))
1327                 return
1328
1329             if success:
1330                 # Fixup content
1331                 fixup_policy = self.params.get('fixup')
1332                 if fixup_policy is None:
1333                     fixup_policy = 'detect_or_warn'
1334
1335                 stretched_ratio = info_dict.get('stretched_ratio')
1336                 if stretched_ratio is not None and stretched_ratio != 1:
1337                     if fixup_policy == 'warn':
1338                         self.report_warning('%s: Non-uniform pixel ratio (%s)' % (
1339                             info_dict['id'], stretched_ratio))
1340                     elif fixup_policy == 'detect_or_warn':
1341                         stretched_pp = FFmpegFixupStretchedPP(self)
1342                         if stretched_pp.available:
1343                             info_dict.setdefault('__postprocessors', [])
1344                             info_dict['__postprocessors'].append(stretched_pp)
1345                         else:
1346                             self.report_warning(
1347                                 '%s: Non-uniform pixel ratio (%s). Install ffmpeg or avconv to fix this automatically.' % (
1348                                     info_dict['id'], stretched_ratio))
1349                     else:
1350                         assert fixup_policy in ('ignore', 'never')
1351
1352                 if info_dict.get('requested_formats') is None and info_dict.get('container') == 'm4a_dash':
1353                     if fixup_policy == 'warn':
1354                         self.report_warning('%s: writing DASH m4a. Only some players support this container.' % (
1355                             info_dict['id']))
1356                     elif fixup_policy == 'detect_or_warn':
1357                         fixup_pp = FFmpegFixupM4aPP(self)
1358                         if fixup_pp.available:
1359                             info_dict.setdefault('__postprocessors', [])
1360                             info_dict['__postprocessors'].append(fixup_pp)
1361                         else:
1362                             self.report_warning(
1363                                 '%s: writing DASH m4a. Only some players support this container. Install ffmpeg or avconv to fix this automatically.' % (
1364                                     info_dict['id']))
1365                     else:
1366                         assert fixup_policy in ('ignore', 'never')
1367
1368                 try:
1369                     self.post_process(filename, info_dict)
1370                 except (PostProcessingError) as err:
1371                     self.report_error('postprocessing: %s' % str(err))
1372                     return
1373                 self.record_download_archive(info_dict)
1374
1375     def download(self, url_list):
1376         """Download a given list of URLs."""
1377         outtmpl = self.params.get('outtmpl', DEFAULT_OUTTMPL)
1378         if (len(url_list) > 1 and
1379                 '%' not in outtmpl
1380                 and self.params.get('max_downloads') != 1):
1381             raise SameFileError(outtmpl)
1382
1383         for url in url_list:
1384             try:
1385                 # It also downloads the videos
1386                 res = self.extract_info(url)
1387             except UnavailableVideoError:
1388                 self.report_error('unable to download video')
1389             except MaxDownloadsReached:
1390                 self.to_screen('[info] Maximum number of downloaded files reached.')
1391                 raise
1392             else:
1393                 if self.params.get('dump_single_json', False):
1394                     self.to_stdout(json.dumps(res))
1395
1396         return self._download_retcode
1397
1398     def download_with_info_file(self, info_filename):
1399         with io.open(info_filename, 'r', encoding='utf-8') as f:
1400             info = json.load(f)
1401         try:
1402             self.process_ie_result(info, download=True)
1403         except DownloadError:
1404             webpage_url = info.get('webpage_url')
1405             if webpage_url is not None:
1406                 self.report_warning('The info failed to download, trying with "%s"' % webpage_url)
1407                 return self.download([webpage_url])
1408             else:
1409                 raise
1410         return self._download_retcode
1411
1412     def post_process(self, filename, ie_info):
1413         """Run all the postprocessors on the given file."""
1414         info = dict(ie_info)
1415         info['filepath'] = filename
1416         pps_chain = []
1417         if ie_info.get('__postprocessors') is not None:
1418             pps_chain.extend(ie_info['__postprocessors'])
1419         pps_chain.extend(self._pps)
1420         for pp in pps_chain:
1421             keep_video = None
1422             old_filename = info['filepath']
1423             try:
1424                 keep_video_wish, info = pp.run(info)
1425                 if keep_video_wish is not None:
1426                     if keep_video_wish:
1427                         keep_video = keep_video_wish
1428                     elif keep_video is None:
1429                         # No clear decision yet, let IE decide
1430                         keep_video = keep_video_wish
1431             except PostProcessingError as e:
1432                 self.report_error(e.msg)
1433             if keep_video is False and not self.params.get('keepvideo', False):
1434                 try:
1435                     self.to_screen('Deleting original file %s (pass -k to keep)' % old_filename)
1436                     os.remove(encodeFilename(old_filename))
1437                 except (IOError, OSError):
1438                     self.report_warning('Unable to remove downloaded video file')
1439
1440     def _make_archive_id(self, info_dict):
1441         # Future-proof against any change in case
1442         # and backwards compatibility with prior versions
1443         extractor = info_dict.get('extractor_key')
1444         if extractor is None:
1445             if 'id' in info_dict:
1446                 extractor = info_dict.get('ie_key')  # key in a playlist
1447         if extractor is None:
1448             return None  # Incomplete video information
1449         return extractor.lower() + ' ' + info_dict['id']
1450
1451     def in_download_archive(self, info_dict):
1452         fn = self.params.get('download_archive')
1453         if fn is None:
1454             return False
1455
1456         vid_id = self._make_archive_id(info_dict)
1457         if vid_id is None:
1458             return False  # Incomplete video information
1459
1460         try:
1461             with locked_file(fn, 'r', encoding='utf-8') as archive_file:
1462                 for line in archive_file:
1463                     if line.strip() == vid_id:
1464                         return True
1465         except IOError as ioe:
1466             if ioe.errno != errno.ENOENT:
1467                 raise
1468         return False
1469
1470     def record_download_archive(self, info_dict):
1471         fn = self.params.get('download_archive')
1472         if fn is None:
1473             return
1474         vid_id = self._make_archive_id(info_dict)
1475         assert vid_id
1476         with locked_file(fn, 'a', encoding='utf-8') as archive_file:
1477             archive_file.write(vid_id + '\n')
1478
1479     @staticmethod
1480     def format_resolution(format, default='unknown'):
1481         if format.get('vcodec') == 'none':
1482             return 'audio only'
1483         if format.get('resolution') is not None:
1484             return format['resolution']
1485         if format.get('height') is not None:
1486             if format.get('width') is not None:
1487                 res = '%sx%s' % (format['width'], format['height'])
1488             else:
1489                 res = '%sp' % format['height']
1490         elif format.get('width') is not None:
1491             res = '?x%d' % format['width']
1492         else:
1493             res = default
1494         return res
1495
1496     def _format_note(self, fdict):
1497         res = ''
1498         if fdict.get('ext') in ['f4f', 'f4m']:
1499             res += '(unsupported) '
1500         if fdict.get('format_note') is not None:
1501             res += fdict['format_note'] + ' '
1502         if fdict.get('tbr') is not None:
1503             res += '%4dk ' % fdict['tbr']
1504         if fdict.get('container') is not None:
1505             if res:
1506                 res += ', '
1507             res += '%s container' % fdict['container']
1508         if (fdict.get('vcodec') is not None and
1509                 fdict.get('vcodec') != 'none'):
1510             if res:
1511                 res += ', '
1512             res += fdict['vcodec']
1513             if fdict.get('vbr') is not None:
1514                 res += '@'
1515         elif fdict.get('vbr') is not None and fdict.get('abr') is not None:
1516             res += 'video@'
1517         if fdict.get('vbr') is not None:
1518             res += '%4dk' % fdict['vbr']
1519         if fdict.get('fps') is not None:
1520             res += ', %sfps' % fdict['fps']
1521         if fdict.get('acodec') is not None:
1522             if res:
1523                 res += ', '
1524             if fdict['acodec'] == 'none':
1525                 res += 'video only'
1526             else:
1527                 res += '%-5s' % fdict['acodec']
1528         elif fdict.get('abr') is not None:
1529             if res:
1530                 res += ', '
1531             res += 'audio'
1532         if fdict.get('abr') is not None:
1533             res += '@%3dk' % fdict['abr']
1534         if fdict.get('asr') is not None:
1535             res += ' (%5dHz)' % fdict['asr']
1536         if fdict.get('filesize') is not None:
1537             if res:
1538                 res += ', '
1539             res += format_bytes(fdict['filesize'])
1540         elif fdict.get('filesize_approx') is not None:
1541             if res:
1542                 res += ', '
1543             res += '~' + format_bytes(fdict['filesize_approx'])
1544         return res
1545
1546     def list_formats(self, info_dict):
1547         def line(format, idlen=20):
1548             return (('%-' + compat_str(idlen + 1) + 's%-10s%-12s%s') % (
1549                 format['format_id'],
1550                 format['ext'],
1551                 self.format_resolution(format),
1552                 self._format_note(format),
1553             ))
1554
1555         formats = info_dict.get('formats', [info_dict])
1556         idlen = max(len('format code'),
1557                     max(len(f['format_id']) for f in formats))
1558         formats_s = [
1559             line(f, idlen) for f in formats
1560             if f.get('preference') is None or f['preference'] >= -1000]
1561         if len(formats) > 1:
1562             formats_s[-1] += (' ' if self._format_note(formats[-1]) else '') + '(best)'
1563
1564         header_line = line({
1565             'format_id': 'format code', 'ext': 'extension',
1566             'resolution': 'resolution', 'format_note': 'note'}, idlen=idlen)
1567         self.to_screen(
1568             '[info] Available formats for %s:\n%s\n%s' %
1569             (info_dict['id'], header_line, '\n'.join(formats_s)))
1570
1571     def list_thumbnails(self, info_dict):
1572         thumbnails = info_dict.get('thumbnails')
1573         if not thumbnails:
1574             tn_url = info_dict.get('thumbnail')
1575             if tn_url:
1576                 thumbnails = [{'id': '0', 'url': tn_url}]
1577             else:
1578                 self.to_screen(
1579                     '[info] No thumbnails present for %s' % info_dict['id'])
1580                 return
1581
1582         self.to_screen(
1583             '[info] Thumbnails for %s:' % info_dict['id'])
1584         self.to_screen(render_table(
1585             ['ID', 'width', 'height', 'URL'],
1586             [[t['id'], t.get('width', 'unknown'), t.get('height', 'unknown'), t['url']] for t in thumbnails]))
1587
1588     def urlopen(self, req):
1589         """ Start an HTTP download """
1590
1591         # According to RFC 3986, URLs can not contain non-ASCII characters, however this is not
1592         # always respected by websites, some tend to give out URLs with non percent-encoded
1593         # non-ASCII characters (see telemb.py, ard.py [#3412])
1594         # urllib chokes on URLs with non-ASCII characters (see http://bugs.python.org/issue3991)
1595         # To work around aforementioned issue we will replace request's original URL with
1596         # percent-encoded one
1597         req_is_string = isinstance(req, compat_basestring)
1598         url = req if req_is_string else req.get_full_url()
1599         url_escaped = escape_url(url)
1600
1601         # Substitute URL if any change after escaping
1602         if url != url_escaped:
1603             if req_is_string:
1604                 req = url_escaped
1605             else:
1606                 req = compat_urllib_request.Request(
1607                     url_escaped, data=req.data, headers=req.headers,
1608                     origin_req_host=req.origin_req_host, unverifiable=req.unverifiable)
1609
1610         return self._opener.open(req, timeout=self._socket_timeout)
1611
1612     def print_debug_header(self):
1613         if not self.params.get('verbose'):
1614             return
1615
1616         if type('') is not compat_str:
1617             # Python 2.6 on SLES11 SP1 (https://github.com/rg3/youtube-dl/issues/3326)
1618             self.report_warning(
1619                 'Your Python is broken! Update to a newer and supported version')
1620
1621         stdout_encoding = getattr(
1622             sys.stdout, 'encoding', 'missing (%s)' % type(sys.stdout).__name__)
1623         encoding_str = (
1624             '[debug] Encodings: locale %s, fs %s, out %s, pref %s\n' % (
1625                 locale.getpreferredencoding(),
1626                 sys.getfilesystemencoding(),
1627                 stdout_encoding,
1628                 self.get_encoding()))
1629         write_string(encoding_str, encoding=None)
1630
1631         self._write_string('[debug] youtube-dl version ' + __version__ + '\n')
1632         try:
1633             sp = subprocess.Popen(
1634                 ['git', 'rev-parse', '--short', 'HEAD'],
1635                 stdout=subprocess.PIPE, stderr=subprocess.PIPE,
1636                 cwd=os.path.dirname(os.path.abspath(__file__)))
1637             out, err = sp.communicate()
1638             out = out.decode().strip()
1639             if re.match('[0-9a-f]+', out):
1640                 self._write_string('[debug] Git HEAD: ' + out + '\n')
1641         except:
1642             try:
1643                 sys.exc_clear()
1644             except:
1645                 pass
1646         self._write_string('[debug] Python version %s - %s\n' % (
1647             platform.python_version(), platform_name()))
1648
1649         exe_versions = FFmpegPostProcessor.get_versions()
1650         exe_versions['rtmpdump'] = rtmpdump_version()
1651         exe_str = ', '.join(
1652             '%s %s' % (exe, v)
1653             for exe, v in sorted(exe_versions.items())
1654             if v
1655         )
1656         if not exe_str:
1657             exe_str = 'none'
1658         self._write_string('[debug] exe versions: %s\n' % exe_str)
1659
1660         proxy_map = {}
1661         for handler in self._opener.handlers:
1662             if hasattr(handler, 'proxies'):
1663                 proxy_map.update(handler.proxies)
1664         self._write_string('[debug] Proxy map: ' + compat_str(proxy_map) + '\n')
1665
1666         if self.params.get('call_home', False):
1667             ipaddr = self.urlopen('https://yt-dl.org/ip').read().decode('utf-8')
1668             self._write_string('[debug] Public IP address: %s\n' % ipaddr)
1669             latest_version = self.urlopen(
1670                 'https://yt-dl.org/latest/version').read().decode('utf-8')
1671             if version_tuple(latest_version) > version_tuple(__version__):
1672                 self.report_warning(
1673                     'You are using an outdated version (newest version: %s)! '
1674                     'See https://yt-dl.org/update if you need help updating.' %
1675                     latest_version)
1676
1677     def _setup_opener(self):
1678         timeout_val = self.params.get('socket_timeout')
1679         self._socket_timeout = 600 if timeout_val is None else float(timeout_val)
1680
1681         opts_cookiefile = self.params.get('cookiefile')
1682         opts_proxy = self.params.get('proxy')
1683
1684         if opts_cookiefile is None:
1685             self.cookiejar = compat_cookiejar.CookieJar()
1686         else:
1687             self.cookiejar = compat_cookiejar.MozillaCookieJar(
1688                 opts_cookiefile)
1689             if os.access(opts_cookiefile, os.R_OK):
1690                 self.cookiejar.load()
1691
1692         cookie_processor = compat_urllib_request.HTTPCookieProcessor(
1693             self.cookiejar)
1694         if opts_proxy is not None:
1695             if opts_proxy == '':
1696                 proxies = {}
1697             else:
1698                 proxies = {'http': opts_proxy, 'https': opts_proxy}
1699         else:
1700             proxies = compat_urllib_request.getproxies()
1701             # Set HTTPS proxy to HTTP one if given (https://github.com/rg3/youtube-dl/issues/805)
1702             if 'http' in proxies and 'https' not in proxies:
1703                 proxies['https'] = proxies['http']
1704         proxy_handler = compat_urllib_request.ProxyHandler(proxies)
1705
1706         debuglevel = 1 if self.params.get('debug_printtraffic') else 0
1707         https_handler = make_HTTPS_handler(self.params, debuglevel=debuglevel)
1708         ydlh = YoutubeDLHandler(self.params, debuglevel=debuglevel)
1709         opener = compat_urllib_request.build_opener(
1710             https_handler, proxy_handler, cookie_processor, ydlh)
1711         # Delete the default user-agent header, which would otherwise apply in
1712         # cases where our custom HTTP handler doesn't come into play
1713         # (See https://github.com/rg3/youtube-dl/issues/1309 for details)
1714         opener.addheaders = []
1715         self._opener = opener
1716
1717     def encode(self, s):
1718         if isinstance(s, bytes):
1719             return s  # Already encoded
1720
1721         try:
1722             return s.encode(self.get_encoding())
1723         except UnicodeEncodeError as err:
1724             err.reason = err.reason + '. Check your system encoding configuration or use the --encoding option.'
1725             raise
1726
1727     def get_encoding(self):
1728         encoding = self.params.get('encoding')
1729         if encoding is None:
1730             encoding = preferredencoding()
1731         return encoding
1732
1733     def _write_thumbnails(self, info_dict, filename):
1734         if self.params.get('writethumbnail', False):
1735             thumbnails = info_dict.get('thumbnails')
1736             if thumbnails:
1737                 thumbnails = [thumbnails[-1]]
1738         elif self.params.get('write_all_thumbnails', False):
1739             thumbnails = info_dict.get('thumbnails')
1740         else:
1741             return
1742
1743         if not thumbnails:
1744             # No thumbnails present, so return immediately
1745             return
1746
1747         for t in thumbnails:
1748             thumb_ext = determine_ext(t['url'], 'jpg')
1749             suffix = '_%s' % t['id'] if len(thumbnails) > 1 else ''
1750             thumb_display_id = '%s ' % t['id'] if len(thumbnails) > 1 else ''
1751             thumb_filename = os.path.splitext(filename)[0] + suffix + '.' + thumb_ext
1752
1753             if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(thumb_filename)):
1754                 self.to_screen('[%s] %s: Thumbnail %sis already present' %
1755                                (info_dict['extractor'], info_dict['id'], thumb_display_id))
1756             else:
1757                 self.to_screen('[%s] %s: Downloading thumbnail %s...' %
1758                                (info_dict['extractor'], info_dict['id'], thumb_display_id))
1759                 try:
1760                     uf = self.urlopen(t['url'])
1761                     with open(thumb_filename, 'wb') as thumbf:
1762                         shutil.copyfileobj(uf, thumbf)
1763                     self.to_screen('[%s] %s: Writing thumbnail %sto: %s' %
1764                                    (info_dict['extractor'], info_dict['id'], thumb_display_id, thumb_filename))
1765                 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
1766                     self.report_warning('Unable to download thumbnail "%s": %s' %
1767                                         (t['url'], compat_str(err)))