_ Git - youtube-dl/blob - youtube_dl/YoutubeDL.py

   1 #!/usr/bin/env python
   2 # -*- coding: utf-8 -*-
   3
   4 from __future__ import absolute_import, unicode_literals
   5
   6 import collections
   7 import datetime
   8 import errno
   9 import io
  10 import itertools
  11 import json
  12 import locale
  13 import operator
  14 import os
  15 import platform
  16 import re
  17 import shutil
  18 import subprocess
  19 import socket
  20 import sys
  21 import time
  22 import traceback
  23
  24 if os.name == 'nt':
  25     import ctypes
  26
  27 from .compat import (
  28     compat_basestring,
  29     compat_cookiejar,
  30     compat_expanduser,
  31     compat_http_client,
  32     compat_kwargs,
  33     compat_str,
  34     compat_urllib_error,
  35     compat_urllib_request,
  36 )
  37 from .utils import (
  38     escape_url,
  39     ContentTooShortError,
  40     date_from_str,
  41     DateRange,
  42     DEFAULT_OUTTMPL,
  43     determine_ext,
  44     DownloadError,
  45     encodeFilename,
  46     ExtractorError,
  47     format_bytes,
  48     formatSeconds,
  49     get_term_width,
  50     locked_file,
  51     make_HTTPS_handler,
  52     MaxDownloadsReached,
  53     PagedList,
  54     parse_filesize,
  55     PostProcessingError,
  56     platform_name,
  57     preferredencoding,
  58     render_table,
  59     SameFileError,
  60     sanitize_filename,
  61     std_headers,
  62     subtitles_filename,
  63     takewhile_inclusive,
  64     UnavailableVideoError,
  65     url_basename,
  66     version_tuple,
  67     write_json_file,
  68     write_string,
  69     YoutubeDLHandler,
  70     prepend_extension,
  71     args_to_str,
  72     age_restricted,
  73 )
  74 from .cache import Cache
  75 from .extractor import get_info_extractor, gen_extractors
  76 from .downloader import get_suitable_downloader
  77 from .downloader.rtmp import rtmpdump_version
  78 from .postprocessor import (
  79     FFmpegFixupM4aPP,
  80     FFmpegFixupStretchedPP,
  81     FFmpegMergerPP,
  82     FFmpegPostProcessor,
  83     get_postprocessor,
  84 )
  85 from .version import __version__
  86
  87
  88 class YoutubeDL(object):
  89     """YoutubeDL class.
  90
  91     YoutubeDL objects are the ones responsible of downloading the
  92     actual video file and writing it to disk if the user has requested
  93     it, among some other tasks. In most cases there should be one per
  94     program. As, given a video URL, the downloader doesn't know how to
  95     extract all the needed information, task that InfoExtractors do, it
  96     has to pass the URL to one of them.
  97
  98     For this, YoutubeDL objects have a method that allows
  99     InfoExtractors to be registered in a given order. When it is passed
 100     a URL, the YoutubeDL object handles it to the first InfoExtractor it
 101     finds that reports being able to handle it. The InfoExtractor extracts
 102     all the information about the video or videos the URL refers to, and
 103     YoutubeDL process the extracted information, possibly using a File
 104     Downloader to download the video.
 105
 106     YoutubeDL objects accept a lot of parameters. In order not to saturate
 107     the object constructor with arguments, it receives a dictionary of
 108     options instead. These options are available through the params
 109     attribute for the InfoExtractors to use. The YoutubeDL also
 110     registers itself as the downloader in charge for the InfoExtractors
 111     that are added to it, so this is a "mutual registration".
 112
 113     Available options:
 114
 115     username:          Username for authentication purposes.
 116     password:          Password for authentication purposes.
 117     videopassword:     Password for acces a video.
 118     usenetrc:          Use netrc for authentication instead.
 119     verbose:           Print additional info to stdout.
 120     quiet:             Do not print messages to stdout.
 121     no_warnings:       Do not print out anything for warnings.
 122     forceurl:          Force printing final URL.
 123     forcetitle:        Force printing title.
 124     forceid:           Force printing ID.
 125     forcethumbnail:    Force printing thumbnail URL.
 126     forcedescription:  Force printing description.
 127     forcefilename:     Force printing final filename.
 128     forceduration:     Force printing duration.
 129     forcejson:         Force printing info_dict as JSON.
 130     dump_single_json:  Force printing the info_dict of the whole playlist
 131                        (or video) as a single JSON line.
 132     simulate:          Do not download the video files.
 133     format:            Video format code. See options.py for more information.
 134     format_limit:      Highest quality format to try.
 135     outtmpl:           Template for output names.
 136     restrictfilenames: Do not allow "&" and spaces in file names
 137     ignoreerrors:      Do not stop on download errors.
 138     nooverwrites:      Prevent overwriting files.
 139     playliststart:     Playlist item to start at.
 140     playlistend:       Playlist item to end at.
 141     playlist_items:    Specific indices of playlist to download.
 142     playlistreverse:   Download playlist items in reverse order.
 143     matchtitle:        Download only matching titles.
 144     rejecttitle:       Reject downloads for matching titles.
 145     logger:            Log messages to a logging.Logger instance.
 146     logtostderr:       Log messages to stderr instead of stdout.
 147     writedescription:  Write the video description to a .description file
 148     writeinfojson:     Write the video description to a .info.json file
 149     writeannotations:  Write the video annotations to a .annotations.xml file
 150     writethumbnail:    Write the thumbnail image to a file
 151     write_all_thumbnails:  Write all thumbnail formats to files
 152     writesubtitles:    Write the video subtitles to a file
 153     writeautomaticsub: Write the automatic subtitles to a file
 154     allsubtitles:      Downloads all the subtitles of the video
 155                        (requires writesubtitles or writeautomaticsub)
 156     listsubtitles:     Lists all available subtitles for the video
 157     subtitlesformat:   Subtitle format [srt/sbv/vtt] (default=srt)
 158     subtitleslangs:    List of languages of the subtitles to download
 159     keepvideo:         Keep the video file after post-processing
 160     daterange:         A DateRange object, download only if the upload_date is in the range.
 161     skip_download:     Skip the actual download of the video file
 162     cachedir:          Location of the cache files in the filesystem.
 163                        False to disable filesystem cache.
 164     noplaylist:        Download single video instead of a playlist if in doubt.
 165     age_limit:         An integer representing the user's age in years.
 166                        Unsuitable videos for the given age are skipped.
 167     min_views:         An integer representing the minimum view count the video
 168                        must have in order to not be skipped.
 169                        Videos without view count information are always
 170                        downloaded. None for no limit.
 171     max_views:         An integer representing the maximum view count.
 172                        Videos that are more popular than that are not
 173                        downloaded.
 174                        Videos without view count information are always
 175                        downloaded. None for no limit.
 176     download_archive:  File name of a file where all downloads are recorded.
 177                        Videos already present in the file are not downloaded
 178                        again.
 179     cookiefile:        File name where cookies should be read from and dumped to.
 180     nocheckcertificate:Do not verify SSL certificates
 181     prefer_insecure:   Use HTTP instead of HTTPS to retrieve information.
 182                        At the moment, this is only supported by YouTube.
 183     proxy:             URL of the proxy server to use
 184     socket_timeout:    Time to wait for unresponsive hosts, in seconds
 185     bidi_workaround:   Work around buggy terminals without bidirectional text
 186                        support, using fridibi
 187     debug_printtraffic:Print out sent and received HTTP traffic
 188     include_ads:       Download ads as well
 189     default_search:    Prepend this string if an input url is not valid.
 190                        'auto' for elaborate guessing
 191     encoding:          Use this encoding instead of the system-specified.
 192     extract_flat:      Do not resolve URLs, return the immediate result.
 193                        Pass in 'in_playlist' to only show this behavior for
 194                        playlist items.
 195     postprocessors:    A list of dictionaries, each with an entry
 196                        * key:  The name of the postprocessor. See
 197                                youtube_dl/postprocessor/__init__.py for a list.
 198                        as well as any further keyword arguments for the
 199                        postprocessor.
 200     progress_hooks:    A list of functions that get called on download
 201                        progress, with a dictionary with the entries
 202                        * status: One of "downloading" and "finished".
 203                                  Check this first and ignore unknown values.
 204
 205                        If status is one of "downloading" or "finished", the
 206                        following properties may also be present:
 207                        * filename: The final filename (always present)
 208                        * downloaded_bytes: Bytes on disk
 209                        * total_bytes: Size of the whole file, None if unknown
 210                        * tmpfilename: The filename we're currently writing to
 211                        * eta: The estimated time in seconds, None if unknown
 212                        * speed: The download speed in bytes/second, None if
 213                                 unknown
 214
 215                        Progress hooks are guaranteed to be called at least once
 216                        (with status "finished") if the download is successful.
 217     merge_output_format: Extension to use when merging formats.
 218     fixup:             Automatically correct known faults of the file.
 219                        One of:
 220                        - "never": do nothing
 221                        - "warn": only emit a warning
 222                        - "detect_or_warn": check whether we can do anything
 223                                            about it, warn otherwise (default)
 224     source_address:    (Experimental) Client-side IP address to bind to.
 225     call_home:         Boolean, true iff we are allowed to contact the
 226                        youtube-dl servers for debugging.
 227     sleep_interval:    Number of seconds to sleep before each download.
 228     listformats:       Print an overview of available video formats and exit.
 229     list_thumbnails:   Print a table of all thumbnails and exit.
 230     match_filter:      A function that gets called with the info_dict of
 231                        every video.
 232                        If it returns a message, the video is ignored.
 233                        If it returns None, the video is downloaded.
 234                        match_filter_func in utils.py is one example for this.
 235     no_color:          Do not emit color codes in output.
 236
 237     The following options determine which downloader is picked:
 238     external_downloader: Executable of the external downloader to call.
 239                        None or unset for standard (built-in) downloader.
 240     hls_prefer_native: Use the native HLS downloader instead of ffmpeg/avconv.
 241
 242     The following parameters are not used by YoutubeDL itself, they are used by
 243     the FileDownloader:
 244     nopart, updatetime, buffersize, ratelimit, min_filesize, max_filesize, test,
 245     noresizebuffer, retries, continuedl, noprogress, consoletitle,
 246     xattr_set_filesize.
 247
 248     The following options are used by the post processors:
 249     prefer_ffmpeg:     If True, use ffmpeg instead of avconv if both are available,
 250                        otherwise prefer avconv.
 251     exec_cmd:          Arbitrary command to run after downloading
 252     """
 253
 254     params = None
 255     _ies = []
 256     _pps = []
 257     _download_retcode = None
 258     _num_downloads = None
 259     _screen_file = None
 260
 261     def __init__(self, params=None, auto_init=True):
 262         """Create a FileDownloader object with the given options."""
 263         if params is None:
 264             params = {}
 265         self._ies = []
 266         self._ies_instances = {}
 267         self._pps = []
 268         self._progress_hooks = []
 269         self._download_retcode = 0
 270         self._num_downloads = 0
 271         self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)]
 272         self._err_file = sys.stderr
 273         self.params = params
 274         self.cache = Cache(self)
 275
 276         if params.get('bidi_workaround', False):
 277             try:
 278                 import pty
 279                 master, slave = pty.openpty()
 280                 width = get_term_width()
 281                 if width is None:
 282                     width_args = []
 283                 else:
 284                     width_args = ['-w', str(width)]
 285                 sp_kwargs = dict(
 286                     stdin=subprocess.PIPE,
 287                     stdout=slave,
 288                     stderr=self._err_file)
 289                 try:
 290                     self._output_process = subprocess.Popen(
 291                         ['bidiv'] + width_args, **sp_kwargs
 292                     )
 293                 except OSError:
 294                     self._output_process = subprocess.Popen(
 295                         ['fribidi', '-c', 'UTF-8'] + width_args, **sp_kwargs)
 296                 self._output_channel = os.fdopen(master, 'rb')
 297             except OSError as ose:
 298                 if ose.errno == 2:
 299                     self.report_warning('Could not find fribidi executable, ignoring --bidi-workaround . Make sure that  fribidi  is an executable file in one of the directories in your $PATH.')
 300                 else:
 301                     raise
 302
 303         if (sys.version_info >= (3,) and sys.platform != 'win32' and
 304                 sys.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968']
 305                 and not params.get('restrictfilenames', False)):
 306             # On Python 3, the Unicode filesystem API will throw errors (#1474)
 307             self.report_warning(
 308                 'Assuming --restrict-filenames since file system encoding '
 309                 'cannot encode all characters. '
 310                 'Set the LC_ALL environment variable to fix this.')
 311             self.params['restrictfilenames'] = True
 312
 313         if '%(stitle)s' in self.params.get('outtmpl', ''):
 314             self.report_warning('%(stitle)s is deprecated. Use the %(title)s and the --restrict-filenames flag(which also secures %(uploader)s et al) instead.')
 315
 316         self._setup_opener()
 317
 318         if auto_init:
 319             self.print_debug_header()
 320             self.add_default_info_extractors()
 321
 322         for pp_def_raw in self.params.get('postprocessors', []):
 323             pp_class = get_postprocessor(pp_def_raw['key'])
 324             pp_def = dict(pp_def_raw)
 325             del pp_def['key']
 326             pp = pp_class(self, **compat_kwargs(pp_def))
 327             self.add_post_processor(pp)
 328
 329         for ph in self.params.get('progress_hooks', []):
 330             self.add_progress_hook(ph)
 331
 332     def warn_if_short_id(self, argv):
 333         # short YouTube ID starting with dash?
 334         idxs = [
 335             i for i, a in enumerate(argv)
 336             if re.match(r'^-[0-9A-Za-z_-]{10}$', a)]
 337         if idxs:
 338             correct_argv = (
 339                 ['youtube-dl'] +
 340                 [a for i, a in enumerate(argv) if i not in idxs] +
 341                 ['--'] + [argv[i] for i in idxs]
 342             )
 343             self.report_warning(
 344                 'Long argument string detected. '
 345                 'Use -- to separate parameters and URLs, like this:\n%s\n' %
 346                 args_to_str(correct_argv))
 347
 348     def add_info_extractor(self, ie):
 349         """Add an InfoExtractor object to the end of the list."""
 350         self._ies.append(ie)
 351         self._ies_instances[ie.ie_key()] = ie
 352         ie.set_downloader(self)
 353
 354     def get_info_extractor(self, ie_key):
 355         """
 356         Get an instance of an IE with name ie_key, it will try to get one from
 357         the _ies list, if there's no instance it will create a new one and add
 358         it to the extractor list.
 359         """
 360         ie = self._ies_instances.get(ie_key)
 361         if ie is None:
 362             ie = get_info_extractor(ie_key)()
 363             self.add_info_extractor(ie)
 364         return ie
 365
 366     def add_default_info_extractors(self):
 367         """
 368         Add the InfoExtractors returned by gen_extractors to the end of the list
 369         """
 370         for ie in gen_extractors():
 371             self.add_info_extractor(ie)
 372
 373     def add_post_processor(self, pp):
 374         """Add a PostProcessor object to the end of the chain."""
 375         self._pps.append(pp)
 376         pp.set_downloader(self)
 377
 378     def add_progress_hook(self, ph):
 379         """Add the progress hook (currently only for the file downloader)"""
 380         self._progress_hooks.append(ph)
 381
 382     def _bidi_workaround(self, message):
 383         if not hasattr(self, '_output_channel'):
 384             return message
 385
 386         assert hasattr(self, '_output_process')
 387         assert isinstance(message, compat_str)
 388         line_count = message.count('\n') + 1
 389         self._output_process.stdin.write((message + '\n').encode('utf-8'))
 390         self._output_process.stdin.flush()
 391         res = ''.join(self._output_channel.readline().decode('utf-8')
 392                       for _ in range(line_count))
 393         return res[:-len('\n')]
 394
 395     def to_screen(self, message, skip_eol=False):
 396         """Print message to stdout if not in quiet mode."""
 397         return self.to_stdout(message, skip_eol, check_quiet=True)
 398
 399     def _write_string(self, s, out=None):
 400         write_string(s, out=out, encoding=self.params.get('encoding'))
 401
 402     def to_stdout(self, message, skip_eol=False, check_quiet=False):
 403         """Print message to stdout if not in quiet mode."""
 404         if self.params.get('logger'):
 405             self.params['logger'].debug(message)
 406         elif not check_quiet or not self.params.get('quiet', False):
 407             message = self._bidi_workaround(message)
 408             terminator = ['\n', ''][skip_eol]
 409             output = message + terminator
 410
 411             self._write_string(output, self._screen_file)
 412
 413     def to_stderr(self, message):
 414         """Print message to stderr."""
 415         assert isinstance(message, compat_str)
 416         if self.params.get('logger'):
 417             self.params['logger'].error(message)
 418         else:
 419             message = self._bidi_workaround(message)
 420             output = message + '\n'
 421             self._write_string(output, self._err_file)
 422
 423     def to_console_title(self, message):
 424         if not self.params.get('consoletitle', False):
 425             return
 426         if os.name == 'nt' and ctypes.windll.kernel32.GetConsoleWindow():
 427             # c_wchar_p() might not be necessary if `message` is
 428             # already of type unicode()
 429             ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message))
 430         elif 'TERM' in os.environ:
 431             self._write_string('\033]0;%s\007' % message, self._screen_file)
 432
 433     def save_console_title(self):
 434         if not self.params.get('consoletitle', False):
 435             return
 436         if 'TERM' in os.environ:
 437             # Save the title on stack
 438             self._write_string('\033[22;0t', self._screen_file)
 439
 440     def restore_console_title(self):
 441         if not self.params.get('consoletitle', False):
 442             return
 443         if 'TERM' in os.environ:
 444             # Restore the title from stack
 445             self._write_string('\033[23;0t', self._screen_file)
 446
 447     def __enter__(self):
 448         self.save_console_title()
 449         return self
 450
 451     def __exit__(self, *args):
 452         self.restore_console_title()
 453
 454         if self.params.get('cookiefile') is not None:
 455             self.cookiejar.save()
 456
 457     def trouble(self, message=None, tb=None):
 458         """Determine action to take when a download problem appears.
 459
 460         Depending on if the downloader has been configured to ignore
 461         download errors or not, this method may throw an exception or
 462         not when errors are found, after printing the message.
 463
 464         tb, if given, is additional traceback information.
 465         """
 466         if message is not None:
 467             self.to_stderr(message)
 468         if self.params.get('verbose'):
 469             if tb is None:
 470                 if sys.exc_info()[0]:  # if .trouble has been called from an except block
 471                     tb = ''
 472                     if hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
 473                         tb += ''.join(traceback.format_exception(*sys.exc_info()[1].exc_info))
 474                     tb += compat_str(traceback.format_exc())
 475                 else:
 476                     tb_data = traceback.format_list(traceback.extract_stack())
 477                     tb = ''.join(tb_data)
 478             self.to_stderr(tb)
 479         if not self.params.get('ignoreerrors', False):
 480             if sys.exc_info()[0] and hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
 481                 exc_info = sys.exc_info()[1].exc_info
 482             else:
 483                 exc_info = sys.exc_info()
 484             raise DownloadError(message, exc_info)
 485         self._download_retcode = 1
 486
 487     def report_warning(self, message):
 488         '''
 489         Print the message to stderr, it will be prefixed with 'WARNING:'
 490         If stderr is a tty file the 'WARNING:' will be colored
 491         '''
 492         if self.params.get('logger') is not None:
 493             self.params['logger'].warning(message)
 494         else:
 495             if self.params.get('no_warnings'):
 496                 return
 497             if not self.params.get('no_color') and self._err_file.isatty() and os.name != 'nt':
 498                 _msg_header = '\033[0;33mWARNING:\033[0m'
 499             else:
 500                 _msg_header = 'WARNING:'
 501             warning_message = '%s %s' % (_msg_header, message)
 502             self.to_stderr(warning_message)
 503
 504     def report_error(self, message, tb=None):
 505         '''
 506         Do the same as trouble, but prefixes the message with 'ERROR:', colored
 507         in red if stderr is a tty file.
 508         '''
 509         if not self.params.get('no_color') and self._err_file.isatty() and os.name != 'nt':
 510             _msg_header = '\033[0;31mERROR:\033[0m'
 511         else:
 512             _msg_header = 'ERROR:'
 513         error_message = '%s %s' % (_msg_header, message)
 514         self.trouble(error_message, tb)
 515
 516     def report_file_already_downloaded(self, file_name):
 517         """Report file has already been fully downloaded."""
 518         try:
 519             self.to_screen('[download] %s has already been downloaded' % file_name)
 520         except UnicodeEncodeError:
 521             self.to_screen('[download] The file has already been downloaded')
 522
 523     def prepare_filename(self, info_dict):
 524         """Generate the output filename."""
 525         try:
 526             template_dict = dict(info_dict)
 527
 528             template_dict['epoch'] = int(time.time())
 529             autonumber_size = self.params.get('autonumber_size')
 530             if autonumber_size is None:
 531                 autonumber_size = 5
 532             autonumber_templ = '%0' + str(autonumber_size) + 'd'
 533             template_dict['autonumber'] = autonumber_templ % self._num_downloads
 534             if template_dict.get('playlist_index') is not None:
 535                 template_dict['playlist_index'] = '%0*d' % (len(str(template_dict['n_entries'])), template_dict['playlist_index'])
 536             if template_dict.get('resolution') is None:
 537                 if template_dict.get('width') and template_dict.get('height'):
 538                     template_dict['resolution'] = '%dx%d' % (template_dict['width'], template_dict['height'])
 539                 elif template_dict.get('height'):
 540                     template_dict['resolution'] = '%sp' % template_dict['height']
 541                 elif template_dict.get('width'):
 542                     template_dict['resolution'] = '?x%d' % template_dict['width']
 543
 544             sanitize = lambda k, v: sanitize_filename(
 545                 compat_str(v),
 546                 restricted=self.params.get('restrictfilenames'),
 547                 is_id=(k == 'id'))
 548             template_dict = dict((k, sanitize(k, v))
 549                                  for k, v in template_dict.items()
 550                                  if v is not None)
 551             template_dict = collections.defaultdict(lambda: 'NA', template_dict)
 552
 553             outtmpl = self.params.get('outtmpl', DEFAULT_OUTTMPL)
 554             tmpl = compat_expanduser(outtmpl)
 555             filename = tmpl % template_dict
 556             # Temporary fix for #4787
 557             # 'Treat' all problem characters by passing filename through preferredencoding
 558             # to workaround encoding issues with subprocess on python2 @ Windows
 559             if sys.version_info < (3, 0) and sys.platform == 'win32':
 560                 filename = encodeFilename(filename, True).decode(preferredencoding())
 561             return filename
 562         except ValueError as err:
 563             self.report_error('Error in output template: ' + str(err) + ' (encoding: ' + repr(preferredencoding()) + ')')
 564             return None
 565
 566     def _match_entry(self, info_dict, incomplete):
 567         """ Returns None iff the file should be downloaded """
 568
 569         video_title = info_dict.get('title', info_dict.get('id', 'video'))
 570         if 'title' in info_dict:
 571             # This can happen when we're just evaluating the playlist
 572             title = info_dict['title']
 573             matchtitle = self.params.get('matchtitle', False)
 574             if matchtitle:
 575                 if not re.search(matchtitle, title, re.IGNORECASE):
 576                     return '"' + title + '" title did not match pattern "' + matchtitle + '"'
 577             rejecttitle = self.params.get('rejecttitle', False)
 578             if rejecttitle:
 579                 if re.search(rejecttitle, title, re.IGNORECASE):
 580                     return '"' + title + '" title matched reject pattern "' + rejecttitle + '"'
 581         date = info_dict.get('upload_date', None)
 582         if date is not None:
 583             dateRange = self.params.get('daterange', DateRange())
 584             if date not in dateRange:
 585                 return '%s upload date is not in range %s' % (date_from_str(date).isoformat(), dateRange)
 586         view_count = info_dict.get('view_count', None)
 587         if view_count is not None:
 588             min_views = self.params.get('min_views')
 589             if min_views is not None and view_count < min_views:
 590                 return 'Skipping %s, because it has not reached minimum view count (%d/%d)' % (video_title, view_count, min_views)
 591             max_views = self.params.get('max_views')
 592             if max_views is not None and view_count > max_views:
 593                 return 'Skipping %s, because it has exceeded the maximum view count (%d/%d)' % (video_title, view_count, max_views)
 594         if age_restricted(info_dict.get('age_limit'), self.params.get('age_limit')):
 595             return 'Skipping "%s" because it is age restricted' % video_title
 596         if self.in_download_archive(info_dict):
 597             return '%s has already been recorded in archive' % video_title
 598
 599         if not incomplete:
 600             match_filter = self.params.get('match_filter')
 601             if match_filter is not None:
 602                 ret = match_filter(info_dict)
 603                 if ret is not None:
 604                     return ret
 605
 606         return None
 607
 608     @staticmethod
 609     def add_extra_info(info_dict, extra_info):
 610         '''Set the keys from extra_info in info dict if they are missing'''
 611         for key, value in extra_info.items():
 612             info_dict.setdefault(key, value)
 613
 614     def extract_info(self, url, download=True, ie_key=None, extra_info={},
 615                      process=True):
 616         '''
 617         Returns a list with a dictionary for each video we find.
 618         If 'download', also downloads the videos.
 619         extra_info is a dict containing the extra values to add to each result
 620          '''
 621
 622         if ie_key:
 623             ies = [self.get_info_extractor(ie_key)]
 624         else:
 625             ies = self._ies
 626
 627         for ie in ies:
 628             if not ie.suitable(url):
 629                 continue
 630
 631             if not ie.working():
 632                 self.report_warning('The program functionality for this site has been marked as broken, '
 633                                     'and will probably not work.')
 634
 635             try:
 636                 ie_result = ie.extract(url)
 637                 if ie_result is None:  # Finished already (backwards compatibility; listformats and friends should be moved here)
 638                     break
 639                 if isinstance(ie_result, list):
 640                     # Backwards compatibility: old IE result format
 641                     ie_result = {
 642                         '_type': 'compat_list',
 643                         'entries': ie_result,
 644                     }
 645                 self.add_default_extra_info(ie_result, ie, url)
 646                 if process:
 647                     return self.process_ie_result(ie_result, download, extra_info)
 648                 else:
 649                     return ie_result
 650             except ExtractorError as de:  # An error we somewhat expected
 651                 self.report_error(compat_str(de), de.format_traceback())
 652                 break
 653             except MaxDownloadsReached:
 654                 raise
 655             except Exception as e:
 656                 if self.params.get('ignoreerrors', False):
 657                     self.report_error(compat_str(e), tb=compat_str(traceback.format_exc()))
 658                     break
 659                 else:
 660                     raise
 661         else:
 662             self.report_error('no suitable InfoExtractor for URL %s' % url)
 663
 664     def add_default_extra_info(self, ie_result, ie, url):
 665         self.add_extra_info(ie_result, {
 666             'extractor': ie.IE_NAME,
 667             'webpage_url': url,
 668             'webpage_url_basename': url_basename(url),
 669             'extractor_key': ie.ie_key(),
 670         })
 671
 672     def process_ie_result(self, ie_result, download=True, extra_info={}):
 673         """
 674         Take the result of the ie(may be modified) and resolve all unresolved
 675         references (URLs, playlist items).
 676
 677         It will also download the videos if 'download'.
 678         Returns the resolved ie_result.
 679         """
 680
 681         result_type = ie_result.get('_type', 'video')
 682
 683         if result_type in ('url', 'url_transparent'):
 684             extract_flat = self.params.get('extract_flat', False)
 685             if ((extract_flat == 'in_playlist' and 'playlist' in extra_info) or
 686                     extract_flat is True):
 687                 if self.params.get('forcejson', False):
 688                     self.to_stdout(json.dumps(ie_result))
 689                 return ie_result
 690
 691         if result_type == 'video':
 692             self.add_extra_info(ie_result, extra_info)
 693             return self.process_video_result(ie_result, download=download)
 694         elif result_type == 'url':
 695             # We have to add extra_info to the results because it may be
 696             # contained in a playlist
 697             return self.extract_info(ie_result['url'],
 698                                      download,
 699                                      ie_key=ie_result.get('ie_key'),
 700                                      extra_info=extra_info)
 701         elif result_type == 'url_transparent':
 702             # Use the information from the embedding page
 703             info = self.extract_info(
 704                 ie_result['url'], ie_key=ie_result.get('ie_key'),
 705                 extra_info=extra_info, download=False, process=False)
 706
 707             force_properties = dict(
 708                 (k, v) for k, v in ie_result.items() if v is not None)
 709             for f in ('_type', 'url'):
 710                 if f in force_properties:
 711                     del force_properties[f]
 712             new_result = info.copy()
 713             new_result.update(force_properties)
 714
 715             assert new_result.get('_type') != 'url_transparent'
 716
 717             return self.process_ie_result(
 718                 new_result, download=download, extra_info=extra_info)
 719         elif result_type == 'playlist' or result_type == 'multi_video':
 720             # We process each entry in the playlist
 721             playlist = ie_result.get('title', None) or ie_result.get('id', None)
 722             self.to_screen('[download] Downloading playlist: %s' % playlist)
 723
 724             playlist_results = []
 725
 726             playliststart = self.params.get('playliststart', 1) - 1
 727             playlistend = self.params.get('playlistend', None)
 728             # For backwards compatibility, interpret -1 as whole list
 729             if playlistend == -1:
 730                 playlistend = None
 731
 732             playlistitems_str = self.params.get('playlist_items', None)
 733             playlistitems = None
 734             if playlistitems_str is not None:
 735                 def iter_playlistitems(format):
 736                     for string_segment in format.split(','):
 737                         if '-' in string_segment:
 738                             start, end = string_segment.split('-')
 739                             for item in range(int(start), int(end) + 1):
 740                                 yield int(item)
 741                         else:
 742                             yield int(string_segment)
 743                 playlistitems = iter_playlistitems(playlistitems_str)
 744
 745             ie_entries = ie_result['entries']
 746             if isinstance(ie_entries, list):
 747                 n_all_entries = len(ie_entries)
 748                 if playlistitems:
 749                     entries = [ie_entries[i - 1] for i in playlistitems]
 750                 else:
 751                     entries = ie_entries[playliststart:playlistend]
 752                 n_entries = len(entries)
 753                 self.to_screen(
 754                     "[%s] playlist %s: Collected %d video ids (downloading %d of them)" %
 755                     (ie_result['extractor'], playlist, n_all_entries, n_entries))
 756             elif isinstance(ie_entries, PagedList):
 757                 if playlistitems:
 758                     entries = []
 759                     for item in playlistitems:
 760                         entries.extend(ie_entries.getslice(
 761                             item - 1, item
 762                         ))
 763                 else:
 764                     entries = ie_entries.getslice(
 765                         playliststart, playlistend)
 766                 n_entries = len(entries)
 767                 self.to_screen(
 768                     "[%s] playlist %s: Downloading %d videos" %
 769                     (ie_result['extractor'], playlist, n_entries))
 770             else:  # iterable
 771                 if playlistitems:
 772                     entry_list = list(ie_entries)
 773                     entries = [entry_list[i - 1] for i in playlistitems]
 774                 else:
 775                     entries = list(itertools.islice(
 776                         ie_entries, playliststart, playlistend))
 777                 n_entries = len(entries)
 778                 self.to_screen(
 779                     "[%s] playlist %s: Downloading %d videos" %
 780                     (ie_result['extractor'], playlist, n_entries))
 781
 782             if self.params.get('playlistreverse', False):
 783                 entries = entries[::-1]
 784
 785             for i, entry in enumerate(entries, 1):
 786                 self.to_screen('[download] Downloading video %s of %s' % (i, n_entries))
 787                 extra = {
 788                     'n_entries': n_entries,
 789                     'playlist': playlist,
 790                     'playlist_id': ie_result.get('id'),
 791                     'playlist_title': ie_result.get('title'),
 792                     'playlist_index': i + playliststart,
 793                     'extractor': ie_result['extractor'],
 794                     'webpage_url': ie_result['webpage_url'],
 795                     'webpage_url_basename': url_basename(ie_result['webpage_url']),
 796                     'extractor_key': ie_result['extractor_key'],
 797                 }
 798
 799                 reason = self._match_entry(entry, incomplete=True)
 800                 if reason is not None:
 801                     self.to_screen('[download] ' + reason)
 802                     continue
 803
 804                 entry_result = self.process_ie_result(entry,
 805                                                       download=download,
 806                                                       extra_info=extra)
 807                 playlist_results.append(entry_result)
 808             ie_result['entries'] = playlist_results
 809             return ie_result
 810         elif result_type == 'compat_list':
 811             self.report_warning(
 812                 'Extractor %s returned a compat_list result. '
 813                 'It needs to be updated.' % ie_result.get('extractor'))
 814
 815             def _fixup(r):
 816                 self.add_extra_info(
 817                     r,
 818                     {
 819                         'extractor': ie_result['extractor'],
 820                         'webpage_url': ie_result['webpage_url'],
 821                         'webpage_url_basename': url_basename(ie_result['webpage_url']),
 822                         'extractor_key': ie_result['extractor_key'],
 823                     }
 824                 )
 825                 return r
 826             ie_result['entries'] = [
 827                 self.process_ie_result(_fixup(r), download, extra_info)
 828                 for r in ie_result['entries']
 829             ]
 830             return ie_result
 831         else:
 832             raise Exception('Invalid result type: %s' % result_type)
 833
 834     def _apply_format_filter(self, format_spec, available_formats):
 835         " Returns a tuple of the remaining format_spec and filtered formats "
 836
 837         OPERATORS = {
 838             '<': operator.lt,
 839             '<=': operator.le,
 840             '>': operator.gt,
 841             '>=': operator.ge,
 842             '=': operator.eq,
 843             '!=': operator.ne,
 844         }
 845         operator_rex = re.compile(r'''(?x)\s*\[
 846             (?P<key>width|height|tbr|abr|vbr|asr|filesize|fps)
 847             \s*(?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
 848             (?P<value>[0-9.]+(?:[kKmMgGtTpPeEzZyY]i?[Bb]?)?)
 849             \]$
 850             ''' % '|'.join(map(re.escape, OPERATORS.keys())))
 851         m = operator_rex.search(format_spec)
 852         if m:
 853             try:
 854                 comparison_value = int(m.group('value'))
 855             except ValueError:
 856                 comparison_value = parse_filesize(m.group('value'))
 857                 if comparison_value is None:
 858                     comparison_value = parse_filesize(m.group('value') + 'B')
 859                 if comparison_value is None:
 860                     raise ValueError(
 861                         'Invalid value %r in format specification %r' % (
 862                             m.group('value'), format_spec))
 863             op = OPERATORS[m.group('op')]
 864
 865         if not m:
 866             STR_OPERATORS = {
 867                 '=': operator.eq,
 868                 '!=': operator.ne,
 869             }
 870             str_operator_rex = re.compile(r'''(?x)\s*\[
 871                 \s*(?P<key>ext|acodec|vcodec|container|protocol)
 872                 \s*(?P<op>%s)(?P<none_inclusive>\s*\?)?
 873                 \s*(?P<value>[a-zA-Z0-9_-]+)
 874                 \s*\]$
 875                 ''' % '|'.join(map(re.escape, STR_OPERATORS.keys())))
 876             m = str_operator_rex.search(format_spec)
 877             if m:
 878                 comparison_value = m.group('value')
 879                 op = STR_OPERATORS[m.group('op')]
 880
 881         if not m:
 882             raise ValueError('Invalid format specification %r' % format_spec)
 883
 884         def _filter(f):
 885             actual_value = f.get(m.group('key'))
 886             if actual_value is None:
 887                 return m.group('none_inclusive')
 888             return op(actual_value, comparison_value)
 889         new_formats = [f for f in available_formats if _filter(f)]
 890
 891         new_format_spec = format_spec[:-len(m.group(0))]
 892         if not new_format_spec:
 893             new_format_spec = 'best'
 894
 895         return (new_format_spec, new_formats)
 896
 897     def select_format(self, format_spec, available_formats):
 898         while format_spec.endswith(']'):
 899             format_spec, available_formats = self._apply_format_filter(
 900                 format_spec, available_formats)
 901         if not available_formats:
 902             return None
 903
 904         if format_spec == 'best' or format_spec is None:
 905             return available_formats[-1]
 906         elif format_spec == 'worst':
 907             return available_formats[0]
 908         elif format_spec == 'bestaudio':
 909             audio_formats = [
 910                 f for f in available_formats
 911                 if f.get('vcodec') == 'none']
 912             if audio_formats:
 913                 return audio_formats[-1]
 914         elif format_spec == 'worstaudio':
 915             audio_formats = [
 916                 f for f in available_formats
 917                 if f.get('vcodec') == 'none']
 918             if audio_formats:
 919                 return audio_formats[0]
 920         elif format_spec == 'bestvideo':
 921             video_formats = [
 922                 f for f in available_formats
 923                 if f.get('acodec') == 'none']
 924             if video_formats:
 925                 return video_formats[-1]
 926         elif format_spec == 'worstvideo':
 927             video_formats = [
 928                 f for f in available_formats
 929                 if f.get('acodec') == 'none']
 930             if video_formats:
 931                 return video_formats[0]
 932         else:
 933             extensions = ['mp4', 'flv', 'webm', '3gp', 'm4a', 'mp3', 'ogg', 'aac', 'wav']
 934             if format_spec in extensions:
 935                 filter_f = lambda f: f['ext'] == format_spec
 936             else:
 937                 filter_f = lambda f: f['format_id'] == format_spec
 938             matches = list(filter(filter_f, available_formats))
 939             if matches:
 940                 return matches[-1]
 941         return None
 942
 943     def _calc_headers(self, info_dict):
 944         res = std_headers.copy()
 945
 946         add_headers = info_dict.get('http_headers')
 947         if add_headers:
 948             res.update(add_headers)
 949
 950         cookies = self._calc_cookies(info_dict)
 951         if cookies:
 952             res['Cookie'] = cookies
 953
 954         return res
 955
 956     def _calc_cookies(self, info_dict):
 957         pr = compat_urllib_request.Request(info_dict['url'])
 958         self.cookiejar.add_cookie_header(pr)
 959         return pr.get_header('Cookie')
 960
 961     def process_video_result(self, info_dict, download=True):
 962         assert info_dict.get('_type', 'video') == 'video'
 963
 964         if 'id' not in info_dict:
 965             raise ExtractorError('Missing "id" field in extractor result')
 966         if 'title' not in info_dict:
 967             raise ExtractorError('Missing "title" field in extractor result')
 968
 969         if 'playlist' not in info_dict:
 970             # It isn't part of a playlist
 971             info_dict['playlist'] = None
 972             info_dict['playlist_index'] = None
 973
 974         thumbnails = info_dict.get('thumbnails')
 975         if thumbnails is None:
 976             thumbnail = info_dict.get('thumbnail')
 977             if thumbnail:
 978                 info_dict['thumbnails'] = thumbnails = [{'url': thumbnail}]
 979         if thumbnails:
 980             thumbnails.sort(key=lambda t: (
 981                 t.get('preference'), t.get('width'), t.get('height'),
 982                 t.get('id'), t.get('url')))
 983             for i, t in enumerate(thumbnails):
 984                 if 'width' in t and 'height' in t:
 985                     t['resolution'] = '%dx%d' % (t['width'], t['height'])
 986                 if t.get('id') is None:
 987                     t['id'] = '%d' % i
 988
 989         if thumbnails and 'thumbnail' not in info_dict:
 990             info_dict['thumbnail'] = thumbnails[-1]['url']
 991
 992         if 'display_id' not in info_dict and 'id' in info_dict:
 993             info_dict['display_id'] = info_dict['id']
 994
 995         if info_dict.get('upload_date') is None and info_dict.get('timestamp') is not None:
 996             # Working around negative timestamps in Windows
 997             # (see http://bugs.python.org/issue1646728)
 998             if info_dict['timestamp'] < 0 and os.name == 'nt':
 999                 info_dict['timestamp'] = 0
1000             upload_date = datetime.datetime.utcfromtimestamp(
1001                 info_dict['timestamp'])
1002             info_dict['upload_date'] = upload_date.strftime('%Y%m%d')
1003
1004         # This extractors handle format selection themselves
1005         if info_dict['extractor'] in ['Youku']:
1006             if download:
1007                 self.process_info(info_dict)
1008             return info_dict
1009
1010         # We now pick which formats have to be downloaded
1011         if info_dict.get('formats') is None:
1012             # There's only one format available
1013             formats = [info_dict]
1014         else:
1015             formats = info_dict['formats']
1016
1017         if not formats:
1018             raise ExtractorError('No video formats found!')
1019
1020         # We check that all the formats have the format and format_id fields
1021         for i, format in enumerate(formats):
1022             if 'url' not in format:
1023                 raise ExtractorError('Missing "url" key in result (index %d)' % i)
1024
1025             if format.get('format_id') is None:
1026                 format['format_id'] = compat_str(i)
1027             if format.get('format') is None:
1028                 format['format'] = '{id} - {res}{note}'.format(
1029                     id=format['format_id'],
1030                     res=self.format_resolution(format),
1031                     note=' ({0})'.format(format['format_note']) if format.get('format_note') is not None else '',
1032                 )
1033             # Automatically determine file extension if missing
1034             if 'ext' not in format:
1035                 format['ext'] = determine_ext(format['url']).lower()
1036             # Add HTTP headers, so that external programs can use them from the
1037             # json output
1038             full_format_info = info_dict.copy()
1039             full_format_info.update(format)
1040             format['http_headers'] = self._calc_headers(full_format_info)
1041
1042         format_limit = self.params.get('format_limit', None)
1043         if format_limit:
1044             formats = list(takewhile_inclusive(
1045                 lambda f: f['format_id'] != format_limit, formats
1046             ))
1047
1048         # TODO Central sorting goes here
1049
1050         if formats[0] is not info_dict:
1051             # only set the 'formats' fields if the original info_dict list them
1052             # otherwise we end up with a circular reference, the first (and unique)
1053             # element in the 'formats' field in info_dict is info_dict itself,
1054             # wich can't be exported to json
1055             info_dict['formats'] = formats
1056         if self.params.get('listformats'):
1057             self.list_formats(info_dict)
1058             return
1059         if self.params.get('list_thumbnails'):
1060             self.list_thumbnails(info_dict)
1061             return
1062
1063         req_format = self.params.get('format')
1064         if req_format is None:
1065             req_format = 'best'
1066         formats_to_download = []
1067         # The -1 is for supporting YoutubeIE
1068         if req_format in ('-1', 'all'):
1069             formats_to_download = formats
1070         else:
1071             for rfstr in req_format.split(','):
1072                 # We can accept formats requested in the format: 34/5/best, we pick
1073                 # the first that is available, starting from left
1074                 req_formats = rfstr.split('/')
1075                 for rf in req_formats:
1076                     if re.match(r'.+?\+.+?', rf) is not None:
1077                         # Two formats have been requested like '137+139'
1078                         format_1, format_2 = rf.split('+')
1079                         formats_info = (self.select_format(format_1, formats),
1080                                         self.select_format(format_2, formats))
1081                         if all(formats_info):
1082                             # The first format must contain the video and the
1083                             # second the audio
1084                             if formats_info[0].get('vcodec') == 'none':
1085                                 self.report_error('The first format must '
1086                                                   'contain the video, try using '
1087                                                   '"-f %s+%s"' % (format_2, format_1))
1088                                 return
1089                             output_ext = (
1090                                 formats_info[0]['ext']
1091                                 if self.params.get('merge_output_format') is None
1092                                 else self.params['merge_output_format'])
1093                             selected_format = {
1094                                 'requested_formats': formats_info,
1095                                 'format': '%s+%s' % (formats_info[0].get('format'),
1096                                                      formats_info[1].get('format')),
1097                                 'format_id': '%s+%s' % (formats_info[0].get('format_id'),
1098                                                         formats_info[1].get('format_id')),
1099                                 'width': formats_info[0].get('width'),
1100                                 'height': formats_info[0].get('height'),
1101                                 'resolution': formats_info[0].get('resolution'),
1102                                 'fps': formats_info[0].get('fps'),
1103                                 'vcodec': formats_info[0].get('vcodec'),
1104                                 'vbr': formats_info[0].get('vbr'),
1105                                 'stretched_ratio': formats_info[0].get('stretched_ratio'),
1106                                 'acodec': formats_info[1].get('acodec'),
1107                                 'abr': formats_info[1].get('abr'),
1108                                 'ext': output_ext,
1109                             }
1110                         else:
1111                             selected_format = None
1112                     else:
1113                         selected_format = self.select_format(rf, formats)
1114                     if selected_format is not None:
1115                         formats_to_download.append(selected_format)
1116                         break
1117         if not formats_to_download:
1118             raise ExtractorError('requested format not available',
1119                                  expected=True)
1120
1121         if download:
1122             if len(formats_to_download) > 1:
1123                 self.to_screen('[info] %s: downloading video in %s formats' % (info_dict['id'], len(formats_to_download)))
1124             for format in formats_to_download:
1125                 new_info = dict(info_dict)
1126                 new_info.update(format)
1127                 self.process_info(new_info)
1128         # We update the info dict with the best quality format (backwards compatibility)
1129         info_dict.update(formats_to_download[-1])
1130         return info_dict
1131
1132     def process_info(self, info_dict):
1133         """Process a single resolved IE result."""
1134
1135         assert info_dict.get('_type', 'video') == 'video'
1136
1137         max_downloads = self.params.get('max_downloads')
1138         if max_downloads is not None:
1139             if self._num_downloads >= int(max_downloads):
1140                 raise MaxDownloadsReached()
1141
1142         info_dict['fulltitle'] = info_dict['title']
1143         if len(info_dict['title']) > 200:
1144             info_dict['title'] = info_dict['title'][:197] + '...'
1145
1146         # Keep for backwards compatibility
1147         info_dict['stitle'] = info_dict['title']
1148
1149         if 'format' not in info_dict:
1150             info_dict['format'] = info_dict['ext']
1151
1152         reason = self._match_entry(info_dict, incomplete=False)
1153         if reason is not None:
1154             self.to_screen('[download] ' + reason)
1155             return
1156
1157         self._num_downloads += 1
1158
1159         info_dict['_filename'] = filename = self.prepare_filename(info_dict)
1160
1161         # Forced printings
1162         if self.params.get('forcetitle', False):
1163             self.to_stdout(info_dict['fulltitle'])
1164         if self.params.get('forceid', False):
1165             self.to_stdout(info_dict['id'])
1166         if self.params.get('forceurl', False):
1167             if info_dict.get('requested_formats') is not None:
1168                 for f in info_dict['requested_formats']:
1169                     self.to_stdout(f['url'] + f.get('play_path', ''))
1170             else:
1171                 # For RTMP URLs, also include the playpath
1172                 self.to_stdout(info_dict['url'] + info_dict.get('play_path', ''))
1173         if self.params.get('forcethumbnail', False) and info_dict.get('thumbnail') is not None:
1174             self.to_stdout(info_dict['thumbnail'])
1175         if self.params.get('forcedescription', False) and info_dict.get('description') is not None:
1176             self.to_stdout(info_dict['description'])
1177         if self.params.get('forcefilename', False) and filename is not None:
1178             self.to_stdout(filename)
1179         if self.params.get('forceduration', False) and info_dict.get('duration') is not None:
1180             self.to_stdout(formatSeconds(info_dict['duration']))
1181         if self.params.get('forceformat', False):
1182             self.to_stdout(info_dict['format'])
1183         if self.params.get('forcejson', False):
1184             self.to_stdout(json.dumps(info_dict))
1185
1186         # Do nothing else if in simulate mode
1187         if self.params.get('simulate', False):
1188             return
1189
1190         if filename is None:
1191             return
1192
1193         try:
1194             dn = os.path.dirname(encodeFilename(filename))
1195             if dn and not os.path.exists(dn):
1196                 os.makedirs(dn)
1197         except (OSError, IOError) as err:
1198             self.report_error('unable to create directory ' + compat_str(err))
1199             return
1200
1201         if self.params.get('writedescription', False):
1202             descfn = filename + '.description'
1203             if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(descfn)):
1204                 self.to_screen('[info] Video description is already present')
1205             elif info_dict.get('description') is None:
1206                 self.report_warning('There\'s no description to write.')
1207             else:
1208                 try:
1209                     self.to_screen('[info] Writing video description to: ' + descfn)
1210                     with io.open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile:
1211                         descfile.write(info_dict['description'])
1212                 except (OSError, IOError):
1213                     self.report_error('Cannot write description file ' + descfn)
1214                     return
1215
1216         if self.params.get('writeannotations', False):
1217             annofn = filename + '.annotations.xml'
1218             if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(annofn)):
1219                 self.to_screen('[info] Video annotations are already present')
1220             else:
1221                 try:
1222                     self.to_screen('[info] Writing video annotations to: ' + annofn)
1223                     with io.open(encodeFilename(annofn), 'w', encoding='utf-8') as annofile:
1224                         annofile.write(info_dict['annotations'])
1225                 except (KeyError, TypeError):
1226                     self.report_warning('There are no annotations to write.')
1227                 except (OSError, IOError):
1228                     self.report_error('Cannot write annotations file: ' + annofn)
1229                     return
1230
1231         subtitles_are_requested = any([self.params.get('writesubtitles', False),
1232                                        self.params.get('writeautomaticsub')])
1233
1234         if subtitles_are_requested and 'subtitles' in info_dict and info_dict['subtitles']:
1235             # subtitles download errors are already managed as troubles in relevant IE
1236             # that way it will silently go on when used with unsupporting IE
1237             subtitles = info_dict['subtitles']
1238             sub_format = self.params.get('subtitlesformat', 'srt')
1239             for sub_lang in subtitles.keys():
1240                 sub = subtitles[sub_lang]
1241                 if sub is None:
1242                     continue
1243                 try:
1244                     sub_filename = subtitles_filename(filename, sub_lang, sub_format)
1245                     if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(sub_filename)):
1246                         self.to_screen('[info] Video subtitle %s.%s is already_present' % (sub_lang, sub_format))
1247                     else:
1248                         self.to_screen('[info] Writing video subtitles to: ' + sub_filename)
1249                         with io.open(encodeFilename(sub_filename), 'w', encoding='utf-8') as subfile:
1250                             subfile.write(sub)
1251                 except (OSError, IOError):
1252                     self.report_error('Cannot write subtitles file ' + sub_filename)
1253                     return
1254
1255         if self.params.get('writeinfojson', False):
1256             infofn = os.path.splitext(filename)[0] + '.info.json'
1257             if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(infofn)):
1258                 self.to_screen('[info] Video description metadata is already present')
1259             else:
1260                 self.to_screen('[info] Writing video description metadata as JSON to: ' + infofn)
1261                 try:
1262                     write_json_file(info_dict, infofn)
1263                 except (OSError, IOError):
1264                     self.report_error('Cannot write metadata to JSON file ' + infofn)
1265                     return
1266
1267         self._write_thumbnails(info_dict, filename)
1268
1269         if not self.params.get('skip_download', False):
1270             try:
1271                 def dl(name, info):
1272                     fd = get_suitable_downloader(info, self.params)(self, self.params)
1273                     for ph in self._progress_hooks:
1274                         fd.add_progress_hook(ph)
1275                     if self.params.get('verbose'):
1276                         self.to_stdout('[debug] Invoking downloader on %r' % info.get('url'))
1277                     return fd.download(name, info)
1278
1279                 if info_dict.get('requested_formats') is not None:
1280                     downloaded = []
1281                     success = True
1282                     merger = FFmpegMergerPP(self, not self.params.get('keepvideo'))
1283                     if not merger.available:
1284                         postprocessors = []
1285                         self.report_warning('You have requested multiple '
1286                                             'formats but ffmpeg or avconv are not installed.'
1287                                             ' The formats won\'t be merged')
1288                     else:
1289                         postprocessors = [merger]
1290                     for f in info_dict['requested_formats']:
1291                         new_info = dict(info_dict)
1292                         new_info.update(f)
1293                         fname = self.prepare_filename(new_info)
1294                         fname = prepend_extension(fname, 'f%s' % f['format_id'])
1295                         downloaded.append(fname)
1296                         partial_success = dl(fname, new_info)
1297                         success = success and partial_success
1298                     info_dict['__postprocessors'] = postprocessors
1299                     info_dict['__files_to_merge'] = downloaded
1300                 else:
1301                     # Just a single file
1302                     success = dl(filename, info_dict)
1303             except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
1304                 self.report_error('unable to download video data: %s' % str(err))
1305                 return
1306             except (OSError, IOError) as err:
1307                 raise UnavailableVideoError(err)
1308             except (ContentTooShortError, ) as err:
1309                 self.report_error('content too short (expected %s bytes and served %s)' % (err.expected, err.downloaded))
1310                 return
1311
1312             if success:
1313                 # Fixup content
1314                 fixup_policy = self.params.get('fixup')
1315                 if fixup_policy is None:
1316                     fixup_policy = 'detect_or_warn'
1317
1318                 stretched_ratio = info_dict.get('stretched_ratio')
1319                 if stretched_ratio is not None and stretched_ratio != 1:
1320                     if fixup_policy == 'warn':
1321                         self.report_warning('%s: Non-uniform pixel ratio (%s)' % (
1322                             info_dict['id'], stretched_ratio))
1323                     elif fixup_policy == 'detect_or_warn':
1324                         stretched_pp = FFmpegFixupStretchedPP(self)
1325                         if stretched_pp.available:
1326                             info_dict.setdefault('__postprocessors', [])
1327                             info_dict['__postprocessors'].append(stretched_pp)
1328                         else:
1329                             self.report_warning(
1330                                 '%s: Non-uniform pixel ratio (%s). Install ffmpeg or avconv to fix this automatically.' % (
1331                                     info_dict['id'], stretched_ratio))
1332                     else:
1333                         assert fixup_policy in ('ignore', 'never')
1334
1335                 if info_dict.get('requested_formats') is None and info_dict.get('container') == 'm4a_dash':
1336                     if fixup_policy == 'warn':
1337                         self.report_warning('%s: writing DASH m4a. Only some players support this container.' % (
1338                             info_dict['id']))
1339                     elif fixup_policy == 'detect_or_warn':
1340                         fixup_pp = FFmpegFixupM4aPP(self)
1341                         if fixup_pp.available:
1342                             info_dict.setdefault('__postprocessors', [])
1343                             info_dict['__postprocessors'].append(fixup_pp)
1344                         else:
1345                             self.report_warning(
1346                                 '%s: writing DASH m4a. Only some players support this container. Install ffmpeg or avconv to fix this automatically.' % (
1347                                     info_dict['id']))
1348                     else:
1349                         assert fixup_policy in ('ignore', 'never')
1350
1351                 try:
1352                     self.post_process(filename, info_dict)
1353                 except (PostProcessingError) as err:
1354                     self.report_error('postprocessing: %s' % str(err))
1355                     return
1356                 self.record_download_archive(info_dict)
1357
1358     def download(self, url_list):
1359         """Download a given list of URLs."""
1360         outtmpl = self.params.get('outtmpl', DEFAULT_OUTTMPL)
1361         if (len(url_list) > 1 and
1362                 '%' not in outtmpl
1363                 and self.params.get('max_downloads') != 1):
1364             raise SameFileError(outtmpl)
1365
1366         for url in url_list:
1367             try:
1368                 # It also downloads the videos
1369                 res = self.extract_info(url)
1370             except UnavailableVideoError:
1371                 self.report_error('unable to download video')
1372             except MaxDownloadsReached:
1373                 self.to_screen('[info] Maximum number of downloaded files reached.')
1374                 raise
1375             else:
1376                 if self.params.get('dump_single_json', False):
1377                     self.to_stdout(json.dumps(res))
1378
1379         return self._download_retcode
1380
1381     def download_with_info_file(self, info_filename):
1382         with io.open(info_filename, 'r', encoding='utf-8') as f:
1383             info = json.load(f)
1384         try:
1385             self.process_ie_result(info, download=True)
1386         except DownloadError:
1387             webpage_url = info.get('webpage_url')
1388             if webpage_url is not None:
1389                 self.report_warning('The info failed to download, trying with "%s"' % webpage_url)
1390                 return self.download([webpage_url])
1391             else:
1392                 raise
1393         return self._download_retcode
1394
1395     def post_process(self, filename, ie_info):
1396         """Run all the postprocessors on the given file."""
1397         info = dict(ie_info)
1398         info['filepath'] = filename
1399         pps_chain = []
1400         if ie_info.get('__postprocessors') is not None:
1401             pps_chain.extend(ie_info['__postprocessors'])
1402         pps_chain.extend(self._pps)
1403         for pp in pps_chain:
1404             keep_video = None
1405             old_filename = info['filepath']
1406             try:
1407                 keep_video_wish, info = pp.run(info)
1408                 if keep_video_wish is not None:
1409                     if keep_video_wish:
1410                         keep_video = keep_video_wish
1411                     elif keep_video is None:
1412                         # No clear decision yet, let IE decide
1413                         keep_video = keep_video_wish
1414             except PostProcessingError as e:
1415                 self.report_error(e.msg)
1416             if keep_video is False and not self.params.get('keepvideo', False):
1417                 try:
1418                     self.to_screen('Deleting original file %s (pass -k to keep)' % old_filename)
1419                     os.remove(encodeFilename(old_filename))
1420                 except (IOError, OSError):
1421                     self.report_warning('Unable to remove downloaded video file')
1422
1423     def _make_archive_id(self, info_dict):
1424         # Future-proof against any change in case
1425         # and backwards compatibility with prior versions
1426         extractor = info_dict.get('extractor_key')
1427         if extractor is None:
1428             if 'id' in info_dict:
1429                 extractor = info_dict.get('ie_key')  # key in a playlist
1430         if extractor is None:
1431             return None  # Incomplete video information
1432         return extractor.lower() + ' ' + info_dict['id']
1433
1434     def in_download_archive(self, info_dict):
1435         fn = self.params.get('download_archive')
1436         if fn is None:
1437             return False
1438
1439         vid_id = self._make_archive_id(info_dict)
1440         if vid_id is None:
1441             return False  # Incomplete video information
1442
1443         try:
1444             with locked_file(fn, 'r', encoding='utf-8') as archive_file:
1445                 for line in archive_file:
1446                     if line.strip() == vid_id:
1447                         return True
1448         except IOError as ioe:
1449             if ioe.errno != errno.ENOENT:
1450                 raise
1451         return False
1452
1453     def record_download_archive(self, info_dict):
1454         fn = self.params.get('download_archive')
1455         if fn is None:
1456             return
1457         vid_id = self._make_archive_id(info_dict)
1458         assert vid_id
1459         with locked_file(fn, 'a', encoding='utf-8') as archive_file:
1460             archive_file.write(vid_id + '\n')
1461
1462     @staticmethod
1463     def format_resolution(format, default='unknown'):
1464         if format.get('vcodec') == 'none':
1465             return 'audio only'
1466         if format.get('resolution') is not None:
1467             return format['resolution']
1468         if format.get('height') is not None:
1469             if format.get('width') is not None:
1470                 res = '%sx%s' % (format['width'], format['height'])
1471             else:
1472                 res = '%sp' % format['height']
1473         elif format.get('width') is not None:
1474             res = '?x%d' % format['width']
1475         else:
1476             res = default
1477         return res
1478
1479     def _format_note(self, fdict):
1480         res = ''
1481         if fdict.get('ext') in ['f4f', 'f4m']:
1482             res += '(unsupported) '
1483         if fdict.get('format_note') is not None:
1484             res += fdict['format_note'] + ' '
1485         if fdict.get('tbr') is not None:
1486             res += '%4dk ' % fdict['tbr']
1487         if fdict.get('container') is not None:
1488             if res:
1489                 res += ', '
1490             res += '%s container' % fdict['container']
1491         if (fdict.get('vcodec') is not None and
1492                 fdict.get('vcodec') != 'none'):
1493             if res:
1494                 res += ', '
1495             res += fdict['vcodec']
1496             if fdict.get('vbr') is not None:
1497                 res += '@'
1498         elif fdict.get('vbr') is not None and fdict.get('abr') is not None:
1499             res += 'video@'
1500         if fdict.get('vbr') is not None:
1501             res += '%4dk' % fdict['vbr']
1502         if fdict.get('fps') is not None:
1503             res += ', %sfps' % fdict['fps']
1504         if fdict.get('acodec') is not None:
1505             if res:
1506                 res += ', '
1507             if fdict['acodec'] == 'none':
1508                 res += 'video only'
1509             else:
1510                 res += '%-5s' % fdict['acodec']
1511         elif fdict.get('abr') is not None:
1512             if res:
1513                 res += ', '
1514             res += 'audio'
1515         if fdict.get('abr') is not None:
1516             res += '@%3dk' % fdict['abr']
1517         if fdict.get('asr') is not None:
1518             res += ' (%5dHz)' % fdict['asr']
1519         if fdict.get('filesize') is not None:
1520             if res:
1521                 res += ', '
1522             res += format_bytes(fdict['filesize'])
1523         elif fdict.get('filesize_approx') is not None:
1524             if res:
1525                 res += ', '
1526             res += '~' + format_bytes(fdict['filesize_approx'])
1527         return res
1528
1529     def list_formats(self, info_dict):
1530         def line(format, idlen=20):
1531             return (('%-' + compat_str(idlen + 1) + 's%-10s%-12s%s') % (
1532                 format['format_id'],
1533                 format['ext'],
1534                 self.format_resolution(format),
1535                 self._format_note(format),
1536             ))
1537
1538         formats = info_dict.get('formats', [info_dict])
1539         idlen = max(len('format code'),
1540                     max(len(f['format_id']) for f in formats))
1541         formats_s = [
1542             line(f, idlen) for f in formats
1543             if f.get('preference') is None or f['preference'] >= -1000]
1544         if len(formats) > 1:
1545             formats_s[-1] += (' ' if self._format_note(formats[-1]) else '') + '(best)'
1546
1547         header_line = line({
1548             'format_id': 'format code', 'ext': 'extension',
1549             'resolution': 'resolution', 'format_note': 'note'}, idlen=idlen)
1550         self.to_screen(
1551             '[info] Available formats for %s:\n%s\n%s' %
1552             (info_dict['id'], header_line, '\n'.join(formats_s)))
1553
1554     def list_thumbnails(self, info_dict):
1555         thumbnails = info_dict.get('thumbnails')
1556         if not thumbnails:
1557             tn_url = info_dict.get('thumbnail')
1558             if tn_url:
1559                 thumbnails = [{'id': '0', 'url': tn_url}]
1560             else:
1561                 self.to_screen(
1562                     '[info] No thumbnails present for %s' % info_dict['id'])
1563                 return
1564
1565         self.to_screen(
1566             '[info] Thumbnails for %s:' % info_dict['id'])
1567         self.to_screen(render_table(
1568             ['ID', 'width', 'height', 'URL'],
1569             [[t['id'], t.get('width', 'unknown'), t.get('height', 'unknown'), t['url']] for t in thumbnails]))
1570
1571     def urlopen(self, req):
1572         """ Start an HTTP download """
1573
1574         # According to RFC 3986, URLs can not contain non-ASCII characters, however this is not
1575         # always respected by websites, some tend to give out URLs with non percent-encoded
1576         # non-ASCII characters (see telemb.py, ard.py [#3412])
1577         # urllib chokes on URLs with non-ASCII characters (see http://bugs.python.org/issue3991)
1578         # To work around aforementioned issue we will replace request's original URL with
1579         # percent-encoded one
1580         req_is_string = isinstance(req, compat_basestring)
1581         url = req if req_is_string else req.get_full_url()
1582         url_escaped = escape_url(url)
1583
1584         # Substitute URL if any change after escaping
1585         if url != url_escaped:
1586             if req_is_string:
1587                 req = url_escaped
1588             else:
1589                 req = compat_urllib_request.Request(
1590                     url_escaped, data=req.data, headers=req.headers,
1591                     origin_req_host=req.origin_req_host, unverifiable=req.unverifiable)
1592
1593         return self._opener.open(req, timeout=self._socket_timeout)
1594
1595     def print_debug_header(self):
1596         if not self.params.get('verbose'):
1597             return
1598
1599         if type('') is not compat_str:
1600             # Python 2.6 on SLES11 SP1 (https://github.com/rg3/youtube-dl/issues/3326)
1601             self.report_warning(
1602                 'Your Python is broken! Update to a newer and supported version')
1603
1604         stdout_encoding = getattr(
1605             sys.stdout, 'encoding', 'missing (%s)' % type(sys.stdout).__name__)
1606         encoding_str = (
1607             '[debug] Encodings: locale %s, fs %s, out %s, pref %s\n' % (
1608                 locale.getpreferredencoding(),
1609                 sys.getfilesystemencoding(),
1610                 stdout_encoding,
1611                 self.get_encoding()))
1612         write_string(encoding_str, encoding=None)
1613
1614         self._write_string('[debug] youtube-dl version ' + __version__ + '\n')
1615         try:
1616             sp = subprocess.Popen(
1617                 ['git', 'rev-parse', '--short', 'HEAD'],
1618                 stdout=subprocess.PIPE, stderr=subprocess.PIPE,
1619                 cwd=os.path.dirname(os.path.abspath(__file__)))
1620             out, err = sp.communicate()
1621             out = out.decode().strip()
1622             if re.match('[0-9a-f]+', out):
1623                 self._write_string('[debug] Git HEAD: ' + out + '\n')
1624         except:
1625             try:
1626                 sys.exc_clear()
1627             except:
1628                 pass
1629         self._write_string('[debug] Python version %s - %s\n' % (
1630             platform.python_version(), platform_name()))
1631
1632         exe_versions = FFmpegPostProcessor.get_versions(self)
1633         exe_versions['rtmpdump'] = rtmpdump_version()
1634         exe_str = ', '.join(
1635             '%s %s' % (exe, v)
1636             for exe, v in sorted(exe_versions.items())
1637             if v
1638         )
1639         if not exe_str:
1640             exe_str = 'none'
1641         self._write_string('[debug] exe versions: %s\n' % exe_str)
1642
1643         proxy_map = {}
1644         for handler in self._opener.handlers:
1645             if hasattr(handler, 'proxies'):
1646                 proxy_map.update(handler.proxies)
1647         self._write_string('[debug] Proxy map: ' + compat_str(proxy_map) + '\n')
1648
1649         if self.params.get('call_home', False):
1650             ipaddr = self.urlopen('https://yt-dl.org/ip').read().decode('utf-8')
1651             self._write_string('[debug] Public IP address: %s\n' % ipaddr)
1652             latest_version = self.urlopen(
1653                 'https://yt-dl.org/latest/version').read().decode('utf-8')
1654             if version_tuple(latest_version) > version_tuple(__version__):
1655                 self.report_warning(
1656                     'You are using an outdated version (newest version: %s)! '
1657                     'See https://yt-dl.org/update if you need help updating.' %
1658                     latest_version)
1659
1660     def _setup_opener(self):
1661         timeout_val = self.params.get('socket_timeout')
1662         self._socket_timeout = 600 if timeout_val is None else float(timeout_val)
1663
1664         opts_cookiefile = self.params.get('cookiefile')
1665         opts_proxy = self.params.get('proxy')
1666
1667         if opts_cookiefile is None:
1668             self.cookiejar = compat_cookiejar.CookieJar()
1669         else:
1670             self.cookiejar = compat_cookiejar.MozillaCookieJar(
1671                 opts_cookiefile)
1672             if os.access(opts_cookiefile, os.R_OK):
1673                 self.cookiejar.load()
1674
1675         cookie_processor = compat_urllib_request.HTTPCookieProcessor(
1676             self.cookiejar)
1677         if opts_proxy is not None:
1678             if opts_proxy == '':
1679                 proxies = {}
1680             else:
1681                 proxies = {'http': opts_proxy, 'https': opts_proxy}
1682         else:
1683             proxies = compat_urllib_request.getproxies()
1684             # Set HTTPS proxy to HTTP one if given (https://github.com/rg3/youtube-dl/issues/805)
1685             if 'http' in proxies and 'https' not in proxies:
1686                 proxies['https'] = proxies['http']
1687         proxy_handler = compat_urllib_request.ProxyHandler(proxies)
1688
1689         debuglevel = 1 if self.params.get('debug_printtraffic') else 0
1690         https_handler = make_HTTPS_handler(self.params, debuglevel=debuglevel)
1691         ydlh = YoutubeDLHandler(self.params, debuglevel=debuglevel)
1692         opener = compat_urllib_request.build_opener(
1693             https_handler, proxy_handler, cookie_processor, ydlh)
1694         # Delete the default user-agent header, which would otherwise apply in
1695         # cases where our custom HTTP handler doesn't come into play
1696         # (See https://github.com/rg3/youtube-dl/issues/1309 for details)
1697         opener.addheaders = []
1698         self._opener = opener
1699
1700     def encode(self, s):
1701         if isinstance(s, bytes):
1702             return s  # Already encoded
1703
1704         try:
1705             return s.encode(self.get_encoding())
1706         except UnicodeEncodeError as err:
1707             err.reason = err.reason + '. Check your system encoding configuration or use the --encoding option.'
1708             raise
1709
1710     def get_encoding(self):
1711         encoding = self.params.get('encoding')
1712         if encoding is None:
1713             encoding = preferredencoding()
1714         return encoding
1715
1716     def _write_thumbnails(self, info_dict, filename):
1717         if self.params.get('writethumbnail', False):
1718             thumbnails = info_dict.get('thumbnails')
1719             if thumbnails:
1720                 thumbnails = [thumbnails[-1]]
1721         elif self.params.get('write_all_thumbnails', False):
1722             thumbnails = info_dict.get('thumbnails')
1723         else:
1724             return
1725
1726         if not thumbnails:
1727             # No thumbnails present, so return immediately
1728             return
1729
1730         for t in thumbnails:
1731             thumb_ext = determine_ext(t['url'], 'jpg')
1732             suffix = '_%s' % t['id'] if len(thumbnails) > 1 else ''
1733             thumb_display_id = '%s ' % t['id'] if len(thumbnails) > 1 else ''
1734             thumb_filename = os.path.splitext(filename)[0] + suffix + '.' + thumb_ext
1735
1736             if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(thumb_filename)):
1737                 self.to_screen('[%s] %s: Thumbnail %sis already present' %
1738                                (info_dict['extractor'], info_dict['id'], thumb_display_id))
1739             else:
1740                 self.to_screen('[%s] %s: Downloading thumbnail %s...' %
1741                                (info_dict['extractor'], info_dict['id'], thumb_display_id))
1742                 try:
1743                     uf = self.urlopen(t['url'])
1744                     with open(thumb_filename, 'wb') as thumbf:
1745                         shutil.copyfileobj(uf, thumbf)
1746                     self.to_screen('[%s] %s: Writing thumbnail %sto: %s' %
1747                                    (info_dict['extractor'], info_dict['id'], thumb_display_id, thumb_filename))
1748                 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
1749                     self.report_warning('Unable to download thumbnail "%s": %s' %
1750                                         (t['url'], compat_str(err)))