_ Git - youtube-dl/blob - youtube_dl/YoutubeDL.py

   1 #!/usr/bin/env python
   2 # -*- coding: utf-8 -*-
   3
   4 from __future__ import absolute_import, unicode_literals
   5
   6 import collections
   7 import datetime
   8 import errno
   9 import io
  10 import itertools
  11 import json
  12 import locale
  13 import operator
  14 import os
  15 import platform
  16 import re
  17 import shutil
  18 import subprocess
  19 import socket
  20 import sys
  21 import time
  22 import traceback
  23
  24 if os.name == 'nt':
  25     import ctypes
  26
  27 from .compat import (
  28     compat_cookiejar,
  29     compat_expanduser,
  30     compat_http_client,
  31     compat_kwargs,
  32     compat_str,
  33     compat_urllib_error,
  34     compat_urllib_request,
  35 )
  36 from .utils import (
  37     escape_url,
  38     ContentTooShortError,
  39     date_from_str,
  40     DateRange,
  41     DEFAULT_OUTTMPL,
  42     determine_ext,
  43     DownloadError,
  44     encodeFilename,
  45     ExtractorError,
  46     format_bytes,
  47     formatSeconds,
  48     get_term_width,
  49     locked_file,
  50     make_HTTPS_handler,
  51     MaxDownloadsReached,
  52     PagedList,
  53     parse_filesize,
  54     PostProcessingError,
  55     platform_name,
  56     preferredencoding,
  57     render_table,
  58     SameFileError,
  59     sanitize_filename,
  60     std_headers,
  61     subtitles_filename,
  62     takewhile_inclusive,
  63     UnavailableVideoError,
  64     url_basename,
  65     version_tuple,
  66     write_json_file,
  67     write_string,
  68     YoutubeDLHandler,
  69     prepend_extension,
  70     args_to_str,
  71     age_restricted,
  72 )
  73 from .cache import Cache
  74 from .extractor import get_info_extractor, gen_extractors
  75 from .downloader import get_suitable_downloader
  76 from .downloader.rtmp import rtmpdump_version
  77 from .postprocessor import (
  78     FFmpegFixupM4aPP,
  79     FFmpegFixupStretchedPP,
  80     FFmpegMergerPP,
  81     FFmpegPostProcessor,
  82     get_postprocessor,
  83 )
  84 from .version import __version__
  85
  86
  87 class YoutubeDL(object):
  88     """YoutubeDL class.
  89
  90     YoutubeDL objects are the ones responsible of downloading the
  91     actual video file and writing it to disk if the user has requested
  92     it, among some other tasks. In most cases there should be one per
  93     program. As, given a video URL, the downloader doesn't know how to
  94     extract all the needed information, task that InfoExtractors do, it
  95     has to pass the URL to one of them.
  96
  97     For this, YoutubeDL objects have a method that allows
  98     InfoExtractors to be registered in a given order. When it is passed
  99     a URL, the YoutubeDL object handles it to the first InfoExtractor it
 100     finds that reports being able to handle it. The InfoExtractor extracts
 101     all the information about the video or videos the URL refers to, and
 102     YoutubeDL process the extracted information, possibly using a File
 103     Downloader to download the video.
 104
 105     YoutubeDL objects accept a lot of parameters. In order not to saturate
 106     the object constructor with arguments, it receives a dictionary of
 107     options instead. These options are available through the params
 108     attribute for the InfoExtractors to use. The YoutubeDL also
 109     registers itself as the downloader in charge for the InfoExtractors
 110     that are added to it, so this is a "mutual registration".
 111
 112     Available options:
 113
 114     username:          Username for authentication purposes.
 115     password:          Password for authentication purposes.
 116     videopassword:     Password for acces a video.
 117     usenetrc:          Use netrc for authentication instead.
 118     verbose:           Print additional info to stdout.
 119     quiet:             Do not print messages to stdout.
 120     no_warnings:       Do not print out anything for warnings.
 121     forceurl:          Force printing final URL.
 122     forcetitle:        Force printing title.
 123     forceid:           Force printing ID.
 124     forcethumbnail:    Force printing thumbnail URL.
 125     forcedescription:  Force printing description.
 126     forcefilename:     Force printing final filename.
 127     forceduration:     Force printing duration.
 128     forcejson:         Force printing info_dict as JSON.
 129     dump_single_json:  Force printing the info_dict of the whole playlist
 130                        (or video) as a single JSON line.
 131     simulate:          Do not download the video files.
 132     format:            Video format code. See options.py for more information.
 133     format_limit:      Highest quality format to try.
 134     outtmpl:           Template for output names.
 135     restrictfilenames: Do not allow "&" and spaces in file names
 136     ignoreerrors:      Do not stop on download errors.
 137     nooverwrites:      Prevent overwriting files.
 138     playliststart:     Playlist item to start at.
 139     playlistend:       Playlist item to end at.
 140     playlistreverse:   Download playlist items in reverse order.
 141     matchtitle:        Download only matching titles.
 142     rejecttitle:       Reject downloads for matching titles.
 143     logger:            Log messages to a logging.Logger instance.
 144     logtostderr:       Log messages to stderr instead of stdout.
 145     writedescription:  Write the video description to a .description file
 146     writeinfojson:     Write the video description to a .info.json file
 147     writeannotations:  Write the video annotations to a .annotations.xml file
 148     writethumbnail:    Write the thumbnail image to a file
 149     writesubtitles:    Write the video subtitles to a file
 150     writeautomaticsub: Write the automatic subtitles to a file
 151     allsubtitles:      Downloads all the subtitles of the video
 152                        (requires writesubtitles or writeautomaticsub)
 153     listsubtitles:     Lists all available subtitles for the video
 154     subtitlesformat:   Subtitle format [srt/sbv/vtt] (default=srt)
 155     subtitleslangs:    List of languages of the subtitles to download
 156     keepvideo:         Keep the video file after post-processing
 157     daterange:         A DateRange object, download only if the upload_date is in the range.
 158     skip_download:     Skip the actual download of the video file
 159     cachedir:          Location of the cache files in the filesystem.
 160                        False to disable filesystem cache.
 161     noplaylist:        Download single video instead of a playlist if in doubt.
 162     age_limit:         An integer representing the user's age in years.
 163                        Unsuitable videos for the given age are skipped.
 164     min_views:         An integer representing the minimum view count the video
 165                        must have in order to not be skipped.
 166                        Videos without view count information are always
 167                        downloaded. None for no limit.
 168     max_views:         An integer representing the maximum view count.
 169                        Videos that are more popular than that are not
 170                        downloaded.
 171                        Videos without view count information are always
 172                        downloaded. None for no limit.
 173     download_archive:  File name of a file where all downloads are recorded.
 174                        Videos already present in the file are not downloaded
 175                        again.
 176     cookiefile:        File name where cookies should be read from and dumped to.
 177     nocheckcertificate:Do not verify SSL certificates
 178     prefer_insecure:   Use HTTP instead of HTTPS to retrieve information.
 179                        At the moment, this is only supported by YouTube.
 180     proxy:             URL of the proxy server to use
 181     socket_timeout:    Time to wait for unresponsive hosts, in seconds
 182     bidi_workaround:   Work around buggy terminals without bidirectional text
 183                        support, using fridibi
 184     debug_printtraffic:Print out sent and received HTTP traffic
 185     include_ads:       Download ads as well
 186     default_search:    Prepend this string if an input url is not valid.
 187                        'auto' for elaborate guessing
 188     encoding:          Use this encoding instead of the system-specified.
 189     extract_flat:      Do not resolve URLs, return the immediate result.
 190                        Pass in 'in_playlist' to only show this behavior for
 191                        playlist items.
 192     postprocessors:    A list of dictionaries, each with an entry
 193                        * key:  The name of the postprocessor. See
 194                                youtube_dl/postprocessor/__init__.py for a list.
 195                        as well as any further keyword arguments for the
 196                        postprocessor.
 197     progress_hooks:    A list of functions that get called on download
 198                        progress, with a dictionary with the entries
 199                        * filename: The final filename
 200                        * status: One of "downloading" and "finished"
 201
 202                        The dict may also have some of the following entries:
 203
 204                        * downloaded_bytes: Bytes on disk
 205                        * total_bytes: Size of the whole file, None if unknown
 206                        * tmpfilename: The filename we're currently writing to
 207                        * eta: The estimated time in seconds, None if unknown
 208                        * speed: The download speed in bytes/second, None if
 209                                 unknown
 210
 211                        Progress hooks are guaranteed to be called at least once
 212                        (with status "finished") if the download is successful.
 213     merge_output_format: Extension to use when merging formats.
 214     fixup:             Automatically correct known faults of the file.
 215                        One of:
 216                        - "never": do nothing
 217                        - "warn": only emit a warning
 218                        - "detect_or_warn": check whether we can do anything
 219                                            about it, warn otherwise (default)
 220     source_address:    (Experimental) Client-side IP address to bind to.
 221     call_home:         Boolean, true iff we are allowed to contact the
 222                        youtube-dl servers for debugging.
 223     sleep_interval:    Number of seconds to sleep before each download.
 224     external_downloader:  Executable of the external downloader to call.
 225     listformats:       Print an overview of available video formats and exit.
 226     list_thumbnails:   Print a table of all thumbnails and exit.
 227
 228
 229     The following parameters are not used by YoutubeDL itself, they are used by
 230     the FileDownloader:
 231     nopart, updatetime, buffersize, ratelimit, min_filesize, max_filesize, test,
 232     noresizebuffer, retries, continuedl, noprogress, consoletitle
 233
 234     The following options are used by the post processors:
 235     prefer_ffmpeg:     If True, use ffmpeg instead of avconv if both are available,
 236                        otherwise prefer avconv.
 237     exec_cmd:          Arbitrary command to run after downloading
 238     """
 239
 240     params = None
 241     _ies = []
 242     _pps = []
 243     _download_retcode = None
 244     _num_downloads = None
 245     _screen_file = None
 246
 247     def __init__(self, params=None, auto_init=True):
 248         """Create a FileDownloader object with the given options."""
 249         if params is None:
 250             params = {}
 251         self._ies = []
 252         self._ies_instances = {}
 253         self._pps = []
 254         self._progress_hooks = []
 255         self._download_retcode = 0
 256         self._num_downloads = 0
 257         self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)]
 258         self._err_file = sys.stderr
 259         self.params = params
 260         self.cache = Cache(self)
 261
 262         if params.get('bidi_workaround', False):
 263             try:
 264                 import pty
 265                 master, slave = pty.openpty()
 266                 width = get_term_width()
 267                 if width is None:
 268                     width_args = []
 269                 else:
 270                     width_args = ['-w', str(width)]
 271                 sp_kwargs = dict(
 272                     stdin=subprocess.PIPE,
 273                     stdout=slave,
 274                     stderr=self._err_file)
 275                 try:
 276                     self._output_process = subprocess.Popen(
 277                         ['bidiv'] + width_args, **sp_kwargs
 278                     )
 279                 except OSError:
 280                     self._output_process = subprocess.Popen(
 281                         ['fribidi', '-c', 'UTF-8'] + width_args, **sp_kwargs)
 282                 self._output_channel = os.fdopen(master, 'rb')
 283             except OSError as ose:
 284                 if ose.errno == 2:
 285                     self.report_warning('Could not find fribidi executable, ignoring --bidi-workaround . Make sure that  fribidi  is an executable file in one of the directories in your $PATH.')
 286                 else:
 287                     raise
 288
 289         if (sys.version_info >= (3,) and sys.platform != 'win32' and
 290                 sys.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968']
 291                 and not params.get('restrictfilenames', False)):
 292             # On Python 3, the Unicode filesystem API will throw errors (#1474)
 293             self.report_warning(
 294                 'Assuming --restrict-filenames since file system encoding '
 295                 'cannot encode all characters. '
 296                 'Set the LC_ALL environment variable to fix this.')
 297             self.params['restrictfilenames'] = True
 298
 299         if '%(stitle)s' in self.params.get('outtmpl', ''):
 300             self.report_warning('%(stitle)s is deprecated. Use the %(title)s and the --restrict-filenames flag(which also secures %(uploader)s et al) instead.')
 301
 302         self._setup_opener()
 303
 304         if auto_init:
 305             self.print_debug_header()
 306             self.add_default_info_extractors()
 307
 308         for pp_def_raw in self.params.get('postprocessors', []):
 309             pp_class = get_postprocessor(pp_def_raw['key'])
 310             pp_def = dict(pp_def_raw)
 311             del pp_def['key']
 312             pp = pp_class(self, **compat_kwargs(pp_def))
 313             self.add_post_processor(pp)
 314
 315         for ph in self.params.get('progress_hooks', []):
 316             self.add_progress_hook(ph)
 317
 318     def warn_if_short_id(self, argv):
 319         # short YouTube ID starting with dash?
 320         idxs = [
 321             i for i, a in enumerate(argv)
 322             if re.match(r'^-[0-9A-Za-z_-]{10}$', a)]
 323         if idxs:
 324             correct_argv = (
 325                 ['youtube-dl'] +
 326                 [a for i, a in enumerate(argv) if i not in idxs] +
 327                 ['--'] + [argv[i] for i in idxs]
 328             )
 329             self.report_warning(
 330                 'Long argument string detected. '
 331                 'Use -- to separate parameters and URLs, like this:\n%s\n' %
 332                 args_to_str(correct_argv))
 333
 334     def add_info_extractor(self, ie):
 335         """Add an InfoExtractor object to the end of the list."""
 336         self._ies.append(ie)
 337         self._ies_instances[ie.ie_key()] = ie
 338         ie.set_downloader(self)
 339
 340     def get_info_extractor(self, ie_key):
 341         """
 342         Get an instance of an IE with name ie_key, it will try to get one from
 343         the _ies list, if there's no instance it will create a new one and add
 344         it to the extractor list.
 345         """
 346         ie = self._ies_instances.get(ie_key)
 347         if ie is None:
 348             ie = get_info_extractor(ie_key)()
 349             self.add_info_extractor(ie)
 350         return ie
 351
 352     def add_default_info_extractors(self):
 353         """
 354         Add the InfoExtractors returned by gen_extractors to the end of the list
 355         """
 356         for ie in gen_extractors():
 357             self.add_info_extractor(ie)
 358
 359     def add_post_processor(self, pp):
 360         """Add a PostProcessor object to the end of the chain."""
 361         self._pps.append(pp)
 362         pp.set_downloader(self)
 363
 364     def add_progress_hook(self, ph):
 365         """Add the progress hook (currently only for the file downloader)"""
 366         self._progress_hooks.append(ph)
 367
 368     def _bidi_workaround(self, message):
 369         if not hasattr(self, '_output_channel'):
 370             return message
 371
 372         assert hasattr(self, '_output_process')
 373         assert isinstance(message, compat_str)
 374         line_count = message.count('\n') + 1
 375         self._output_process.stdin.write((message + '\n').encode('utf-8'))
 376         self._output_process.stdin.flush()
 377         res = ''.join(self._output_channel.readline().decode('utf-8')
 378                       for _ in range(line_count))
 379         return res[:-len('\n')]
 380
 381     def to_screen(self, message, skip_eol=False):
 382         """Print message to stdout if not in quiet mode."""
 383         return self.to_stdout(message, skip_eol, check_quiet=True)
 384
 385     def _write_string(self, s, out=None):
 386         write_string(s, out=out, encoding=self.params.get('encoding'))
 387
 388     def to_stdout(self, message, skip_eol=False, check_quiet=False):
 389         """Print message to stdout if not in quiet mode."""
 390         if self.params.get('logger'):
 391             self.params['logger'].debug(message)
 392         elif not check_quiet or not self.params.get('quiet', False):
 393             message = self._bidi_workaround(message)
 394             terminator = ['\n', ''][skip_eol]
 395             output = message + terminator
 396
 397             self._write_string(output, self._screen_file)
 398
 399     def to_stderr(self, message):
 400         """Print message to stderr."""
 401         assert isinstance(message, compat_str)
 402         if self.params.get('logger'):
 403             self.params['logger'].error(message)
 404         else:
 405             message = self._bidi_workaround(message)
 406             output = message + '\n'
 407             self._write_string(output, self._err_file)
 408
 409     def to_console_title(self, message):
 410         if not self.params.get('consoletitle', False):
 411             return
 412         if os.name == 'nt' and ctypes.windll.kernel32.GetConsoleWindow():
 413             # c_wchar_p() might not be necessary if `message` is
 414             # already of type unicode()
 415             ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message))
 416         elif 'TERM' in os.environ:
 417             self._write_string('\033]0;%s\007' % message, self._screen_file)
 418
 419     def save_console_title(self):
 420         if not self.params.get('consoletitle', False):
 421             return
 422         if 'TERM' in os.environ:
 423             # Save the title on stack
 424             self._write_string('\033[22;0t', self._screen_file)
 425
 426     def restore_console_title(self):
 427         if not self.params.get('consoletitle', False):
 428             return
 429         if 'TERM' in os.environ:
 430             # Restore the title from stack
 431             self._write_string('\033[23;0t', self._screen_file)
 432
 433     def __enter__(self):
 434         self.save_console_title()
 435         return self
 436
 437     def __exit__(self, *args):
 438         self.restore_console_title()
 439
 440         if self.params.get('cookiefile') is not None:
 441             self.cookiejar.save()
 442
 443     def trouble(self, message=None, tb=None):
 444         """Determine action to take when a download problem appears.
 445
 446         Depending on if the downloader has been configured to ignore
 447         download errors or not, this method may throw an exception or
 448         not when errors are found, after printing the message.
 449
 450         tb, if given, is additional traceback information.
 451         """
 452         if message is not None:
 453             self.to_stderr(message)
 454         if self.params.get('verbose'):
 455             if tb is None:
 456                 if sys.exc_info()[0]:  # if .trouble has been called from an except block
 457                     tb = ''
 458                     if hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
 459                         tb += ''.join(traceback.format_exception(*sys.exc_info()[1].exc_info))
 460                     tb += compat_str(traceback.format_exc())
 461                 else:
 462                     tb_data = traceback.format_list(traceback.extract_stack())
 463                     tb = ''.join(tb_data)
 464             self.to_stderr(tb)
 465         if not self.params.get('ignoreerrors', False):
 466             if sys.exc_info()[0] and hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
 467                 exc_info = sys.exc_info()[1].exc_info
 468             else:
 469                 exc_info = sys.exc_info()
 470             raise DownloadError(message, exc_info)
 471         self._download_retcode = 1
 472
 473     def report_warning(self, message):
 474         '''
 475         Print the message to stderr, it will be prefixed with 'WARNING:'
 476         If stderr is a tty file the 'WARNING:' will be colored
 477         '''
 478         if self.params.get('logger') is not None:
 479             self.params['logger'].warning(message)
 480         else:
 481             if self.params.get('no_warnings'):
 482                 return
 483             if self._err_file.isatty() and os.name != 'nt':
 484                 _msg_header = '\033[0;33mWARNING:\033[0m'
 485             else:
 486                 _msg_header = 'WARNING:'
 487             warning_message = '%s %s' % (_msg_header, message)
 488             self.to_stderr(warning_message)
 489
 490     def report_error(self, message, tb=None):
 491         '''
 492         Do the same as trouble, but prefixes the message with 'ERROR:', colored
 493         in red if stderr is a tty file.
 494         '''
 495         if self._err_file.isatty() and os.name != 'nt':
 496             _msg_header = '\033[0;31mERROR:\033[0m'
 497         else:
 498             _msg_header = 'ERROR:'
 499         error_message = '%s %s' % (_msg_header, message)
 500         self.trouble(error_message, tb)
 501
 502     def report_file_already_downloaded(self, file_name):
 503         """Report file has already been fully downloaded."""
 504         try:
 505             self.to_screen('[download] %s has already been downloaded' % file_name)
 506         except UnicodeEncodeError:
 507             self.to_screen('[download] The file has already been downloaded')
 508
 509     def prepare_filename(self, info_dict):
 510         """Generate the output filename."""
 511         try:
 512             template_dict = dict(info_dict)
 513
 514             template_dict['epoch'] = int(time.time())
 515             autonumber_size = self.params.get('autonumber_size')
 516             if autonumber_size is None:
 517                 autonumber_size = 5
 518             autonumber_templ = '%0' + str(autonumber_size) + 'd'
 519             template_dict['autonumber'] = autonumber_templ % self._num_downloads
 520             if template_dict.get('playlist_index') is not None:
 521                 template_dict['playlist_index'] = '%0*d' % (len(str(template_dict['n_entries'])), template_dict['playlist_index'])
 522             if template_dict.get('resolution') is None:
 523                 if template_dict.get('width') and template_dict.get('height'):
 524                     template_dict['resolution'] = '%dx%d' % (template_dict['width'], template_dict['height'])
 525                 elif template_dict.get('height'):
 526                     template_dict['resolution'] = '%sp' % template_dict['height']
 527                 elif template_dict.get('width'):
 528                     template_dict['resolution'] = '?x%d' % template_dict['width']
 529
 530             sanitize = lambda k, v: sanitize_filename(
 531                 compat_str(v),
 532                 restricted=self.params.get('restrictfilenames'),
 533                 is_id=(k == 'id'))
 534             template_dict = dict((k, sanitize(k, v))
 535                                  for k, v in template_dict.items()
 536                                  if v is not None)
 537             template_dict = collections.defaultdict(lambda: 'NA', template_dict)
 538
 539             outtmpl = self.params.get('outtmpl', DEFAULT_OUTTMPL)
 540             tmpl = compat_expanduser(outtmpl)
 541             filename = tmpl % template_dict
 542             return filename
 543         except ValueError as err:
 544             self.report_error('Error in output template: ' + str(err) + ' (encoding: ' + repr(preferredencoding()) + ')')
 545             return None
 546
 547     def _match_entry(self, info_dict):
 548         """ Returns None iff the file should be downloaded """
 549
 550         video_title = info_dict.get('title', info_dict.get('id', 'video'))
 551         if 'title' in info_dict:
 552             # This can happen when we're just evaluating the playlist
 553             title = info_dict['title']
 554             matchtitle = self.params.get('matchtitle', False)
 555             if matchtitle:
 556                 if not re.search(matchtitle, title, re.IGNORECASE):
 557                     return '"' + title + '" title did not match pattern "' + matchtitle + '"'
 558             rejecttitle = self.params.get('rejecttitle', False)
 559             if rejecttitle:
 560                 if re.search(rejecttitle, title, re.IGNORECASE):
 561                     return '"' + title + '" title matched reject pattern "' + rejecttitle + '"'
 562         date = info_dict.get('upload_date', None)
 563         if date is not None:
 564             dateRange = self.params.get('daterange', DateRange())
 565             if date not in dateRange:
 566                 return '%s upload date is not in range %s' % (date_from_str(date).isoformat(), dateRange)
 567         view_count = info_dict.get('view_count', None)
 568         if view_count is not None:
 569             min_views = self.params.get('min_views')
 570             if min_views is not None and view_count < min_views:
 571                 return 'Skipping %s, because it has not reached minimum view count (%d/%d)' % (video_title, view_count, min_views)
 572             max_views = self.params.get('max_views')
 573             if max_views is not None and view_count > max_views:
 574                 return 'Skipping %s, because it has exceeded the maximum view count (%d/%d)' % (video_title, view_count, max_views)
 575         if age_restricted(info_dict.get('age_limit'), self.params.get('age_limit')):
 576             return 'Skipping "%s" because it is age restricted' % title
 577         if self.in_download_archive(info_dict):
 578             return '%s has already been recorded in archive' % video_title
 579         return None
 580
 581     @staticmethod
 582     def add_extra_info(info_dict, extra_info):
 583         '''Set the keys from extra_info in info dict if they are missing'''
 584         for key, value in extra_info.items():
 585             info_dict.setdefault(key, value)
 586
 587     def extract_info(self, url, download=True, ie_key=None, extra_info={},
 588                      process=True):
 589         '''
 590         Returns a list with a dictionary for each video we find.
 591         If 'download', also downloads the videos.
 592         extra_info is a dict containing the extra values to add to each result
 593          '''
 594
 595         if ie_key:
 596             ies = [self.get_info_extractor(ie_key)]
 597         else:
 598             ies = self._ies
 599
 600         for ie in ies:
 601             if not ie.suitable(url):
 602                 continue
 603
 604             if not ie.working():
 605                 self.report_warning('The program functionality for this site has been marked as broken, '
 606                                     'and will probably not work.')
 607
 608             try:
 609                 ie_result = ie.extract(url)
 610                 if ie_result is None:  # Finished already (backwards compatibility; listformats and friends should be moved here)
 611                     break
 612                 if isinstance(ie_result, list):
 613                     # Backwards compatibility: old IE result format
 614                     ie_result = {
 615                         '_type': 'compat_list',
 616                         'entries': ie_result,
 617                     }
 618                 self.add_default_extra_info(ie_result, ie, url)
 619                 if process:
 620                     return self.process_ie_result(ie_result, download, extra_info)
 621                 else:
 622                     return ie_result
 623             except ExtractorError as de:  # An error we somewhat expected
 624                 self.report_error(compat_str(de), de.format_traceback())
 625                 break
 626             except MaxDownloadsReached:
 627                 raise
 628             except Exception as e:
 629                 if self.params.get('ignoreerrors', False):
 630                     self.report_error(compat_str(e), tb=compat_str(traceback.format_exc()))
 631                     break
 632                 else:
 633                     raise
 634         else:
 635             self.report_error('no suitable InfoExtractor for URL %s' % url)
 636
 637     def add_default_extra_info(self, ie_result, ie, url):
 638         self.add_extra_info(ie_result, {
 639             'extractor': ie.IE_NAME,
 640             'webpage_url': url,
 641             'webpage_url_basename': url_basename(url),
 642             'extractor_key': ie.ie_key(),
 643         })
 644
 645     def process_ie_result(self, ie_result, download=True, extra_info={}):
 646         """
 647         Take the result of the ie(may be modified) and resolve all unresolved
 648         references (URLs, playlist items).
 649
 650         It will also download the videos if 'download'.
 651         Returns the resolved ie_result.
 652         """
 653
 654         result_type = ie_result.get('_type', 'video')
 655
 656         if result_type in ('url', 'url_transparent'):
 657             extract_flat = self.params.get('extract_flat', False)
 658             if ((extract_flat == 'in_playlist' and 'playlist' in extra_info) or
 659                     extract_flat is True):
 660                 if self.params.get('forcejson', False):
 661                     self.to_stdout(json.dumps(ie_result))
 662                 return ie_result
 663
 664         if result_type == 'video':
 665             self.add_extra_info(ie_result, extra_info)
 666             return self.process_video_result(ie_result, download=download)
 667         elif result_type == 'url':
 668             # We have to add extra_info to the results because it may be
 669             # contained in a playlist
 670             return self.extract_info(ie_result['url'],
 671                                      download,
 672                                      ie_key=ie_result.get('ie_key'),
 673                                      extra_info=extra_info)
 674         elif result_type == 'url_transparent':
 675             # Use the information from the embedding page
 676             info = self.extract_info(
 677                 ie_result['url'], ie_key=ie_result.get('ie_key'),
 678                 extra_info=extra_info, download=False, process=False)
 679
 680             force_properties = dict(
 681                 (k, v) for k, v in ie_result.items() if v is not None)
 682             for f in ('_type', 'url'):
 683                 if f in force_properties:
 684                     del force_properties[f]
 685             new_result = info.copy()
 686             new_result.update(force_properties)
 687
 688             assert new_result.get('_type') != 'url_transparent'
 689
 690             return self.process_ie_result(
 691                 new_result, download=download, extra_info=extra_info)
 692         elif result_type == 'playlist' or result_type == 'multi_video':
 693             # We process each entry in the playlist
 694             playlist = ie_result.get('title', None) or ie_result.get('id', None)
 695             self.to_screen('[download] Downloading playlist: %s' % playlist)
 696
 697             playlist_results = []
 698
 699             playliststart = self.params.get('playliststart', 1) - 1
 700             playlistend = self.params.get('playlistend', None)
 701             # For backwards compatibility, interpret -1 as whole list
 702             if playlistend == -1:
 703                 playlistend = None
 704
 705             ie_entries = ie_result['entries']
 706             if isinstance(ie_entries, list):
 707                 n_all_entries = len(ie_entries)
 708                 entries = ie_entries[playliststart:playlistend]
 709                 n_entries = len(entries)
 710                 self.to_screen(
 711                     "[%s] playlist %s: Collected %d video ids (downloading %d of them)" %
 712                     (ie_result['extractor'], playlist, n_all_entries, n_entries))
 713             elif isinstance(ie_entries, PagedList):
 714                 entries = ie_entries.getslice(
 715                     playliststart, playlistend)
 716                 n_entries = len(entries)
 717                 self.to_screen(
 718                     "[%s] playlist %s: Downloading %d videos" %
 719                     (ie_result['extractor'], playlist, n_entries))
 720             else:  # iterable
 721                 entries = list(itertools.islice(
 722                     ie_entries, playliststart, playlistend))
 723                 n_entries = len(entries)
 724                 self.to_screen(
 725                     "[%s] playlist %s: Downloading %d videos" %
 726                     (ie_result['extractor'], playlist, n_entries))
 727
 728             if self.params.get('playlistreverse', False):
 729                 entries = entries[::-1]
 730
 731             for i, entry in enumerate(entries, 1):
 732                 self.to_screen('[download] Downloading video %s of %s' % (i, n_entries))
 733                 extra = {
 734                     'n_entries': n_entries,
 735                     'playlist': playlist,
 736                     'playlist_id': ie_result.get('id'),
 737                     'playlist_title': ie_result.get('title'),
 738                     'playlist_index': i + playliststart,
 739                     'extractor': ie_result['extractor'],
 740                     'webpage_url': ie_result['webpage_url'],
 741                     'webpage_url_basename': url_basename(ie_result['webpage_url']),
 742                     'extractor_key': ie_result['extractor_key'],
 743                 }
 744
 745                 reason = self._match_entry(entry)
 746                 if reason is not None:
 747                     self.to_screen('[download] ' + reason)
 748                     continue
 749
 750                 entry_result = self.process_ie_result(entry,
 751                                                       download=download,
 752                                                       extra_info=extra)
 753                 playlist_results.append(entry_result)
 754             ie_result['entries'] = playlist_results
 755             return ie_result
 756         elif result_type == 'compat_list':
 757             self.report_warning(
 758                 'Extractor %s returned a compat_list result. '
 759                 'It needs to be updated.' % ie_result.get('extractor'))
 760
 761             def _fixup(r):
 762                 self.add_extra_info(
 763                     r,
 764                     {
 765                         'extractor': ie_result['extractor'],
 766                         'webpage_url': ie_result['webpage_url'],
 767                         'webpage_url_basename': url_basename(ie_result['webpage_url']),
 768                         'extractor_key': ie_result['extractor_key'],
 769                     }
 770                 )
 771                 return r
 772             ie_result['entries'] = [
 773                 self.process_ie_result(_fixup(r), download, extra_info)
 774                 for r in ie_result['entries']
 775             ]
 776             return ie_result
 777         else:
 778             raise Exception('Invalid result type: %s' % result_type)
 779
 780     def _apply_format_filter(self, format_spec, available_formats):
 781         " Returns a tuple of the remaining format_spec and filtered formats "
 782
 783         OPERATORS = {
 784             '<': operator.lt,
 785             '<=': operator.le,
 786             '>': operator.gt,
 787             '>=': operator.ge,
 788             '=': operator.eq,
 789             '!=': operator.ne,
 790         }
 791         operator_rex = re.compile(r'''(?x)\s*\[
 792             (?P<key>width|height|tbr|abr|vbr|filesize)
 793             \s*(?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
 794             (?P<value>[0-9.]+(?:[kKmMgGtTpPeEzZyY]i?[Bb]?)?)
 795             \]$
 796             ''' % '|'.join(map(re.escape, OPERATORS.keys())))
 797         m = operator_rex.search(format_spec)
 798         if not m:
 799             raise ValueError('Invalid format specification %r' % format_spec)
 800
 801         try:
 802             comparison_value = int(m.group('value'))
 803         except ValueError:
 804             comparison_value = parse_filesize(m.group('value'))
 805             if comparison_value is None:
 806                 comparison_value = parse_filesize(m.group('value') + 'B')
 807             if comparison_value is None:
 808                 raise ValueError(
 809                     'Invalid value %r in format specification %r' % (
 810                         m.group('value'), format_spec))
 811         op = OPERATORS[m.group('op')]
 812
 813         def _filter(f):
 814             actual_value = f.get(m.group('key'))
 815             if actual_value is None:
 816                 return m.group('none_inclusive')
 817             return op(actual_value, comparison_value)
 818         new_formats = [f for f in available_formats if _filter(f)]
 819
 820         new_format_spec = format_spec[:-len(m.group(0))]
 821         if not new_format_spec:
 822             new_format_spec = 'best'
 823
 824         return (new_format_spec, new_formats)
 825
 826     def select_format(self, format_spec, available_formats):
 827         while format_spec.endswith(']'):
 828             format_spec, available_formats = self._apply_format_filter(
 829                 format_spec, available_formats)
 830         if not available_formats:
 831             return None
 832
 833         if format_spec == 'best' or format_spec is None:
 834             return available_formats[-1]
 835         elif format_spec == 'worst':
 836             return available_formats[0]
 837         elif format_spec == 'bestaudio':
 838             audio_formats = [
 839                 f for f in available_formats
 840                 if f.get('vcodec') == 'none']
 841             if audio_formats:
 842                 return audio_formats[-1]
 843         elif format_spec == 'worstaudio':
 844             audio_formats = [
 845                 f for f in available_formats
 846                 if f.get('vcodec') == 'none']
 847             if audio_formats:
 848                 return audio_formats[0]
 849         elif format_spec == 'bestvideo':
 850             video_formats = [
 851                 f for f in available_formats
 852                 if f.get('acodec') == 'none']
 853             if video_formats:
 854                 return video_formats[-1]
 855         elif format_spec == 'worstvideo':
 856             video_formats = [
 857                 f for f in available_formats
 858                 if f.get('acodec') == 'none']
 859             if video_formats:
 860                 return video_formats[0]
 861         else:
 862             extensions = ['mp4', 'flv', 'webm', '3gp', 'm4a', 'mp3', 'ogg', 'aac', 'wav']
 863             if format_spec in extensions:
 864                 filter_f = lambda f: f['ext'] == format_spec
 865             else:
 866                 filter_f = lambda f: f['format_id'] == format_spec
 867             matches = list(filter(filter_f, available_formats))
 868             if matches:
 869                 return matches[-1]
 870         return None
 871
 872     def _calc_headers(self, info_dict):
 873         res = std_headers.copy()
 874
 875         add_headers = info_dict.get('http_headers')
 876         if add_headers:
 877             res.update(add_headers)
 878
 879         cookies = self._calc_cookies(info_dict)
 880         if cookies:
 881             res['Cookie'] = cookies
 882
 883         return res
 884
 885     def _calc_cookies(self, info_dict):
 886         class _PseudoRequest(object):
 887             def __init__(self, url):
 888                 self.url = url
 889                 self.headers = {}
 890                 self.unverifiable = False
 891
 892             def add_unredirected_header(self, k, v):
 893                 self.headers[k] = v
 894
 895             def get_full_url(self):
 896                 return self.url
 897
 898             def is_unverifiable(self):
 899                 return self.unverifiable
 900
 901             def has_header(self, h):
 902                 return h in self.headers
 903
 904         pr = _PseudoRequest(info_dict['url'])
 905         self.cookiejar.add_cookie_header(pr)
 906         return pr.headers.get('Cookie')
 907
 908     def process_video_result(self, info_dict, download=True):
 909         assert info_dict.get('_type', 'video') == 'video'
 910
 911         if 'id' not in info_dict:
 912             raise ExtractorError('Missing "id" field in extractor result')
 913         if 'title' not in info_dict:
 914             raise ExtractorError('Missing "title" field in extractor result')
 915
 916         if 'playlist' not in info_dict:
 917             # It isn't part of a playlist
 918             info_dict['playlist'] = None
 919             info_dict['playlist_index'] = None
 920
 921         thumbnails = info_dict.get('thumbnails')
 922         if thumbnails is None:
 923             thumbnail = info_dict.get('thumbnail')
 924             if thumbnail:
 925                 thumbnails = [{'url': thumbnail}]
 926         if thumbnails:
 927             thumbnails.sort(key=lambda t: (
 928                 t.get('preference'), t.get('width'), t.get('height'),
 929                 t.get('id'), t.get('url')))
 930             for t in thumbnails:
 931                 if 'width' in t and 'height' in t:
 932                     t['resolution'] = '%dx%d' % (t['width'], t['height'])
 933
 934         if thumbnails and 'thumbnail' not in info_dict:
 935             info_dict['thumbnail'] = thumbnails[-1]['url']
 936
 937         if 'display_id' not in info_dict and 'id' in info_dict:
 938             info_dict['display_id'] = info_dict['id']
 939
 940         if info_dict.get('upload_date') is None and info_dict.get('timestamp') is not None:
 941             # Working around negative timestamps in Windows
 942             # (see http://bugs.python.org/issue1646728)
 943             if info_dict['timestamp'] < 0 and os.name == 'nt':
 944                 info_dict['timestamp'] = 0
 945             upload_date = datetime.datetime.utcfromtimestamp(
 946                 info_dict['timestamp'])
 947             info_dict['upload_date'] = upload_date.strftime('%Y%m%d')
 948
 949         # This extractors handle format selection themselves
 950         if info_dict['extractor'] in ['Youku']:
 951             if download:
 952                 self.process_info(info_dict)
 953             return info_dict
 954
 955         # We now pick which formats have to be downloaded
 956         if info_dict.get('formats') is None:
 957             # There's only one format available
 958             formats = [info_dict]
 959         else:
 960             formats = info_dict['formats']
 961
 962         if not formats:
 963             raise ExtractorError('No video formats found!')
 964
 965         # We check that all the formats have the format and format_id fields
 966         for i, format in enumerate(formats):
 967             if 'url' not in format:
 968                 raise ExtractorError('Missing "url" key in result (index %d)' % i)
 969
 970             if format.get('format_id') is None:
 971                 format['format_id'] = compat_str(i)
 972             if format.get('format') is None:
 973                 format['format'] = '{id} - {res}{note}'.format(
 974                     id=format['format_id'],
 975                     res=self.format_resolution(format),
 976                     note=' ({0})'.format(format['format_note']) if format.get('format_note') is not None else '',
 977                 )
 978             # Automatically determine file extension if missing
 979             if 'ext' not in format:
 980                 format['ext'] = determine_ext(format['url']).lower()
 981             # Add HTTP headers, so that external programs can use them from the
 982             # json output
 983             full_format_info = info_dict.copy()
 984             full_format_info.update(format)
 985             format['http_headers'] = self._calc_headers(full_format_info)
 986
 987         format_limit = self.params.get('format_limit', None)
 988         if format_limit:
 989             formats = list(takewhile_inclusive(
 990                 lambda f: f['format_id'] != format_limit, formats
 991             ))
 992
 993         # TODO Central sorting goes here
 994
 995         if formats[0] is not info_dict:
 996             # only set the 'formats' fields if the original info_dict list them
 997             # otherwise we end up with a circular reference, the first (and unique)
 998             # element in the 'formats' field in info_dict is info_dict itself,
 999             # wich can't be exported to json
1000             info_dict['formats'] = formats
1001         if self.params.get('listformats'):
1002             self.list_formats(info_dict)
1003             return
1004         if self.params.get('list_thumbnails'):
1005             self.list_thumbnails(info_dict)
1006             return
1007
1008         req_format = self.params.get('format')
1009         if req_format is None:
1010             req_format = 'best'
1011         formats_to_download = []
1012         # The -1 is for supporting YoutubeIE
1013         if req_format in ('-1', 'all'):
1014             formats_to_download = formats
1015         else:
1016             for rfstr in req_format.split(','):
1017                 # We can accept formats requested in the format: 34/5/best, we pick
1018                 # the first that is available, starting from left
1019                 req_formats = rfstr.split('/')
1020                 for rf in req_formats:
1021                     if re.match(r'.+?\+.+?', rf) is not None:
1022                         # Two formats have been requested like '137+139'
1023                         format_1, format_2 = rf.split('+')
1024                         formats_info = (self.select_format(format_1, formats),
1025                                         self.select_format(format_2, formats))
1026                         if all(formats_info):
1027                             # The first format must contain the video and the
1028                             # second the audio
1029                             if formats_info[0].get('vcodec') == 'none':
1030                                 self.report_error('The first format must '
1031                                                   'contain the video, try using '
1032                                                   '"-f %s+%s"' % (format_2, format_1))
1033                                 return
1034                             output_ext = (
1035                                 formats_info[0]['ext']
1036                                 if self.params.get('merge_output_format') is None
1037                                 else self.params['merge_output_format'])
1038                             selected_format = {
1039                                 'requested_formats': formats_info,
1040                                 'format': rf,
1041                                 'ext': formats_info[0]['ext'],
1042                                 'width': formats_info[0].get('width'),
1043                                 'height': formats_info[0].get('height'),
1044                                 'resolution': formats_info[0].get('resolution'),
1045                                 'fps': formats_info[0].get('fps'),
1046                                 'vcodec': formats_info[0].get('vcodec'),
1047                                 'vbr': formats_info[0].get('vbr'),
1048                                 'stretched_ratio': formats_info[0].get('stretched_ratio'),
1049                                 'acodec': formats_info[1].get('acodec'),
1050                                 'abr': formats_info[1].get('abr'),
1051                                 'ext': output_ext,
1052                             }
1053                         else:
1054                             selected_format = None
1055                     else:
1056                         selected_format = self.select_format(rf, formats)
1057                     if selected_format is not None:
1058                         formats_to_download.append(selected_format)
1059                         break
1060         if not formats_to_download:
1061             raise ExtractorError('requested format not available',
1062                                  expected=True)
1063
1064         if download:
1065             if len(formats_to_download) > 1:
1066                 self.to_screen('[info] %s: downloading video in %s formats' % (info_dict['id'], len(formats_to_download)))
1067             for format in formats_to_download:
1068                 new_info = dict(info_dict)
1069                 new_info.update(format)
1070                 self.process_info(new_info)
1071         # We update the info dict with the best quality format (backwards compatibility)
1072         info_dict.update(formats_to_download[-1])
1073         return info_dict
1074
1075     def process_info(self, info_dict):
1076         """Process a single resolved IE result."""
1077
1078         assert info_dict.get('_type', 'video') == 'video'
1079
1080         max_downloads = self.params.get('max_downloads')
1081         if max_downloads is not None:
1082             if self._num_downloads >= int(max_downloads):
1083                 raise MaxDownloadsReached()
1084
1085         info_dict['fulltitle'] = info_dict['title']
1086         if len(info_dict['title']) > 200:
1087             info_dict['title'] = info_dict['title'][:197] + '...'
1088
1089         # Keep for backwards compatibility
1090         info_dict['stitle'] = info_dict['title']
1091
1092         if 'format' not in info_dict:
1093             info_dict['format'] = info_dict['ext']
1094
1095         reason = self._match_entry(info_dict)
1096         if reason is not None:
1097             self.to_screen('[download] ' + reason)
1098             return
1099
1100         self._num_downloads += 1
1101
1102         filename = self.prepare_filename(info_dict)
1103
1104         # Forced printings
1105         if self.params.get('forcetitle', False):
1106             self.to_stdout(info_dict['fulltitle'])
1107         if self.params.get('forceid', False):
1108             self.to_stdout(info_dict['id'])
1109         if self.params.get('forceurl', False):
1110             if info_dict.get('requested_formats') is not None:
1111                 for f in info_dict['requested_formats']:
1112                     self.to_stdout(f['url'] + f.get('play_path', ''))
1113             else:
1114                 # For RTMP URLs, also include the playpath
1115                 self.to_stdout(info_dict['url'] + info_dict.get('play_path', ''))
1116         if self.params.get('forcethumbnail', False) and info_dict.get('thumbnail') is not None:
1117             self.to_stdout(info_dict['thumbnail'])
1118         if self.params.get('forcedescription', False) and info_dict.get('description') is not None:
1119             self.to_stdout(info_dict['description'])
1120         if self.params.get('forcefilename', False) and filename is not None:
1121             self.to_stdout(filename)
1122         if self.params.get('forceduration', False) and info_dict.get('duration') is not None:
1123             self.to_stdout(formatSeconds(info_dict['duration']))
1124         if self.params.get('forceformat', False):
1125             self.to_stdout(info_dict['format'])
1126         if self.params.get('forcejson', False):
1127             info_dict['_filename'] = filename
1128             self.to_stdout(json.dumps(info_dict))
1129         if self.params.get('dump_single_json', False):
1130             info_dict['_filename'] = filename
1131
1132         # Do nothing else if in simulate mode
1133         if self.params.get('simulate', False):
1134             return
1135
1136         if filename is None:
1137             return
1138
1139         try:
1140             dn = os.path.dirname(encodeFilename(filename))
1141             if dn and not os.path.exists(dn):
1142                 os.makedirs(dn)
1143         except (OSError, IOError) as err:
1144             self.report_error('unable to create directory ' + compat_str(err))
1145             return
1146
1147         if self.params.get('writedescription', False):
1148             descfn = filename + '.description'
1149             if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(descfn)):
1150                 self.to_screen('[info] Video description is already present')
1151             elif info_dict.get('description') is None:
1152                 self.report_warning('There\'s no description to write.')
1153             else:
1154                 try:
1155                     self.to_screen('[info] Writing video description to: ' + descfn)
1156                     with io.open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile:
1157                         descfile.write(info_dict['description'])
1158                 except (OSError, IOError):
1159                     self.report_error('Cannot write description file ' + descfn)
1160                     return
1161
1162         if self.params.get('writeannotations', False):
1163             annofn = filename + '.annotations.xml'
1164             if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(annofn)):
1165                 self.to_screen('[info] Video annotations are already present')
1166             else:
1167                 try:
1168                     self.to_screen('[info] Writing video annotations to: ' + annofn)
1169                     with io.open(encodeFilename(annofn), 'w', encoding='utf-8') as annofile:
1170                         annofile.write(info_dict['annotations'])
1171                 except (KeyError, TypeError):
1172                     self.report_warning('There are no annotations to write.')
1173                 except (OSError, IOError):
1174                     self.report_error('Cannot write annotations file: ' + annofn)
1175                     return
1176
1177         subtitles_are_requested = any([self.params.get('writesubtitles', False),
1178                                        self.params.get('writeautomaticsub')])
1179
1180         if subtitles_are_requested and 'subtitles' in info_dict and info_dict['subtitles']:
1181             # subtitles download errors are already managed as troubles in relevant IE
1182             # that way it will silently go on when used with unsupporting IE
1183             subtitles = info_dict['subtitles']
1184             sub_format = self.params.get('subtitlesformat', 'srt')
1185             for sub_lang in subtitles.keys():
1186                 sub = subtitles[sub_lang]
1187                 if sub is None:
1188                     continue
1189                 try:
1190                     sub_filename = subtitles_filename(filename, sub_lang, sub_format)
1191                     if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(sub_filename)):
1192                         self.to_screen('[info] Video subtitle %s.%s is already_present' % (sub_lang, sub_format))
1193                     else:
1194                         self.to_screen('[info] Writing video subtitles to: ' + sub_filename)
1195                         with io.open(encodeFilename(sub_filename), 'w', encoding='utf-8') as subfile:
1196                             subfile.write(sub)
1197                 except (OSError, IOError):
1198                     self.report_error('Cannot write subtitles file ' + sub_filename)
1199                     return
1200
1201         if self.params.get('writeinfojson', False):
1202             infofn = os.path.splitext(filename)[0] + '.info.json'
1203             if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(infofn)):
1204                 self.to_screen('[info] Video description metadata is already present')
1205             else:
1206                 self.to_screen('[info] Writing video description metadata as JSON to: ' + infofn)
1207                 try:
1208                     write_json_file(info_dict, infofn)
1209                 except (OSError, IOError):
1210                     self.report_error('Cannot write metadata to JSON file ' + infofn)
1211                     return
1212
1213         if self.params.get('writethumbnail', False):
1214             if info_dict.get('thumbnail') is not None:
1215                 thumb_format = determine_ext(info_dict['thumbnail'], 'jpg')
1216                 thumb_filename = os.path.splitext(filename)[0] + '.' + thumb_format
1217                 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(thumb_filename)):
1218                     self.to_screen('[%s] %s: Thumbnail is already present' %
1219                                    (info_dict['extractor'], info_dict['id']))
1220                 else:
1221                     self.to_screen('[%s] %s: Downloading thumbnail ...' %
1222                                    (info_dict['extractor'], info_dict['id']))
1223                     try:
1224                         uf = self.urlopen(info_dict['thumbnail'])
1225                         with open(thumb_filename, 'wb') as thumbf:
1226                             shutil.copyfileobj(uf, thumbf)
1227                         self.to_screen('[%s] %s: Writing thumbnail to: %s' %
1228                                        (info_dict['extractor'], info_dict['id'], thumb_filename))
1229                     except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
1230                         self.report_warning('Unable to download thumbnail "%s": %s' %
1231                                             (info_dict['thumbnail'], compat_str(err)))
1232
1233         if not self.params.get('skip_download', False):
1234             try:
1235                 def dl(name, info):
1236                     fd = get_suitable_downloader(info, self.params)(self, self.params)
1237                     for ph in self._progress_hooks:
1238                         fd.add_progress_hook(ph)
1239                     if self.params.get('verbose'):
1240                         self.to_stdout('[debug] Invoking downloader on %r' % info.get('url'))
1241                     return fd.download(name, info)
1242                 if info_dict.get('requested_formats') is not None:
1243                     downloaded = []
1244                     success = True
1245                     merger = FFmpegMergerPP(self, not self.params.get('keepvideo'))
1246                     if not merger._executable:
1247                         postprocessors = []
1248                         self.report_warning('You have requested multiple '
1249                                             'formats but ffmpeg or avconv are not installed.'
1250                                             ' The formats won\'t be merged')
1251                     else:
1252                         postprocessors = [merger]
1253                     for f in info_dict['requested_formats']:
1254                         new_info = dict(info_dict)
1255                         new_info.update(f)
1256                         fname = self.prepare_filename(new_info)
1257                         fname = prepend_extension(fname, 'f%s' % f['format_id'])
1258                         downloaded.append(fname)
1259                         partial_success = dl(fname, new_info)
1260                         success = success and partial_success
1261                     info_dict['__postprocessors'] = postprocessors
1262                     info_dict['__files_to_merge'] = downloaded
1263                 else:
1264                     # Just a single file
1265                     success = dl(filename, info_dict)
1266             except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
1267                 self.report_error('unable to download video data: %s' % str(err))
1268                 return
1269             except (OSError, IOError) as err:
1270                 raise UnavailableVideoError(err)
1271             except (ContentTooShortError, ) as err:
1272                 self.report_error('content too short (expected %s bytes and served %s)' % (err.expected, err.downloaded))
1273                 return
1274
1275             if success:
1276                 # Fixup content
1277                 fixup_policy = self.params.get('fixup')
1278                 if fixup_policy is None:
1279                     fixup_policy = 'detect_or_warn'
1280
1281                 stretched_ratio = info_dict.get('stretched_ratio')
1282                 if stretched_ratio is not None and stretched_ratio != 1:
1283                     if fixup_policy == 'warn':
1284                         self.report_warning('%s: Non-uniform pixel ratio (%s)' % (
1285                             info_dict['id'], stretched_ratio))
1286                     elif fixup_policy == 'detect_or_warn':
1287                         stretched_pp = FFmpegFixupStretchedPP(self)
1288                         if stretched_pp.available:
1289                             info_dict.setdefault('__postprocessors', [])
1290                             info_dict['__postprocessors'].append(stretched_pp)
1291                         else:
1292                             self.report_warning(
1293                                 '%s: Non-uniform pixel ratio (%s). Install ffmpeg or avconv to fix this automatically.' % (
1294                                     info_dict['id'], stretched_ratio))
1295                     else:
1296                         assert fixup_policy in ('ignore', 'never')
1297
1298                 if info_dict.get('requested_formats') is None and info_dict.get('container') == 'm4a_dash':
1299                     if fixup_policy == 'warn':
1300                         self.report_warning('%s: writing DASH m4a. Only some players support this container.' % (
1301                             info_dict['id']))
1302                     elif fixup_policy == 'detect_or_warn':
1303                         fixup_pp = FFmpegFixupM4aPP(self)
1304                         if fixup_pp.available:
1305                             info_dict.setdefault('__postprocessors', [])
1306                             info_dict['__postprocessors'].append(fixup_pp)
1307                         else:
1308                             self.report_warning(
1309                                 '%s: writing DASH m4a. Only some players support this container. Install ffmpeg or avconv to fix this automatically.' % (
1310                                     info_dict['id']))
1311                     else:
1312                         assert fixup_policy in ('ignore', 'never')
1313
1314                 try:
1315                     self.post_process(filename, info_dict)
1316                 except (PostProcessingError) as err:
1317                     self.report_error('postprocessing: %s' % str(err))
1318                     return
1319                 self.record_download_archive(info_dict)
1320
1321     def download(self, url_list):
1322         """Download a given list of URLs."""
1323         outtmpl = self.params.get('outtmpl', DEFAULT_OUTTMPL)
1324         if (len(url_list) > 1 and
1325                 '%' not in outtmpl
1326                 and self.params.get('max_downloads') != 1):
1327             raise SameFileError(outtmpl)
1328
1329         for url in url_list:
1330             try:
1331                 # It also downloads the videos
1332                 res = self.extract_info(url)
1333             except UnavailableVideoError:
1334                 self.report_error('unable to download video')
1335             except MaxDownloadsReached:
1336                 self.to_screen('[info] Maximum number of downloaded files reached.')
1337                 raise
1338             else:
1339                 if self.params.get('dump_single_json', False):
1340                     self.to_stdout(json.dumps(res))
1341
1342         return self._download_retcode
1343
1344     def download_with_info_file(self, info_filename):
1345         with io.open(info_filename, 'r', encoding='utf-8') as f:
1346             info = json.load(f)
1347         try:
1348             self.process_ie_result(info, download=True)
1349         except DownloadError:
1350             webpage_url = info.get('webpage_url')
1351             if webpage_url is not None:
1352                 self.report_warning('The info failed to download, trying with "%s"' % webpage_url)
1353                 return self.download([webpage_url])
1354             else:
1355                 raise
1356         return self._download_retcode
1357
1358     def post_process(self, filename, ie_info):
1359         """Run all the postprocessors on the given file."""
1360         info = dict(ie_info)
1361         info['filepath'] = filename
1362         pps_chain = []
1363         if ie_info.get('__postprocessors') is not None:
1364             pps_chain.extend(ie_info['__postprocessors'])
1365         pps_chain.extend(self._pps)
1366         for pp in pps_chain:
1367             keep_video = None
1368             old_filename = info['filepath']
1369             try:
1370                 keep_video_wish, info = pp.run(info)
1371                 if keep_video_wish is not None:
1372                     if keep_video_wish:
1373                         keep_video = keep_video_wish
1374                     elif keep_video is None:
1375                         # No clear decision yet, let IE decide
1376                         keep_video = keep_video_wish
1377             except PostProcessingError as e:
1378                 self.report_error(e.msg)
1379             if keep_video is False and not self.params.get('keepvideo', False):
1380                 try:
1381                     self.to_screen('Deleting original file %s (pass -k to keep)' % old_filename)
1382                     os.remove(encodeFilename(old_filename))
1383                 except (IOError, OSError):
1384                     self.report_warning('Unable to remove downloaded video file')
1385
1386     def _make_archive_id(self, info_dict):
1387         # Future-proof against any change in case
1388         # and backwards compatibility with prior versions
1389         extractor = info_dict.get('extractor_key')
1390         if extractor is None:
1391             if 'id' in info_dict:
1392                 extractor = info_dict.get('ie_key')  # key in a playlist
1393         if extractor is None:
1394             return None  # Incomplete video information
1395         return extractor.lower() + ' ' + info_dict['id']
1396
1397     def in_download_archive(self, info_dict):
1398         fn = self.params.get('download_archive')
1399         if fn is None:
1400             return False
1401
1402         vid_id = self._make_archive_id(info_dict)
1403         if vid_id is None:
1404             return False  # Incomplete video information
1405
1406         try:
1407             with locked_file(fn, 'r', encoding='utf-8') as archive_file:
1408                 for line in archive_file:
1409                     if line.strip() == vid_id:
1410                         return True
1411         except IOError as ioe:
1412             if ioe.errno != errno.ENOENT:
1413                 raise
1414         return False
1415
1416     def record_download_archive(self, info_dict):
1417         fn = self.params.get('download_archive')
1418         if fn is None:
1419             return
1420         vid_id = self._make_archive_id(info_dict)
1421         assert vid_id
1422         with locked_file(fn, 'a', encoding='utf-8') as archive_file:
1423             archive_file.write(vid_id + '\n')
1424
1425     @staticmethod
1426     def format_resolution(format, default='unknown'):
1427         if format.get('vcodec') == 'none':
1428             return 'audio only'
1429         if format.get('resolution') is not None:
1430             return format['resolution']
1431         if format.get('height') is not None:
1432             if format.get('width') is not None:
1433                 res = '%sx%s' % (format['width'], format['height'])
1434             else:
1435                 res = '%sp' % format['height']
1436         elif format.get('width') is not None:
1437             res = '?x%d' % format['width']
1438         else:
1439             res = default
1440         return res
1441
1442     def _format_note(self, fdict):
1443         res = ''
1444         if fdict.get('ext') in ['f4f', 'f4m']:
1445             res += '(unsupported) '
1446         if fdict.get('format_note') is not None:
1447             res += fdict['format_note'] + ' '
1448         if fdict.get('tbr') is not None:
1449             res += '%4dk ' % fdict['tbr']
1450         if fdict.get('container') is not None:
1451             if res:
1452                 res += ', '
1453             res += '%s container' % fdict['container']
1454         if (fdict.get('vcodec') is not None and
1455                 fdict.get('vcodec') != 'none'):
1456             if res:
1457                 res += ', '
1458             res += fdict['vcodec']
1459             if fdict.get('vbr') is not None:
1460                 res += '@'
1461         elif fdict.get('vbr') is not None and fdict.get('abr') is not None:
1462             res += 'video@'
1463         if fdict.get('vbr') is not None:
1464             res += '%4dk' % fdict['vbr']
1465         if fdict.get('fps') is not None:
1466             res += ', %sfps' % fdict['fps']
1467         if fdict.get('acodec') is not None:
1468             if res:
1469                 res += ', '
1470             if fdict['acodec'] == 'none':
1471                 res += 'video only'
1472             else:
1473                 res += '%-5s' % fdict['acodec']
1474         elif fdict.get('abr') is not None:
1475             if res:
1476                 res += ', '
1477             res += 'audio'
1478         if fdict.get('abr') is not None:
1479             res += '@%3dk' % fdict['abr']
1480         if fdict.get('asr') is not None:
1481             res += ' (%5dHz)' % fdict['asr']
1482         if fdict.get('filesize') is not None:
1483             if res:
1484                 res += ', '
1485             res += format_bytes(fdict['filesize'])
1486         elif fdict.get('filesize_approx') is not None:
1487             if res:
1488                 res += ', '
1489             res += '~' + format_bytes(fdict['filesize_approx'])
1490         return res
1491
1492     def list_formats(self, info_dict):
1493         def line(format, idlen=20):
1494             return (('%-' + compat_str(idlen + 1) + 's%-10s%-12s%s') % (
1495                 format['format_id'],
1496                 format['ext'],
1497                 self.format_resolution(format),
1498                 self._format_note(format),
1499             ))
1500
1501         formats = info_dict.get('formats', [info_dict])
1502         idlen = max(len('format code'),
1503                     max(len(f['format_id']) for f in formats))
1504         formats_s = [
1505             line(f, idlen) for f in formats
1506             if f.get('preference') is None or f['preference'] >= -1000]
1507         if len(formats) > 1:
1508             formats_s[0] += (' ' if self._format_note(formats[0]) else '') + '(worst)'
1509             formats_s[-1] += (' ' if self._format_note(formats[-1]) else '') + '(best)'
1510
1511         header_line = line({
1512             'format_id': 'format code', 'ext': 'extension',
1513             'resolution': 'resolution', 'format_note': 'note'}, idlen=idlen)
1514         self.to_screen(
1515             '[info] Available formats for %s:\n%s\n%s' %
1516             (info_dict['id'], header_line, '\n'.join(formats_s)))
1517
1518     def list_thumbnails(self, info_dict):
1519         thumbnails = info_dict.get('thumbnails')
1520         if not thumbnails:
1521             tn_url = info_dict.get('thumbnail')
1522             if tn_url:
1523                 thumbnails = [{'id': '0', 'url': tn_url}]
1524             else:
1525                 self.to_screen(
1526                     '[info] No thumbnails present for %s' % info_dict['id'])
1527                 return
1528
1529         self.to_screen(
1530             '[info] Thumbnails for %s:' % info_dict['id'])
1531         self.to_screen(render_table(
1532             ['ID', 'width', 'height', 'URL'],
1533             [[t['id'], t.get('width', 'unknown'), t.get('height', 'unknown'), t['url']] for t in thumbnails]))
1534
1535     def urlopen(self, req):
1536         """ Start an HTTP download """
1537
1538         # According to RFC 3986, URLs can not contain non-ASCII characters, however this is not
1539         # always respected by websites, some tend to give out URLs with non percent-encoded
1540         # non-ASCII characters (see telemb.py, ard.py [#3412])
1541         # urllib chokes on URLs with non-ASCII characters (see http://bugs.python.org/issue3991)
1542         # To work around aforementioned issue we will replace request's original URL with
1543         # percent-encoded one
1544         req_is_string = isinstance(req, basestring if sys.version_info < (3, 0) else compat_str)
1545         url = req if req_is_string else req.get_full_url()
1546         url_escaped = escape_url(url)
1547
1548         # Substitute URL if any change after escaping
1549         if url != url_escaped:
1550             if req_is_string:
1551                 req = url_escaped
1552             else:
1553                 req = compat_urllib_request.Request(
1554                     url_escaped, data=req.data, headers=req.headers,
1555                     origin_req_host=req.origin_req_host, unverifiable=req.unverifiable)
1556
1557         return self._opener.open(req, timeout=self._socket_timeout)
1558
1559     def print_debug_header(self):
1560         if not self.params.get('verbose'):
1561             return
1562
1563         if type('') is not compat_str:
1564             # Python 2.6 on SLES11 SP1 (https://github.com/rg3/youtube-dl/issues/3326)
1565             self.report_warning(
1566                 'Your Python is broken! Update to a newer and supported version')
1567
1568         stdout_encoding = getattr(
1569             sys.stdout, 'encoding', 'missing (%s)' % type(sys.stdout).__name__)
1570         encoding_str = (
1571             '[debug] Encodings: locale %s, fs %s, out %s, pref %s\n' % (
1572                 locale.getpreferredencoding(),
1573                 sys.getfilesystemencoding(),
1574                 stdout_encoding,
1575                 self.get_encoding()))
1576         write_string(encoding_str, encoding=None)
1577
1578         self._write_string('[debug] youtube-dl version ' + __version__ + '\n')
1579         try:
1580             sp = subprocess.Popen(
1581                 ['git', 'rev-parse', '--short', 'HEAD'],
1582                 stdout=subprocess.PIPE, stderr=subprocess.PIPE,
1583                 cwd=os.path.dirname(os.path.abspath(__file__)))
1584             out, err = sp.communicate()
1585             out = out.decode().strip()
1586             if re.match('[0-9a-f]+', out):
1587                 self._write_string('[debug] Git HEAD: ' + out + '\n')
1588         except:
1589             try:
1590                 sys.exc_clear()
1591             except:
1592                 pass
1593         self._write_string('[debug] Python version %s - %s\n' % (
1594             platform.python_version(), platform_name()))
1595
1596         exe_versions = FFmpegPostProcessor.get_versions()
1597         exe_versions['rtmpdump'] = rtmpdump_version()
1598         exe_str = ', '.join(
1599             '%s %s' % (exe, v)
1600             for exe, v in sorted(exe_versions.items())
1601             if v
1602         )
1603         if not exe_str:
1604             exe_str = 'none'
1605         self._write_string('[debug] exe versions: %s\n' % exe_str)
1606
1607         proxy_map = {}
1608         for handler in self._opener.handlers:
1609             if hasattr(handler, 'proxies'):
1610                 proxy_map.update(handler.proxies)
1611         self._write_string('[debug] Proxy map: ' + compat_str(proxy_map) + '\n')
1612
1613         if self.params.get('call_home', False):
1614             ipaddr = self.urlopen('https://yt-dl.org/ip').read().decode('utf-8')
1615             self._write_string('[debug] Public IP address: %s\n' % ipaddr)
1616             latest_version = self.urlopen(
1617                 'https://yt-dl.org/latest/version').read().decode('utf-8')
1618             if version_tuple(latest_version) > version_tuple(__version__):
1619                 self.report_warning(
1620                     'You are using an outdated version (newest version: %s)! '
1621                     'See https://yt-dl.org/update if you need help updating.' %
1622                     latest_version)
1623
1624     def _setup_opener(self):
1625         timeout_val = self.params.get('socket_timeout')
1626         self._socket_timeout = 600 if timeout_val is None else float(timeout_val)
1627
1628         opts_cookiefile = self.params.get('cookiefile')
1629         opts_proxy = self.params.get('proxy')
1630
1631         if opts_cookiefile is None:
1632             self.cookiejar = compat_cookiejar.CookieJar()
1633         else:
1634             self.cookiejar = compat_cookiejar.MozillaCookieJar(
1635                 opts_cookiefile)
1636             if os.access(opts_cookiefile, os.R_OK):
1637                 self.cookiejar.load()
1638
1639         cookie_processor = compat_urllib_request.HTTPCookieProcessor(
1640             self.cookiejar)
1641         if opts_proxy is not None:
1642             if opts_proxy == '':
1643                 proxies = {}
1644             else:
1645                 proxies = {'http': opts_proxy, 'https': opts_proxy}
1646         else:
1647             proxies = compat_urllib_request.getproxies()
1648             # Set HTTPS proxy to HTTP one if given (https://github.com/rg3/youtube-dl/issues/805)
1649             if 'http' in proxies and 'https' not in proxies:
1650                 proxies['https'] = proxies['http']
1651         proxy_handler = compat_urllib_request.ProxyHandler(proxies)
1652
1653         debuglevel = 1 if self.params.get('debug_printtraffic') else 0
1654         https_handler = make_HTTPS_handler(self.params, debuglevel=debuglevel)
1655         ydlh = YoutubeDLHandler(self.params, debuglevel=debuglevel)
1656         opener = compat_urllib_request.build_opener(
1657             https_handler, proxy_handler, cookie_processor, ydlh)
1658         # Delete the default user-agent header, which would otherwise apply in
1659         # cases where our custom HTTP handler doesn't come into play
1660         # (See https://github.com/rg3/youtube-dl/issues/1309 for details)
1661         opener.addheaders = []
1662         self._opener = opener
1663
1664     def encode(self, s):
1665         if isinstance(s, bytes):
1666             return s  # Already encoded
1667
1668         try:
1669             return s.encode(self.get_encoding())
1670         except UnicodeEncodeError as err:
1671             err.reason = err.reason + '. Check your system encoding configuration or use the --encoding option.'
1672             raise
1673
1674     def get_encoding(self):
1675         encoding = self.params.get('encoding')
1676         if encoding is None:
1677             encoding = preferredencoding()
1678         return encoding