_ Git - youtube-dl/blob - youtube_dl/YoutubeDL.py

   1 #!/usr/bin/env python
   2 # -*- coding: utf-8 -*-
   3
   4 from __future__ import absolute_import, unicode_literals
   5
   6 import collections
   7 import datetime
   8 import errno
   9 import io
  10 import itertools
  11 import json
  12 import locale
  13 import os
  14 import platform
  15 import re
  16 import shutil
  17 import subprocess
  18 import socket
  19 import sys
  20 import time
  21 import traceback
  22
  23 if os.name == 'nt':
  24     import ctypes
  25
  26 from .compat import (
  27     compat_cookiejar,
  28     compat_expanduser,
  29     compat_http_client,
  30     compat_kwargs,
  31     compat_str,
  32     compat_urllib_error,
  33     compat_urllib_request,
  34 )
  35 from .utils import (
  36     escape_url,
  37     ContentTooShortError,
  38     date_from_str,
  39     DateRange,
  40     DEFAULT_OUTTMPL,
  41     determine_ext,
  42     DownloadError,
  43     encodeFilename,
  44     ExtractorError,
  45     format_bytes,
  46     formatSeconds,
  47     get_term_width,
  48     locked_file,
  49     make_HTTPS_handler,
  50     MaxDownloadsReached,
  51     PagedList,
  52     PostProcessingError,
  53     platform_name,
  54     preferredencoding,
  55     SameFileError,
  56     sanitize_filename,
  57     subtitles_filename,
  58     takewhile_inclusive,
  59     UnavailableVideoError,
  60     url_basename,
  61     write_json_file,
  62     write_string,
  63     YoutubeDLHandler,
  64     prepend_extension,
  65     args_to_str,
  66     age_restricted,
  67 )
  68 from .cache import Cache
  69 from .extractor import get_info_extractor, gen_extractors
  70 from .downloader import get_suitable_downloader
  71 from .downloader.rtmp import rtmpdump_version
  72 from .postprocessor import (
  73     FFmpegMergerPP,
  74     FFmpegPostProcessor,
  75     get_postprocessor,
  76 )
  77 from .version import __version__
  78
  79
  80 class YoutubeDL(object):
  81     """YoutubeDL class.
  82
  83     YoutubeDL objects are the ones responsible of downloading the
  84     actual video file and writing it to disk if the user has requested
  85     it, among some other tasks. In most cases there should be one per
  86     program. As, given a video URL, the downloader doesn't know how to
  87     extract all the needed information, task that InfoExtractors do, it
  88     has to pass the URL to one of them.
  89
  90     For this, YoutubeDL objects have a method that allows
  91     InfoExtractors to be registered in a given order. When it is passed
  92     a URL, the YoutubeDL object handles it to the first InfoExtractor it
  93     finds that reports being able to handle it. The InfoExtractor extracts
  94     all the information about the video or videos the URL refers to, and
  95     YoutubeDL process the extracted information, possibly using a File
  96     Downloader to download the video.
  97
  98     YoutubeDL objects accept a lot of parameters. In order not to saturate
  99     the object constructor with arguments, it receives a dictionary of
 100     options instead. These options are available through the params
 101     attribute for the InfoExtractors to use. The YoutubeDL also
 102     registers itself as the downloader in charge for the InfoExtractors
 103     that are added to it, so this is a "mutual registration".
 104
 105     Available options:
 106
 107     username:          Username for authentication purposes.
 108     password:          Password for authentication purposes.
 109     videopassword:     Password for acces a video.
 110     usenetrc:          Use netrc for authentication instead.
 111     verbose:           Print additional info to stdout.
 112     quiet:             Do not print messages to stdout.
 113     no_warnings:       Do not print out anything for warnings.
 114     forceurl:          Force printing final URL.
 115     forcetitle:        Force printing title.
 116     forceid:           Force printing ID.
 117     forcethumbnail:    Force printing thumbnail URL.
 118     forcedescription:  Force printing description.
 119     forcefilename:     Force printing final filename.
 120     forceduration:     Force printing duration.
 121     forcejson:         Force printing info_dict as JSON.
 122     dump_single_json:  Force printing the info_dict of the whole playlist
 123                        (or video) as a single JSON line.
 124     simulate:          Do not download the video files.
 125     format:            Video format code. See options.py for more information.
 126     format_limit:      Highest quality format to try.
 127     outtmpl:           Template for output names.
 128     restrictfilenames: Do not allow "&" and spaces in file names
 129     ignoreerrors:      Do not stop on download errors.
 130     nooverwrites:      Prevent overwriting files.
 131     playliststart:     Playlist item to start at.
 132     playlistend:       Playlist item to end at.
 133     playlistreverse:   Download playlist items in reverse order.
 134     matchtitle:        Download only matching titles.
 135     rejecttitle:       Reject downloads for matching titles.
 136     logger:            Log messages to a logging.Logger instance.
 137     logtostderr:       Log messages to stderr instead of stdout.
 138     writedescription:  Write the video description to a .description file
 139     writeinfojson:     Write the video description to a .info.json file
 140     writeannotations:  Write the video annotations to a .annotations.xml file
 141     writethumbnail:    Write the thumbnail image to a file
 142     writesubtitles:    Write the video subtitles to a file
 143     writeautomaticsub: Write the automatic subtitles to a file
 144     allsubtitles:      Downloads all the subtitles of the video
 145                        (requires writesubtitles or writeautomaticsub)
 146     listsubtitles:     Lists all available subtitles for the video
 147     subtitlesformat:   Subtitle format [srt/sbv/vtt] (default=srt)
 148     subtitleslangs:    List of languages of the subtitles to download
 149     keepvideo:         Keep the video file after post-processing
 150     daterange:         A DateRange object, download only if the upload_date is in the range.
 151     skip_download:     Skip the actual download of the video file
 152     cachedir:          Location of the cache files in the filesystem.
 153                        False to disable filesystem cache.
 154     noplaylist:        Download single video instead of a playlist if in doubt.
 155     age_limit:         An integer representing the user's age in years.
 156                        Unsuitable videos for the given age are skipped.
 157     min_views:         An integer representing the minimum view count the video
 158                        must have in order to not be skipped.
 159                        Videos without view count information are always
 160                        downloaded. None for no limit.
 161     max_views:         An integer representing the maximum view count.
 162                        Videos that are more popular than that are not
 163                        downloaded.
 164                        Videos without view count information are always
 165                        downloaded. None for no limit.
 166     download_archive:  File name of a file where all downloads are recorded.
 167                        Videos already present in the file are not downloaded
 168                        again.
 169     cookiefile:        File name where cookies should be read from and dumped to.
 170     nocheckcertificate:Do not verify SSL certificates
 171     prefer_insecure:   Use HTTP instead of HTTPS to retrieve information.
 172                        At the moment, this is only supported by YouTube.
 173     proxy:             URL of the proxy server to use
 174     socket_timeout:    Time to wait for unresponsive hosts, in seconds
 175     bidi_workaround:   Work around buggy terminals without bidirectional text
 176                        support, using fridibi
 177     debug_printtraffic:Print out sent and received HTTP traffic
 178     include_ads:       Download ads as well
 179     default_search:    Prepend this string if an input url is not valid.
 180                        'auto' for elaborate guessing
 181     encoding:          Use this encoding instead of the system-specified.
 182     extract_flat:      Do not resolve URLs, return the immediate result.
 183                        Pass in 'in_playlist' to only show this behavior for
 184                        playlist items.
 185     postprocessors:    A list of dictionaries, each with an entry
 186                        * key:  The name of the postprocessor. See
 187                                youtube_dl/postprocessor/__init__.py for a list.
 188                        as well as any further keyword arguments for the
 189                        postprocessor.
 190     progress_hooks:    A list of functions that get called on download
 191                        progress, with a dictionary with the entries
 192                        * filename: The final filename
 193                        * status: One of "downloading" and "finished"
 194
 195                        The dict may also have some of the following entries:
 196
 197                        * downloaded_bytes: Bytes on disk
 198                        * total_bytes: Size of the whole file, None if unknown
 199                        * tmpfilename: The filename we're currently writing to
 200                        * eta: The estimated time in seconds, None if unknown
 201                        * speed: The download speed in bytes/second, None if
 202                                 unknown
 203
 204                        Progress hooks are guaranteed to be called at least once
 205                        (with status "finished") if the download is successful.
 206     merge_output_format: Extension to use when merging formats.
 207
 208
 209     The following parameters are not used by YoutubeDL itself, they are used by
 210     the FileDownloader:
 211     nopart, updatetime, buffersize, ratelimit, min_filesize, max_filesize, test,
 212     noresizebuffer, retries, continuedl, noprogress, consoletitle
 213
 214     The following options are used by the post processors:
 215     prefer_ffmpeg:     If True, use ffmpeg instead of avconv if both are available,
 216                        otherwise prefer avconv.
 217     exec_cmd:          Arbitrary command to run after downloading
 218     """
 219
 220     params = None
 221     _ies = []
 222     _pps = []
 223     _download_retcode = None
 224     _num_downloads = None
 225     _screen_file = None
 226
 227     def __init__(self, params=None, auto_init=True):
 228         """Create a FileDownloader object with the given options."""
 229         if params is None:
 230             params = {}
 231         self._ies = []
 232         self._ies_instances = {}
 233         self._pps = []
 234         self._progress_hooks = []
 235         self._download_retcode = 0
 236         self._num_downloads = 0
 237         self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)]
 238         self._err_file = sys.stderr
 239         self.params = params
 240         self.cache = Cache(self)
 241
 242         if params.get('bidi_workaround', False):
 243             try:
 244                 import pty
 245                 master, slave = pty.openpty()
 246                 width = get_term_width()
 247                 if width is None:
 248                     width_args = []
 249                 else:
 250                     width_args = ['-w', str(width)]
 251                 sp_kwargs = dict(
 252                     stdin=subprocess.PIPE,
 253                     stdout=slave,
 254                     stderr=self._err_file)
 255                 try:
 256                     self._output_process = subprocess.Popen(
 257                         ['bidiv'] + width_args, **sp_kwargs
 258                     )
 259                 except OSError:
 260                     self._output_process = subprocess.Popen(
 261                         ['fribidi', '-c', 'UTF-8'] + width_args, **sp_kwargs)
 262                 self._output_channel = os.fdopen(master, 'rb')
 263             except OSError as ose:
 264                 if ose.errno == 2:
 265                     self.report_warning('Could not find fribidi executable, ignoring --bidi-workaround . Make sure that  fribidi  is an executable file in one of the directories in your $PATH.')
 266                 else:
 267                     raise
 268
 269         if (sys.version_info >= (3,) and sys.platform != 'win32' and
 270                 sys.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968']
 271                 and not params.get('restrictfilenames', False)):
 272             # On Python 3, the Unicode filesystem API will throw errors (#1474)
 273             self.report_warning(
 274                 'Assuming --restrict-filenames since file system encoding '
 275                 'cannot encode all characters. '
 276                 'Set the LC_ALL environment variable to fix this.')
 277             self.params['restrictfilenames'] = True
 278
 279         if '%(stitle)s' in self.params.get('outtmpl', ''):
 280             self.report_warning('%(stitle)s is deprecated. Use the %(title)s and the --restrict-filenames flag(which also secures %(uploader)s et al) instead.')
 281
 282         self._setup_opener()
 283
 284         if auto_init:
 285             self.print_debug_header()
 286             self.add_default_info_extractors()
 287
 288         for pp_def_raw in self.params.get('postprocessors', []):
 289             pp_class = get_postprocessor(pp_def_raw['key'])
 290             pp_def = dict(pp_def_raw)
 291             del pp_def['key']
 292             pp = pp_class(self, **compat_kwargs(pp_def))
 293             self.add_post_processor(pp)
 294
 295         for ph in self.params.get('progress_hooks', []):
 296             self.add_progress_hook(ph)
 297
 298     def warn_if_short_id(self, argv):
 299         # short YouTube ID starting with dash?
 300         idxs = [
 301             i for i, a in enumerate(argv)
 302             if re.match(r'^-[0-9A-Za-z_-]{10}$', a)]
 303         if idxs:
 304             correct_argv = (
 305                 ['youtube-dl'] +
 306                 [a for i, a in enumerate(argv) if i not in idxs] +
 307                 ['--'] + [argv[i] for i in idxs]
 308             )
 309             self.report_warning(
 310                 'Long argument string detected. '
 311                 'Use -- to separate parameters and URLs, like this:\n%s\n' %
 312                 args_to_str(correct_argv))
 313
 314     def add_info_extractor(self, ie):
 315         """Add an InfoExtractor object to the end of the list."""
 316         self._ies.append(ie)
 317         self._ies_instances[ie.ie_key()] = ie
 318         ie.set_downloader(self)
 319
 320     def get_info_extractor(self, ie_key):
 321         """
 322         Get an instance of an IE with name ie_key, it will try to get one from
 323         the _ies list, if there's no instance it will create a new one and add
 324         it to the extractor list.
 325         """
 326         ie = self._ies_instances.get(ie_key)
 327         if ie is None:
 328             ie = get_info_extractor(ie_key)()
 329             self.add_info_extractor(ie)
 330         return ie
 331
 332     def add_default_info_extractors(self):
 333         """
 334         Add the InfoExtractors returned by gen_extractors to the end of the list
 335         """
 336         for ie in gen_extractors():
 337             self.add_info_extractor(ie)
 338
 339     def add_post_processor(self, pp):
 340         """Add a PostProcessor object to the end of the chain."""
 341         self._pps.append(pp)
 342         pp.set_downloader(self)
 343
 344     def add_progress_hook(self, ph):
 345         """Add the progress hook (currently only for the file downloader)"""
 346         self._progress_hooks.append(ph)
 347
 348     def _bidi_workaround(self, message):
 349         if not hasattr(self, '_output_channel'):
 350             return message
 351
 352         assert hasattr(self, '_output_process')
 353         assert isinstance(message, compat_str)
 354         line_count = message.count('\n') + 1
 355         self._output_process.stdin.write((message + '\n').encode('utf-8'))
 356         self._output_process.stdin.flush()
 357         res = ''.join(self._output_channel.readline().decode('utf-8')
 358                       for _ in range(line_count))
 359         return res[:-len('\n')]
 360
 361     def to_screen(self, message, skip_eol=False):
 362         """Print message to stdout if not in quiet mode."""
 363         return self.to_stdout(message, skip_eol, check_quiet=True)
 364
 365     def _write_string(self, s, out=None):
 366         write_string(s, out=out, encoding=self.params.get('encoding'))
 367
 368     def to_stdout(self, message, skip_eol=False, check_quiet=False):
 369         """Print message to stdout if not in quiet mode."""
 370         if self.params.get('logger'):
 371             self.params['logger'].debug(message)
 372         elif not check_quiet or not self.params.get('quiet', False):
 373             message = self._bidi_workaround(message)
 374             terminator = ['\n', ''][skip_eol]
 375             output = message + terminator
 376
 377             self._write_string(output, self._screen_file)
 378
 379     def to_stderr(self, message):
 380         """Print message to stderr."""
 381         assert isinstance(message, compat_str)
 382         if self.params.get('logger'):
 383             self.params['logger'].error(message)
 384         else:
 385             message = self._bidi_workaround(message)
 386             output = message + '\n'
 387             self._write_string(output, self._err_file)
 388
 389     def to_console_title(self, message):
 390         if not self.params.get('consoletitle', False):
 391             return
 392         if os.name == 'nt' and ctypes.windll.kernel32.GetConsoleWindow():
 393             # c_wchar_p() might not be necessary if `message` is
 394             # already of type unicode()
 395             ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message))
 396         elif 'TERM' in os.environ:
 397             self._write_string('\033]0;%s\007' % message, self._screen_file)
 398
 399     def save_console_title(self):
 400         if not self.params.get('consoletitle', False):
 401             return
 402         if 'TERM' in os.environ:
 403             # Save the title on stack
 404             self._write_string('\033[22;0t', self._screen_file)
 405
 406     def restore_console_title(self):
 407         if not self.params.get('consoletitle', False):
 408             return
 409         if 'TERM' in os.environ:
 410             # Restore the title from stack
 411             self._write_string('\033[23;0t', self._screen_file)
 412
 413     def __enter__(self):
 414         self.save_console_title()
 415         return self
 416
 417     def __exit__(self, *args):
 418         self.restore_console_title()
 419
 420         if self.params.get('cookiefile') is not None:
 421             self.cookiejar.save()
 422
 423     def trouble(self, message=None, tb=None):
 424         """Determine action to take when a download problem appears.
 425
 426         Depending on if the downloader has been configured to ignore
 427         download errors or not, this method may throw an exception or
 428         not when errors are found, after printing the message.
 429
 430         tb, if given, is additional traceback information.
 431         """
 432         if message is not None:
 433             self.to_stderr(message)
 434         if self.params.get('verbose'):
 435             if tb is None:
 436                 if sys.exc_info()[0]:  # if .trouble has been called from an except block
 437                     tb = ''
 438                     if hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
 439                         tb += ''.join(traceback.format_exception(*sys.exc_info()[1].exc_info))
 440                     tb += compat_str(traceback.format_exc())
 441                 else:
 442                     tb_data = traceback.format_list(traceback.extract_stack())
 443                     tb = ''.join(tb_data)
 444             self.to_stderr(tb)
 445         if not self.params.get('ignoreerrors', False):
 446             if sys.exc_info()[0] and hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
 447                 exc_info = sys.exc_info()[1].exc_info
 448             else:
 449                 exc_info = sys.exc_info()
 450             raise DownloadError(message, exc_info)
 451         self._download_retcode = 1
 452
 453     def report_warning(self, message):
 454         '''
 455         Print the message to stderr, it will be prefixed with 'WARNING:'
 456         If stderr is a tty file the 'WARNING:' will be colored
 457         '''
 458         if self.params.get('logger') is not None:
 459             self.params['logger'].warning(message)
 460         else:
 461             if self.params.get('no_warnings'):
 462                 return
 463             if self._err_file.isatty() and os.name != 'nt':
 464                 _msg_header = '\033[0;33mWARNING:\033[0m'
 465             else:
 466                 _msg_header = 'WARNING:'
 467             warning_message = '%s %s' % (_msg_header, message)
 468             self.to_stderr(warning_message)
 469
 470     def report_error(self, message, tb=None):
 471         '''
 472         Do the same as trouble, but prefixes the message with 'ERROR:', colored
 473         in red if stderr is a tty file.
 474         '''
 475         if self._err_file.isatty() and os.name != 'nt':
 476             _msg_header = '\033[0;31mERROR:\033[0m'
 477         else:
 478             _msg_header = 'ERROR:'
 479         error_message = '%s %s' % (_msg_header, message)
 480         self.trouble(error_message, tb)
 481
 482     def report_file_already_downloaded(self, file_name):
 483         """Report file has already been fully downloaded."""
 484         try:
 485             self.to_screen('[download] %s has already been downloaded' % file_name)
 486         except UnicodeEncodeError:
 487             self.to_screen('[download] The file has already been downloaded')
 488
 489     def prepare_filename(self, info_dict):
 490         """Generate the output filename."""
 491         try:
 492             template_dict = dict(info_dict)
 493
 494             template_dict['epoch'] = int(time.time())
 495             autonumber_size = self.params.get('autonumber_size')
 496             if autonumber_size is None:
 497                 autonumber_size = 5
 498             autonumber_templ = '%0' + str(autonumber_size) + 'd'
 499             template_dict['autonumber'] = autonumber_templ % self._num_downloads
 500             if template_dict.get('playlist_index') is not None:
 501                 template_dict['playlist_index'] = '%0*d' % (len(str(template_dict['n_entries'])), template_dict['playlist_index'])
 502             if template_dict.get('resolution') is None:
 503                 if template_dict.get('width') and template_dict.get('height'):
 504                     template_dict['resolution'] = '%dx%d' % (template_dict['width'], template_dict['height'])
 505                 elif template_dict.get('height'):
 506                     template_dict['resolution'] = '%sp' % template_dict['height']
 507                 elif template_dict.get('width'):
 508                     template_dict['resolution'] = '?x%d' % template_dict['width']
 509
 510             sanitize = lambda k, v: sanitize_filename(
 511                 compat_str(v),
 512                 restricted=self.params.get('restrictfilenames'),
 513                 is_id=(k == 'id'))
 514             template_dict = dict((k, sanitize(k, v))
 515                                  for k, v in template_dict.items()
 516                                  if v is not None)
 517             template_dict = collections.defaultdict(lambda: 'NA', template_dict)
 518
 519             outtmpl = self.params.get('outtmpl', DEFAULT_OUTTMPL)
 520             tmpl = compat_expanduser(outtmpl)
 521             filename = tmpl % template_dict
 522             return filename
 523         except ValueError as err:
 524             self.report_error('Error in output template: ' + str(err) + ' (encoding: ' + repr(preferredencoding()) + ')')
 525             return None
 526
 527     def _match_entry(self, info_dict):
 528         """ Returns None iff the file should be downloaded """
 529
 530         video_title = info_dict.get('title', info_dict.get('id', 'video'))
 531         if 'title' in info_dict:
 532             # This can happen when we're just evaluating the playlist
 533             title = info_dict['title']
 534             matchtitle = self.params.get('matchtitle', False)
 535             if matchtitle:
 536                 if not re.search(matchtitle, title, re.IGNORECASE):
 537                     return '"' + title + '" title did not match pattern "' + matchtitle + '"'
 538             rejecttitle = self.params.get('rejecttitle', False)
 539             if rejecttitle:
 540                 if re.search(rejecttitle, title, re.IGNORECASE):
 541                     return '"' + title + '" title matched reject pattern "' + rejecttitle + '"'
 542         date = info_dict.get('upload_date', None)
 543         if date is not None:
 544             dateRange = self.params.get('daterange', DateRange())
 545             if date not in dateRange:
 546                 return '%s upload date is not in range %s' % (date_from_str(date).isoformat(), dateRange)
 547         view_count = info_dict.get('view_count', None)
 548         if view_count is not None:
 549             min_views = self.params.get('min_views')
 550             if min_views is not None and view_count < min_views:
 551                 return 'Skipping %s, because it has not reached minimum view count (%d/%d)' % (video_title, view_count, min_views)
 552             max_views = self.params.get('max_views')
 553             if max_views is not None and view_count > max_views:
 554                 return 'Skipping %s, because it has exceeded the maximum view count (%d/%d)' % (video_title, view_count, max_views)
 555         if age_restricted(info_dict.get('age_limit'), self.params.get('age_limit')):
 556             return 'Skipping "%s" because it is age restricted' % title
 557         if self.in_download_archive(info_dict):
 558             return '%s has already been recorded in archive' % video_title
 559         return None
 560
 561     @staticmethod
 562     def add_extra_info(info_dict, extra_info):
 563         '''Set the keys from extra_info in info dict if they are missing'''
 564         for key, value in extra_info.items():
 565             info_dict.setdefault(key, value)
 566
 567     def extract_info(self, url, download=True, ie_key=None, extra_info={},
 568                      process=True):
 569         '''
 570         Returns a list with a dictionary for each video we find.
 571         If 'download', also downloads the videos.
 572         extra_info is a dict containing the extra values to add to each result
 573          '''
 574
 575         if ie_key:
 576             ies = [self.get_info_extractor(ie_key)]
 577         else:
 578             ies = self._ies
 579
 580         for ie in ies:
 581             if not ie.suitable(url):
 582                 continue
 583
 584             if not ie.working():
 585                 self.report_warning('The program functionality for this site has been marked as broken, '
 586                                     'and will probably not work.')
 587
 588             try:
 589                 ie_result = ie.extract(url)
 590                 if ie_result is None:  # Finished already (backwards compatibility; listformats and friends should be moved here)
 591                     break
 592                 if isinstance(ie_result, list):
 593                     # Backwards compatibility: old IE result format
 594                     ie_result = {
 595                         '_type': 'compat_list',
 596                         'entries': ie_result,
 597                     }
 598                 self.add_default_extra_info(ie_result, ie, url)
 599                 if process:
 600                     return self.process_ie_result(ie_result, download, extra_info)
 601                 else:
 602                     return ie_result
 603             except ExtractorError as de:  # An error we somewhat expected
 604                 self.report_error(compat_str(de), de.format_traceback())
 605                 break
 606             except MaxDownloadsReached:
 607                 raise
 608             except Exception as e:
 609                 if self.params.get('ignoreerrors', False):
 610                     self.report_error(compat_str(e), tb=compat_str(traceback.format_exc()))
 611                     break
 612                 else:
 613                     raise
 614         else:
 615             self.report_error('no suitable InfoExtractor for URL %s' % url)
 616
 617     def add_default_extra_info(self, ie_result, ie, url):
 618         self.add_extra_info(ie_result, {
 619             'extractor': ie.IE_NAME,
 620             'webpage_url': url,
 621             'webpage_url_basename': url_basename(url),
 622             'extractor_key': ie.ie_key(),
 623         })
 624
 625     def process_ie_result(self, ie_result, download=True, extra_info={}):
 626         """
 627         Take the result of the ie(may be modified) and resolve all unresolved
 628         references (URLs, playlist items).
 629
 630         It will also download the videos if 'download'.
 631         Returns the resolved ie_result.
 632         """
 633
 634         result_type = ie_result.get('_type', 'video')
 635
 636         if result_type in ('url', 'url_transparent'):
 637             extract_flat = self.params.get('extract_flat', False)
 638             if ((extract_flat == 'in_playlist' and 'playlist' in extra_info) or
 639                     extract_flat is True):
 640                 if self.params.get('forcejson', False):
 641                     self.to_stdout(json.dumps(ie_result))
 642                 return ie_result
 643
 644         if result_type == 'video':
 645             self.add_extra_info(ie_result, extra_info)
 646             return self.process_video_result(ie_result, download=download)
 647         elif result_type == 'url':
 648             # We have to add extra_info to the results because it may be
 649             # contained in a playlist
 650             return self.extract_info(ie_result['url'],
 651                                      download,
 652                                      ie_key=ie_result.get('ie_key'),
 653                                      extra_info=extra_info)
 654         elif result_type == 'url_transparent':
 655             # Use the information from the embedding page
 656             info = self.extract_info(
 657                 ie_result['url'], ie_key=ie_result.get('ie_key'),
 658                 extra_info=extra_info, download=False, process=False)
 659
 660             force_properties = dict(
 661                 (k, v) for k, v in ie_result.items() if v is not None)
 662             for f in ('_type', 'url'):
 663                 if f in force_properties:
 664                     del force_properties[f]
 665             new_result = info.copy()
 666             new_result.update(force_properties)
 667
 668             assert new_result.get('_type') != 'url_transparent'
 669
 670             return self.process_ie_result(
 671                 new_result, download=download, extra_info=extra_info)
 672         elif result_type == 'playlist' or result_type == 'multi_video':
 673             # We process each entry in the playlist
 674             playlist = ie_result.get('title', None) or ie_result.get('id', None)
 675             self.to_screen('[download] Downloading playlist: %s' % playlist)
 676
 677             playlist_results = []
 678
 679             playliststart = self.params.get('playliststart', 1) - 1
 680             playlistend = self.params.get('playlistend', None)
 681             # For backwards compatibility, interpret -1 as whole list
 682             if playlistend == -1:
 683                 playlistend = None
 684
 685             ie_entries = ie_result['entries']
 686             if isinstance(ie_entries, list):
 687                 n_all_entries = len(ie_entries)
 688                 entries = ie_entries[playliststart:playlistend]
 689                 n_entries = len(entries)
 690                 self.to_screen(
 691                     "[%s] playlist %s: Collected %d video ids (downloading %d of them)" %
 692                     (ie_result['extractor'], playlist, n_all_entries, n_entries))
 693             elif isinstance(ie_entries, PagedList):
 694                 entries = ie_entries.getslice(
 695                     playliststart, playlistend)
 696                 n_entries = len(entries)
 697                 self.to_screen(
 698                     "[%s] playlist %s: Downloading %d videos" %
 699                     (ie_result['extractor'], playlist, n_entries))
 700             else:  # iterable
 701                 entries = list(itertools.islice(
 702                     ie_entries, playliststart, playlistend))
 703                 n_entries = len(entries)
 704                 self.to_screen(
 705                     "[%s] playlist %s: Downloading %d videos" %
 706                     (ie_result['extractor'], playlist, n_entries))
 707
 708             if self.params.get('playlistreverse', False):
 709                 entries = entries[::-1]
 710
 711             for i, entry in enumerate(entries, 1):
 712                 self.to_screen('[download] Downloading video %s of %s' % (i, n_entries))
 713                 extra = {
 714                     'n_entries': n_entries,
 715                     'playlist': playlist,
 716                     'playlist_id': ie_result.get('id'),
 717                     'playlist_title': ie_result.get('title'),
 718                     'playlist_index': i + playliststart,
 719                     'extractor': ie_result['extractor'],
 720                     'webpage_url': ie_result['webpage_url'],
 721                     'webpage_url_basename': url_basename(ie_result['webpage_url']),
 722                     'extractor_key': ie_result['extractor_key'],
 723                 }
 724
 725                 reason = self._match_entry(entry)
 726                 if reason is not None:
 727                     self.to_screen('[download] ' + reason)
 728                     continue
 729
 730                 entry_result = self.process_ie_result(entry,
 731                                                       download=download,
 732                                                       extra_info=extra)
 733                 playlist_results.append(entry_result)
 734             ie_result['entries'] = playlist_results
 735             return ie_result
 736         elif result_type == 'compat_list':
 737             self.report_warning(
 738                 'Extractor %s returned a compat_list result. '
 739                 'It needs to be updated.' % ie_result.get('extractor'))
 740
 741             def _fixup(r):
 742                 self.add_extra_info(
 743                     r,
 744                     {
 745                         'extractor': ie_result['extractor'],
 746                         'webpage_url': ie_result['webpage_url'],
 747                         'webpage_url_basename': url_basename(ie_result['webpage_url']),
 748                         'extractor_key': ie_result['extractor_key'],
 749                     }
 750                 )
 751                 return r
 752             ie_result['entries'] = [
 753                 self.process_ie_result(_fixup(r), download, extra_info)
 754                 for r in ie_result['entries']
 755             ]
 756             return ie_result
 757         else:
 758             raise Exception('Invalid result type: %s' % result_type)
 759
 760     def select_format(self, format_spec, available_formats):
 761         if format_spec == 'best' or format_spec is None:
 762             return available_formats[-1]
 763         elif format_spec == 'worst':
 764             return available_formats[0]
 765         elif format_spec == 'bestaudio':
 766             audio_formats = [
 767                 f for f in available_formats
 768                 if f.get('vcodec') == 'none']
 769             if audio_formats:
 770                 return audio_formats[-1]
 771         elif format_spec == 'worstaudio':
 772             audio_formats = [
 773                 f for f in available_formats
 774                 if f.get('vcodec') == 'none']
 775             if audio_formats:
 776                 return audio_formats[0]
 777         elif format_spec == 'bestvideo':
 778             video_formats = [
 779                 f for f in available_formats
 780                 if f.get('acodec') == 'none']
 781             if video_formats:
 782                 return video_formats[-1]
 783         elif format_spec == 'worstvideo':
 784             video_formats = [
 785                 f for f in available_formats
 786                 if f.get('acodec') == 'none']
 787             if video_formats:
 788                 return video_formats[0]
 789         else:
 790             extensions = ['mp4', 'flv', 'webm', '3gp', 'm4a', 'mp3', 'ogg', 'aac', 'wav']
 791             if format_spec in extensions:
 792                 filter_f = lambda f: f['ext'] == format_spec
 793             else:
 794                 filter_f = lambda f: f['format_id'] == format_spec
 795             matches = list(filter(filter_f, available_formats))
 796             if matches:
 797                 return matches[-1]
 798         return None
 799
 800     def process_video_result(self, info_dict, download=True):
 801         assert info_dict.get('_type', 'video') == 'video'
 802
 803         if 'id' not in info_dict:
 804             raise ExtractorError('Missing "id" field in extractor result')
 805         if 'title' not in info_dict:
 806             raise ExtractorError('Missing "title" field in extractor result')
 807
 808         if 'playlist' not in info_dict:
 809             # It isn't part of a playlist
 810             info_dict['playlist'] = None
 811             info_dict['playlist_index'] = None
 812
 813         thumbnails = info_dict.get('thumbnails')
 814         if thumbnails:
 815             thumbnails.sort(key=lambda t: (
 816                 t.get('width'), t.get('height'), t.get('url')))
 817             for t in thumbnails:
 818                 if 'width' in t and 'height' in t:
 819                     t['resolution'] = '%dx%d' % (t['width'], t['height'])
 820
 821         if thumbnails and 'thumbnail' not in info_dict:
 822             info_dict['thumbnail'] = thumbnails[-1]['url']
 823
 824         if 'display_id' not in info_dict and 'id' in info_dict:
 825             info_dict['display_id'] = info_dict['id']
 826
 827         if info_dict.get('upload_date') is None and info_dict.get('timestamp') is not None:
 828             # Working around negative timestamps in Windows
 829             # (see http://bugs.python.org/issue1646728)
 830             if info_dict['timestamp'] < 0 and os.name == 'nt':
 831                 info_dict['timestamp'] = 0
 832             upload_date = datetime.datetime.utcfromtimestamp(
 833                 info_dict['timestamp'])
 834             info_dict['upload_date'] = upload_date.strftime('%Y%m%d')
 835
 836         # This extractors handle format selection themselves
 837         if info_dict['extractor'] in ['Youku']:
 838             if download:
 839                 self.process_info(info_dict)
 840             return info_dict
 841
 842         # We now pick which formats have to be downloaded
 843         if info_dict.get('formats') is None:
 844             # There's only one format available
 845             formats = [info_dict]
 846         else:
 847             formats = info_dict['formats']
 848
 849         if not formats:
 850             raise ExtractorError('No video formats found!')
 851
 852         # We check that all the formats have the format and format_id fields
 853         for i, format in enumerate(formats):
 854             if 'url' not in format:
 855                 raise ExtractorError('Missing "url" key in result (index %d)' % i)
 856
 857             if format.get('format_id') is None:
 858                 format['format_id'] = compat_str(i)
 859             if format.get('format') is None:
 860                 format['format'] = '{id} - {res}{note}'.format(
 861                     id=format['format_id'],
 862                     res=self.format_resolution(format),
 863                     note=' ({0})'.format(format['format_note']) if format.get('format_note') is not None else '',
 864                 )
 865             # Automatically determine file extension if missing
 866             if 'ext' not in format:
 867                 format['ext'] = determine_ext(format['url']).lower()
 868
 869         format_limit = self.params.get('format_limit', None)
 870         if format_limit:
 871             formats = list(takewhile_inclusive(
 872                 lambda f: f['format_id'] != format_limit, formats
 873             ))
 874
 875         # TODO Central sorting goes here
 876
 877         if formats[0] is not info_dict:
 878             # only set the 'formats' fields if the original info_dict list them
 879             # otherwise we end up with a circular reference, the first (and unique)
 880             # element in the 'formats' field in info_dict is info_dict itself,
 881             # wich can't be exported to json
 882             info_dict['formats'] = formats
 883         if self.params.get('listformats', None):
 884             self.list_formats(info_dict)
 885             return
 886
 887         req_format = self.params.get('format')
 888         if req_format is None:
 889             req_format = 'best'
 890         formats_to_download = []
 891         # The -1 is for supporting YoutubeIE
 892         if req_format in ('-1', 'all'):
 893             formats_to_download = formats
 894         else:
 895             for rfstr in req_format.split(','):
 896                 # We can accept formats requested in the format: 34/5/best, we pick
 897                 # the first that is available, starting from left
 898                 req_formats = rfstr.split('/')
 899                 for rf in req_formats:
 900                     if re.match(r'.+?\+.+?', rf) is not None:
 901                         # Two formats have been requested like '137+139'
 902                         format_1, format_2 = rf.split('+')
 903                         formats_info = (self.select_format(format_1, formats),
 904                                         self.select_format(format_2, formats))
 905                         if all(formats_info):
 906                             # The first format must contain the video and the
 907                             # second the audio
 908                             if formats_info[0].get('vcodec') == 'none':
 909                                 self.report_error('The first format must '
 910                                                   'contain the video, try using '
 911                                                   '"-f %s+%s"' % (format_2, format_1))
 912                                 return
 913                             output_ext = (
 914                                 formats_info[0]['ext']
 915                                 if self.params.get('merge_output_format') is None
 916                                 else self.params['merge_output_format'])
 917                             selected_format = {
 918                                 'requested_formats': formats_info,
 919                                 'format': rf,
 920                                 'ext': formats_info[0]['ext'],
 921                                 'width': formats_info[0].get('width'),
 922                                 'height': formats_info[0].get('height'),
 923                                 'resolution': formats_info[0].get('resolution'),
 924                                 'fps': formats_info[0].get('fps'),
 925                                 'vcodec': formats_info[0].get('vcodec'),
 926                                 'vbr': formats_info[0].get('vbr'),
 927                                 'acodec': formats_info[1].get('acodec'),
 928                                 'abr': formats_info[1].get('abr'),
 929                                 'ext': output_ext,
 930                             }
 931                         else:
 932                             selected_format = None
 933                     else:
 934                         selected_format = self.select_format(rf, formats)
 935                     if selected_format is not None:
 936                         formats_to_download.append(selected_format)
 937                         break
 938         if not formats_to_download:
 939             raise ExtractorError('requested format not available',
 940                                  expected=True)
 941
 942         if download:
 943             if len(formats_to_download) > 1:
 944                 self.to_screen('[info] %s: downloading video in %s formats' % (info_dict['id'], len(formats_to_download)))
 945             for format in formats_to_download:
 946                 new_info = dict(info_dict)
 947                 new_info.update(format)
 948                 self.process_info(new_info)
 949         # We update the info dict with the best quality format (backwards compatibility)
 950         info_dict.update(formats_to_download[-1])
 951         return info_dict
 952
 953     def process_info(self, info_dict):
 954         """Process a single resolved IE result."""
 955
 956         assert info_dict.get('_type', 'video') == 'video'
 957
 958         max_downloads = self.params.get('max_downloads')
 959         if max_downloads is not None:
 960             if self._num_downloads >= int(max_downloads):
 961                 raise MaxDownloadsReached()
 962
 963         info_dict['fulltitle'] = info_dict['title']
 964         if len(info_dict['title']) > 200:
 965             info_dict['title'] = info_dict['title'][:197] + '...'
 966
 967         # Keep for backwards compatibility
 968         info_dict['stitle'] = info_dict['title']
 969
 970         if 'format' not in info_dict:
 971             info_dict['format'] = info_dict['ext']
 972
 973         reason = self._match_entry(info_dict)
 974         if reason is not None:
 975             self.to_screen('[download] ' + reason)
 976             return
 977
 978         self._num_downloads += 1
 979
 980         filename = self.prepare_filename(info_dict)
 981
 982         # Forced printings
 983         if self.params.get('forcetitle', False):
 984             self.to_stdout(info_dict['fulltitle'])
 985         if self.params.get('forceid', False):
 986             self.to_stdout(info_dict['id'])
 987         if self.params.get('forceurl', False):
 988             if info_dict.get('requested_formats') is not None:
 989                 for f in info_dict['requested_formats']:
 990                     self.to_stdout(f['url'] + f.get('play_path', ''))
 991             else:
 992                 # For RTMP URLs, also include the playpath
 993                 self.to_stdout(info_dict['url'] + info_dict.get('play_path', ''))
 994         if self.params.get('forcethumbnail', False) and info_dict.get('thumbnail') is not None:
 995             self.to_stdout(info_dict['thumbnail'])
 996         if self.params.get('forcedescription', False) and info_dict.get('description') is not None:
 997             self.to_stdout(info_dict['description'])
 998         if self.params.get('forcefilename', False) and filename is not None:
 999             self.to_stdout(filename)
1000         if self.params.get('forceduration', False) and info_dict.get('duration') is not None:
1001             self.to_stdout(formatSeconds(info_dict['duration']))
1002         if self.params.get('forceformat', False):
1003             self.to_stdout(info_dict['format'])
1004         if self.params.get('forcejson', False):
1005             info_dict['_filename'] = filename
1006             self.to_stdout(json.dumps(info_dict))
1007         if self.params.get('dump_single_json', False):
1008             info_dict['_filename'] = filename
1009
1010         # Do nothing else if in simulate mode
1011         if self.params.get('simulate', False):
1012             return
1013
1014         if filename is None:
1015             return
1016
1017         try:
1018             dn = os.path.dirname(encodeFilename(filename))
1019             if dn and not os.path.exists(dn):
1020                 os.makedirs(dn)
1021         except (OSError, IOError) as err:
1022             self.report_error('unable to create directory ' + compat_str(err))
1023             return
1024
1025         if self.params.get('writedescription', False):
1026             descfn = filename + '.description'
1027             if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(descfn)):
1028                 self.to_screen('[info] Video description is already present')
1029             elif info_dict.get('description') is None:
1030                 self.report_warning('There\'s no description to write.')
1031             else:
1032                 try:
1033                     self.to_screen('[info] Writing video description to: ' + descfn)
1034                     with io.open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile:
1035                         descfile.write(info_dict['description'])
1036                 except (OSError, IOError):
1037                     self.report_error('Cannot write description file ' + descfn)
1038                     return
1039
1040         if self.params.get('writeannotations', False):
1041             annofn = filename + '.annotations.xml'
1042             if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(annofn)):
1043                 self.to_screen('[info] Video annotations are already present')
1044             else:
1045                 try:
1046                     self.to_screen('[info] Writing video annotations to: ' + annofn)
1047                     with io.open(encodeFilename(annofn), 'w', encoding='utf-8') as annofile:
1048                         annofile.write(info_dict['annotations'])
1049                 except (KeyError, TypeError):
1050                     self.report_warning('There are no annotations to write.')
1051                 except (OSError, IOError):
1052                     self.report_error('Cannot write annotations file: ' + annofn)
1053                     return
1054
1055         subtitles_are_requested = any([self.params.get('writesubtitles', False),
1056                                        self.params.get('writeautomaticsub')])
1057
1058         if subtitles_are_requested and 'subtitles' in info_dict and info_dict['subtitles']:
1059             # subtitles download errors are already managed as troubles in relevant IE
1060             # that way it will silently go on when used with unsupporting IE
1061             subtitles = info_dict['subtitles']
1062             sub_format = self.params.get('subtitlesformat', 'srt')
1063             for sub_lang in subtitles.keys():
1064                 sub = subtitles[sub_lang]
1065                 if sub is None:
1066                     continue
1067                 try:
1068                     sub_filename = subtitles_filename(filename, sub_lang, sub_format)
1069                     if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(sub_filename)):
1070                         self.to_screen('[info] Video subtitle %s.%s is already_present' % (sub_lang, sub_format))
1071                     else:
1072                         self.to_screen('[info] Writing video subtitles to: ' + sub_filename)
1073                         with io.open(encodeFilename(sub_filename), 'w', encoding='utf-8') as subfile:
1074                             subfile.write(sub)
1075                 except (OSError, IOError):
1076                     self.report_error('Cannot write subtitles file ' + sub_filename)
1077                     return
1078
1079         if self.params.get('writeinfojson', False):
1080             infofn = os.path.splitext(filename)[0] + '.info.json'
1081             if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(infofn)):
1082                 self.to_screen('[info] Video description metadata is already present')
1083             else:
1084                 self.to_screen('[info] Writing video description metadata as JSON to: ' + infofn)
1085                 try:
1086                     write_json_file(info_dict, infofn)
1087                 except (OSError, IOError):
1088                     self.report_error('Cannot write metadata to JSON file ' + infofn)
1089                     return
1090
1091         if self.params.get('writethumbnail', False):
1092             if info_dict.get('thumbnail') is not None:
1093                 thumb_format = determine_ext(info_dict['thumbnail'], 'jpg')
1094                 thumb_filename = os.path.splitext(filename)[0] + '.' + thumb_format
1095                 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(thumb_filename)):
1096                     self.to_screen('[%s] %s: Thumbnail is already present' %
1097                                    (info_dict['extractor'], info_dict['id']))
1098                 else:
1099                     self.to_screen('[%s] %s: Downloading thumbnail ...' %
1100                                    (info_dict['extractor'], info_dict['id']))
1101                     try:
1102                         uf = self.urlopen(info_dict['thumbnail'])
1103                         with open(thumb_filename, 'wb') as thumbf:
1104                             shutil.copyfileobj(uf, thumbf)
1105                         self.to_screen('[%s] %s: Writing thumbnail to: %s' %
1106                                        (info_dict['extractor'], info_dict['id'], thumb_filename))
1107                     except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
1108                         self.report_warning('Unable to download thumbnail "%s": %s' %
1109                                             (info_dict['thumbnail'], compat_str(err)))
1110
1111         if not self.params.get('skip_download', False):
1112             if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(filename)):
1113                 success = True
1114             else:
1115                 try:
1116                     def dl(name, info):
1117                         fd = get_suitable_downloader(info)(self, self.params)
1118                         for ph in self._progress_hooks:
1119                             fd.add_progress_hook(ph)
1120                         if self.params.get('verbose'):
1121                             self.to_stdout('[debug] Invoking downloader on %r' % info.get('url'))
1122                         return fd.download(name, info)
1123                     if info_dict.get('requested_formats') is not None:
1124                         downloaded = []
1125                         success = True
1126                         merger = FFmpegMergerPP(self, not self.params.get('keepvideo'))
1127                         if not merger._executable:
1128                             postprocessors = []
1129                             self.report_warning('You have requested multiple '
1130                                                 'formats but ffmpeg or avconv are not installed.'
1131                                                 ' The formats won\'t be merged')
1132                         else:
1133                             postprocessors = [merger]
1134                         for f in info_dict['requested_formats']:
1135                             new_info = dict(info_dict)
1136                             new_info.update(f)
1137                             fname = self.prepare_filename(new_info)
1138                             fname = prepend_extension(fname, 'f%s' % f['format_id'])
1139                             downloaded.append(fname)
1140                             partial_success = dl(fname, new_info)
1141                             success = success and partial_success
1142                         info_dict['__postprocessors'] = postprocessors
1143                         info_dict['__files_to_merge'] = downloaded
1144                     else:
1145                         # Just a single file
1146                         success = dl(filename, info_dict)
1147                 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
1148                     self.report_error('unable to download video data: %s' % str(err))
1149                     return
1150                 except (OSError, IOError) as err:
1151                     raise UnavailableVideoError(err)
1152                 except (ContentTooShortError, ) as err:
1153                     self.report_error('content too short (expected %s bytes and served %s)' % (err.expected, err.downloaded))
1154                     return
1155
1156             if success:
1157                 try:
1158                     self.post_process(filename, info_dict)
1159                 except (PostProcessingError) as err:
1160                     self.report_error('postprocessing: %s' % str(err))
1161                     return
1162                 self.record_download_archive(info_dict)
1163
1164     def download(self, url_list):
1165         """Download a given list of URLs."""
1166         outtmpl = self.params.get('outtmpl', DEFAULT_OUTTMPL)
1167         if (len(url_list) > 1 and
1168                 '%' not in outtmpl
1169                 and self.params.get('max_downloads') != 1):
1170             raise SameFileError(outtmpl)
1171
1172         for url in url_list:
1173             try:
1174                 # It also downloads the videos
1175                 res = self.extract_info(url)
1176             except UnavailableVideoError:
1177                 self.report_error('unable to download video')
1178             except MaxDownloadsReached:
1179                 self.to_screen('[info] Maximum number of downloaded files reached.')
1180                 raise
1181             else:
1182                 if self.params.get('dump_single_json', False):
1183                     self.to_stdout(json.dumps(res))
1184
1185         return self._download_retcode
1186
1187     def download_with_info_file(self, info_filename):
1188         with io.open(info_filename, 'r', encoding='utf-8') as f:
1189             info = json.load(f)
1190         try:
1191             self.process_ie_result(info, download=True)
1192         except DownloadError:
1193             webpage_url = info.get('webpage_url')
1194             if webpage_url is not None:
1195                 self.report_warning('The info failed to download, trying with "%s"' % webpage_url)
1196                 return self.download([webpage_url])
1197             else:
1198                 raise
1199         return self._download_retcode
1200
1201     def post_process(self, filename, ie_info):
1202         """Run all the postprocessors on the given file."""
1203         info = dict(ie_info)
1204         info['filepath'] = filename
1205         keep_video = None
1206         pps_chain = []
1207         if ie_info.get('__postprocessors') is not None:
1208             pps_chain.extend(ie_info['__postprocessors'])
1209         pps_chain.extend(self._pps)
1210         for pp in pps_chain:
1211             try:
1212                 keep_video_wish, new_info = pp.run(info)
1213                 if keep_video_wish is not None:
1214                     if keep_video_wish:
1215                         keep_video = keep_video_wish
1216                     elif keep_video is None:
1217                         # No clear decision yet, let IE decide
1218                         keep_video = keep_video_wish
1219             except PostProcessingError as e:
1220                 self.report_error(e.msg)
1221         if keep_video is False and not self.params.get('keepvideo', False):
1222             try:
1223                 self.to_screen('Deleting original file %s (pass -k to keep)' % filename)
1224                 os.remove(encodeFilename(filename))
1225             except (IOError, OSError):
1226                 self.report_warning('Unable to remove downloaded video file')
1227
1228     def _make_archive_id(self, info_dict):
1229         # Future-proof against any change in case
1230         # and backwards compatibility with prior versions
1231         extractor = info_dict.get('extractor_key')
1232         if extractor is None:
1233             if 'id' in info_dict:
1234                 extractor = info_dict.get('ie_key')  # key in a playlist
1235         if extractor is None:
1236             return None  # Incomplete video information
1237         return extractor.lower() + ' ' + info_dict['id']
1238
1239     def in_download_archive(self, info_dict):
1240         fn = self.params.get('download_archive')
1241         if fn is None:
1242             return False
1243
1244         vid_id = self._make_archive_id(info_dict)
1245         if vid_id is None:
1246             return False  # Incomplete video information
1247
1248         try:
1249             with locked_file(fn, 'r', encoding='utf-8') as archive_file:
1250                 for line in archive_file:
1251                     if line.strip() == vid_id:
1252                         return True
1253         except IOError as ioe:
1254             if ioe.errno != errno.ENOENT:
1255                 raise
1256         return False
1257
1258     def record_download_archive(self, info_dict):
1259         fn = self.params.get('download_archive')
1260         if fn is None:
1261             return
1262         vid_id = self._make_archive_id(info_dict)
1263         assert vid_id
1264         with locked_file(fn, 'a', encoding='utf-8') as archive_file:
1265             archive_file.write(vid_id + '\n')
1266
1267     @staticmethod
1268     def format_resolution(format, default='unknown'):
1269         if format.get('vcodec') == 'none':
1270             return 'audio only'
1271         if format.get('resolution') is not None:
1272             return format['resolution']
1273         if format.get('height') is not None:
1274             if format.get('width') is not None:
1275                 res = '%sx%s' % (format['width'], format['height'])
1276             else:
1277                 res = '%sp' % format['height']
1278         elif format.get('width') is not None:
1279             res = '?x%d' % format['width']
1280         else:
1281             res = default
1282         return res
1283
1284     def _format_note(self, fdict):
1285         res = ''
1286         if fdict.get('ext') in ['f4f', 'f4m']:
1287             res += '(unsupported) '
1288         if fdict.get('format_note') is not None:
1289             res += fdict['format_note'] + ' '
1290         if fdict.get('tbr') is not None:
1291             res += '%4dk ' % fdict['tbr']
1292         if fdict.get('container') is not None:
1293             if res:
1294                 res += ', '
1295             res += '%s container' % fdict['container']
1296         if (fdict.get('vcodec') is not None and
1297                 fdict.get('vcodec') != 'none'):
1298             if res:
1299                 res += ', '
1300             res += fdict['vcodec']
1301             if fdict.get('vbr') is not None:
1302                 res += '@'
1303         elif fdict.get('vbr') is not None and fdict.get('abr') is not None:
1304             res += 'video@'
1305         if fdict.get('vbr') is not None:
1306             res += '%4dk' % fdict['vbr']
1307         if fdict.get('fps') is not None:
1308             res += ', %sfps' % fdict['fps']
1309         if fdict.get('acodec') is not None:
1310             if res:
1311                 res += ', '
1312             if fdict['acodec'] == 'none':
1313                 res += 'video only'
1314             else:
1315                 res += '%-5s' % fdict['acodec']
1316         elif fdict.get('abr') is not None:
1317             if res:
1318                 res += ', '
1319             res += 'audio'
1320         if fdict.get('abr') is not None:
1321             res += '@%3dk' % fdict['abr']
1322         if fdict.get('asr') is not None:
1323             res += ' (%5dHz)' % fdict['asr']
1324         if fdict.get('filesize') is not None:
1325             if res:
1326                 res += ', '
1327             res += format_bytes(fdict['filesize'])
1328         elif fdict.get('filesize_approx') is not None:
1329             if res:
1330                 res += ', '
1331             res += '~' + format_bytes(fdict['filesize_approx'])
1332         return res
1333
1334     def list_formats(self, info_dict):
1335         def line(format, idlen=20):
1336             return (('%-' + compat_str(idlen + 1) + 's%-10s%-12s%s') % (
1337                 format['format_id'],
1338                 format['ext'],
1339                 self.format_resolution(format),
1340                 self._format_note(format),
1341             ))
1342
1343         formats = info_dict.get('formats', [info_dict])
1344         idlen = max(len('format code'),
1345                     max(len(f['format_id']) for f in formats))
1346         formats_s = [
1347             line(f, idlen) for f in formats
1348             if f.get('preference') is None or f['preference'] >= -1000]
1349         if len(formats) > 1:
1350             formats_s[0] += (' ' if self._format_note(formats[0]) else '') + '(worst)'
1351             formats_s[-1] += (' ' if self._format_note(formats[-1]) else '') + '(best)'
1352
1353         header_line = line({
1354             'format_id': 'format code', 'ext': 'extension',
1355             'resolution': 'resolution', 'format_note': 'note'}, idlen=idlen)
1356         self.to_screen('[info] Available formats for %s:\n%s\n%s' %
1357                        (info_dict['id'], header_line, '\n'.join(formats_s)))
1358
1359     def urlopen(self, req):
1360         """ Start an HTTP download """
1361
1362         # According to RFC 3986, URLs can not contain non-ASCII characters, however this is not
1363         # always respected by websites, some tend to give out URLs with non percent-encoded
1364         # non-ASCII characters (see telemb.py, ard.py [#3412])
1365         # urllib chokes on URLs with non-ASCII characters (see http://bugs.python.org/issue3991)
1366         # To work around aforementioned issue we will replace request's original URL with
1367         # percent-encoded one
1368         req_is_string = isinstance(req, basestring if sys.version_info < (3, 0) else compat_str)
1369         url = req if req_is_string else req.get_full_url()
1370         url_escaped = escape_url(url)
1371
1372         # Substitute URL if any change after escaping
1373         if url != url_escaped:
1374             if req_is_string:
1375                 req = url_escaped
1376             else:
1377                 req = compat_urllib_request.Request(
1378                     url_escaped, data=req.data, headers=req.headers,
1379                     origin_req_host=req.origin_req_host, unverifiable=req.unverifiable)
1380
1381         return self._opener.open(req, timeout=self._socket_timeout)
1382
1383     def print_debug_header(self):
1384         if not self.params.get('verbose'):
1385             return
1386
1387         if type('') is not compat_str:
1388             # Python 2.6 on SLES11 SP1 (https://github.com/rg3/youtube-dl/issues/3326)
1389             self.report_warning(
1390                 'Your Python is broken! Update to a newer and supported version')
1391
1392         stdout_encoding = getattr(
1393             sys.stdout, 'encoding', 'missing (%s)' % type(sys.stdout).__name__)
1394         encoding_str = (
1395             '[debug] Encodings: locale %s, fs %s, out %s, pref %s\n' % (
1396                 locale.getpreferredencoding(),
1397                 sys.getfilesystemencoding(),
1398                 stdout_encoding,
1399                 self.get_encoding()))
1400         write_string(encoding_str, encoding=None)
1401
1402         self._write_string('[debug] youtube-dl version ' + __version__ + '\n')
1403         try:
1404             sp = subprocess.Popen(
1405                 ['git', 'rev-parse', '--short', 'HEAD'],
1406                 stdout=subprocess.PIPE, stderr=subprocess.PIPE,
1407                 cwd=os.path.dirname(os.path.abspath(__file__)))
1408             out, err = sp.communicate()
1409             out = out.decode().strip()
1410             if re.match('[0-9a-f]+', out):
1411                 self._write_string('[debug] Git HEAD: ' + out + '\n')
1412         except:
1413             try:
1414                 sys.exc_clear()
1415             except:
1416                 pass
1417         self._write_string('[debug] Python version %s - %s\n' % (
1418             platform.python_version(), platform_name()))
1419
1420         exe_versions = FFmpegPostProcessor.get_versions()
1421         exe_versions['rtmpdump'] = rtmpdump_version()
1422         exe_str = ', '.join(
1423             '%s %s' % (exe, v)
1424             for exe, v in sorted(exe_versions.items())
1425             if v
1426         )
1427         if not exe_str:
1428             exe_str = 'none'
1429         self._write_string('[debug] exe versions: %s\n' % exe_str)
1430
1431         proxy_map = {}
1432         for handler in self._opener.handlers:
1433             if hasattr(handler, 'proxies'):
1434                 proxy_map.update(handler.proxies)
1435         self._write_string('[debug] Proxy map: ' + compat_str(proxy_map) + '\n')
1436
1437     def _setup_opener(self):
1438         timeout_val = self.params.get('socket_timeout')
1439         self._socket_timeout = 600 if timeout_val is None else float(timeout_val)
1440
1441         opts_cookiefile = self.params.get('cookiefile')
1442         opts_proxy = self.params.get('proxy')
1443
1444         if opts_cookiefile is None:
1445             self.cookiejar = compat_cookiejar.CookieJar()
1446         else:
1447             self.cookiejar = compat_cookiejar.MozillaCookieJar(
1448                 opts_cookiefile)
1449             if os.access(opts_cookiefile, os.R_OK):
1450                 self.cookiejar.load()
1451
1452         cookie_processor = compat_urllib_request.HTTPCookieProcessor(
1453             self.cookiejar)
1454         if opts_proxy is not None:
1455             if opts_proxy == '':
1456                 proxies = {}
1457             else:
1458                 proxies = {'http': opts_proxy, 'https': opts_proxy}
1459         else:
1460             proxies = compat_urllib_request.getproxies()
1461             # Set HTTPS proxy to HTTP one if given (https://github.com/rg3/youtube-dl/issues/805)
1462             if 'http' in proxies and 'https' not in proxies:
1463                 proxies['https'] = proxies['http']
1464         proxy_handler = compat_urllib_request.ProxyHandler(proxies)
1465
1466         debuglevel = 1 if self.params.get('debug_printtraffic') else 0
1467         https_handler = make_HTTPS_handler(
1468             self.params.get('nocheckcertificate', False), debuglevel=debuglevel)
1469         ydlh = YoutubeDLHandler(debuglevel=debuglevel)
1470         opener = compat_urllib_request.build_opener(
1471             https_handler, proxy_handler, cookie_processor, ydlh)
1472         # Delete the default user-agent header, which would otherwise apply in
1473         # cases where our custom HTTP handler doesn't come into play
1474         # (See https://github.com/rg3/youtube-dl/issues/1309 for details)
1475         opener.addheaders = []
1476         self._opener = opener
1477
1478     def encode(self, s):
1479         if isinstance(s, bytes):
1480             return s  # Already encoded
1481
1482         try:
1483             return s.encode(self.get_encoding())
1484         except UnicodeEncodeError as err:
1485             err.reason = err.reason + '. Check your system encoding configuration or use the --encoding option.'
1486             raise
1487
1488     def get_encoding(self):
1489         encoding = self.params.get('encoding')
1490         if encoding is None:
1491             encoding = preferredencoding()
1492         return encoding