_ Git - youtube-dl/blob - youtube_dl/YoutubeDL.py

   1 #!/usr/bin/env python
   2 # -*- coding: utf-8 -*-
   3
   4 from __future__ import absolute_import, unicode_literals
   5
   6 import collections
   7 import datetime
   8 import errno
   9 import io
  10 import itertools
  11 import json
  12 import locale
  13 import operator
  14 import os
  15 import platform
  16 import re
  17 import shutil
  18 import subprocess
  19 import socket
  20 import sys
  21 import time
  22 import traceback
  23
  24 if os.name == 'nt':
  25     import ctypes
  26
  27 from .compat import (
  28     compat_cookiejar,
  29     compat_expanduser,
  30     compat_http_client,
  31     compat_kwargs,
  32     compat_str,
  33     compat_urllib_error,
  34     compat_urllib_request,
  35 )
  36 from .utils import (
  37     escape_url,
  38     ContentTooShortError,
  39     date_from_str,
  40     DateRange,
  41     DEFAULT_OUTTMPL,
  42     determine_ext,
  43     DownloadError,
  44     encodeFilename,
  45     ExtractorError,
  46     format_bytes,
  47     formatSeconds,
  48     get_term_width,
  49     locked_file,
  50     make_HTTPS_handler,
  51     MaxDownloadsReached,
  52     PagedList,
  53     parse_filesize,
  54     PostProcessingError,
  55     platform_name,
  56     preferredencoding,
  57     SameFileError,
  58     sanitize_filename,
  59     subtitles_filename,
  60     takewhile_inclusive,
  61     UnavailableVideoError,
  62     url_basename,
  63     version_tuple,
  64     write_json_file,
  65     write_string,
  66     YoutubeDLHandler,
  67     prepend_extension,
  68     args_to_str,
  69     age_restricted,
  70 )
  71 from .cache import Cache
  72 from .extractor import get_info_extractor, gen_extractors
  73 from .downloader import get_suitable_downloader
  74 from .downloader.rtmp import rtmpdump_version
  75 from .postprocessor import (
  76     FFmpegFixupStretchedPP,
  77     FFmpegMergerPP,
  78     FFmpegPostProcessor,
  79     get_postprocessor,
  80 )
  81 from .version import __version__
  82
  83
  84 class YoutubeDL(object):
  85     """YoutubeDL class.
  86
  87     YoutubeDL objects are the ones responsible of downloading the
  88     actual video file and writing it to disk if the user has requested
  89     it, among some other tasks. In most cases there should be one per
  90     program. As, given a video URL, the downloader doesn't know how to
  91     extract all the needed information, task that InfoExtractors do, it
  92     has to pass the URL to one of them.
  93
  94     For this, YoutubeDL objects have a method that allows
  95     InfoExtractors to be registered in a given order. When it is passed
  96     a URL, the YoutubeDL object handles it to the first InfoExtractor it
  97     finds that reports being able to handle it. The InfoExtractor extracts
  98     all the information about the video or videos the URL refers to, and
  99     YoutubeDL process the extracted information, possibly using a File
 100     Downloader to download the video.
 101
 102     YoutubeDL objects accept a lot of parameters. In order not to saturate
 103     the object constructor with arguments, it receives a dictionary of
 104     options instead. These options are available through the params
 105     attribute for the InfoExtractors to use. The YoutubeDL also
 106     registers itself as the downloader in charge for the InfoExtractors
 107     that are added to it, so this is a "mutual registration".
 108
 109     Available options:
 110
 111     username:          Username for authentication purposes.
 112     password:          Password for authentication purposes.
 113     videopassword:     Password for acces a video.
 114     usenetrc:          Use netrc for authentication instead.
 115     verbose:           Print additional info to stdout.
 116     quiet:             Do not print messages to stdout.
 117     no_warnings:       Do not print out anything for warnings.
 118     forceurl:          Force printing final URL.
 119     forcetitle:        Force printing title.
 120     forceid:           Force printing ID.
 121     forcethumbnail:    Force printing thumbnail URL.
 122     forcedescription:  Force printing description.
 123     forcefilename:     Force printing final filename.
 124     forceduration:     Force printing duration.
 125     forcejson:         Force printing info_dict as JSON.
 126     dump_single_json:  Force printing the info_dict of the whole playlist
 127                        (or video) as a single JSON line.
 128     simulate:          Do not download the video files.
 129     format:            Video format code. See options.py for more information.
 130     format_limit:      Highest quality format to try.
 131     outtmpl:           Template for output names.
 132     restrictfilenames: Do not allow "&" and spaces in file names
 133     ignoreerrors:      Do not stop on download errors.
 134     nooverwrites:      Prevent overwriting files.
 135     playliststart:     Playlist item to start at.
 136     playlistend:       Playlist item to end at.
 137     playlistreverse:   Download playlist items in reverse order.
 138     matchtitle:        Download only matching titles.
 139     rejecttitle:       Reject downloads for matching titles.
 140     logger:            Log messages to a logging.Logger instance.
 141     logtostderr:       Log messages to stderr instead of stdout.
 142     writedescription:  Write the video description to a .description file
 143     writeinfojson:     Write the video description to a .info.json file
 144     writeannotations:  Write the video annotations to a .annotations.xml file
 145     writethumbnail:    Write the thumbnail image to a file
 146     writesubtitles:    Write the video subtitles to a file
 147     writeautomaticsub: Write the automatic subtitles to a file
 148     allsubtitles:      Downloads all the subtitles of the video
 149                        (requires writesubtitles or writeautomaticsub)
 150     listsubtitles:     Lists all available subtitles for the video
 151     subtitlesformat:   Subtitle format [srt/sbv/vtt] (default=srt)
 152     subtitleslangs:    List of languages of the subtitles to download
 153     keepvideo:         Keep the video file after post-processing
 154     daterange:         A DateRange object, download only if the upload_date is in the range.
 155     skip_download:     Skip the actual download of the video file
 156     cachedir:          Location of the cache files in the filesystem.
 157                        False to disable filesystem cache.
 158     noplaylist:        Download single video instead of a playlist if in doubt.
 159     age_limit:         An integer representing the user's age in years.
 160                        Unsuitable videos for the given age are skipped.
 161     min_views:         An integer representing the minimum view count the video
 162                        must have in order to not be skipped.
 163                        Videos without view count information are always
 164                        downloaded. None for no limit.
 165     max_views:         An integer representing the maximum view count.
 166                        Videos that are more popular than that are not
 167                        downloaded.
 168                        Videos without view count information are always
 169                        downloaded. None for no limit.
 170     download_archive:  File name of a file where all downloads are recorded.
 171                        Videos already present in the file are not downloaded
 172                        again.
 173     cookiefile:        File name where cookies should be read from and dumped to.
 174     nocheckcertificate:Do not verify SSL certificates
 175     prefer_insecure:   Use HTTP instead of HTTPS to retrieve information.
 176                        At the moment, this is only supported by YouTube.
 177     proxy:             URL of the proxy server to use
 178     socket_timeout:    Time to wait for unresponsive hosts, in seconds
 179     bidi_workaround:   Work around buggy terminals without bidirectional text
 180                        support, using fridibi
 181     debug_printtraffic:Print out sent and received HTTP traffic
 182     include_ads:       Download ads as well
 183     default_search:    Prepend this string if an input url is not valid.
 184                        'auto' for elaborate guessing
 185     encoding:          Use this encoding instead of the system-specified.
 186     extract_flat:      Do not resolve URLs, return the immediate result.
 187                        Pass in 'in_playlist' to only show this behavior for
 188                        playlist items.
 189     postprocessors:    A list of dictionaries, each with an entry
 190                        * key:  The name of the postprocessor. See
 191                                youtube_dl/postprocessor/__init__.py for a list.
 192                        as well as any further keyword arguments for the
 193                        postprocessor.
 194     progress_hooks:    A list of functions that get called on download
 195                        progress, with a dictionary with the entries
 196                        * filename: The final filename
 197                        * status: One of "downloading" and "finished"
 198
 199                        The dict may also have some of the following entries:
 200
 201                        * downloaded_bytes: Bytes on disk
 202                        * total_bytes: Size of the whole file, None if unknown
 203                        * tmpfilename: The filename we're currently writing to
 204                        * eta: The estimated time in seconds, None if unknown
 205                        * speed: The download speed in bytes/second, None if
 206                                 unknown
 207
 208                        Progress hooks are guaranteed to be called at least once
 209                        (with status "finished") if the download is successful.
 210     merge_output_format: Extension to use when merging formats.
 211     fixup:             Automatically correct known faults of the file.
 212                        One of:
 213                        - "never": do nothing
 214                        - "warn": only emit a warning
 215                        - "detect_or_warn": check whether we can do anything
 216                                            about it, warn otherwise
 217     source_address:    (Experimental) Client-side IP address to bind to.
 218     call_home:         Boolean, true iff we are allowed to contact the
 219                        youtube-dl servers for debugging.
 220
 221
 222     The following parameters are not used by YoutubeDL itself, they are used by
 223     the FileDownloader:
 224     nopart, updatetime, buffersize, ratelimit, min_filesize, max_filesize, test,
 225     noresizebuffer, retries, continuedl, noprogress, consoletitle
 226
 227     The following options are used by the post processors:
 228     prefer_ffmpeg:     If True, use ffmpeg instead of avconv if both are available,
 229                        otherwise prefer avconv.
 230     exec_cmd:          Arbitrary command to run after downloading
 231     """
 232
 233     params = None
 234     _ies = []
 235     _pps = []
 236     _download_retcode = None
 237     _num_downloads = None
 238     _screen_file = None
 239
 240     def __init__(self, params=None, auto_init=True):
 241         """Create a FileDownloader object with the given options."""
 242         if params is None:
 243             params = {}
 244         self._ies = []
 245         self._ies_instances = {}
 246         self._pps = []
 247         self._progress_hooks = []
 248         self._download_retcode = 0
 249         self._num_downloads = 0
 250         self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)]
 251         self._err_file = sys.stderr
 252         self.params = params
 253         self.cache = Cache(self)
 254
 255         if params.get('bidi_workaround', False):
 256             try:
 257                 import pty
 258                 master, slave = pty.openpty()
 259                 width = get_term_width()
 260                 if width is None:
 261                     width_args = []
 262                 else:
 263                     width_args = ['-w', str(width)]
 264                 sp_kwargs = dict(
 265                     stdin=subprocess.PIPE,
 266                     stdout=slave,
 267                     stderr=self._err_file)
 268                 try:
 269                     self._output_process = subprocess.Popen(
 270                         ['bidiv'] + width_args, **sp_kwargs
 271                     )
 272                 except OSError:
 273                     self._output_process = subprocess.Popen(
 274                         ['fribidi', '-c', 'UTF-8'] + width_args, **sp_kwargs)
 275                 self._output_channel = os.fdopen(master, 'rb')
 276             except OSError as ose:
 277                 if ose.errno == 2:
 278                     self.report_warning('Could not find fribidi executable, ignoring --bidi-workaround . Make sure that  fribidi  is an executable file in one of the directories in your $PATH.')
 279                 else:
 280                     raise
 281
 282         if (sys.version_info >= (3,) and sys.platform != 'win32' and
 283                 sys.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968']
 284                 and not params.get('restrictfilenames', False)):
 285             # On Python 3, the Unicode filesystem API will throw errors (#1474)
 286             self.report_warning(
 287                 'Assuming --restrict-filenames since file system encoding '
 288                 'cannot encode all characters. '
 289                 'Set the LC_ALL environment variable to fix this.')
 290             self.params['restrictfilenames'] = True
 291
 292         if '%(stitle)s' in self.params.get('outtmpl', ''):
 293             self.report_warning('%(stitle)s is deprecated. Use the %(title)s and the --restrict-filenames flag(which also secures %(uploader)s et al) instead.')
 294
 295         self._setup_opener()
 296
 297         if auto_init:
 298             self.print_debug_header()
 299             self.add_default_info_extractors()
 300
 301         for pp_def_raw in self.params.get('postprocessors', []):
 302             pp_class = get_postprocessor(pp_def_raw['key'])
 303             pp_def = dict(pp_def_raw)
 304             del pp_def['key']
 305             pp = pp_class(self, **compat_kwargs(pp_def))
 306             self.add_post_processor(pp)
 307
 308         for ph in self.params.get('progress_hooks', []):
 309             self.add_progress_hook(ph)
 310
 311     def warn_if_short_id(self, argv):
 312         # short YouTube ID starting with dash?
 313         idxs = [
 314             i for i, a in enumerate(argv)
 315             if re.match(r'^-[0-9A-Za-z_-]{10}$', a)]
 316         if idxs:
 317             correct_argv = (
 318                 ['youtube-dl'] +
 319                 [a for i, a in enumerate(argv) if i not in idxs] +
 320                 ['--'] + [argv[i] for i in idxs]
 321             )
 322             self.report_warning(
 323                 'Long argument string detected. '
 324                 'Use -- to separate parameters and URLs, like this:\n%s\n' %
 325                 args_to_str(correct_argv))
 326
 327     def add_info_extractor(self, ie):
 328         """Add an InfoExtractor object to the end of the list."""
 329         self._ies.append(ie)
 330         self._ies_instances[ie.ie_key()] = ie
 331         ie.set_downloader(self)
 332
 333     def get_info_extractor(self, ie_key):
 334         """
 335         Get an instance of an IE with name ie_key, it will try to get one from
 336         the _ies list, if there's no instance it will create a new one and add
 337         it to the extractor list.
 338         """
 339         ie = self._ies_instances.get(ie_key)
 340         if ie is None:
 341             ie = get_info_extractor(ie_key)()
 342             self.add_info_extractor(ie)
 343         return ie
 344
 345     def add_default_info_extractors(self):
 346         """
 347         Add the InfoExtractors returned by gen_extractors to the end of the list
 348         """
 349         for ie in gen_extractors():
 350             self.add_info_extractor(ie)
 351
 352     def add_post_processor(self, pp):
 353         """Add a PostProcessor object to the end of the chain."""
 354         self._pps.append(pp)
 355         pp.set_downloader(self)
 356
 357     def add_progress_hook(self, ph):
 358         """Add the progress hook (currently only for the file downloader)"""
 359         self._progress_hooks.append(ph)
 360
 361     def _bidi_workaround(self, message):
 362         if not hasattr(self, '_output_channel'):
 363             return message
 364
 365         assert hasattr(self, '_output_process')
 366         assert isinstance(message, compat_str)
 367         line_count = message.count('\n') + 1
 368         self._output_process.stdin.write((message + '\n').encode('utf-8'))
 369         self._output_process.stdin.flush()
 370         res = ''.join(self._output_channel.readline().decode('utf-8')
 371                       for _ in range(line_count))
 372         return res[:-len('\n')]
 373
 374     def to_screen(self, message, skip_eol=False):
 375         """Print message to stdout if not in quiet mode."""
 376         return self.to_stdout(message, skip_eol, check_quiet=True)
 377
 378     def _write_string(self, s, out=None):
 379         write_string(s, out=out, encoding=self.params.get('encoding'))
 380
 381     def to_stdout(self, message, skip_eol=False, check_quiet=False):
 382         """Print message to stdout if not in quiet mode."""
 383         if self.params.get('logger'):
 384             self.params['logger'].debug(message)
 385         elif not check_quiet or not self.params.get('quiet', False):
 386             message = self._bidi_workaround(message)
 387             terminator = ['\n', ''][skip_eol]
 388             output = message + terminator
 389
 390             self._write_string(output, self._screen_file)
 391
 392     def to_stderr(self, message):
 393         """Print message to stderr."""
 394         assert isinstance(message, compat_str)
 395         if self.params.get('logger'):
 396             self.params['logger'].error(message)
 397         else:
 398             message = self._bidi_workaround(message)
 399             output = message + '\n'
 400             self._write_string(output, self._err_file)
 401
 402     def to_console_title(self, message):
 403         if not self.params.get('consoletitle', False):
 404             return
 405         if os.name == 'nt' and ctypes.windll.kernel32.GetConsoleWindow():
 406             # c_wchar_p() might not be necessary if `message` is
 407             # already of type unicode()
 408             ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message))
 409         elif 'TERM' in os.environ:
 410             self._write_string('\033]0;%s\007' % message, self._screen_file)
 411
 412     def save_console_title(self):
 413         if not self.params.get('consoletitle', False):
 414             return
 415         if 'TERM' in os.environ:
 416             # Save the title on stack
 417             self._write_string('\033[22;0t', self._screen_file)
 418
 419     def restore_console_title(self):
 420         if not self.params.get('consoletitle', False):
 421             return
 422         if 'TERM' in os.environ:
 423             # Restore the title from stack
 424             self._write_string('\033[23;0t', self._screen_file)
 425
 426     def __enter__(self):
 427         self.save_console_title()
 428         return self
 429
 430     def __exit__(self, *args):
 431         self.restore_console_title()
 432
 433         if self.params.get('cookiefile') is not None:
 434             self.cookiejar.save()
 435
 436     def trouble(self, message=None, tb=None):
 437         """Determine action to take when a download problem appears.
 438
 439         Depending on if the downloader has been configured to ignore
 440         download errors or not, this method may throw an exception or
 441         not when errors are found, after printing the message.
 442
 443         tb, if given, is additional traceback information.
 444         """
 445         if message is not None:
 446             self.to_stderr(message)
 447         if self.params.get('verbose'):
 448             if tb is None:
 449                 if sys.exc_info()[0]:  # if .trouble has been called from an except block
 450                     tb = ''
 451                     if hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
 452                         tb += ''.join(traceback.format_exception(*sys.exc_info()[1].exc_info))
 453                     tb += compat_str(traceback.format_exc())
 454                 else:
 455                     tb_data = traceback.format_list(traceback.extract_stack())
 456                     tb = ''.join(tb_data)
 457             self.to_stderr(tb)
 458         if not self.params.get('ignoreerrors', False):
 459             if sys.exc_info()[0] and hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
 460                 exc_info = sys.exc_info()[1].exc_info
 461             else:
 462                 exc_info = sys.exc_info()
 463             raise DownloadError(message, exc_info)
 464         self._download_retcode = 1
 465
 466     def report_warning(self, message):
 467         '''
 468         Print the message to stderr, it will be prefixed with 'WARNING:'
 469         If stderr is a tty file the 'WARNING:' will be colored
 470         '''
 471         if self.params.get('logger') is not None:
 472             self.params['logger'].warning(message)
 473         else:
 474             if self.params.get('no_warnings'):
 475                 return
 476             if self._err_file.isatty() and os.name != 'nt':
 477                 _msg_header = '\033[0;33mWARNING:\033[0m'
 478             else:
 479                 _msg_header = 'WARNING:'
 480             warning_message = '%s %s' % (_msg_header, message)
 481             self.to_stderr(warning_message)
 482
 483     def report_error(self, message, tb=None):
 484         '''
 485         Do the same as trouble, but prefixes the message with 'ERROR:', colored
 486         in red if stderr is a tty file.
 487         '''
 488         if self._err_file.isatty() and os.name != 'nt':
 489             _msg_header = '\033[0;31mERROR:\033[0m'
 490         else:
 491             _msg_header = 'ERROR:'
 492         error_message = '%s %s' % (_msg_header, message)
 493         self.trouble(error_message, tb)
 494
 495     def report_file_already_downloaded(self, file_name):
 496         """Report file has already been fully downloaded."""
 497         try:
 498             self.to_screen('[download] %s has already been downloaded' % file_name)
 499         except UnicodeEncodeError:
 500             self.to_screen('[download] The file has already been downloaded')
 501
 502     def prepare_filename(self, info_dict):
 503         """Generate the output filename."""
 504         try:
 505             template_dict = dict(info_dict)
 506
 507             template_dict['epoch'] = int(time.time())
 508             autonumber_size = self.params.get('autonumber_size')
 509             if autonumber_size is None:
 510                 autonumber_size = 5
 511             autonumber_templ = '%0' + str(autonumber_size) + 'd'
 512             template_dict['autonumber'] = autonumber_templ % self._num_downloads
 513             if template_dict.get('playlist_index') is not None:
 514                 template_dict['playlist_index'] = '%0*d' % (len(str(template_dict['n_entries'])), template_dict['playlist_index'])
 515             if template_dict.get('resolution') is None:
 516                 if template_dict.get('width') and template_dict.get('height'):
 517                     template_dict['resolution'] = '%dx%d' % (template_dict['width'], template_dict['height'])
 518                 elif template_dict.get('height'):
 519                     template_dict['resolution'] = '%sp' % template_dict['height']
 520                 elif template_dict.get('width'):
 521                     template_dict['resolution'] = '?x%d' % template_dict['width']
 522
 523             sanitize = lambda k, v: sanitize_filename(
 524                 compat_str(v),
 525                 restricted=self.params.get('restrictfilenames'),
 526                 is_id=(k == 'id'))
 527             template_dict = dict((k, sanitize(k, v))
 528                                  for k, v in template_dict.items()
 529                                  if v is not None)
 530             template_dict = collections.defaultdict(lambda: 'NA', template_dict)
 531
 532             outtmpl = self.params.get('outtmpl', DEFAULT_OUTTMPL)
 533             tmpl = compat_expanduser(outtmpl)
 534             filename = tmpl % template_dict
 535             return filename
 536         except ValueError as err:
 537             self.report_error('Error in output template: ' + str(err) + ' (encoding: ' + repr(preferredencoding()) + ')')
 538             return None
 539
 540     def _match_entry(self, info_dict):
 541         """ Returns None iff the file should be downloaded """
 542
 543         video_title = info_dict.get('title', info_dict.get('id', 'video'))
 544         if 'title' in info_dict:
 545             # This can happen when we're just evaluating the playlist
 546             title = info_dict['title']
 547             matchtitle = self.params.get('matchtitle', False)
 548             if matchtitle:
 549                 if not re.search(matchtitle, title, re.IGNORECASE):
 550                     return '"' + title + '" title did not match pattern "' + matchtitle + '"'
 551             rejecttitle = self.params.get('rejecttitle', False)
 552             if rejecttitle:
 553                 if re.search(rejecttitle, title, re.IGNORECASE):
 554                     return '"' + title + '" title matched reject pattern "' + rejecttitle + '"'
 555         date = info_dict.get('upload_date', None)
 556         if date is not None:
 557             dateRange = self.params.get('daterange', DateRange())
 558             if date not in dateRange:
 559                 return '%s upload date is not in range %s' % (date_from_str(date).isoformat(), dateRange)
 560         view_count = info_dict.get('view_count', None)
 561         if view_count is not None:
 562             min_views = self.params.get('min_views')
 563             if min_views is not None and view_count < min_views:
 564                 return 'Skipping %s, because it has not reached minimum view count (%d/%d)' % (video_title, view_count, min_views)
 565             max_views = self.params.get('max_views')
 566             if max_views is not None and view_count > max_views:
 567                 return 'Skipping %s, because it has exceeded the maximum view count (%d/%d)' % (video_title, view_count, max_views)
 568         if age_restricted(info_dict.get('age_limit'), self.params.get('age_limit')):
 569             return 'Skipping "%s" because it is age restricted' % title
 570         if self.in_download_archive(info_dict):
 571             return '%s has already been recorded in archive' % video_title
 572         return None
 573
 574     @staticmethod
 575     def add_extra_info(info_dict, extra_info):
 576         '''Set the keys from extra_info in info dict if they are missing'''
 577         for key, value in extra_info.items():
 578             info_dict.setdefault(key, value)
 579
 580     def extract_info(self, url, download=True, ie_key=None, extra_info={},
 581                      process=True):
 582         '''
 583         Returns a list with a dictionary for each video we find.
 584         If 'download', also downloads the videos.
 585         extra_info is a dict containing the extra values to add to each result
 586          '''
 587
 588         if ie_key:
 589             ies = [self.get_info_extractor(ie_key)]
 590         else:
 591             ies = self._ies
 592
 593         for ie in ies:
 594             if not ie.suitable(url):
 595                 continue
 596
 597             if not ie.working():
 598                 self.report_warning('The program functionality for this site has been marked as broken, '
 599                                     'and will probably not work.')
 600
 601             try:
 602                 ie_result = ie.extract(url)
 603                 if ie_result is None:  # Finished already (backwards compatibility; listformats and friends should be moved here)
 604                     break
 605                 if isinstance(ie_result, list):
 606                     # Backwards compatibility: old IE result format
 607                     ie_result = {
 608                         '_type': 'compat_list',
 609                         'entries': ie_result,
 610                     }
 611                 self.add_default_extra_info(ie_result, ie, url)
 612                 if process:
 613                     return self.process_ie_result(ie_result, download, extra_info)
 614                 else:
 615                     return ie_result
 616             except ExtractorError as de:  # An error we somewhat expected
 617                 self.report_error(compat_str(de), de.format_traceback())
 618                 break
 619             except MaxDownloadsReached:
 620                 raise
 621             except Exception as e:
 622                 if self.params.get('ignoreerrors', False):
 623                     self.report_error(compat_str(e), tb=compat_str(traceback.format_exc()))
 624                     break
 625                 else:
 626                     raise
 627         else:
 628             self.report_error('no suitable InfoExtractor for URL %s' % url)
 629
 630     def add_default_extra_info(self, ie_result, ie, url):
 631         self.add_extra_info(ie_result, {
 632             'extractor': ie.IE_NAME,
 633             'webpage_url': url,
 634             'webpage_url_basename': url_basename(url),
 635             'extractor_key': ie.ie_key(),
 636         })
 637
 638     def process_ie_result(self, ie_result, download=True, extra_info={}):
 639         """
 640         Take the result of the ie(may be modified) and resolve all unresolved
 641         references (URLs, playlist items).
 642
 643         It will also download the videos if 'download'.
 644         Returns the resolved ie_result.
 645         """
 646
 647         result_type = ie_result.get('_type', 'video')
 648
 649         if result_type in ('url', 'url_transparent'):
 650             extract_flat = self.params.get('extract_flat', False)
 651             if ((extract_flat == 'in_playlist' and 'playlist' in extra_info) or
 652                     extract_flat is True):
 653                 if self.params.get('forcejson', False):
 654                     self.to_stdout(json.dumps(ie_result))
 655                 return ie_result
 656
 657         if result_type == 'video':
 658             self.add_extra_info(ie_result, extra_info)
 659             return self.process_video_result(ie_result, download=download)
 660         elif result_type == 'url':
 661             # We have to add extra_info to the results because it may be
 662             # contained in a playlist
 663             return self.extract_info(ie_result['url'],
 664                                      download,
 665                                      ie_key=ie_result.get('ie_key'),
 666                                      extra_info=extra_info)
 667         elif result_type == 'url_transparent':
 668             # Use the information from the embedding page
 669             info = self.extract_info(
 670                 ie_result['url'], ie_key=ie_result.get('ie_key'),
 671                 extra_info=extra_info, download=False, process=False)
 672
 673             force_properties = dict(
 674                 (k, v) for k, v in ie_result.items() if v is not None)
 675             for f in ('_type', 'url'):
 676                 if f in force_properties:
 677                     del force_properties[f]
 678             new_result = info.copy()
 679             new_result.update(force_properties)
 680
 681             assert new_result.get('_type') != 'url_transparent'
 682
 683             return self.process_ie_result(
 684                 new_result, download=download, extra_info=extra_info)
 685         elif result_type == 'playlist' or result_type == 'multi_video':
 686             # We process each entry in the playlist
 687             playlist = ie_result.get('title', None) or ie_result.get('id', None)
 688             self.to_screen('[download] Downloading playlist: %s' % playlist)
 689
 690             playlist_results = []
 691
 692             playliststart = self.params.get('playliststart', 1) - 1
 693             playlistend = self.params.get('playlistend', None)
 694             # For backwards compatibility, interpret -1 as whole list
 695             if playlistend == -1:
 696                 playlistend = None
 697
 698             ie_entries = ie_result['entries']
 699             if isinstance(ie_entries, list):
 700                 n_all_entries = len(ie_entries)
 701                 entries = ie_entries[playliststart:playlistend]
 702                 n_entries = len(entries)
 703                 self.to_screen(
 704                     "[%s] playlist %s: Collected %d video ids (downloading %d of them)" %
 705                     (ie_result['extractor'], playlist, n_all_entries, n_entries))
 706             elif isinstance(ie_entries, PagedList):
 707                 entries = ie_entries.getslice(
 708                     playliststart, playlistend)
 709                 n_entries = len(entries)
 710                 self.to_screen(
 711                     "[%s] playlist %s: Downloading %d videos" %
 712                     (ie_result['extractor'], playlist, n_entries))
 713             else:  # iterable
 714                 entries = list(itertools.islice(
 715                     ie_entries, playliststart, playlistend))
 716                 n_entries = len(entries)
 717                 self.to_screen(
 718                     "[%s] playlist %s: Downloading %d videos" %
 719                     (ie_result['extractor'], playlist, n_entries))
 720
 721             if self.params.get('playlistreverse', False):
 722                 entries = entries[::-1]
 723
 724             for i, entry in enumerate(entries, 1):
 725                 self.to_screen('[download] Downloading video %s of %s' % (i, n_entries))
 726                 extra = {
 727                     'n_entries': n_entries,
 728                     'playlist': playlist,
 729                     'playlist_id': ie_result.get('id'),
 730                     'playlist_title': ie_result.get('title'),
 731                     'playlist_index': i + playliststart,
 732                     'extractor': ie_result['extractor'],
 733                     'webpage_url': ie_result['webpage_url'],
 734                     'webpage_url_basename': url_basename(ie_result['webpage_url']),
 735                     'extractor_key': ie_result['extractor_key'],
 736                 }
 737
 738                 reason = self._match_entry(entry)
 739                 if reason is not None:
 740                     self.to_screen('[download] ' + reason)
 741                     continue
 742
 743                 entry_result = self.process_ie_result(entry,
 744                                                       download=download,
 745                                                       extra_info=extra)
 746                 playlist_results.append(entry_result)
 747             ie_result['entries'] = playlist_results
 748             return ie_result
 749         elif result_type == 'compat_list':
 750             self.report_warning(
 751                 'Extractor %s returned a compat_list result. '
 752                 'It needs to be updated.' % ie_result.get('extractor'))
 753
 754             def _fixup(r):
 755                 self.add_extra_info(
 756                     r,
 757                     {
 758                         'extractor': ie_result['extractor'],
 759                         'webpage_url': ie_result['webpage_url'],
 760                         'webpage_url_basename': url_basename(ie_result['webpage_url']),
 761                         'extractor_key': ie_result['extractor_key'],
 762                     }
 763                 )
 764                 return r
 765             ie_result['entries'] = [
 766                 self.process_ie_result(_fixup(r), download, extra_info)
 767                 for r in ie_result['entries']
 768             ]
 769             return ie_result
 770         else:
 771             raise Exception('Invalid result type: %s' % result_type)
 772
 773     def _apply_format_filter(self, format_spec, available_formats):
 774         " Returns a tuple of the remaining format_spec and filtered formats "
 775
 776         OPERATORS = {
 777             '<': operator.lt,
 778             '<=': operator.le,
 779             '>': operator.gt,
 780             '>=': operator.ge,
 781             '=': operator.eq,
 782             '!=': operator.ne,
 783         }
 784         operator_rex = re.compile(r'''(?x)\s*\[
 785             (?P<key>width|height|tbr|abr|vbr|filesize)
 786             \s*(?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
 787             (?P<value>[0-9.]+(?:[kKmMgGtTpPeEzZyY]i?[Bb]?)?)
 788             \]$
 789             ''' % '|'.join(map(re.escape, OPERATORS.keys())))
 790         m = operator_rex.search(format_spec)
 791         if not m:
 792             raise ValueError('Invalid format specification %r' % format_spec)
 793
 794         try:
 795             comparison_value = int(m.group('value'))
 796         except ValueError:
 797             comparison_value = parse_filesize(m.group('value'))
 798             if comparison_value is None:
 799                 comparison_value = parse_filesize(m.group('value') + 'B')
 800             if comparison_value is None:
 801                 raise ValueError(
 802                     'Invalid value %r in format specification %r' % (
 803                         m.group('value'), format_spec))
 804         op = OPERATORS[m.group('op')]
 805
 806         def _filter(f):
 807             actual_value = f.get(m.group('key'))
 808             if actual_value is None:
 809                 return m.group('none_inclusive')
 810             return op(actual_value, comparison_value)
 811         new_formats = [f for f in available_formats if _filter(f)]
 812
 813         new_format_spec = format_spec[:-len(m.group(0))]
 814         if not new_format_spec:
 815             new_format_spec = 'best'
 816
 817         return (new_format_spec, new_formats)
 818
 819     def select_format(self, format_spec, available_formats):
 820         while format_spec.endswith(']'):
 821             format_spec, available_formats = self._apply_format_filter(
 822                 format_spec, available_formats)
 823         if not available_formats:
 824             return None
 825
 826         if format_spec == 'best' or format_spec is None:
 827             return available_formats[-1]
 828         elif format_spec == 'worst':
 829             return available_formats[0]
 830         elif format_spec == 'bestaudio':
 831             audio_formats = [
 832                 f for f in available_formats
 833                 if f.get('vcodec') == 'none']
 834             if audio_formats:
 835                 return audio_formats[-1]
 836         elif format_spec == 'worstaudio':
 837             audio_formats = [
 838                 f for f in available_formats
 839                 if f.get('vcodec') == 'none']
 840             if audio_formats:
 841                 return audio_formats[0]
 842         elif format_spec == 'bestvideo':
 843             video_formats = [
 844                 f for f in available_formats
 845                 if f.get('acodec') == 'none']
 846             if video_formats:
 847                 return video_formats[-1]
 848         elif format_spec == 'worstvideo':
 849             video_formats = [
 850                 f for f in available_formats
 851                 if f.get('acodec') == 'none']
 852             if video_formats:
 853                 return video_formats[0]
 854         else:
 855             extensions = ['mp4', 'flv', 'webm', '3gp', 'm4a', 'mp3', 'ogg', 'aac', 'wav']
 856             if format_spec in extensions:
 857                 filter_f = lambda f: f['ext'] == format_spec
 858             else:
 859                 filter_f = lambda f: f['format_id'] == format_spec
 860             matches = list(filter(filter_f, available_formats))
 861             if matches:
 862                 return matches[-1]
 863         return None
 864
 865     def process_video_result(self, info_dict, download=True):
 866         assert info_dict.get('_type', 'video') == 'video'
 867
 868         if 'id' not in info_dict:
 869             raise ExtractorError('Missing "id" field in extractor result')
 870         if 'title' not in info_dict:
 871             raise ExtractorError('Missing "title" field in extractor result')
 872
 873         if 'playlist' not in info_dict:
 874             # It isn't part of a playlist
 875             info_dict['playlist'] = None
 876             info_dict['playlist_index'] = None
 877
 878         thumbnails = info_dict.get('thumbnails')
 879         if thumbnails:
 880             thumbnails.sort(key=lambda t: (
 881                 t.get('width'), t.get('height'), t.get('url')))
 882             for t in thumbnails:
 883                 if 'width' in t and 'height' in t:
 884                     t['resolution'] = '%dx%d' % (t['width'], t['height'])
 885
 886         if thumbnails and 'thumbnail' not in info_dict:
 887             info_dict['thumbnail'] = thumbnails[-1]['url']
 888
 889         if 'display_id' not in info_dict and 'id' in info_dict:
 890             info_dict['display_id'] = info_dict['id']
 891
 892         if info_dict.get('upload_date') is None and info_dict.get('timestamp') is not None:
 893             # Working around negative timestamps in Windows
 894             # (see http://bugs.python.org/issue1646728)
 895             if info_dict['timestamp'] < 0 and os.name == 'nt':
 896                 info_dict['timestamp'] = 0
 897             upload_date = datetime.datetime.utcfromtimestamp(
 898                 info_dict['timestamp'])
 899             info_dict['upload_date'] = upload_date.strftime('%Y%m%d')
 900
 901         # This extractors handle format selection themselves
 902         if info_dict['extractor'] in ['Youku']:
 903             if download:
 904                 self.process_info(info_dict)
 905             return info_dict
 906
 907         # We now pick which formats have to be downloaded
 908         if info_dict.get('formats') is None:
 909             # There's only one format available
 910             formats = [info_dict]
 911         else:
 912             formats = info_dict['formats']
 913
 914         if not formats:
 915             raise ExtractorError('No video formats found!')
 916
 917         # We check that all the formats have the format and format_id fields
 918         for i, format in enumerate(formats):
 919             if 'url' not in format:
 920                 raise ExtractorError('Missing "url" key in result (index %d)' % i)
 921
 922             if format.get('format_id') is None:
 923                 format['format_id'] = compat_str(i)
 924             if format.get('format') is None:
 925                 format['format'] = '{id} - {res}{note}'.format(
 926                     id=format['format_id'],
 927                     res=self.format_resolution(format),
 928                     note=' ({0})'.format(format['format_note']) if format.get('format_note') is not None else '',
 929                 )
 930             # Automatically determine file extension if missing
 931             if 'ext' not in format:
 932                 format['ext'] = determine_ext(format['url']).lower()
 933
 934         format_limit = self.params.get('format_limit', None)
 935         if format_limit:
 936             formats = list(takewhile_inclusive(
 937                 lambda f: f['format_id'] != format_limit, formats
 938             ))
 939
 940         # TODO Central sorting goes here
 941
 942         if formats[0] is not info_dict:
 943             # only set the 'formats' fields if the original info_dict list them
 944             # otherwise we end up with a circular reference, the first (and unique)
 945             # element in the 'formats' field in info_dict is info_dict itself,
 946             # wich can't be exported to json
 947             info_dict['formats'] = formats
 948         if self.params.get('listformats', None):
 949             self.list_formats(info_dict)
 950             return
 951
 952         req_format = self.params.get('format')
 953         if req_format is None:
 954             req_format = 'best'
 955         formats_to_download = []
 956         # The -1 is for supporting YoutubeIE
 957         if req_format in ('-1', 'all'):
 958             formats_to_download = formats
 959         else:
 960             for rfstr in req_format.split(','):
 961                 # We can accept formats requested in the format: 34/5/best, we pick
 962                 # the first that is available, starting from left
 963                 req_formats = rfstr.split('/')
 964                 for rf in req_formats:
 965                     if re.match(r'.+?\+.+?', rf) is not None:
 966                         # Two formats have been requested like '137+139'
 967                         format_1, format_2 = rf.split('+')
 968                         formats_info = (self.select_format(format_1, formats),
 969                                         self.select_format(format_2, formats))
 970                         if all(formats_info):
 971                             # The first format must contain the video and the
 972                             # second the audio
 973                             if formats_info[0].get('vcodec') == 'none':
 974                                 self.report_error('The first format must '
 975                                                   'contain the video, try using '
 976                                                   '"-f %s+%s"' % (format_2, format_1))
 977                                 return
 978                             output_ext = (
 979                                 formats_info[0]['ext']
 980                                 if self.params.get('merge_output_format') is None
 981                                 else self.params['merge_output_format'])
 982                             selected_format = {
 983                                 'requested_formats': formats_info,
 984                                 'format': rf,
 985                                 'ext': formats_info[0]['ext'],
 986                                 'width': formats_info[0].get('width'),
 987                                 'height': formats_info[0].get('height'),
 988                                 'resolution': formats_info[0].get('resolution'),
 989                                 'fps': formats_info[0].get('fps'),
 990                                 'vcodec': formats_info[0].get('vcodec'),
 991                                 'vbr': formats_info[0].get('vbr'),
 992                                 'stretched_ratio': formats_info[0].get('stretched_ratio'),
 993                                 'acodec': formats_info[1].get('acodec'),
 994                                 'abr': formats_info[1].get('abr'),
 995                                 'ext': output_ext,
 996                             }
 997                         else:
 998                             selected_format = None
 999                     else:
1000                         selected_format = self.select_format(rf, formats)
1001                     if selected_format is not None:
1002                         formats_to_download.append(selected_format)
1003                         break
1004         if not formats_to_download:
1005             raise ExtractorError('requested format not available',
1006                                  expected=True)
1007
1008         if download:
1009             if len(formats_to_download) > 1:
1010                 self.to_screen('[info] %s: downloading video in %s formats' % (info_dict['id'], len(formats_to_download)))
1011             for format in formats_to_download:
1012                 new_info = dict(info_dict)
1013                 new_info.update(format)
1014                 self.process_info(new_info)
1015         # We update the info dict with the best quality format (backwards compatibility)
1016         info_dict.update(formats_to_download[-1])
1017         return info_dict
1018
1019     def process_info(self, info_dict):
1020         """Process a single resolved IE result."""
1021
1022         assert info_dict.get('_type', 'video') == 'video'
1023
1024         max_downloads = self.params.get('max_downloads')
1025         if max_downloads is not None:
1026             if self._num_downloads >= int(max_downloads):
1027                 raise MaxDownloadsReached()
1028
1029         info_dict['fulltitle'] = info_dict['title']
1030         if len(info_dict['title']) > 200:
1031             info_dict['title'] = info_dict['title'][:197] + '...'
1032
1033         # Keep for backwards compatibility
1034         info_dict['stitle'] = info_dict['title']
1035
1036         if 'format' not in info_dict:
1037             info_dict['format'] = info_dict['ext']
1038
1039         reason = self._match_entry(info_dict)
1040         if reason is not None:
1041             self.to_screen('[download] ' + reason)
1042             return
1043
1044         self._num_downloads += 1
1045
1046         filename = self.prepare_filename(info_dict)
1047
1048         # Forced printings
1049         if self.params.get('forcetitle', False):
1050             self.to_stdout(info_dict['fulltitle'])
1051         if self.params.get('forceid', False):
1052             self.to_stdout(info_dict['id'])
1053         if self.params.get('forceurl', False):
1054             if info_dict.get('requested_formats') is not None:
1055                 for f in info_dict['requested_formats']:
1056                     self.to_stdout(f['url'] + f.get('play_path', ''))
1057             else:
1058                 # For RTMP URLs, also include the playpath
1059                 self.to_stdout(info_dict['url'] + info_dict.get('play_path', ''))
1060         if self.params.get('forcethumbnail', False) and info_dict.get('thumbnail') is not None:
1061             self.to_stdout(info_dict['thumbnail'])
1062         if self.params.get('forcedescription', False) and info_dict.get('description') is not None:
1063             self.to_stdout(info_dict['description'])
1064         if self.params.get('forcefilename', False) and filename is not None:
1065             self.to_stdout(filename)
1066         if self.params.get('forceduration', False) and info_dict.get('duration') is not None:
1067             self.to_stdout(formatSeconds(info_dict['duration']))
1068         if self.params.get('forceformat', False):
1069             self.to_stdout(info_dict['format'])
1070         if self.params.get('forcejson', False):
1071             info_dict['_filename'] = filename
1072             self.to_stdout(json.dumps(info_dict))
1073         if self.params.get('dump_single_json', False):
1074             info_dict['_filename'] = filename
1075
1076         # Do nothing else if in simulate mode
1077         if self.params.get('simulate', False):
1078             return
1079
1080         if filename is None:
1081             return
1082
1083         try:
1084             dn = os.path.dirname(encodeFilename(filename))
1085             if dn and not os.path.exists(dn):
1086                 os.makedirs(dn)
1087         except (OSError, IOError) as err:
1088             self.report_error('unable to create directory ' + compat_str(err))
1089             return
1090
1091         if self.params.get('writedescription', False):
1092             descfn = filename + '.description'
1093             if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(descfn)):
1094                 self.to_screen('[info] Video description is already present')
1095             elif info_dict.get('description') is None:
1096                 self.report_warning('There\'s no description to write.')
1097             else:
1098                 try:
1099                     self.to_screen('[info] Writing video description to: ' + descfn)
1100                     with io.open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile:
1101                         descfile.write(info_dict['description'])
1102                 except (OSError, IOError):
1103                     self.report_error('Cannot write description file ' + descfn)
1104                     return
1105
1106         if self.params.get('writeannotations', False):
1107             annofn = filename + '.annotations.xml'
1108             if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(annofn)):
1109                 self.to_screen('[info] Video annotations are already present')
1110             else:
1111                 try:
1112                     self.to_screen('[info] Writing video annotations to: ' + annofn)
1113                     with io.open(encodeFilename(annofn), 'w', encoding='utf-8') as annofile:
1114                         annofile.write(info_dict['annotations'])
1115                 except (KeyError, TypeError):
1116                     self.report_warning('There are no annotations to write.')
1117                 except (OSError, IOError):
1118                     self.report_error('Cannot write annotations file: ' + annofn)
1119                     return
1120
1121         subtitles_are_requested = any([self.params.get('writesubtitles', False),
1122                                        self.params.get('writeautomaticsub')])
1123
1124         if subtitles_are_requested and 'subtitles' in info_dict and info_dict['subtitles']:
1125             # subtitles download errors are already managed as troubles in relevant IE
1126             # that way it will silently go on when used with unsupporting IE
1127             subtitles = info_dict['subtitles']
1128             sub_format = self.params.get('subtitlesformat', 'srt')
1129             for sub_lang in subtitles.keys():
1130                 sub = subtitles[sub_lang]
1131                 if sub is None:
1132                     continue
1133                 try:
1134                     sub_filename = subtitles_filename(filename, sub_lang, sub_format)
1135                     if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(sub_filename)):
1136                         self.to_screen('[info] Video subtitle %s.%s is already_present' % (sub_lang, sub_format))
1137                     else:
1138                         self.to_screen('[info] Writing video subtitles to: ' + sub_filename)
1139                         with io.open(encodeFilename(sub_filename), 'w', encoding='utf-8') as subfile:
1140                             subfile.write(sub)
1141                 except (OSError, IOError):
1142                     self.report_error('Cannot write subtitles file ' + sub_filename)
1143                     return
1144
1145         if self.params.get('writeinfojson', False):
1146             infofn = os.path.splitext(filename)[0] + '.info.json'
1147             if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(infofn)):
1148                 self.to_screen('[info] Video description metadata is already present')
1149             else:
1150                 self.to_screen('[info] Writing video description metadata as JSON to: ' + infofn)
1151                 try:
1152                     write_json_file(info_dict, infofn)
1153                 except (OSError, IOError):
1154                     self.report_error('Cannot write metadata to JSON file ' + infofn)
1155                     return
1156
1157         if self.params.get('writethumbnail', False):
1158             if info_dict.get('thumbnail') is not None:
1159                 thumb_format = determine_ext(info_dict['thumbnail'], 'jpg')
1160                 thumb_filename = os.path.splitext(filename)[0] + '.' + thumb_format
1161                 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(thumb_filename)):
1162                     self.to_screen('[%s] %s: Thumbnail is already present' %
1163                                    (info_dict['extractor'], info_dict['id']))
1164                 else:
1165                     self.to_screen('[%s] %s: Downloading thumbnail ...' %
1166                                    (info_dict['extractor'], info_dict['id']))
1167                     try:
1168                         uf = self.urlopen(info_dict['thumbnail'])
1169                         with open(thumb_filename, 'wb') as thumbf:
1170                             shutil.copyfileobj(uf, thumbf)
1171                         self.to_screen('[%s] %s: Writing thumbnail to: %s' %
1172                                        (info_dict['extractor'], info_dict['id'], thumb_filename))
1173                     except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
1174                         self.report_warning('Unable to download thumbnail "%s": %s' %
1175                                             (info_dict['thumbnail'], compat_str(err)))
1176
1177         if not self.params.get('skip_download', False):
1178             try:
1179                 def dl(name, info):
1180                     fd = get_suitable_downloader(info)(self, self.params)
1181                     for ph in self._progress_hooks:
1182                         fd.add_progress_hook(ph)
1183                     if self.params.get('verbose'):
1184                         self.to_stdout('[debug] Invoking downloader on %r' % info.get('url'))
1185                     return fd.download(name, info)
1186                 if info_dict.get('requested_formats') is not None:
1187                     downloaded = []
1188                     success = True
1189                     merger = FFmpegMergerPP(self, not self.params.get('keepvideo'))
1190                     if not merger._executable:
1191                         postprocessors = []
1192                         self.report_warning('You have requested multiple '
1193                                             'formats but ffmpeg or avconv are not installed.'
1194                                             ' The formats won\'t be merged')
1195                     else:
1196                         postprocessors = [merger]
1197                     for f in info_dict['requested_formats']:
1198                         new_info = dict(info_dict)
1199                         new_info.update(f)
1200                         fname = self.prepare_filename(new_info)
1201                         fname = prepend_extension(fname, 'f%s' % f['format_id'])
1202                         downloaded.append(fname)
1203                         partial_success = dl(fname, new_info)
1204                         success = success and partial_success
1205                     info_dict['__postprocessors'] = postprocessors
1206                     info_dict['__files_to_merge'] = downloaded
1207                 else:
1208                     # Just a single file
1209                     success = dl(filename, info_dict)
1210             except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
1211                 self.report_error('unable to download video data: %s' % str(err))
1212                 return
1213             except (OSError, IOError) as err:
1214                 raise UnavailableVideoError(err)
1215             except (ContentTooShortError, ) as err:
1216                 self.report_error('content too short (expected %s bytes and served %s)' % (err.expected, err.downloaded))
1217                 return
1218
1219             if success:
1220                 # Fixup content
1221                 stretched_ratio = info_dict.get('stretched_ratio')
1222                 if stretched_ratio is not None and stretched_ratio != 1:
1223                     fixup_policy = self.params.get('fixup')
1224                     if fixup_policy is None:
1225                         fixup_policy = 'detect_or_warn'
1226                     if fixup_policy == 'warn':
1227                         self.report_warning('%s: Non-uniform pixel ratio (%s)' % (
1228                             info_dict['id'], stretched_ratio))
1229                     elif fixup_policy == 'detect_or_warn':
1230                         stretched_pp = FFmpegFixupStretchedPP(self)
1231                         if stretched_pp.available:
1232                             info_dict.setdefault('__postprocessors', [])
1233                             info_dict['__postprocessors'].append(stretched_pp)
1234                         else:
1235                             self.report_warning(
1236                                 '%s: Non-uniform pixel ratio (%s). Install ffmpeg or avconv to fix this automatically.' % (
1237                                     info_dict['id'], stretched_ratio))
1238                     else:
1239                         assert fixup_policy == 'ignore'
1240
1241                 try:
1242                     self.post_process(filename, info_dict)
1243                 except (PostProcessingError) as err:
1244                     self.report_error('postprocessing: %s' % str(err))
1245                     return
1246                 self.record_download_archive(info_dict)
1247
1248     def download(self, url_list):
1249         """Download a given list of URLs."""
1250         outtmpl = self.params.get('outtmpl', DEFAULT_OUTTMPL)
1251         if (len(url_list) > 1 and
1252                 '%' not in outtmpl
1253                 and self.params.get('max_downloads') != 1):
1254             raise SameFileError(outtmpl)
1255
1256         for url in url_list:
1257             try:
1258                 # It also downloads the videos
1259                 res = self.extract_info(url)
1260             except UnavailableVideoError:
1261                 self.report_error('unable to download video')
1262             except MaxDownloadsReached:
1263                 self.to_screen('[info] Maximum number of downloaded files reached.')
1264                 raise
1265             else:
1266                 if self.params.get('dump_single_json', False):
1267                     self.to_stdout(json.dumps(res))
1268
1269         return self._download_retcode
1270
1271     def download_with_info_file(self, info_filename):
1272         with io.open(info_filename, 'r', encoding='utf-8') as f:
1273             info = json.load(f)
1274         try:
1275             self.process_ie_result(info, download=True)
1276         except DownloadError:
1277             webpage_url = info.get('webpage_url')
1278             if webpage_url is not None:
1279                 self.report_warning('The info failed to download, trying with "%s"' % webpage_url)
1280                 return self.download([webpage_url])
1281             else:
1282                 raise
1283         return self._download_retcode
1284
1285     def post_process(self, filename, ie_info):
1286         """Run all the postprocessors on the given file."""
1287         info = dict(ie_info)
1288         info['filepath'] = filename
1289         pps_chain = []
1290         if ie_info.get('__postprocessors') is not None:
1291             pps_chain.extend(ie_info['__postprocessors'])
1292         pps_chain.extend(self._pps)
1293         for pp in pps_chain:
1294             keep_video = None
1295             old_filename = info['filepath']
1296             try:
1297                 keep_video_wish, info = pp.run(info)
1298                 if keep_video_wish is not None:
1299                     if keep_video_wish:
1300                         keep_video = keep_video_wish
1301                     elif keep_video is None:
1302                         # No clear decision yet, let IE decide
1303                         keep_video = keep_video_wish
1304             except PostProcessingError as e:
1305                 self.report_error(e.msg)
1306             if keep_video is False and not self.params.get('keepvideo', False):
1307                 try:
1308                     self.to_screen('Deleting original file %s (pass -k to keep)' % old_filename)
1309                     os.remove(encodeFilename(old_filename))
1310                 except (IOError, OSError):
1311                     self.report_warning('Unable to remove downloaded video file')
1312
1313     def _make_archive_id(self, info_dict):
1314         # Future-proof against any change in case
1315         # and backwards compatibility with prior versions
1316         extractor = info_dict.get('extractor_key')
1317         if extractor is None:
1318             if 'id' in info_dict:
1319                 extractor = info_dict.get('ie_key')  # key in a playlist
1320         if extractor is None:
1321             return None  # Incomplete video information
1322         return extractor.lower() + ' ' + info_dict['id']
1323
1324     def in_download_archive(self, info_dict):
1325         fn = self.params.get('download_archive')
1326         if fn is None:
1327             return False
1328
1329         vid_id = self._make_archive_id(info_dict)
1330         if vid_id is None:
1331             return False  # Incomplete video information
1332
1333         try:
1334             with locked_file(fn, 'r', encoding='utf-8') as archive_file:
1335                 for line in archive_file:
1336                     if line.strip() == vid_id:
1337                         return True
1338         except IOError as ioe:
1339             if ioe.errno != errno.ENOENT:
1340                 raise
1341         return False
1342
1343     def record_download_archive(self, info_dict):
1344         fn = self.params.get('download_archive')
1345         if fn is None:
1346             return
1347         vid_id = self._make_archive_id(info_dict)
1348         assert vid_id
1349         with locked_file(fn, 'a', encoding='utf-8') as archive_file:
1350             archive_file.write(vid_id + '\n')
1351
1352     @staticmethod
1353     def format_resolution(format, default='unknown'):
1354         if format.get('vcodec') == 'none':
1355             return 'audio only'
1356         if format.get('resolution') is not None:
1357             return format['resolution']
1358         if format.get('height') is not None:
1359             if format.get('width') is not None:
1360                 res = '%sx%s' % (format['width'], format['height'])
1361             else:
1362                 res = '%sp' % format['height']
1363         elif format.get('width') is not None:
1364             res = '?x%d' % format['width']
1365         else:
1366             res = default
1367         return res
1368
1369     def _format_note(self, fdict):
1370         res = ''
1371         if fdict.get('ext') in ['f4f', 'f4m']:
1372             res += '(unsupported) '
1373         if fdict.get('format_note') is not None:
1374             res += fdict['format_note'] + ' '
1375         if fdict.get('tbr') is not None:
1376             res += '%4dk ' % fdict['tbr']
1377         if fdict.get('container') is not None:
1378             if res:
1379                 res += ', '
1380             res += '%s container' % fdict['container']
1381         if (fdict.get('vcodec') is not None and
1382                 fdict.get('vcodec') != 'none'):
1383             if res:
1384                 res += ', '
1385             res += fdict['vcodec']
1386             if fdict.get('vbr') is not None:
1387                 res += '@'
1388         elif fdict.get('vbr') is not None and fdict.get('abr') is not None:
1389             res += 'video@'
1390         if fdict.get('vbr') is not None:
1391             res += '%4dk' % fdict['vbr']
1392         if fdict.get('fps') is not None:
1393             res += ', %sfps' % fdict['fps']
1394         if fdict.get('acodec') is not None:
1395             if res:
1396                 res += ', '
1397             if fdict['acodec'] == 'none':
1398                 res += 'video only'
1399             else:
1400                 res += '%-5s' % fdict['acodec']
1401         elif fdict.get('abr') is not None:
1402             if res:
1403                 res += ', '
1404             res += 'audio'
1405         if fdict.get('abr') is not None:
1406             res += '@%3dk' % fdict['abr']
1407         if fdict.get('asr') is not None:
1408             res += ' (%5dHz)' % fdict['asr']
1409         if fdict.get('filesize') is not None:
1410             if res:
1411                 res += ', '
1412             res += format_bytes(fdict['filesize'])
1413         elif fdict.get('filesize_approx') is not None:
1414             if res:
1415                 res += ', '
1416             res += '~' + format_bytes(fdict['filesize_approx'])
1417         return res
1418
1419     def list_formats(self, info_dict):
1420         def line(format, idlen=20):
1421             return (('%-' + compat_str(idlen + 1) + 's%-10s%-12s%s') % (
1422                 format['format_id'],
1423                 format['ext'],
1424                 self.format_resolution(format),
1425                 self._format_note(format),
1426             ))
1427
1428         formats = info_dict.get('formats', [info_dict])
1429         idlen = max(len('format code'),
1430                     max(len(f['format_id']) for f in formats))
1431         formats_s = [
1432             line(f, idlen) for f in formats
1433             if f.get('preference') is None or f['preference'] >= -1000]
1434         if len(formats) > 1:
1435             formats_s[0] += (' ' if self._format_note(formats[0]) else '') + '(worst)'
1436             formats_s[-1] += (' ' if self._format_note(formats[-1]) else '') + '(best)'
1437
1438         header_line = line({
1439             'format_id': 'format code', 'ext': 'extension',
1440             'resolution': 'resolution', 'format_note': 'note'}, idlen=idlen)
1441         self.to_screen('[info] Available formats for %s:\n%s\n%s' %
1442                        (info_dict['id'], header_line, '\n'.join(formats_s)))
1443
1444     def urlopen(self, req):
1445         """ Start an HTTP download """
1446
1447         # According to RFC 3986, URLs can not contain non-ASCII characters, however this is not
1448         # always respected by websites, some tend to give out URLs with non percent-encoded
1449         # non-ASCII characters (see telemb.py, ard.py [#3412])
1450         # urllib chokes on URLs with non-ASCII characters (see http://bugs.python.org/issue3991)
1451         # To work around aforementioned issue we will replace request's original URL with
1452         # percent-encoded one
1453         req_is_string = isinstance(req, basestring if sys.version_info < (3, 0) else compat_str)
1454         url = req if req_is_string else req.get_full_url()
1455         url_escaped = escape_url(url)
1456
1457         # Substitute URL if any change after escaping
1458         if url != url_escaped:
1459             if req_is_string:
1460                 req = url_escaped
1461             else:
1462                 req = compat_urllib_request.Request(
1463                     url_escaped, data=req.data, headers=req.headers,
1464                     origin_req_host=req.origin_req_host, unverifiable=req.unverifiable)
1465
1466         return self._opener.open(req, timeout=self._socket_timeout)
1467
1468     def print_debug_header(self):
1469         if not self.params.get('verbose'):
1470             return
1471
1472         if type('') is not compat_str:
1473             # Python 2.6 on SLES11 SP1 (https://github.com/rg3/youtube-dl/issues/3326)
1474             self.report_warning(
1475                 'Your Python is broken! Update to a newer and supported version')
1476
1477         stdout_encoding = getattr(
1478             sys.stdout, 'encoding', 'missing (%s)' % type(sys.stdout).__name__)
1479         encoding_str = (
1480             '[debug] Encodings: locale %s, fs %s, out %s, pref %s\n' % (
1481                 locale.getpreferredencoding(),
1482                 sys.getfilesystemencoding(),
1483                 stdout_encoding,
1484                 self.get_encoding()))
1485         write_string(encoding_str, encoding=None)
1486
1487         self._write_string('[debug] youtube-dl version ' + __version__ + '\n')
1488         try:
1489             sp = subprocess.Popen(
1490                 ['git', 'rev-parse', '--short', 'HEAD'],
1491                 stdout=subprocess.PIPE, stderr=subprocess.PIPE,
1492                 cwd=os.path.dirname(os.path.abspath(__file__)))
1493             out, err = sp.communicate()
1494             out = out.decode().strip()
1495             if re.match('[0-9a-f]+', out):
1496                 self._write_string('[debug] Git HEAD: ' + out + '\n')
1497         except:
1498             try:
1499                 sys.exc_clear()
1500             except:
1501                 pass
1502         self._write_string('[debug] Python version %s - %s\n' % (
1503             platform.python_version(), platform_name()))
1504
1505         exe_versions = FFmpegPostProcessor.get_versions()
1506         exe_versions['rtmpdump'] = rtmpdump_version()
1507         exe_str = ', '.join(
1508             '%s %s' % (exe, v)
1509             for exe, v in sorted(exe_versions.items())
1510             if v
1511         )
1512         if not exe_str:
1513             exe_str = 'none'
1514         self._write_string('[debug] exe versions: %s\n' % exe_str)
1515
1516         proxy_map = {}
1517         for handler in self._opener.handlers:
1518             if hasattr(handler, 'proxies'):
1519                 proxy_map.update(handler.proxies)
1520         self._write_string('[debug] Proxy map: ' + compat_str(proxy_map) + '\n')
1521
1522         if self.params.get('call_home', False):
1523             ipaddr = self.urlopen('https://yt-dl.org/ip').read().decode('utf-8')
1524             self._write_string('[debug] Public IP address: %s\n' % ipaddr)
1525             latest_version = self.urlopen(
1526                 'https://yt-dl.org/latest/version').read().decode('utf-8')
1527             if version_tuple(latest_version) > version_tuple(__version__):
1528                 self.report_warning(
1529                     'You are using an outdated version (newest version: %s)! '
1530                     'See https://yt-dl.org/update if you need help updating.' %
1531                     latest_version)
1532
1533     def _setup_opener(self):
1534         timeout_val = self.params.get('socket_timeout')
1535         self._socket_timeout = 600 if timeout_val is None else float(timeout_val)
1536
1537         opts_cookiefile = self.params.get('cookiefile')
1538         opts_proxy = self.params.get('proxy')
1539
1540         if opts_cookiefile is None:
1541             self.cookiejar = compat_cookiejar.CookieJar()
1542         else:
1543             self.cookiejar = compat_cookiejar.MozillaCookieJar(
1544                 opts_cookiefile)
1545             if os.access(opts_cookiefile, os.R_OK):
1546                 self.cookiejar.load()
1547
1548         cookie_processor = compat_urllib_request.HTTPCookieProcessor(
1549             self.cookiejar)
1550         if opts_proxy is not None:
1551             if opts_proxy == '':
1552                 proxies = {}
1553             else:
1554                 proxies = {'http': opts_proxy, 'https': opts_proxy}
1555         else:
1556             proxies = compat_urllib_request.getproxies()
1557             # Set HTTPS proxy to HTTP one if given (https://github.com/rg3/youtube-dl/issues/805)
1558             if 'http' in proxies and 'https' not in proxies:
1559                 proxies['https'] = proxies['http']
1560         proxy_handler = compat_urllib_request.ProxyHandler(proxies)
1561
1562         debuglevel = 1 if self.params.get('debug_printtraffic') else 0
1563         https_handler = make_HTTPS_handler(self.params, debuglevel=debuglevel)
1564         ydlh = YoutubeDLHandler(self.params, debuglevel=debuglevel)
1565         opener = compat_urllib_request.build_opener(
1566             https_handler, proxy_handler, cookie_processor, ydlh)
1567         # Delete the default user-agent header, which would otherwise apply in
1568         # cases where our custom HTTP handler doesn't come into play
1569         # (See https://github.com/rg3/youtube-dl/issues/1309 for details)
1570         opener.addheaders = []
1571         self._opener = opener
1572
1573     def encode(self, s):
1574         if isinstance(s, bytes):
1575             return s  # Already encoded
1576
1577         try:
1578             return s.encode(self.get_encoding())
1579         except UnicodeEncodeError as err:
1580             err.reason = err.reason + '. Check your system encoding configuration or use the --encoding option.'
1581             raise
1582
1583     def get_encoding(self):
1584         encoding = self.params.get('encoding')
1585         if encoding is None:
1586             encoding = preferredencoding()
1587         return encoding