_ Git - youtube-dl/blob - youtube_dl/YoutubeDL.py

   1 #!/usr/bin/env python
   2 # -*- coding: utf-8 -*-
   3
   4 from __future__ import absolute_import, unicode_literals
   5
   6 import collections
   7 import datetime
   8 import errno
   9 import io
  10 import itertools
  11 import json
  12 import locale
  13 import operator
  14 import os
  15 import platform
  16 import re
  17 import shutil
  18 import subprocess
  19 import socket
  20 import sys
  21 import time
  22 import traceback
  23
  24 if os.name == 'nt':
  25     import ctypes
  26
  27 from .compat import (
  28     compat_cookiejar,
  29     compat_expanduser,
  30     compat_http_client,
  31     compat_kwargs,
  32     compat_str,
  33     compat_urllib_error,
  34     compat_urllib_request,
  35 )
  36 from .utils import (
  37     escape_url,
  38     ContentTooShortError,
  39     date_from_str,
  40     DateRange,
  41     DEFAULT_OUTTMPL,
  42     determine_ext,
  43     DownloadError,
  44     encodeFilename,
  45     ExtractorError,
  46     format_bytes,
  47     formatSeconds,
  48     get_term_width,
  49     locked_file,
  50     make_HTTPS_handler,
  51     MaxDownloadsReached,
  52     PagedList,
  53     parse_filesize,
  54     PostProcessingError,
  55     platform_name,
  56     preferredencoding,
  57     SameFileError,
  58     sanitize_filename,
  59     std_headers,
  60     subtitles_filename,
  61     takewhile_inclusive,
  62     UnavailableVideoError,
  63     url_basename,
  64     version_tuple,
  65     write_json_file,
  66     write_string,
  67     YoutubeDLHandler,
  68     prepend_extension,
  69     args_to_str,
  70     age_restricted,
  71 )
  72 from .cache import Cache
  73 from .extractor import get_info_extractor, gen_extractors
  74 from .downloader import get_suitable_downloader
  75 from .downloader.rtmp import rtmpdump_version
  76 from .postprocessor import (
  77     FFmpegFixupM4aPP,
  78     FFmpegFixupStretchedPP,
  79     FFmpegMergerPP,
  80     FFmpegPostProcessor,
  81     get_postprocessor,
  82 )
  83 from .version import __version__
  84
  85
  86 class YoutubeDL(object):
  87     """YoutubeDL class.
  88
  89     YoutubeDL objects are the ones responsible of downloading the
  90     actual video file and writing it to disk if the user has requested
  91     it, among some other tasks. In most cases there should be one per
  92     program. As, given a video URL, the downloader doesn't know how to
  93     extract all the needed information, task that InfoExtractors do, it
  94     has to pass the URL to one of them.
  95
  96     For this, YoutubeDL objects have a method that allows
  97     InfoExtractors to be registered in a given order. When it is passed
  98     a URL, the YoutubeDL object handles it to the first InfoExtractor it
  99     finds that reports being able to handle it. The InfoExtractor extracts
 100     all the information about the video or videos the URL refers to, and
 101     YoutubeDL process the extracted information, possibly using a File
 102     Downloader to download the video.
 103
 104     YoutubeDL objects accept a lot of parameters. In order not to saturate
 105     the object constructor with arguments, it receives a dictionary of
 106     options instead. These options are available through the params
 107     attribute for the InfoExtractors to use. The YoutubeDL also
 108     registers itself as the downloader in charge for the InfoExtractors
 109     that are added to it, so this is a "mutual registration".
 110
 111     Available options:
 112
 113     username:          Username for authentication purposes.
 114     password:          Password for authentication purposes.
 115     videopassword:     Password for acces a video.
 116     usenetrc:          Use netrc for authentication instead.
 117     verbose:           Print additional info to stdout.
 118     quiet:             Do not print messages to stdout.
 119     no_warnings:       Do not print out anything for warnings.
 120     forceurl:          Force printing final URL.
 121     forcetitle:        Force printing title.
 122     forceid:           Force printing ID.
 123     forcethumbnail:    Force printing thumbnail URL.
 124     forcedescription:  Force printing description.
 125     forcefilename:     Force printing final filename.
 126     forceduration:     Force printing duration.
 127     forcejson:         Force printing info_dict as JSON.
 128     dump_single_json:  Force printing the info_dict of the whole playlist
 129                        (or video) as a single JSON line.
 130     simulate:          Do not download the video files.
 131     format:            Video format code. See options.py for more information.
 132     format_limit:      Highest quality format to try.
 133     outtmpl:           Template for output names.
 134     restrictfilenames: Do not allow "&" and spaces in file names
 135     ignoreerrors:      Do not stop on download errors.
 136     nooverwrites:      Prevent overwriting files.
 137     playliststart:     Playlist item to start at.
 138     playlistend:       Playlist item to end at.
 139     playlistreverse:   Download playlist items in reverse order.
 140     matchtitle:        Download only matching titles.
 141     rejecttitle:       Reject downloads for matching titles.
 142     logger:            Log messages to a logging.Logger instance.
 143     logtostderr:       Log messages to stderr instead of stdout.
 144     writedescription:  Write the video description to a .description file
 145     writeinfojson:     Write the video description to a .info.json file
 146     writeannotations:  Write the video annotations to a .annotations.xml file
 147     writethumbnail:    Write the thumbnail image to a file
 148     writesubtitles:    Write the video subtitles to a file
 149     writeautomaticsub: Write the automatic subtitles to a file
 150     allsubtitles:      Downloads all the subtitles of the video
 151                        (requires writesubtitles or writeautomaticsub)
 152     listsubtitles:     Lists all available subtitles for the video
 153     subtitlesformat:   Subtitle format [srt/sbv/vtt] (default=srt)
 154     subtitleslangs:    List of languages of the subtitles to download
 155     keepvideo:         Keep the video file after post-processing
 156     daterange:         A DateRange object, download only if the upload_date is in the range.
 157     skip_download:     Skip the actual download of the video file
 158     cachedir:          Location of the cache files in the filesystem.
 159                        False to disable filesystem cache.
 160     noplaylist:        Download single video instead of a playlist if in doubt.
 161     age_limit:         An integer representing the user's age in years.
 162                        Unsuitable videos for the given age are skipped.
 163     min_views:         An integer representing the minimum view count the video
 164                        must have in order to not be skipped.
 165                        Videos without view count information are always
 166                        downloaded. None for no limit.
 167     max_views:         An integer representing the maximum view count.
 168                        Videos that are more popular than that are not
 169                        downloaded.
 170                        Videos without view count information are always
 171                        downloaded. None for no limit.
 172     download_archive:  File name of a file where all downloads are recorded.
 173                        Videos already present in the file are not downloaded
 174                        again.
 175     cookiefile:        File name where cookies should be read from and dumped to.
 176     nocheckcertificate:Do not verify SSL certificates
 177     prefer_insecure:   Use HTTP instead of HTTPS to retrieve information.
 178                        At the moment, this is only supported by YouTube.
 179     proxy:             URL of the proxy server to use
 180     socket_timeout:    Time to wait for unresponsive hosts, in seconds
 181     bidi_workaround:   Work around buggy terminals without bidirectional text
 182                        support, using fridibi
 183     debug_printtraffic:Print out sent and received HTTP traffic
 184     include_ads:       Download ads as well
 185     default_search:    Prepend this string if an input url is not valid.
 186                        'auto' for elaborate guessing
 187     encoding:          Use this encoding instead of the system-specified.
 188     extract_flat:      Do not resolve URLs, return the immediate result.
 189                        Pass in 'in_playlist' to only show this behavior for
 190                        playlist items.
 191     postprocessors:    A list of dictionaries, each with an entry
 192                        * key:  The name of the postprocessor. See
 193                                youtube_dl/postprocessor/__init__.py for a list.
 194                        as well as any further keyword arguments for the
 195                        postprocessor.
 196     progress_hooks:    A list of functions that get called on download
 197                        progress, with a dictionary with the entries
 198                        * filename: The final filename
 199                        * status: One of "downloading" and "finished"
 200
 201                        The dict may also have some of the following entries:
 202
 203                        * downloaded_bytes: Bytes on disk
 204                        * total_bytes: Size of the whole file, None if unknown
 205                        * tmpfilename: The filename we're currently writing to
 206                        * eta: The estimated time in seconds, None if unknown
 207                        * speed: The download speed in bytes/second, None if
 208                                 unknown
 209
 210                        Progress hooks are guaranteed to be called at least once
 211                        (with status "finished") if the download is successful.
 212     merge_output_format: Extension to use when merging formats.
 213     fixup:             Automatically correct known faults of the file.
 214                        One of:
 215                        - "never": do nothing
 216                        - "warn": only emit a warning
 217                        - "detect_or_warn": check whether we can do anything
 218                                            about it, warn otherwise (default)
 219     source_address:    (Experimental) Client-side IP address to bind to.
 220     call_home:         Boolean, true iff we are allowed to contact the
 221                        youtube-dl servers for debugging.
 222     sleep_interval:    Number of seconds to sleep before each download.
 223     external_downloader:  Executable of the external downloader to call.
 224
 225
 226     The following parameters are not used by YoutubeDL itself, they are used by
 227     the FileDownloader:
 228     nopart, updatetime, buffersize, ratelimit, min_filesize, max_filesize, test,
 229     noresizebuffer, retries, continuedl, noprogress, consoletitle
 230
 231     The following options are used by the post processors:
 232     prefer_ffmpeg:     If True, use ffmpeg instead of avconv if both are available,
 233                        otherwise prefer avconv.
 234     exec_cmd:          Arbitrary command to run after downloading
 235     """
 236
 237     params = None
 238     _ies = []
 239     _pps = []
 240     _download_retcode = None
 241     _num_downloads = None
 242     _screen_file = None
 243
 244     def __init__(self, params=None, auto_init=True):
 245         """Create a FileDownloader object with the given options."""
 246         if params is None:
 247             params = {}
 248         self._ies = []
 249         self._ies_instances = {}
 250         self._pps = []
 251         self._progress_hooks = []
 252         self._download_retcode = 0
 253         self._num_downloads = 0
 254         self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)]
 255         self._err_file = sys.stderr
 256         self.params = params
 257         self.cache = Cache(self)
 258
 259         if params.get('bidi_workaround', False):
 260             try:
 261                 import pty
 262                 master, slave = pty.openpty()
 263                 width = get_term_width()
 264                 if width is None:
 265                     width_args = []
 266                 else:
 267                     width_args = ['-w', str(width)]
 268                 sp_kwargs = dict(
 269                     stdin=subprocess.PIPE,
 270                     stdout=slave,
 271                     stderr=self._err_file)
 272                 try:
 273                     self._output_process = subprocess.Popen(
 274                         ['bidiv'] + width_args, **sp_kwargs
 275                     )
 276                 except OSError:
 277                     self._output_process = subprocess.Popen(
 278                         ['fribidi', '-c', 'UTF-8'] + width_args, **sp_kwargs)
 279                 self._output_channel = os.fdopen(master, 'rb')
 280             except OSError as ose:
 281                 if ose.errno == 2:
 282                     self.report_warning('Could not find fribidi executable, ignoring --bidi-workaround . Make sure that  fribidi  is an executable file in one of the directories in your $PATH.')
 283                 else:
 284                     raise
 285
 286         if (sys.version_info >= (3,) and sys.platform != 'win32' and
 287                 sys.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968']
 288                 and not params.get('restrictfilenames', False)):
 289             # On Python 3, the Unicode filesystem API will throw errors (#1474)
 290             self.report_warning(
 291                 'Assuming --restrict-filenames since file system encoding '
 292                 'cannot encode all characters. '
 293                 'Set the LC_ALL environment variable to fix this.')
 294             self.params['restrictfilenames'] = True
 295
 296         if '%(stitle)s' in self.params.get('outtmpl', ''):
 297             self.report_warning('%(stitle)s is deprecated. Use the %(title)s and the --restrict-filenames flag(which also secures %(uploader)s et al) instead.')
 298
 299         self._setup_opener()
 300
 301         if auto_init:
 302             self.print_debug_header()
 303             self.add_default_info_extractors()
 304
 305         for pp_def_raw in self.params.get('postprocessors', []):
 306             pp_class = get_postprocessor(pp_def_raw['key'])
 307             pp_def = dict(pp_def_raw)
 308             del pp_def['key']
 309             pp = pp_class(self, **compat_kwargs(pp_def))
 310             self.add_post_processor(pp)
 311
 312         for ph in self.params.get('progress_hooks', []):
 313             self.add_progress_hook(ph)
 314
 315     def warn_if_short_id(self, argv):
 316         # short YouTube ID starting with dash?
 317         idxs = [
 318             i for i, a in enumerate(argv)
 319             if re.match(r'^-[0-9A-Za-z_-]{10}$', a)]
 320         if idxs:
 321             correct_argv = (
 322                 ['youtube-dl'] +
 323                 [a for i, a in enumerate(argv) if i not in idxs] +
 324                 ['--'] + [argv[i] for i in idxs]
 325             )
 326             self.report_warning(
 327                 'Long argument string detected. '
 328                 'Use -- to separate parameters and URLs, like this:\n%s\n' %
 329                 args_to_str(correct_argv))
 330
 331     def add_info_extractor(self, ie):
 332         """Add an InfoExtractor object to the end of the list."""
 333         self._ies.append(ie)
 334         self._ies_instances[ie.ie_key()] = ie
 335         ie.set_downloader(self)
 336
 337     def get_info_extractor(self, ie_key):
 338         """
 339         Get an instance of an IE with name ie_key, it will try to get one from
 340         the _ies list, if there's no instance it will create a new one and add
 341         it to the extractor list.
 342         """
 343         ie = self._ies_instances.get(ie_key)
 344         if ie is None:
 345             ie = get_info_extractor(ie_key)()
 346             self.add_info_extractor(ie)
 347         return ie
 348
 349     def add_default_info_extractors(self):
 350         """
 351         Add the InfoExtractors returned by gen_extractors to the end of the list
 352         """
 353         for ie in gen_extractors():
 354             self.add_info_extractor(ie)
 355
 356     def add_post_processor(self, pp):
 357         """Add a PostProcessor object to the end of the chain."""
 358         self._pps.append(pp)
 359         pp.set_downloader(self)
 360
 361     def add_progress_hook(self, ph):
 362         """Add the progress hook (currently only for the file downloader)"""
 363         self._progress_hooks.append(ph)
 364
 365     def _bidi_workaround(self, message):
 366         if not hasattr(self, '_output_channel'):
 367             return message
 368
 369         assert hasattr(self, '_output_process')
 370         assert isinstance(message, compat_str)
 371         line_count = message.count('\n') + 1
 372         self._output_process.stdin.write((message + '\n').encode('utf-8'))
 373         self._output_process.stdin.flush()
 374         res = ''.join(self._output_channel.readline().decode('utf-8')
 375                       for _ in range(line_count))
 376         return res[:-len('\n')]
 377
 378     def to_screen(self, message, skip_eol=False):
 379         """Print message to stdout if not in quiet mode."""
 380         return self.to_stdout(message, skip_eol, check_quiet=True)
 381
 382     def _write_string(self, s, out=None):
 383         write_string(s, out=out, encoding=self.params.get('encoding'))
 384
 385     def to_stdout(self, message, skip_eol=False, check_quiet=False):
 386         """Print message to stdout if not in quiet mode."""
 387         if self.params.get('logger'):
 388             self.params['logger'].debug(message)
 389         elif not check_quiet or not self.params.get('quiet', False):
 390             message = self._bidi_workaround(message)
 391             terminator = ['\n', ''][skip_eol]
 392             output = message + terminator
 393
 394             self._write_string(output, self._screen_file)
 395
 396     def to_stderr(self, message):
 397         """Print message to stderr."""
 398         assert isinstance(message, compat_str)
 399         if self.params.get('logger'):
 400             self.params['logger'].error(message)
 401         else:
 402             message = self._bidi_workaround(message)
 403             output = message + '\n'
 404             self._write_string(output, self._err_file)
 405
 406     def to_console_title(self, message):
 407         if not self.params.get('consoletitle', False):
 408             return
 409         if os.name == 'nt' and ctypes.windll.kernel32.GetConsoleWindow():
 410             # c_wchar_p() might not be necessary if `message` is
 411             # already of type unicode()
 412             ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message))
 413         elif 'TERM' in os.environ:
 414             self._write_string('\033]0;%s\007' % message, self._screen_file)
 415
 416     def save_console_title(self):
 417         if not self.params.get('consoletitle', False):
 418             return
 419         if 'TERM' in os.environ:
 420             # Save the title on stack
 421             self._write_string('\033[22;0t', self._screen_file)
 422
 423     def restore_console_title(self):
 424         if not self.params.get('consoletitle', False):
 425             return
 426         if 'TERM' in os.environ:
 427             # Restore the title from stack
 428             self._write_string('\033[23;0t', self._screen_file)
 429
 430     def __enter__(self):
 431         self.save_console_title()
 432         return self
 433
 434     def __exit__(self, *args):
 435         self.restore_console_title()
 436
 437         if self.params.get('cookiefile') is not None:
 438             self.cookiejar.save()
 439
 440     def trouble(self, message=None, tb=None):
 441         """Determine action to take when a download problem appears.
 442
 443         Depending on if the downloader has been configured to ignore
 444         download errors or not, this method may throw an exception or
 445         not when errors are found, after printing the message.
 446
 447         tb, if given, is additional traceback information.
 448         """
 449         if message is not None:
 450             self.to_stderr(message)
 451         if self.params.get('verbose'):
 452             if tb is None:
 453                 if sys.exc_info()[0]:  # if .trouble has been called from an except block
 454                     tb = ''
 455                     if hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
 456                         tb += ''.join(traceback.format_exception(*sys.exc_info()[1].exc_info))
 457                     tb += compat_str(traceback.format_exc())
 458                 else:
 459                     tb_data = traceback.format_list(traceback.extract_stack())
 460                     tb = ''.join(tb_data)
 461             self.to_stderr(tb)
 462         if not self.params.get('ignoreerrors', False):
 463             if sys.exc_info()[0] and hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
 464                 exc_info = sys.exc_info()[1].exc_info
 465             else:
 466                 exc_info = sys.exc_info()
 467             raise DownloadError(message, exc_info)
 468         self._download_retcode = 1
 469
 470     def report_warning(self, message):
 471         '''
 472         Print the message to stderr, it will be prefixed with 'WARNING:'
 473         If stderr is a tty file the 'WARNING:' will be colored
 474         '''
 475         if self.params.get('logger') is not None:
 476             self.params['logger'].warning(message)
 477         else:
 478             if self.params.get('no_warnings'):
 479                 return
 480             if self._err_file.isatty() and os.name != 'nt':
 481                 _msg_header = '\033[0;33mWARNING:\033[0m'
 482             else:
 483                 _msg_header = 'WARNING:'
 484             warning_message = '%s %s' % (_msg_header, message)
 485             self.to_stderr(warning_message)
 486
 487     def report_error(self, message, tb=None):
 488         '''
 489         Do the same as trouble, but prefixes the message with 'ERROR:', colored
 490         in red if stderr is a tty file.
 491         '''
 492         if self._err_file.isatty() and os.name != 'nt':
 493             _msg_header = '\033[0;31mERROR:\033[0m'
 494         else:
 495             _msg_header = 'ERROR:'
 496         error_message = '%s %s' % (_msg_header, message)
 497         self.trouble(error_message, tb)
 498
 499     def report_file_already_downloaded(self, file_name):
 500         """Report file has already been fully downloaded."""
 501         try:
 502             self.to_screen('[download] %s has already been downloaded' % file_name)
 503         except UnicodeEncodeError:
 504             self.to_screen('[download] The file has already been downloaded')
 505
 506     def prepare_filename(self, info_dict):
 507         """Generate the output filename."""
 508         try:
 509             template_dict = dict(info_dict)
 510
 511             template_dict['epoch'] = int(time.time())
 512             autonumber_size = self.params.get('autonumber_size')
 513             if autonumber_size is None:
 514                 autonumber_size = 5
 515             autonumber_templ = '%0' + str(autonumber_size) + 'd'
 516             template_dict['autonumber'] = autonumber_templ % self._num_downloads
 517             if template_dict.get('playlist_index') is not None:
 518                 template_dict['playlist_index'] = '%0*d' % (len(str(template_dict['n_entries'])), template_dict['playlist_index'])
 519             if template_dict.get('resolution') is None:
 520                 if template_dict.get('width') and template_dict.get('height'):
 521                     template_dict['resolution'] = '%dx%d' % (template_dict['width'], template_dict['height'])
 522                 elif template_dict.get('height'):
 523                     template_dict['resolution'] = '%sp' % template_dict['height']
 524                 elif template_dict.get('width'):
 525                     template_dict['resolution'] = '?x%d' % template_dict['width']
 526
 527             sanitize = lambda k, v: sanitize_filename(
 528                 compat_str(v),
 529                 restricted=self.params.get('restrictfilenames'),
 530                 is_id=(k == 'id'))
 531             template_dict = dict((k, sanitize(k, v))
 532                                  for k, v in template_dict.items()
 533                                  if v is not None)
 534             template_dict = collections.defaultdict(lambda: 'NA', template_dict)
 535
 536             outtmpl = self.params.get('outtmpl', DEFAULT_OUTTMPL)
 537             tmpl = compat_expanduser(outtmpl)
 538             filename = tmpl % template_dict
 539             return filename
 540         except ValueError as err:
 541             self.report_error('Error in output template: ' + str(err) + ' (encoding: ' + repr(preferredencoding()) + ')')
 542             return None
 543
 544     def _match_entry(self, info_dict):
 545         """ Returns None iff the file should be downloaded """
 546
 547         video_title = info_dict.get('title', info_dict.get('id', 'video'))
 548         if 'title' in info_dict:
 549             # This can happen when we're just evaluating the playlist
 550             title = info_dict['title']
 551             matchtitle = self.params.get('matchtitle', False)
 552             if matchtitle:
 553                 if not re.search(matchtitle, title, re.IGNORECASE):
 554                     return '"' + title + '" title did not match pattern "' + matchtitle + '"'
 555             rejecttitle = self.params.get('rejecttitle', False)
 556             if rejecttitle:
 557                 if re.search(rejecttitle, title, re.IGNORECASE):
 558                     return '"' + title + '" title matched reject pattern "' + rejecttitle + '"'
 559         date = info_dict.get('upload_date', None)
 560         if date is not None:
 561             dateRange = self.params.get('daterange', DateRange())
 562             if date not in dateRange:
 563                 return '%s upload date is not in range %s' % (date_from_str(date).isoformat(), dateRange)
 564         view_count = info_dict.get('view_count', None)
 565         if view_count is not None:
 566             min_views = self.params.get('min_views')
 567             if min_views is not None and view_count < min_views:
 568                 return 'Skipping %s, because it has not reached minimum view count (%d/%d)' % (video_title, view_count, min_views)
 569             max_views = self.params.get('max_views')
 570             if max_views is not None and view_count > max_views:
 571                 return 'Skipping %s, because it has exceeded the maximum view count (%d/%d)' % (video_title, view_count, max_views)
 572         if age_restricted(info_dict.get('age_limit'), self.params.get('age_limit')):
 573             return 'Skipping "%s" because it is age restricted' % title
 574         if self.in_download_archive(info_dict):
 575             return '%s has already been recorded in archive' % video_title
 576         return None
 577
 578     @staticmethod
 579     def add_extra_info(info_dict, extra_info):
 580         '''Set the keys from extra_info in info dict if they are missing'''
 581         for key, value in extra_info.items():
 582             info_dict.setdefault(key, value)
 583
 584     def extract_info(self, url, download=True, ie_key=None, extra_info={},
 585                      process=True):
 586         '''
 587         Returns a list with a dictionary for each video we find.
 588         If 'download', also downloads the videos.
 589         extra_info is a dict containing the extra values to add to each result
 590          '''
 591
 592         if ie_key:
 593             ies = [self.get_info_extractor(ie_key)]
 594         else:
 595             ies = self._ies
 596
 597         for ie in ies:
 598             if not ie.suitable(url):
 599                 continue
 600
 601             if not ie.working():
 602                 self.report_warning('The program functionality for this site has been marked as broken, '
 603                                     'and will probably not work.')
 604
 605             try:
 606                 ie_result = ie.extract(url)
 607                 if ie_result is None:  # Finished already (backwards compatibility; listformats and friends should be moved here)
 608                     break
 609                 if isinstance(ie_result, list):
 610                     # Backwards compatibility: old IE result format
 611                     ie_result = {
 612                         '_type': 'compat_list',
 613                         'entries': ie_result,
 614                     }
 615                 self.add_default_extra_info(ie_result, ie, url)
 616                 if process:
 617                     return self.process_ie_result(ie_result, download, extra_info)
 618                 else:
 619                     return ie_result
 620             except ExtractorError as de:  # An error we somewhat expected
 621                 self.report_error(compat_str(de), de.format_traceback())
 622                 break
 623             except MaxDownloadsReached:
 624                 raise
 625             except Exception as e:
 626                 if self.params.get('ignoreerrors', False):
 627                     self.report_error(compat_str(e), tb=compat_str(traceback.format_exc()))
 628                     break
 629                 else:
 630                     raise
 631         else:
 632             self.report_error('no suitable InfoExtractor for URL %s' % url)
 633
 634     def add_default_extra_info(self, ie_result, ie, url):
 635         self.add_extra_info(ie_result, {
 636             'extractor': ie.IE_NAME,
 637             'webpage_url': url,
 638             'webpage_url_basename': url_basename(url),
 639             'extractor_key': ie.ie_key(),
 640         })
 641
 642     def process_ie_result(self, ie_result, download=True, extra_info={}):
 643         """
 644         Take the result of the ie(may be modified) and resolve all unresolved
 645         references (URLs, playlist items).
 646
 647         It will also download the videos if 'download'.
 648         Returns the resolved ie_result.
 649         """
 650
 651         result_type = ie_result.get('_type', 'video')
 652
 653         if result_type in ('url', 'url_transparent'):
 654             extract_flat = self.params.get('extract_flat', False)
 655             if ((extract_flat == 'in_playlist' and 'playlist' in extra_info) or
 656                     extract_flat is True):
 657                 if self.params.get('forcejson', False):
 658                     self.to_stdout(json.dumps(ie_result))
 659                 return ie_result
 660
 661         if result_type == 'video':
 662             self.add_extra_info(ie_result, extra_info)
 663             return self.process_video_result(ie_result, download=download)
 664         elif result_type == 'url':
 665             # We have to add extra_info to the results because it may be
 666             # contained in a playlist
 667             return self.extract_info(ie_result['url'],
 668                                      download,
 669                                      ie_key=ie_result.get('ie_key'),
 670                                      extra_info=extra_info)
 671         elif result_type == 'url_transparent':
 672             # Use the information from the embedding page
 673             info = self.extract_info(
 674                 ie_result['url'], ie_key=ie_result.get('ie_key'),
 675                 extra_info=extra_info, download=False, process=False)
 676
 677             force_properties = dict(
 678                 (k, v) for k, v in ie_result.items() if v is not None)
 679             for f in ('_type', 'url'):
 680                 if f in force_properties:
 681                     del force_properties[f]
 682             new_result = info.copy()
 683             new_result.update(force_properties)
 684
 685             assert new_result.get('_type') != 'url_transparent'
 686
 687             return self.process_ie_result(
 688                 new_result, download=download, extra_info=extra_info)
 689         elif result_type == 'playlist' or result_type == 'multi_video':
 690             # We process each entry in the playlist
 691             playlist = ie_result.get('title', None) or ie_result.get('id', None)
 692             self.to_screen('[download] Downloading playlist: %s' % playlist)
 693
 694             playlist_results = []
 695
 696             playliststart = self.params.get('playliststart', 1) - 1
 697             playlistend = self.params.get('playlistend', None)
 698             # For backwards compatibility, interpret -1 as whole list
 699             if playlistend == -1:
 700                 playlistend = None
 701
 702             ie_entries = ie_result['entries']
 703             if isinstance(ie_entries, list):
 704                 n_all_entries = len(ie_entries)
 705                 entries = ie_entries[playliststart:playlistend]
 706                 n_entries = len(entries)
 707                 self.to_screen(
 708                     "[%s] playlist %s: Collected %d video ids (downloading %d of them)" %
 709                     (ie_result['extractor'], playlist, n_all_entries, n_entries))
 710             elif isinstance(ie_entries, PagedList):
 711                 entries = ie_entries.getslice(
 712                     playliststart, playlistend)
 713                 n_entries = len(entries)
 714                 self.to_screen(
 715                     "[%s] playlist %s: Downloading %d videos" %
 716                     (ie_result['extractor'], playlist, n_entries))
 717             else:  # iterable
 718                 entries = list(itertools.islice(
 719                     ie_entries, playliststart, playlistend))
 720                 n_entries = len(entries)
 721                 self.to_screen(
 722                     "[%s] playlist %s: Downloading %d videos" %
 723                     (ie_result['extractor'], playlist, n_entries))
 724
 725             if self.params.get('playlistreverse', False):
 726                 entries = entries[::-1]
 727
 728             for i, entry in enumerate(entries, 1):
 729                 self.to_screen('[download] Downloading video %s of %s' % (i, n_entries))
 730                 extra = {
 731                     'n_entries': n_entries,
 732                     'playlist': playlist,
 733                     'playlist_id': ie_result.get('id'),
 734                     'playlist_title': ie_result.get('title'),
 735                     'playlist_index': i + playliststart,
 736                     'extractor': ie_result['extractor'],
 737                     'webpage_url': ie_result['webpage_url'],
 738                     'webpage_url_basename': url_basename(ie_result['webpage_url']),
 739                     'extractor_key': ie_result['extractor_key'],
 740                 }
 741
 742                 reason = self._match_entry(entry)
 743                 if reason is not None:
 744                     self.to_screen('[download] ' + reason)
 745                     continue
 746
 747                 entry_result = self.process_ie_result(entry,
 748                                                       download=download,
 749                                                       extra_info=extra)
 750                 playlist_results.append(entry_result)
 751             ie_result['entries'] = playlist_results
 752             return ie_result
 753         elif result_type == 'compat_list':
 754             self.report_warning(
 755                 'Extractor %s returned a compat_list result. '
 756                 'It needs to be updated.' % ie_result.get('extractor'))
 757
 758             def _fixup(r):
 759                 self.add_extra_info(
 760                     r,
 761                     {
 762                         'extractor': ie_result['extractor'],
 763                         'webpage_url': ie_result['webpage_url'],
 764                         'webpage_url_basename': url_basename(ie_result['webpage_url']),
 765                         'extractor_key': ie_result['extractor_key'],
 766                     }
 767                 )
 768                 return r
 769             ie_result['entries'] = [
 770                 self.process_ie_result(_fixup(r), download, extra_info)
 771                 for r in ie_result['entries']
 772             ]
 773             return ie_result
 774         else:
 775             raise Exception('Invalid result type: %s' % result_type)
 776
 777     def _apply_format_filter(self, format_spec, available_formats):
 778         " Returns a tuple of the remaining format_spec and filtered formats "
 779
 780         OPERATORS = {
 781             '<': operator.lt,
 782             '<=': operator.le,
 783             '>': operator.gt,
 784             '>=': operator.ge,
 785             '=': operator.eq,
 786             '!=': operator.ne,
 787         }
 788         operator_rex = re.compile(r'''(?x)\s*\[
 789             (?P<key>width|height|tbr|abr|vbr|filesize)
 790             \s*(?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
 791             (?P<value>[0-9.]+(?:[kKmMgGtTpPeEzZyY]i?[Bb]?)?)
 792             \]$
 793             ''' % '|'.join(map(re.escape, OPERATORS.keys())))
 794         m = operator_rex.search(format_spec)
 795         if not m:
 796             raise ValueError('Invalid format specification %r' % format_spec)
 797
 798         try:
 799             comparison_value = int(m.group('value'))
 800         except ValueError:
 801             comparison_value = parse_filesize(m.group('value'))
 802             if comparison_value is None:
 803                 comparison_value = parse_filesize(m.group('value') + 'B')
 804             if comparison_value is None:
 805                 raise ValueError(
 806                     'Invalid value %r in format specification %r' % (
 807                         m.group('value'), format_spec))
 808         op = OPERATORS[m.group('op')]
 809
 810         def _filter(f):
 811             actual_value = f.get(m.group('key'))
 812             if actual_value is None:
 813                 return m.group('none_inclusive')
 814             return op(actual_value, comparison_value)
 815         new_formats = [f for f in available_formats if _filter(f)]
 816
 817         new_format_spec = format_spec[:-len(m.group(0))]
 818         if not new_format_spec:
 819             new_format_spec = 'best'
 820
 821         return (new_format_spec, new_formats)
 822
 823     def select_format(self, format_spec, available_formats):
 824         while format_spec.endswith(']'):
 825             format_spec, available_formats = self._apply_format_filter(
 826                 format_spec, available_formats)
 827         if not available_formats:
 828             return None
 829
 830         if format_spec == 'best' or format_spec is None:
 831             return available_formats[-1]
 832         elif format_spec == 'worst':
 833             return available_formats[0]
 834         elif format_spec == 'bestaudio':
 835             audio_formats = [
 836                 f for f in available_formats
 837                 if f.get('vcodec') == 'none']
 838             if audio_formats:
 839                 return audio_formats[-1]
 840         elif format_spec == 'worstaudio':
 841             audio_formats = [
 842                 f for f in available_formats
 843                 if f.get('vcodec') == 'none']
 844             if audio_formats:
 845                 return audio_formats[0]
 846         elif format_spec == 'bestvideo':
 847             video_formats = [
 848                 f for f in available_formats
 849                 if f.get('acodec') == 'none']
 850             if video_formats:
 851                 return video_formats[-1]
 852         elif format_spec == 'worstvideo':
 853             video_formats = [
 854                 f for f in available_formats
 855                 if f.get('acodec') == 'none']
 856             if video_formats:
 857                 return video_formats[0]
 858         else:
 859             extensions = ['mp4', 'flv', 'webm', '3gp', 'm4a', 'mp3', 'ogg', 'aac', 'wav']
 860             if format_spec in extensions:
 861                 filter_f = lambda f: f['ext'] == format_spec
 862             else:
 863                 filter_f = lambda f: f['format_id'] == format_spec
 864             matches = list(filter(filter_f, available_formats))
 865             if matches:
 866                 return matches[-1]
 867         return None
 868
 869     def _calc_headers(self, info_dict):
 870         res = std_headers.copy()
 871
 872         add_headers = info_dict.get('http_headers')
 873         if add_headers:
 874             res.update(add_headers)
 875
 876         cookies = self._calc_cookies(info_dict)
 877         if cookies:
 878             res['Cookie'] = cookies
 879
 880         return res
 881
 882     def _calc_cookies(self, info_dict):
 883         class _PseudoRequest(object):
 884             def __init__(self, url):
 885                 self.url = url
 886                 self.headers = {}
 887                 self.unverifiable = False
 888
 889             def add_unredirected_header(self, k, v):
 890                 self.headers[k] = v
 891
 892             def get_full_url(self):
 893                 return self.url
 894
 895         pr = _PseudoRequest(info_dict['url'])
 896         self.cookiejar.add_cookie_header(pr)
 897         return pr.headers.get('Cookie')
 898
 899     def process_video_result(self, info_dict, download=True):
 900         assert info_dict.get('_type', 'video') == 'video'
 901
 902         if 'id' not in info_dict:
 903             raise ExtractorError('Missing "id" field in extractor result')
 904         if 'title' not in info_dict:
 905             raise ExtractorError('Missing "title" field in extractor result')
 906
 907         if 'playlist' not in info_dict:
 908             # It isn't part of a playlist
 909             info_dict['playlist'] = None
 910             info_dict['playlist_index'] = None
 911
 912         thumbnails = info_dict.get('thumbnails')
 913         if thumbnails:
 914             thumbnails.sort(key=lambda t: (
 915                 t.get('width'), t.get('height'), t.get('url')))
 916             for t in thumbnails:
 917                 if 'width' in t and 'height' in t:
 918                     t['resolution'] = '%dx%d' % (t['width'], t['height'])
 919
 920         if thumbnails and 'thumbnail' not in info_dict:
 921             info_dict['thumbnail'] = thumbnails[-1]['url']
 922
 923         if 'display_id' not in info_dict and 'id' in info_dict:
 924             info_dict['display_id'] = info_dict['id']
 925
 926         if info_dict.get('upload_date') is None and info_dict.get('timestamp') is not None:
 927             # Working around negative timestamps in Windows
 928             # (see http://bugs.python.org/issue1646728)
 929             if info_dict['timestamp'] < 0 and os.name == 'nt':
 930                 info_dict['timestamp'] = 0
 931             upload_date = datetime.datetime.utcfromtimestamp(
 932                 info_dict['timestamp'])
 933             info_dict['upload_date'] = upload_date.strftime('%Y%m%d')
 934
 935         # This extractors handle format selection themselves
 936         if info_dict['extractor'] in ['Youku']:
 937             if download:
 938                 self.process_info(info_dict)
 939             return info_dict
 940
 941         # We now pick which formats have to be downloaded
 942         if info_dict.get('formats') is None:
 943             # There's only one format available
 944             formats = [info_dict]
 945         else:
 946             formats = info_dict['formats']
 947
 948         if not formats:
 949             raise ExtractorError('No video formats found!')
 950
 951         # We check that all the formats have the format and format_id fields
 952         for i, format in enumerate(formats):
 953             if 'url' not in format:
 954                 raise ExtractorError('Missing "url" key in result (index %d)' % i)
 955
 956             if format.get('format_id') is None:
 957                 format['format_id'] = compat_str(i)
 958             if format.get('format') is None:
 959                 format['format'] = '{id} - {res}{note}'.format(
 960                     id=format['format_id'],
 961                     res=self.format_resolution(format),
 962                     note=' ({0})'.format(format['format_note']) if format.get('format_note') is not None else '',
 963                 )
 964             # Automatically determine file extension if missing
 965             if 'ext' not in format:
 966                 format['ext'] = determine_ext(format['url']).lower()
 967             # Add HTTP headers, so that external programs can use them from the
 968             # json output
 969             full_format_info = info_dict.copy()
 970             full_format_info.update(format)
 971             format['http_headers'] = self._calc_headers(full_format_info)
 972
 973         format_limit = self.params.get('format_limit', None)
 974         if format_limit:
 975             formats = list(takewhile_inclusive(
 976                 lambda f: f['format_id'] != format_limit, formats
 977             ))
 978
 979         # TODO Central sorting goes here
 980
 981         if formats[0] is not info_dict:
 982             # only set the 'formats' fields if the original info_dict list them
 983             # otherwise we end up with a circular reference, the first (and unique)
 984             # element in the 'formats' field in info_dict is info_dict itself,
 985             # wich can't be exported to json
 986             info_dict['formats'] = formats
 987         if self.params.get('listformats', None):
 988             self.list_formats(info_dict)
 989             return
 990
 991         req_format = self.params.get('format')
 992         if req_format is None:
 993             req_format = 'best'
 994         formats_to_download = []
 995         # The -1 is for supporting YoutubeIE
 996         if req_format in ('-1', 'all'):
 997             formats_to_download = formats
 998         else:
 999             for rfstr in req_format.split(','):
1000                 # We can accept formats requested in the format: 34/5/best, we pick
1001                 # the first that is available, starting from left
1002                 req_formats = rfstr.split('/')
1003                 for rf in req_formats:
1004                     if re.match(r'.+?\+.+?', rf) is not None:
1005                         # Two formats have been requested like '137+139'
1006                         format_1, format_2 = rf.split('+')
1007                         formats_info = (self.select_format(format_1, formats),
1008                                         self.select_format(format_2, formats))
1009                         if all(formats_info):
1010                             # The first format must contain the video and the
1011                             # second the audio
1012                             if formats_info[0].get('vcodec') == 'none':
1013                                 self.report_error('The first format must '
1014                                                   'contain the video, try using '
1015                                                   '"-f %s+%s"' % (format_2, format_1))
1016                                 return
1017                             output_ext = (
1018                                 formats_info[0]['ext']
1019                                 if self.params.get('merge_output_format') is None
1020                                 else self.params['merge_output_format'])
1021                             selected_format = {
1022                                 'requested_formats': formats_info,
1023                                 'format': rf,
1024                                 'ext': formats_info[0]['ext'],
1025                                 'width': formats_info[0].get('width'),
1026                                 'height': formats_info[0].get('height'),
1027                                 'resolution': formats_info[0].get('resolution'),
1028                                 'fps': formats_info[0].get('fps'),
1029                                 'vcodec': formats_info[0].get('vcodec'),
1030                                 'vbr': formats_info[0].get('vbr'),
1031                                 'stretched_ratio': formats_info[0].get('stretched_ratio'),
1032                                 'acodec': formats_info[1].get('acodec'),
1033                                 'abr': formats_info[1].get('abr'),
1034                                 'ext': output_ext,
1035                             }
1036                         else:
1037                             selected_format = None
1038                     else:
1039                         selected_format = self.select_format(rf, formats)
1040                     if selected_format is not None:
1041                         formats_to_download.append(selected_format)
1042                         break
1043         if not formats_to_download:
1044             raise ExtractorError('requested format not available',
1045                                  expected=True)
1046
1047         if download:
1048             if len(formats_to_download) > 1:
1049                 self.to_screen('[info] %s: downloading video in %s formats' % (info_dict['id'], len(formats_to_download)))
1050             for format in formats_to_download:
1051                 new_info = dict(info_dict)
1052                 new_info.update(format)
1053                 self.process_info(new_info)
1054         # We update the info dict with the best quality format (backwards compatibility)
1055         info_dict.update(formats_to_download[-1])
1056         return info_dict
1057
1058     def process_info(self, info_dict):
1059         """Process a single resolved IE result."""
1060
1061         assert info_dict.get('_type', 'video') == 'video'
1062
1063         max_downloads = self.params.get('max_downloads')
1064         if max_downloads is not None:
1065             if self._num_downloads >= int(max_downloads):
1066                 raise MaxDownloadsReached()
1067
1068         info_dict['fulltitle'] = info_dict['title']
1069         if len(info_dict['title']) > 200:
1070             info_dict['title'] = info_dict['title'][:197] + '...'
1071
1072         # Keep for backwards compatibility
1073         info_dict['stitle'] = info_dict['title']
1074
1075         if 'format' not in info_dict:
1076             info_dict['format'] = info_dict['ext']
1077
1078         reason = self._match_entry(info_dict)
1079         if reason is not None:
1080             self.to_screen('[download] ' + reason)
1081             return
1082
1083         self._num_downloads += 1
1084
1085         filename = self.prepare_filename(info_dict)
1086
1087         # Forced printings
1088         if self.params.get('forcetitle', False):
1089             self.to_stdout(info_dict['fulltitle'])
1090         if self.params.get('forceid', False):
1091             self.to_stdout(info_dict['id'])
1092         if self.params.get('forceurl', False):
1093             if info_dict.get('requested_formats') is not None:
1094                 for f in info_dict['requested_formats']:
1095                     self.to_stdout(f['url'] + f.get('play_path', ''))
1096             else:
1097                 # For RTMP URLs, also include the playpath
1098                 self.to_stdout(info_dict['url'] + info_dict.get('play_path', ''))
1099         if self.params.get('forcethumbnail', False) and info_dict.get('thumbnail') is not None:
1100             self.to_stdout(info_dict['thumbnail'])
1101         if self.params.get('forcedescription', False) and info_dict.get('description') is not None:
1102             self.to_stdout(info_dict['description'])
1103         if self.params.get('forcefilename', False) and filename is not None:
1104             self.to_stdout(filename)
1105         if self.params.get('forceduration', False) and info_dict.get('duration') is not None:
1106             self.to_stdout(formatSeconds(info_dict['duration']))
1107         if self.params.get('forceformat', False):
1108             self.to_stdout(info_dict['format'])
1109         if self.params.get('forcejson', False):
1110             info_dict['_filename'] = filename
1111             self.to_stdout(json.dumps(info_dict))
1112         if self.params.get('dump_single_json', False):
1113             info_dict['_filename'] = filename
1114
1115         # Do nothing else if in simulate mode
1116         if self.params.get('simulate', False):
1117             return
1118
1119         if filename is None:
1120             return
1121
1122         try:
1123             dn = os.path.dirname(encodeFilename(filename))
1124             if dn and not os.path.exists(dn):
1125                 os.makedirs(dn)
1126         except (OSError, IOError) as err:
1127             self.report_error('unable to create directory ' + compat_str(err))
1128             return
1129
1130         if self.params.get('writedescription', False):
1131             descfn = filename + '.description'
1132             if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(descfn)):
1133                 self.to_screen('[info] Video description is already present')
1134             elif info_dict.get('description') is None:
1135                 self.report_warning('There\'s no description to write.')
1136             else:
1137                 try:
1138                     self.to_screen('[info] Writing video description to: ' + descfn)
1139                     with io.open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile:
1140                         descfile.write(info_dict['description'])
1141                 except (OSError, IOError):
1142                     self.report_error('Cannot write description file ' + descfn)
1143                     return
1144
1145         if self.params.get('writeannotations', False):
1146             annofn = filename + '.annotations.xml'
1147             if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(annofn)):
1148                 self.to_screen('[info] Video annotations are already present')
1149             else:
1150                 try:
1151                     self.to_screen('[info] Writing video annotations to: ' + annofn)
1152                     with io.open(encodeFilename(annofn), 'w', encoding='utf-8') as annofile:
1153                         annofile.write(info_dict['annotations'])
1154                 except (KeyError, TypeError):
1155                     self.report_warning('There are no annotations to write.')
1156                 except (OSError, IOError):
1157                     self.report_error('Cannot write annotations file: ' + annofn)
1158                     return
1159
1160         subtitles_are_requested = any([self.params.get('writesubtitles', False),
1161                                        self.params.get('writeautomaticsub')])
1162
1163         if subtitles_are_requested and 'subtitles' in info_dict and info_dict['subtitles']:
1164             # subtitles download errors are already managed as troubles in relevant IE
1165             # that way it will silently go on when used with unsupporting IE
1166             subtitles = info_dict['subtitles']
1167             sub_format = self.params.get('subtitlesformat', 'srt')
1168             for sub_lang in subtitles.keys():
1169                 sub = subtitles[sub_lang]
1170                 if sub is None:
1171                     continue
1172                 try:
1173                     sub_filename = subtitles_filename(filename, sub_lang, sub_format)
1174                     if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(sub_filename)):
1175                         self.to_screen('[info] Video subtitle %s.%s is already_present' % (sub_lang, sub_format))
1176                     else:
1177                         self.to_screen('[info] Writing video subtitles to: ' + sub_filename)
1178                         with io.open(encodeFilename(sub_filename), 'w', encoding='utf-8') as subfile:
1179                             subfile.write(sub)
1180                 except (OSError, IOError):
1181                     self.report_error('Cannot write subtitles file ' + sub_filename)
1182                     return
1183
1184         if self.params.get('writeinfojson', False):
1185             infofn = os.path.splitext(filename)[0] + '.info.json'
1186             if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(infofn)):
1187                 self.to_screen('[info] Video description metadata is already present')
1188             else:
1189                 self.to_screen('[info] Writing video description metadata as JSON to: ' + infofn)
1190                 try:
1191                     write_json_file(info_dict, infofn)
1192                 except (OSError, IOError):
1193                     self.report_error('Cannot write metadata to JSON file ' + infofn)
1194                     return
1195
1196         if self.params.get('writethumbnail', False):
1197             if info_dict.get('thumbnail') is not None:
1198                 thumb_format = determine_ext(info_dict['thumbnail'], 'jpg')
1199                 thumb_filename = os.path.splitext(filename)[0] + '.' + thumb_format
1200                 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(thumb_filename)):
1201                     self.to_screen('[%s] %s: Thumbnail is already present' %
1202                                    (info_dict['extractor'], info_dict['id']))
1203                 else:
1204                     self.to_screen('[%s] %s: Downloading thumbnail ...' %
1205                                    (info_dict['extractor'], info_dict['id']))
1206                     try:
1207                         uf = self.urlopen(info_dict['thumbnail'])
1208                         with open(thumb_filename, 'wb') as thumbf:
1209                             shutil.copyfileobj(uf, thumbf)
1210                         self.to_screen('[%s] %s: Writing thumbnail to: %s' %
1211                                        (info_dict['extractor'], info_dict['id'], thumb_filename))
1212                     except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
1213                         self.report_warning('Unable to download thumbnail "%s": %s' %
1214                                             (info_dict['thumbnail'], compat_str(err)))
1215
1216         if not self.params.get('skip_download', False):
1217             try:
1218                 def dl(name, info):
1219                     fd = get_suitable_downloader(info, self.params)(self, self.params)
1220                     for ph in self._progress_hooks:
1221                         fd.add_progress_hook(ph)
1222                     if self.params.get('verbose'):
1223                         self.to_stdout('[debug] Invoking downloader on %r' % info.get('url'))
1224                     return fd.download(name, info)
1225                 if info_dict.get('requested_formats') is not None:
1226                     downloaded = []
1227                     success = True
1228                     merger = FFmpegMergerPP(self, not self.params.get('keepvideo'))
1229                     if not merger._executable:
1230                         postprocessors = []
1231                         self.report_warning('You have requested multiple '
1232                                             'formats but ffmpeg or avconv are not installed.'
1233                                             ' The formats won\'t be merged')
1234                     else:
1235                         postprocessors = [merger]
1236                     for f in info_dict['requested_formats']:
1237                         new_info = dict(info_dict)
1238                         new_info.update(f)
1239                         fname = self.prepare_filename(new_info)
1240                         fname = prepend_extension(fname, 'f%s' % f['format_id'])
1241                         downloaded.append(fname)
1242                         partial_success = dl(fname, new_info)
1243                         success = success and partial_success
1244                     info_dict['__postprocessors'] = postprocessors
1245                     info_dict['__files_to_merge'] = downloaded
1246                 else:
1247                     # Just a single file
1248                     success = dl(filename, info_dict)
1249             except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
1250                 self.report_error('unable to download video data: %s' % str(err))
1251                 return
1252             except (OSError, IOError) as err:
1253                 raise UnavailableVideoError(err)
1254             except (ContentTooShortError, ) as err:
1255                 self.report_error('content too short (expected %s bytes and served %s)' % (err.expected, err.downloaded))
1256                 return
1257
1258             if success:
1259                 # Fixup content
1260                 fixup_policy = self.params.get('fixup')
1261                 if fixup_policy is None:
1262                     fixup_policy = 'detect_or_warn'
1263
1264                 stretched_ratio = info_dict.get('stretched_ratio')
1265                 if stretched_ratio is not None and stretched_ratio != 1:
1266                     if fixup_policy == 'warn':
1267                         self.report_warning('%s: Non-uniform pixel ratio (%s)' % (
1268                             info_dict['id'], stretched_ratio))
1269                     elif fixup_policy == 'detect_or_warn':
1270                         stretched_pp = FFmpegFixupStretchedPP(self)
1271                         if stretched_pp.available:
1272                             info_dict.setdefault('__postprocessors', [])
1273                             info_dict['__postprocessors'].append(stretched_pp)
1274                         else:
1275                             self.report_warning(
1276                                 '%s: Non-uniform pixel ratio (%s). Install ffmpeg or avconv to fix this automatically.' % (
1277                                     info_dict['id'], stretched_ratio))
1278                     else:
1279                         assert fixup_policy in ('ignore', 'never')
1280
1281                 if info_dict.get('requested_formats') is None and info_dict.get('container') == 'm4a_dash':
1282                     if fixup_policy == 'warn':
1283                         self.report_warning('%s: writing DASH m4a. Only some players support this container.' % (
1284                             info_dict['id']))
1285                     elif fixup_policy == 'detect_or_warn':
1286                         fixup_pp = FFmpegFixupM4aPP(self)
1287                         if fixup_pp.available:
1288                             info_dict.setdefault('__postprocessors', [])
1289                             info_dict['__postprocessors'].append(fixup_pp)
1290                         else:
1291                             self.report_warning(
1292                                 '%s: writing DASH m4a. Only some players support this container. Install ffmpeg or avconv to fix this automatically.' % (
1293                                     info_dict['id']))
1294                     else:
1295                         assert fixup_policy in ('ignore', 'never')
1296
1297                 try:
1298                     self.post_process(filename, info_dict)
1299                 except (PostProcessingError) as err:
1300                     self.report_error('postprocessing: %s' % str(err))
1301                     return
1302                 self.record_download_archive(info_dict)
1303
1304     def download(self, url_list):
1305         """Download a given list of URLs."""
1306         outtmpl = self.params.get('outtmpl', DEFAULT_OUTTMPL)
1307         if (len(url_list) > 1 and
1308                 '%' not in outtmpl
1309                 and self.params.get('max_downloads') != 1):
1310             raise SameFileError(outtmpl)
1311
1312         for url in url_list:
1313             try:
1314                 # It also downloads the videos
1315                 res = self.extract_info(url)
1316             except UnavailableVideoError:
1317                 self.report_error('unable to download video')
1318             except MaxDownloadsReached:
1319                 self.to_screen('[info] Maximum number of downloaded files reached.')
1320                 raise
1321             else:
1322                 if self.params.get('dump_single_json', False):
1323                     self.to_stdout(json.dumps(res))
1324
1325         return self._download_retcode
1326
1327     def download_with_info_file(self, info_filename):
1328         with io.open(info_filename, 'r', encoding='utf-8') as f:
1329             info = json.load(f)
1330         try:
1331             self.process_ie_result(info, download=True)
1332         except DownloadError:
1333             webpage_url = info.get('webpage_url')
1334             if webpage_url is not None:
1335                 self.report_warning('The info failed to download, trying with "%s"' % webpage_url)
1336                 return self.download([webpage_url])
1337             else:
1338                 raise
1339         return self._download_retcode
1340
1341     def post_process(self, filename, ie_info):
1342         """Run all the postprocessors on the given file."""
1343         info = dict(ie_info)
1344         info['filepath'] = filename
1345         pps_chain = []
1346         if ie_info.get('__postprocessors') is not None:
1347             pps_chain.extend(ie_info['__postprocessors'])
1348         pps_chain.extend(self._pps)
1349         for pp in pps_chain:
1350             keep_video = None
1351             old_filename = info['filepath']
1352             try:
1353                 keep_video_wish, info = pp.run(info)
1354                 if keep_video_wish is not None:
1355                     if keep_video_wish:
1356                         keep_video = keep_video_wish
1357                     elif keep_video is None:
1358                         # No clear decision yet, let IE decide
1359                         keep_video = keep_video_wish
1360             except PostProcessingError as e:
1361                 self.report_error(e.msg)
1362             if keep_video is False and not self.params.get('keepvideo', False):
1363                 try:
1364                     self.to_screen('Deleting original file %s (pass -k to keep)' % old_filename)
1365                     os.remove(encodeFilename(old_filename))
1366                 except (IOError, OSError):
1367                     self.report_warning('Unable to remove downloaded video file')
1368
1369     def _make_archive_id(self, info_dict):
1370         # Future-proof against any change in case
1371         # and backwards compatibility with prior versions
1372         extractor = info_dict.get('extractor_key')
1373         if extractor is None:
1374             if 'id' in info_dict:
1375                 extractor = info_dict.get('ie_key')  # key in a playlist
1376         if extractor is None:
1377             return None  # Incomplete video information
1378         return extractor.lower() + ' ' + info_dict['id']
1379
1380     def in_download_archive(self, info_dict):
1381         fn = self.params.get('download_archive')
1382         if fn is None:
1383             return False
1384
1385         vid_id = self._make_archive_id(info_dict)
1386         if vid_id is None:
1387             return False  # Incomplete video information
1388
1389         try:
1390             with locked_file(fn, 'r', encoding='utf-8') as archive_file:
1391                 for line in archive_file:
1392                     if line.strip() == vid_id:
1393                         return True
1394         except IOError as ioe:
1395             if ioe.errno != errno.ENOENT:
1396                 raise
1397         return False
1398
1399     def record_download_archive(self, info_dict):
1400         fn = self.params.get('download_archive')
1401         if fn is None:
1402             return
1403         vid_id = self._make_archive_id(info_dict)
1404         assert vid_id
1405         with locked_file(fn, 'a', encoding='utf-8') as archive_file:
1406             archive_file.write(vid_id + '\n')
1407
1408     @staticmethod
1409     def format_resolution(format, default='unknown'):
1410         if format.get('vcodec') == 'none':
1411             return 'audio only'
1412         if format.get('resolution') is not None:
1413             return format['resolution']
1414         if format.get('height') is not None:
1415             if format.get('width') is not None:
1416                 res = '%sx%s' % (format['width'], format['height'])
1417             else:
1418                 res = '%sp' % format['height']
1419         elif format.get('width') is not None:
1420             res = '?x%d' % format['width']
1421         else:
1422             res = default
1423         return res
1424
1425     def _format_note(self, fdict):
1426         res = ''
1427         if fdict.get('ext') in ['f4f', 'f4m']:
1428             res += '(unsupported) '
1429         if fdict.get('format_note') is not None:
1430             res += fdict['format_note'] + ' '
1431         if fdict.get('tbr') is not None:
1432             res += '%4dk ' % fdict['tbr']
1433         if fdict.get('container') is not None:
1434             if res:
1435                 res += ', '
1436             res += '%s container' % fdict['container']
1437         if (fdict.get('vcodec') is not None and
1438                 fdict.get('vcodec') != 'none'):
1439             if res:
1440                 res += ', '
1441             res += fdict['vcodec']
1442             if fdict.get('vbr') is not None:
1443                 res += '@'
1444         elif fdict.get('vbr') is not None and fdict.get('abr') is not None:
1445             res += 'video@'
1446         if fdict.get('vbr') is not None:
1447             res += '%4dk' % fdict['vbr']
1448         if fdict.get('fps') is not None:
1449             res += ', %sfps' % fdict['fps']
1450         if fdict.get('acodec') is not None:
1451             if res:
1452                 res += ', '
1453             if fdict['acodec'] == 'none':
1454                 res += 'video only'
1455             else:
1456                 res += '%-5s' % fdict['acodec']
1457         elif fdict.get('abr') is not None:
1458             if res:
1459                 res += ', '
1460             res += 'audio'
1461         if fdict.get('abr') is not None:
1462             res += '@%3dk' % fdict['abr']
1463         if fdict.get('asr') is not None:
1464             res += ' (%5dHz)' % fdict['asr']
1465         if fdict.get('filesize') is not None:
1466             if res:
1467                 res += ', '
1468             res += format_bytes(fdict['filesize'])
1469         elif fdict.get('filesize_approx') is not None:
1470             if res:
1471                 res += ', '
1472             res += '~' + format_bytes(fdict['filesize_approx'])
1473         return res
1474
1475     def list_formats(self, info_dict):
1476         def line(format, idlen=20):
1477             return (('%-' + compat_str(idlen + 1) + 's%-10s%-12s%s') % (
1478                 format['format_id'],
1479                 format['ext'],
1480                 self.format_resolution(format),
1481                 self._format_note(format),
1482             ))
1483
1484         formats = info_dict.get('formats', [info_dict])
1485         idlen = max(len('format code'),
1486                     max(len(f['format_id']) for f in formats))
1487         formats_s = [
1488             line(f, idlen) for f in formats
1489             if f.get('preference') is None or f['preference'] >= -1000]
1490         if len(formats) > 1:
1491             formats_s[0] += (' ' if self._format_note(formats[0]) else '') + '(worst)'
1492             formats_s[-1] += (' ' if self._format_note(formats[-1]) else '') + '(best)'
1493
1494         header_line = line({
1495             'format_id': 'format code', 'ext': 'extension',
1496             'resolution': 'resolution', 'format_note': 'note'}, idlen=idlen)
1497         self.to_screen('[info] Available formats for %s:\n%s\n%s' %
1498                        (info_dict['id'], header_line, '\n'.join(formats_s)))
1499
1500     def urlopen(self, req):
1501         """ Start an HTTP download """
1502
1503         # According to RFC 3986, URLs can not contain non-ASCII characters, however this is not
1504         # always respected by websites, some tend to give out URLs with non percent-encoded
1505         # non-ASCII characters (see telemb.py, ard.py [#3412])
1506         # urllib chokes on URLs with non-ASCII characters (see http://bugs.python.org/issue3991)
1507         # To work around aforementioned issue we will replace request's original URL with
1508         # percent-encoded one
1509         req_is_string = isinstance(req, basestring if sys.version_info < (3, 0) else compat_str)
1510         url = req if req_is_string else req.get_full_url()
1511         url_escaped = escape_url(url)
1512
1513         # Substitute URL if any change after escaping
1514         if url != url_escaped:
1515             if req_is_string:
1516                 req = url_escaped
1517             else:
1518                 req = compat_urllib_request.Request(
1519                     url_escaped, data=req.data, headers=req.headers,
1520                     origin_req_host=req.origin_req_host, unverifiable=req.unverifiable)
1521
1522         return self._opener.open(req, timeout=self._socket_timeout)
1523
1524     def print_debug_header(self):
1525         if not self.params.get('verbose'):
1526             return
1527
1528         if type('') is not compat_str:
1529             # Python 2.6 on SLES11 SP1 (https://github.com/rg3/youtube-dl/issues/3326)
1530             self.report_warning(
1531                 'Your Python is broken! Update to a newer and supported version')
1532
1533         stdout_encoding = getattr(
1534             sys.stdout, 'encoding', 'missing (%s)' % type(sys.stdout).__name__)
1535         encoding_str = (
1536             '[debug] Encodings: locale %s, fs %s, out %s, pref %s\n' % (
1537                 locale.getpreferredencoding(),
1538                 sys.getfilesystemencoding(),
1539                 stdout_encoding,
1540                 self.get_encoding()))
1541         write_string(encoding_str, encoding=None)
1542
1543         self._write_string('[debug] youtube-dl version ' + __version__ + '\n')
1544         try:
1545             sp = subprocess.Popen(
1546                 ['git', 'rev-parse', '--short', 'HEAD'],
1547                 stdout=subprocess.PIPE, stderr=subprocess.PIPE,
1548                 cwd=os.path.dirname(os.path.abspath(__file__)))
1549             out, err = sp.communicate()
1550             out = out.decode().strip()
1551             if re.match('[0-9a-f]+', out):
1552                 self._write_string('[debug] Git HEAD: ' + out + '\n')
1553         except:
1554             try:
1555                 sys.exc_clear()
1556             except:
1557                 pass
1558         self._write_string('[debug] Python version %s - %s\n' % (
1559             platform.python_version(), platform_name()))
1560
1561         exe_versions = FFmpegPostProcessor.get_versions()
1562         exe_versions['rtmpdump'] = rtmpdump_version()
1563         exe_str = ', '.join(
1564             '%s %s' % (exe, v)
1565             for exe, v in sorted(exe_versions.items())
1566             if v
1567         )
1568         if not exe_str:
1569             exe_str = 'none'
1570         self._write_string('[debug] exe versions: %s\n' % exe_str)
1571
1572         proxy_map = {}
1573         for handler in self._opener.handlers:
1574             if hasattr(handler, 'proxies'):
1575                 proxy_map.update(handler.proxies)
1576         self._write_string('[debug] Proxy map: ' + compat_str(proxy_map) + '\n')
1577
1578         if self.params.get('call_home', False):
1579             ipaddr = self.urlopen('https://yt-dl.org/ip').read().decode('utf-8')
1580             self._write_string('[debug] Public IP address: %s\n' % ipaddr)
1581             latest_version = self.urlopen(
1582                 'https://yt-dl.org/latest/version').read().decode('utf-8')
1583             if version_tuple(latest_version) > version_tuple(__version__):
1584                 self.report_warning(
1585                     'You are using an outdated version (newest version: %s)! '
1586                     'See https://yt-dl.org/update if you need help updating.' %
1587                     latest_version)
1588
1589     def _setup_opener(self):
1590         timeout_val = self.params.get('socket_timeout')
1591         self._socket_timeout = 600 if timeout_val is None else float(timeout_val)
1592
1593         opts_cookiefile = self.params.get('cookiefile')
1594         opts_proxy = self.params.get('proxy')
1595
1596         if opts_cookiefile is None:
1597             self.cookiejar = compat_cookiejar.CookieJar()
1598         else:
1599             self.cookiejar = compat_cookiejar.MozillaCookieJar(
1600                 opts_cookiefile)
1601             if os.access(opts_cookiefile, os.R_OK):
1602                 self.cookiejar.load()
1603
1604         cookie_processor = compat_urllib_request.HTTPCookieProcessor(
1605             self.cookiejar)
1606         if opts_proxy is not None:
1607             if opts_proxy == '':
1608                 proxies = {}
1609             else:
1610                 proxies = {'http': opts_proxy, 'https': opts_proxy}
1611         else:
1612             proxies = compat_urllib_request.getproxies()
1613             # Set HTTPS proxy to HTTP one if given (https://github.com/rg3/youtube-dl/issues/805)
1614             if 'http' in proxies and 'https' not in proxies:
1615                 proxies['https'] = proxies['http']
1616         proxy_handler = compat_urllib_request.ProxyHandler(proxies)
1617
1618         debuglevel = 1 if self.params.get('debug_printtraffic') else 0
1619         https_handler = make_HTTPS_handler(self.params, debuglevel=debuglevel)
1620         ydlh = YoutubeDLHandler(self.params, debuglevel=debuglevel)
1621         opener = compat_urllib_request.build_opener(
1622             https_handler, proxy_handler, cookie_processor, ydlh)
1623         # Delete the default user-agent header, which would otherwise apply in
1624         # cases where our custom HTTP handler doesn't come into play
1625         # (See https://github.com/rg3/youtube-dl/issues/1309 for details)
1626         opener.addheaders = []
1627         self._opener = opener
1628
1629     def encode(self, s):
1630         if isinstance(s, bytes):
1631             return s  # Already encoded
1632
1633         try:
1634             return s.encode(self.get_encoding())
1635         except UnicodeEncodeError as err:
1636             err.reason = err.reason + '. Check your system encoding configuration or use the --encoding option.'
1637             raise
1638
1639     def get_encoding(self):
1640         encoding = self.params.get('encoding')
1641         if encoding is None:
1642             encoding = preferredencoding()
1643         return encoding