_ Git - youtube-dl/blob - youtube_dl/YoutubeDL.py

   1 #!/usr/bin/env python
   2 # -*- coding: utf-8 -*-
   3
   4 from __future__ import absolute_import, unicode_literals
   5
   6 import collections
   7 import datetime
   8 import errno
   9 import io
  10 import itertools
  11 import json
  12 import locale
  13 import operator
  14 import os
  15 import platform
  16 import re
  17 import shutil
  18 import subprocess
  19 import socket
  20 import sys
  21 import time
  22 import traceback
  23
  24 if os.name == 'nt':
  25     import ctypes
  26
  27 from .compat import (
  28     compat_basestring,
  29     compat_cookiejar,
  30     compat_expanduser,
  31     compat_http_client,
  32     compat_kwargs,
  33     compat_str,
  34     compat_urllib_error,
  35     compat_urllib_request,
  36 )
  37 from .utils import (
  38     escape_url,
  39     ContentTooShortError,
  40     date_from_str,
  41     DateRange,
  42     DEFAULT_OUTTMPL,
  43     determine_ext,
  44     DownloadError,
  45     encodeFilename,
  46     ExtractorError,
  47     format_bytes,
  48     formatSeconds,
  49     get_term_width,
  50     locked_file,
  51     make_HTTPS_handler,
  52     MaxDownloadsReached,
  53     PagedList,
  54     parse_filesize,
  55     PostProcessingError,
  56     platform_name,
  57     preferredencoding,
  58     render_table,
  59     SameFileError,
  60     sanitize_filename,
  61     std_headers,
  62     subtitles_filename,
  63     takewhile_inclusive,
  64     UnavailableVideoError,
  65     url_basename,
  66     version_tuple,
  67     write_json_file,
  68     write_string,
  69     YoutubeDLHandler,
  70     prepend_extension,
  71     args_to_str,
  72     age_restricted,
  73 )
  74 from .cache import Cache
  75 from .extractor import get_info_extractor, gen_extractors
  76 from .downloader import get_suitable_downloader
  77 from .downloader.rtmp import rtmpdump_version
  78 from .postprocessor import (
  79     FFmpegFixupM4aPP,
  80     FFmpegFixupStretchedPP,
  81     FFmpegMergerPP,
  82     FFmpegPostProcessor,
  83     get_postprocessor,
  84 )
  85 from .version import __version__
  86
  87
  88 class YoutubeDL(object):
  89     """YoutubeDL class.
  90
  91     YoutubeDL objects are the ones responsible of downloading the
  92     actual video file and writing it to disk if the user has requested
  93     it, among some other tasks. In most cases there should be one per
  94     program. As, given a video URL, the downloader doesn't know how to
  95     extract all the needed information, task that InfoExtractors do, it
  96     has to pass the URL to one of them.
  97
  98     For this, YoutubeDL objects have a method that allows
  99     InfoExtractors to be registered in a given order. When it is passed
 100     a URL, the YoutubeDL object handles it to the first InfoExtractor it
 101     finds that reports being able to handle it. The InfoExtractor extracts
 102     all the information about the video or videos the URL refers to, and
 103     YoutubeDL process the extracted information, possibly using a File
 104     Downloader to download the video.
 105
 106     YoutubeDL objects accept a lot of parameters. In order not to saturate
 107     the object constructor with arguments, it receives a dictionary of
 108     options instead. These options are available through the params
 109     attribute for the InfoExtractors to use. The YoutubeDL also
 110     registers itself as the downloader in charge for the InfoExtractors
 111     that are added to it, so this is a "mutual registration".
 112
 113     Available options:
 114
 115     username:          Username for authentication purposes.
 116     password:          Password for authentication purposes.
 117     videopassword:     Password for acces a video.
 118     usenetrc:          Use netrc for authentication instead.
 119     verbose:           Print additional info to stdout.
 120     quiet:             Do not print messages to stdout.
 121     no_warnings:       Do not print out anything for warnings.
 122     forceurl:          Force printing final URL.
 123     forcetitle:        Force printing title.
 124     forceid:           Force printing ID.
 125     forcethumbnail:    Force printing thumbnail URL.
 126     forcedescription:  Force printing description.
 127     forcefilename:     Force printing final filename.
 128     forceduration:     Force printing duration.
 129     forcejson:         Force printing info_dict as JSON.
 130     dump_single_json:  Force printing the info_dict of the whole playlist
 131                        (or video) as a single JSON line.
 132     simulate:          Do not download the video files.
 133     format:            Video format code. See options.py for more information.
 134     format_limit:      Highest quality format to try.
 135     outtmpl:           Template for output names.
 136     restrictfilenames: Do not allow "&" and spaces in file names
 137     ignoreerrors:      Do not stop on download errors.
 138     nooverwrites:      Prevent overwriting files.
 139     playliststart:     Playlist item to start at.
 140     playlistend:       Playlist item to end at.
 141     playlist_items:    Specific indices of playlist to download.
 142     playlistreverse:   Download playlist items in reverse order.
 143     matchtitle:        Download only matching titles.
 144     rejecttitle:       Reject downloads for matching titles.
 145     logger:            Log messages to a logging.Logger instance.
 146     logtostderr:       Log messages to stderr instead of stdout.
 147     writedescription:  Write the video description to a .description file
 148     writeinfojson:     Write the video description to a .info.json file
 149     writeannotations:  Write the video annotations to a .annotations.xml file
 150     writethumbnail:    Write the thumbnail image to a file
 151     write_all_thumbnails:  Write all thumbnail formats to files
 152     writesubtitles:    Write the video subtitles to a file
 153     writeautomaticsub: Write the automatic subtitles to a file
 154     allsubtitles:      Downloads all the subtitles of the video
 155                        (requires writesubtitles or writeautomaticsub)
 156     listsubtitles:     Lists all available subtitles for the video
 157     subtitlesformat:   Subtitle format [srt/sbv/vtt] (default=srt)
 158     subtitleslangs:    List of languages of the subtitles to download
 159     keepvideo:         Keep the video file after post-processing
 160     daterange:         A DateRange object, download only if the upload_date is in the range.
 161     skip_download:     Skip the actual download of the video file
 162     cachedir:          Location of the cache files in the filesystem.
 163                        False to disable filesystem cache.
 164     noplaylist:        Download single video instead of a playlist if in doubt.
 165     age_limit:         An integer representing the user's age in years.
 166                        Unsuitable videos for the given age are skipped.
 167     min_views:         An integer representing the minimum view count the video
 168                        must have in order to not be skipped.
 169                        Videos without view count information are always
 170                        downloaded. None for no limit.
 171     max_views:         An integer representing the maximum view count.
 172                        Videos that are more popular than that are not
 173                        downloaded.
 174                        Videos without view count information are always
 175                        downloaded. None for no limit.
 176     download_archive:  File name of a file where all downloads are recorded.
 177                        Videos already present in the file are not downloaded
 178                        again.
 179     cookiefile:        File name where cookies should be read from and dumped to.
 180     nocheckcertificate:Do not verify SSL certificates
 181     prefer_insecure:   Use HTTP instead of HTTPS to retrieve information.
 182                        At the moment, this is only supported by YouTube.
 183     proxy:             URL of the proxy server to use
 184     socket_timeout:    Time to wait for unresponsive hosts, in seconds
 185     bidi_workaround:   Work around buggy terminals without bidirectional text
 186                        support, using fridibi
 187     debug_printtraffic:Print out sent and received HTTP traffic
 188     include_ads:       Download ads as well
 189     default_search:    Prepend this string if an input url is not valid.
 190                        'auto' for elaborate guessing
 191     encoding:          Use this encoding instead of the system-specified.
 192     extract_flat:      Do not resolve URLs, return the immediate result.
 193                        Pass in 'in_playlist' to only show this behavior for
 194                        playlist items.
 195     postprocessors:    A list of dictionaries, each with an entry
 196                        * key:  The name of the postprocessor. See
 197                                youtube_dl/postprocessor/__init__.py for a list.
 198                        as well as any further keyword arguments for the
 199                        postprocessor.
 200     progress_hooks:    A list of functions that get called on download
 201                        progress, with a dictionary with the entries
 202                        * status: One of "downloading" and "finished".
 203                                  Check this first and ignore unknown values.
 204
 205                        If status is one of "downloading" or "finished", the
 206                        following properties may also be present:
 207                        * filename: The final filename (always present)
 208                        * downloaded_bytes: Bytes on disk
 209                        * total_bytes: Size of the whole file, None if unknown
 210                        * tmpfilename: The filename we're currently writing to
 211                        * eta: The estimated time in seconds, None if unknown
 212                        * speed: The download speed in bytes/second, None if
 213                                 unknown
 214
 215                        Progress hooks are guaranteed to be called at least once
 216                        (with status "finished") if the download is successful.
 217     merge_output_format: Extension to use when merging formats.
 218     fixup:             Automatically correct known faults of the file.
 219                        One of:
 220                        - "never": do nothing
 221                        - "warn": only emit a warning
 222                        - "detect_or_warn": check whether we can do anything
 223                                            about it, warn otherwise (default)
 224     source_address:    (Experimental) Client-side IP address to bind to.
 225     call_home:         Boolean, true iff we are allowed to contact the
 226                        youtube-dl servers for debugging.
 227     sleep_interval:    Number of seconds to sleep before each download.
 228     external_downloader:  Executable of the external downloader to call.
 229     listformats:       Print an overview of available video formats and exit.
 230     list_thumbnails:   Print a table of all thumbnails and exit.
 231     match_filter:      A function that gets called with the info_dict of
 232                        every video.
 233                        If it returns a message, the video is ignored.
 234                        If it returns None, the video is downloaded.
 235                        match_filter_func in utils.py is one example for this.
 236
 237
 238     The following parameters are not used by YoutubeDL itself, they are used by
 239     the FileDownloader:
 240     nopart, updatetime, buffersize, ratelimit, min_filesize, max_filesize, test,
 241     noresizebuffer, retries, continuedl, noprogress, consoletitle,
 242     xattr_set_filesize.
 243
 244     The following options are used by the post processors:
 245     prefer_ffmpeg:     If True, use ffmpeg instead of avconv if both are available,
 246                        otherwise prefer avconv.
 247     exec_cmd:          Arbitrary command to run after downloading
 248     """
 249
 250     params = None
 251     _ies = []
 252     _pps = []
 253     _download_retcode = None
 254     _num_downloads = None
 255     _screen_file = None
 256
 257     def __init__(self, params=None, auto_init=True):
 258         """Create a FileDownloader object with the given options."""
 259         if params is None:
 260             params = {}
 261         self._ies = []
 262         self._ies_instances = {}
 263         self._pps = []
 264         self._progress_hooks = []
 265         self._download_retcode = 0
 266         self._num_downloads = 0
 267         self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)]
 268         self._err_file = sys.stderr
 269         self.params = params
 270         self.cache = Cache(self)
 271
 272         if params.get('bidi_workaround', False):
 273             try:
 274                 import pty
 275                 master, slave = pty.openpty()
 276                 width = get_term_width()
 277                 if width is None:
 278                     width_args = []
 279                 else:
 280                     width_args = ['-w', str(width)]
 281                 sp_kwargs = dict(
 282                     stdin=subprocess.PIPE,
 283                     stdout=slave,
 284                     stderr=self._err_file)
 285                 try:
 286                     self._output_process = subprocess.Popen(
 287                         ['bidiv'] + width_args, **sp_kwargs
 288                     )
 289                 except OSError:
 290                     self._output_process = subprocess.Popen(
 291                         ['fribidi', '-c', 'UTF-8'] + width_args, **sp_kwargs)
 292                 self._output_channel = os.fdopen(master, 'rb')
 293             except OSError as ose:
 294                 if ose.errno == 2:
 295                     self.report_warning('Could not find fribidi executable, ignoring --bidi-workaround . Make sure that  fribidi  is an executable file in one of the directories in your $PATH.')
 296                 else:
 297                     raise
 298
 299         if (sys.version_info >= (3,) and sys.platform != 'win32' and
 300                 sys.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968']
 301                 and not params.get('restrictfilenames', False)):
 302             # On Python 3, the Unicode filesystem API will throw errors (#1474)
 303             self.report_warning(
 304                 'Assuming --restrict-filenames since file system encoding '
 305                 'cannot encode all characters. '
 306                 'Set the LC_ALL environment variable to fix this.')
 307             self.params['restrictfilenames'] = True
 308
 309         if '%(stitle)s' in self.params.get('outtmpl', ''):
 310             self.report_warning('%(stitle)s is deprecated. Use the %(title)s and the --restrict-filenames flag(which also secures %(uploader)s et al) instead.')
 311
 312         self._setup_opener()
 313
 314         if auto_init:
 315             self.print_debug_header()
 316             self.add_default_info_extractors()
 317
 318         for pp_def_raw in self.params.get('postprocessors', []):
 319             pp_class = get_postprocessor(pp_def_raw['key'])
 320             pp_def = dict(pp_def_raw)
 321             del pp_def['key']
 322             pp = pp_class(self, **compat_kwargs(pp_def))
 323             self.add_post_processor(pp)
 324
 325         for ph in self.params.get('progress_hooks', []):
 326             self.add_progress_hook(ph)
 327
 328     def warn_if_short_id(self, argv):
 329         # short YouTube ID starting with dash?
 330         idxs = [
 331             i for i, a in enumerate(argv)
 332             if re.match(r'^-[0-9A-Za-z_-]{10}$', a)]
 333         if idxs:
 334             correct_argv = (
 335                 ['youtube-dl'] +
 336                 [a for i, a in enumerate(argv) if i not in idxs] +
 337                 ['--'] + [argv[i] for i in idxs]
 338             )
 339             self.report_warning(
 340                 'Long argument string detected. '
 341                 'Use -- to separate parameters and URLs, like this:\n%s\n' %
 342                 args_to_str(correct_argv))
 343
 344     def add_info_extractor(self, ie):
 345         """Add an InfoExtractor object to the end of the list."""
 346         self._ies.append(ie)
 347         self._ies_instances[ie.ie_key()] = ie
 348         ie.set_downloader(self)
 349
 350     def get_info_extractor(self, ie_key):
 351         """
 352         Get an instance of an IE with name ie_key, it will try to get one from
 353         the _ies list, if there's no instance it will create a new one and add
 354         it to the extractor list.
 355         """
 356         ie = self._ies_instances.get(ie_key)
 357         if ie is None:
 358             ie = get_info_extractor(ie_key)()
 359             self.add_info_extractor(ie)
 360         return ie
 361
 362     def add_default_info_extractors(self):
 363         """
 364         Add the InfoExtractors returned by gen_extractors to the end of the list
 365         """
 366         for ie in gen_extractors():
 367             self.add_info_extractor(ie)
 368
 369     def add_post_processor(self, pp):
 370         """Add a PostProcessor object to the end of the chain."""
 371         self._pps.append(pp)
 372         pp.set_downloader(self)
 373
 374     def add_progress_hook(self, ph):
 375         """Add the progress hook (currently only for the file downloader)"""
 376         self._progress_hooks.append(ph)
 377
 378     def _bidi_workaround(self, message):
 379         if not hasattr(self, '_output_channel'):
 380             return message
 381
 382         assert hasattr(self, '_output_process')
 383         assert isinstance(message, compat_str)
 384         line_count = message.count('\n') + 1
 385         self._output_process.stdin.write((message + '\n').encode('utf-8'))
 386         self._output_process.stdin.flush()
 387         res = ''.join(self._output_channel.readline().decode('utf-8')
 388                       for _ in range(line_count))
 389         return res[:-len('\n')]
 390
 391     def to_screen(self, message, skip_eol=False):
 392         """Print message to stdout if not in quiet mode."""
 393         return self.to_stdout(message, skip_eol, check_quiet=True)
 394
 395     def _write_string(self, s, out=None):
 396         write_string(s, out=out, encoding=self.params.get('encoding'))
 397
 398     def to_stdout(self, message, skip_eol=False, check_quiet=False):
 399         """Print message to stdout if not in quiet mode."""
 400         if self.params.get('logger'):
 401             self.params['logger'].debug(message)
 402         elif not check_quiet or not self.params.get('quiet', False):
 403             message = self._bidi_workaround(message)
 404             terminator = ['\n', ''][skip_eol]
 405             output = message + terminator
 406
 407             self._write_string(output, self._screen_file)
 408
 409     def to_stderr(self, message):
 410         """Print message to stderr."""
 411         assert isinstance(message, compat_str)
 412         if self.params.get('logger'):
 413             self.params['logger'].error(message)
 414         else:
 415             message = self._bidi_workaround(message)
 416             output = message + '\n'
 417             self._write_string(output, self._err_file)
 418
 419     def to_console_title(self, message):
 420         if not self.params.get('consoletitle', False):
 421             return
 422         if os.name == 'nt' and ctypes.windll.kernel32.GetConsoleWindow():
 423             # c_wchar_p() might not be necessary if `message` is
 424             # already of type unicode()
 425             ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message))
 426         elif 'TERM' in os.environ:
 427             self._write_string('\033]0;%s\007' % message, self._screen_file)
 428
 429     def save_console_title(self):
 430         if not self.params.get('consoletitle', False):
 431             return
 432         if 'TERM' in os.environ:
 433             # Save the title on stack
 434             self._write_string('\033[22;0t', self._screen_file)
 435
 436     def restore_console_title(self):
 437         if not self.params.get('consoletitle', False):
 438             return
 439         if 'TERM' in os.environ:
 440             # Restore the title from stack
 441             self._write_string('\033[23;0t', self._screen_file)
 442
 443     def __enter__(self):
 444         self.save_console_title()
 445         return self
 446
 447     def __exit__(self, *args):
 448         self.restore_console_title()
 449
 450         if self.params.get('cookiefile') is not None:
 451             self.cookiejar.save()
 452
 453     def trouble(self, message=None, tb=None):
 454         """Determine action to take when a download problem appears.
 455
 456         Depending on if the downloader has been configured to ignore
 457         download errors or not, this method may throw an exception or
 458         not when errors are found, after printing the message.
 459
 460         tb, if given, is additional traceback information.
 461         """
 462         if message is not None:
 463             self.to_stderr(message)
 464         if self.params.get('verbose'):
 465             if tb is None:
 466                 if sys.exc_info()[0]:  # if .trouble has been called from an except block
 467                     tb = ''
 468                     if hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
 469                         tb += ''.join(traceback.format_exception(*sys.exc_info()[1].exc_info))
 470                     tb += compat_str(traceback.format_exc())
 471                 else:
 472                     tb_data = traceback.format_list(traceback.extract_stack())
 473                     tb = ''.join(tb_data)
 474             self.to_stderr(tb)
 475         if not self.params.get('ignoreerrors', False):
 476             if sys.exc_info()[0] and hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
 477                 exc_info = sys.exc_info()[1].exc_info
 478             else:
 479                 exc_info = sys.exc_info()
 480             raise DownloadError(message, exc_info)
 481         self._download_retcode = 1
 482
 483     def report_warning(self, message):
 484         '''
 485         Print the message to stderr, it will be prefixed with 'WARNING:'
 486         If stderr is a tty file the 'WARNING:' will be colored
 487         '''
 488         if self.params.get('logger') is not None:
 489             self.params['logger'].warning(message)
 490         else:
 491             if self.params.get('no_warnings'):
 492                 return
 493             if self._err_file.isatty() and os.name != 'nt':
 494                 _msg_header = '\033[0;33mWARNING:\033[0m'
 495             else:
 496                 _msg_header = 'WARNING:'
 497             warning_message = '%s %s' % (_msg_header, message)
 498             self.to_stderr(warning_message)
 499
 500     def report_error(self, message, tb=None):
 501         '''
 502         Do the same as trouble, but prefixes the message with 'ERROR:', colored
 503         in red if stderr is a tty file.
 504         '''
 505         if self._err_file.isatty() and os.name != 'nt':
 506             _msg_header = '\033[0;31mERROR:\033[0m'
 507         else:
 508             _msg_header = 'ERROR:'
 509         error_message = '%s %s' % (_msg_header, message)
 510         self.trouble(error_message, tb)
 511
 512     def report_file_already_downloaded(self, file_name):
 513         """Report file has already been fully downloaded."""
 514         try:
 515             self.to_screen('[download] %s has already been downloaded' % file_name)
 516         except UnicodeEncodeError:
 517             self.to_screen('[download] The file has already been downloaded')
 518
 519     def prepare_filename(self, info_dict):
 520         """Generate the output filename."""
 521         try:
 522             template_dict = dict(info_dict)
 523
 524             template_dict['epoch'] = int(time.time())
 525             autonumber_size = self.params.get('autonumber_size')
 526             if autonumber_size is None:
 527                 autonumber_size = 5
 528             autonumber_templ = '%0' + str(autonumber_size) + 'd'
 529             template_dict['autonumber'] = autonumber_templ % self._num_downloads
 530             if template_dict.get('playlist_index') is not None:
 531                 template_dict['playlist_index'] = '%0*d' % (len(str(template_dict['n_entries'])), template_dict['playlist_index'])
 532             if template_dict.get('resolution') is None:
 533                 if template_dict.get('width') and template_dict.get('height'):
 534                     template_dict['resolution'] = '%dx%d' % (template_dict['width'], template_dict['height'])
 535                 elif template_dict.get('height'):
 536                     template_dict['resolution'] = '%sp' % template_dict['height']
 537                 elif template_dict.get('width'):
 538                     template_dict['resolution'] = '?x%d' % template_dict['width']
 539
 540             sanitize = lambda k, v: sanitize_filename(
 541                 compat_str(v),
 542                 restricted=self.params.get('restrictfilenames'),
 543                 is_id=(k == 'id'))
 544             template_dict = dict((k, sanitize(k, v))
 545                                  for k, v in template_dict.items()
 546                                  if v is not None)
 547             template_dict = collections.defaultdict(lambda: 'NA', template_dict)
 548
 549             outtmpl = self.params.get('outtmpl', DEFAULT_OUTTMPL)
 550             tmpl = compat_expanduser(outtmpl)
 551             filename = tmpl % template_dict
 552             # Temporary fix for #4787
 553             # 'Treat' all problem characters by passing filename through preferredencoding
 554             # to workaround encoding issues with subprocess on python2 @ Windows
 555             if sys.version_info < (3, 0) and sys.platform == 'win32':
 556                 filename = encodeFilename(filename, True).decode(preferredencoding())
 557             return filename
 558         except ValueError as err:
 559             self.report_error('Error in output template: ' + str(err) + ' (encoding: ' + repr(preferredencoding()) + ')')
 560             return None
 561
 562     def _match_entry(self, info_dict):
 563         """ Returns None iff the file should be downloaded """
 564
 565         video_title = info_dict.get('title', info_dict.get('id', 'video'))
 566         if 'title' in info_dict:
 567             # This can happen when we're just evaluating the playlist
 568             title = info_dict['title']
 569             matchtitle = self.params.get('matchtitle', False)
 570             if matchtitle:
 571                 if not re.search(matchtitle, title, re.IGNORECASE):
 572                     return '"' + title + '" title did not match pattern "' + matchtitle + '"'
 573             rejecttitle = self.params.get('rejecttitle', False)
 574             if rejecttitle:
 575                 if re.search(rejecttitle, title, re.IGNORECASE):
 576                     return '"' + title + '" title matched reject pattern "' + rejecttitle + '"'
 577         date = info_dict.get('upload_date', None)
 578         if date is not None:
 579             dateRange = self.params.get('daterange', DateRange())
 580             if date not in dateRange:
 581                 return '%s upload date is not in range %s' % (date_from_str(date).isoformat(), dateRange)
 582         view_count = info_dict.get('view_count', None)
 583         if view_count is not None:
 584             min_views = self.params.get('min_views')
 585             if min_views is not None and view_count < min_views:
 586                 return 'Skipping %s, because it has not reached minimum view count (%d/%d)' % (video_title, view_count, min_views)
 587             max_views = self.params.get('max_views')
 588             if max_views is not None and view_count > max_views:
 589                 return 'Skipping %s, because it has exceeded the maximum view count (%d/%d)' % (video_title, view_count, max_views)
 590         if age_restricted(info_dict.get('age_limit'), self.params.get('age_limit')):
 591             return 'Skipping "%s" because it is age restricted' % video_title
 592         if self.in_download_archive(info_dict):
 593             return '%s has already been recorded in archive' % video_title
 594
 595         match_filter = self.params.get('match_filter')
 596         if match_filter is not None:
 597             ret = match_filter(info_dict)
 598             if ret is not None:
 599                 return ret
 600
 601         return None
 602
 603     @staticmethod
 604     def add_extra_info(info_dict, extra_info):
 605         '''Set the keys from extra_info in info dict if they are missing'''
 606         for key, value in extra_info.items():
 607             info_dict.setdefault(key, value)
 608
 609     def extract_info(self, url, download=True, ie_key=None, extra_info={},
 610                      process=True):
 611         '''
 612         Returns a list with a dictionary for each video we find.
 613         If 'download', also downloads the videos.
 614         extra_info is a dict containing the extra values to add to each result
 615          '''
 616
 617         if ie_key:
 618             ies = [self.get_info_extractor(ie_key)]
 619         else:
 620             ies = self._ies
 621
 622         for ie in ies:
 623             if not ie.suitable(url):
 624                 continue
 625
 626             if not ie.working():
 627                 self.report_warning('The program functionality for this site has been marked as broken, '
 628                                     'and will probably not work.')
 629
 630             try:
 631                 ie_result = ie.extract(url)
 632                 if ie_result is None:  # Finished already (backwards compatibility; listformats and friends should be moved here)
 633                     break
 634                 if isinstance(ie_result, list):
 635                     # Backwards compatibility: old IE result format
 636                     ie_result = {
 637                         '_type': 'compat_list',
 638                         'entries': ie_result,
 639                     }
 640                 self.add_default_extra_info(ie_result, ie, url)
 641                 if process:
 642                     return self.process_ie_result(ie_result, download, extra_info)
 643                 else:
 644                     return ie_result
 645             except ExtractorError as de:  # An error we somewhat expected
 646                 self.report_error(compat_str(de), de.format_traceback())
 647                 break
 648             except MaxDownloadsReached:
 649                 raise
 650             except Exception as e:
 651                 if self.params.get('ignoreerrors', False):
 652                     self.report_error(compat_str(e), tb=compat_str(traceback.format_exc()))
 653                     break
 654                 else:
 655                     raise
 656         else:
 657             self.report_error('no suitable InfoExtractor for URL %s' % url)
 658
 659     def add_default_extra_info(self, ie_result, ie, url):
 660         self.add_extra_info(ie_result, {
 661             'extractor': ie.IE_NAME,
 662             'webpage_url': url,
 663             'webpage_url_basename': url_basename(url),
 664             'extractor_key': ie.ie_key(),
 665         })
 666
 667     def process_ie_result(self, ie_result, download=True, extra_info={}):
 668         """
 669         Take the result of the ie(may be modified) and resolve all unresolved
 670         references (URLs, playlist items).
 671
 672         It will also download the videos if 'download'.
 673         Returns the resolved ie_result.
 674         """
 675
 676         result_type = ie_result.get('_type', 'video')
 677
 678         if result_type in ('url', 'url_transparent'):
 679             extract_flat = self.params.get('extract_flat', False)
 680             if ((extract_flat == 'in_playlist' and 'playlist' in extra_info) or
 681                     extract_flat is True):
 682                 if self.params.get('forcejson', False):
 683                     self.to_stdout(json.dumps(ie_result))
 684                 return ie_result
 685
 686         if result_type == 'video':
 687             self.add_extra_info(ie_result, extra_info)
 688             return self.process_video_result(ie_result, download=download)
 689         elif result_type == 'url':
 690             # We have to add extra_info to the results because it may be
 691             # contained in a playlist
 692             return self.extract_info(ie_result['url'],
 693                                      download,
 694                                      ie_key=ie_result.get('ie_key'),
 695                                      extra_info=extra_info)
 696         elif result_type == 'url_transparent':
 697             # Use the information from the embedding page
 698             info = self.extract_info(
 699                 ie_result['url'], ie_key=ie_result.get('ie_key'),
 700                 extra_info=extra_info, download=False, process=False)
 701
 702             force_properties = dict(
 703                 (k, v) for k, v in ie_result.items() if v is not None)
 704             for f in ('_type', 'url'):
 705                 if f in force_properties:
 706                     del force_properties[f]
 707             new_result = info.copy()
 708             new_result.update(force_properties)
 709
 710             assert new_result.get('_type') != 'url_transparent'
 711
 712             return self.process_ie_result(
 713                 new_result, download=download, extra_info=extra_info)
 714         elif result_type == 'playlist' or result_type == 'multi_video':
 715             # We process each entry in the playlist
 716             playlist = ie_result.get('title', None) or ie_result.get('id', None)
 717             self.to_screen('[download] Downloading playlist: %s' % playlist)
 718
 719             playlist_results = []
 720
 721             playliststart = self.params.get('playliststart', 1) - 1
 722             playlistend = self.params.get('playlistend', None)
 723             # For backwards compatibility, interpret -1 as whole list
 724             if playlistend == -1:
 725                 playlistend = None
 726
 727             playlistitems_str = self.params.get('playlist_items', None)
 728             playlistitems = None
 729             if playlistitems_str is not None:
 730                 def iter_playlistitems(format):
 731                     for string_segment in format.split(','):
 732                         if '-' in string_segment:
 733                             start, end = string_segment.split('-')
 734                             for item in range(int(start), int(end) + 1):
 735                                 yield int(item)
 736                         else:
 737                             yield int(string_segment)
 738                 playlistitems = iter_playlistitems(playlistitems_str)
 739
 740             ie_entries = ie_result['entries']
 741             if isinstance(ie_entries, list):
 742                 n_all_entries = len(ie_entries)
 743                 if playlistitems:
 744                     entries = [ie_entries[i - 1] for i in playlistitems]
 745                 else:
 746                     entries = ie_entries[playliststart:playlistend]
 747                 n_entries = len(entries)
 748                 self.to_screen(
 749                     "[%s] playlist %s: Collected %d video ids (downloading %d of them)" %
 750                     (ie_result['extractor'], playlist, n_all_entries, n_entries))
 751             elif isinstance(ie_entries, PagedList):
 752                 if playlistitems:
 753                     entries = []
 754                     for item in playlistitems:
 755                         entries.extend(ie_entries.getslice(
 756                             item - 1, item
 757                         ))
 758                 else:
 759                     entries = ie_entries.getslice(
 760                         playliststart, playlistend)
 761                 n_entries = len(entries)
 762                 self.to_screen(
 763                     "[%s] playlist %s: Downloading %d videos" %
 764                     (ie_result['extractor'], playlist, n_entries))
 765             else:  # iterable
 766                 if playlistitems:
 767                     entry_list = list(ie_entries)
 768                     entries = [entry_list[i - 1] for i in playlistitems]
 769                 else:
 770                     entries = list(itertools.islice(
 771                         ie_entries, playliststart, playlistend))
 772                 n_entries = len(entries)
 773                 self.to_screen(
 774                     "[%s] playlist %s: Downloading %d videos" %
 775                     (ie_result['extractor'], playlist, n_entries))
 776
 777             if self.params.get('playlistreverse', False):
 778                 entries = entries[::-1]
 779
 780             for i, entry in enumerate(entries, 1):
 781                 self.to_screen('[download] Downloading video %s of %s' % (i, n_entries))
 782                 extra = {
 783                     'n_entries': n_entries,
 784                     'playlist': playlist,
 785                     'playlist_id': ie_result.get('id'),
 786                     'playlist_title': ie_result.get('title'),
 787                     'playlist_index': i + playliststart,
 788                     'extractor': ie_result['extractor'],
 789                     'webpage_url': ie_result['webpage_url'],
 790                     'webpage_url_basename': url_basename(ie_result['webpage_url']),
 791                     'extractor_key': ie_result['extractor_key'],
 792                 }
 793
 794                 reason = self._match_entry(entry)
 795                 if reason is not None:
 796                     self.to_screen('[download] ' + reason)
 797                     continue
 798
 799                 entry_result = self.process_ie_result(entry,
 800                                                       download=download,
 801                                                       extra_info=extra)
 802                 playlist_results.append(entry_result)
 803             ie_result['entries'] = playlist_results
 804             return ie_result
 805         elif result_type == 'compat_list':
 806             self.report_warning(
 807                 'Extractor %s returned a compat_list result. '
 808                 'It needs to be updated.' % ie_result.get('extractor'))
 809
 810             def _fixup(r):
 811                 self.add_extra_info(
 812                     r,
 813                     {
 814                         'extractor': ie_result['extractor'],
 815                         'webpage_url': ie_result['webpage_url'],
 816                         'webpage_url_basename': url_basename(ie_result['webpage_url']),
 817                         'extractor_key': ie_result['extractor_key'],
 818                     }
 819                 )
 820                 return r
 821             ie_result['entries'] = [
 822                 self.process_ie_result(_fixup(r), download, extra_info)
 823                 for r in ie_result['entries']
 824             ]
 825             return ie_result
 826         else:
 827             raise Exception('Invalid result type: %s' % result_type)
 828
 829     def _apply_format_filter(self, format_spec, available_formats):
 830         " Returns a tuple of the remaining format_spec and filtered formats "
 831
 832         OPERATORS = {
 833             '<': operator.lt,
 834             '<=': operator.le,
 835             '>': operator.gt,
 836             '>=': operator.ge,
 837             '=': operator.eq,
 838             '!=': operator.ne,
 839         }
 840         operator_rex = re.compile(r'''(?x)\s*\[
 841             (?P<key>width|height|tbr|abr|vbr|asr|filesize|fps)
 842             \s*(?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
 843             (?P<value>[0-9.]+(?:[kKmMgGtTpPeEzZyY]i?[Bb]?)?)
 844             \]$
 845             ''' % '|'.join(map(re.escape, OPERATORS.keys())))
 846         m = operator_rex.search(format_spec)
 847         if m:
 848             try:
 849                 comparison_value = int(m.group('value'))
 850             except ValueError:
 851                 comparison_value = parse_filesize(m.group('value'))
 852                 if comparison_value is None:
 853                     comparison_value = parse_filesize(m.group('value') + 'B')
 854                 if comparison_value is None:
 855                     raise ValueError(
 856                         'Invalid value %r in format specification %r' % (
 857                             m.group('value'), format_spec))
 858             op = OPERATORS[m.group('op')]
 859
 860         if not m:
 861             STR_OPERATORS = {
 862                 '=': operator.eq,
 863                 '!=': operator.ne,
 864             }
 865             str_operator_rex = re.compile(r'''(?x)\s*\[
 866                 \s*(?P<key>ext|acodec|vcodec|container|protocol)
 867                 \s*(?P<op>%s)(?P<none_inclusive>\s*\?)?
 868                 \s*(?P<value>[a-zA-Z0-9_-]+)
 869                 \s*\]$
 870                 ''' % '|'.join(map(re.escape, STR_OPERATORS.keys())))
 871             m = str_operator_rex.search(format_spec)
 872             if m:
 873                 comparison_value = m.group('value')
 874                 op = STR_OPERATORS[m.group('op')]
 875
 876         if not m:
 877             raise ValueError('Invalid format specification %r' % format_spec)
 878
 879         def _filter(f):
 880             actual_value = f.get(m.group('key'))
 881             if actual_value is None:
 882                 return m.group('none_inclusive')
 883             return op(actual_value, comparison_value)
 884         new_formats = [f for f in available_formats if _filter(f)]
 885
 886         new_format_spec = format_spec[:-len(m.group(0))]
 887         if not new_format_spec:
 888             new_format_spec = 'best'
 889
 890         return (new_format_spec, new_formats)
 891
 892     def select_format(self, format_spec, available_formats):
 893         while format_spec.endswith(']'):
 894             format_spec, available_formats = self._apply_format_filter(
 895                 format_spec, available_formats)
 896         if not available_formats:
 897             return None
 898
 899         if format_spec == 'best' or format_spec is None:
 900             return available_formats[-1]
 901         elif format_spec == 'worst':
 902             return available_formats[0]
 903         elif format_spec == 'bestaudio':
 904             audio_formats = [
 905                 f for f in available_formats
 906                 if f.get('vcodec') == 'none']
 907             if audio_formats:
 908                 return audio_formats[-1]
 909         elif format_spec == 'worstaudio':
 910             audio_formats = [
 911                 f for f in available_formats
 912                 if f.get('vcodec') == 'none']
 913             if audio_formats:
 914                 return audio_formats[0]
 915         elif format_spec == 'bestvideo':
 916             video_formats = [
 917                 f for f in available_formats
 918                 if f.get('acodec') == 'none']
 919             if video_formats:
 920                 return video_formats[-1]
 921         elif format_spec == 'worstvideo':
 922             video_formats = [
 923                 f for f in available_formats
 924                 if f.get('acodec') == 'none']
 925             if video_formats:
 926                 return video_formats[0]
 927         else:
 928             extensions = ['mp4', 'flv', 'webm', '3gp', 'm4a', 'mp3', 'ogg', 'aac', 'wav']
 929             if format_spec in extensions:
 930                 filter_f = lambda f: f['ext'] == format_spec
 931             else:
 932                 filter_f = lambda f: f['format_id'] == format_spec
 933             matches = list(filter(filter_f, available_formats))
 934             if matches:
 935                 return matches[-1]
 936         return None
 937
 938     def _calc_headers(self, info_dict):
 939         res = std_headers.copy()
 940
 941         add_headers = info_dict.get('http_headers')
 942         if add_headers:
 943             res.update(add_headers)
 944
 945         cookies = self._calc_cookies(info_dict)
 946         if cookies:
 947             res['Cookie'] = cookies
 948
 949         return res
 950
 951     def _calc_cookies(self, info_dict):
 952         class _PseudoRequest(object):
 953             def __init__(self, url):
 954                 self.url = url
 955                 self.headers = {}
 956                 self.unverifiable = False
 957
 958             def add_unredirected_header(self, k, v):
 959                 self.headers[k] = v
 960
 961             def get_full_url(self):
 962                 return self.url
 963
 964             def is_unverifiable(self):
 965                 return self.unverifiable
 966
 967             def has_header(self, h):
 968                 return h in self.headers
 969
 970             def get_header(self, h, default=None):
 971                 return self.headers.get(h, default)
 972
 973         pr = _PseudoRequest(info_dict['url'])
 974         self.cookiejar.add_cookie_header(pr)
 975         return pr.headers.get('Cookie')
 976
 977     def process_video_result(self, info_dict, download=True):
 978         assert info_dict.get('_type', 'video') == 'video'
 979
 980         if 'id' not in info_dict:
 981             raise ExtractorError('Missing "id" field in extractor result')
 982         if 'title' not in info_dict:
 983             raise ExtractorError('Missing "title" field in extractor result')
 984
 985         if 'playlist' not in info_dict:
 986             # It isn't part of a playlist
 987             info_dict['playlist'] = None
 988             info_dict['playlist_index'] = None
 989
 990         thumbnails = info_dict.get('thumbnails')
 991         if thumbnails is None:
 992             thumbnail = info_dict.get('thumbnail')
 993             if thumbnail:
 994                 info_dict['thumbnails'] = thumbnails = [{'url': thumbnail}]
 995         if thumbnails:
 996             thumbnails.sort(key=lambda t: (
 997                 t.get('preference'), t.get('width'), t.get('height'),
 998                 t.get('id'), t.get('url')))
 999             for i, t in enumerate(thumbnails):
1000                 if 'width' in t and 'height' in t:
1001                     t['resolution'] = '%dx%d' % (t['width'], t['height'])
1002                 if t.get('id') is None:
1003                     t['id'] = '%d' % i
1004
1005         if thumbnails and 'thumbnail' not in info_dict:
1006             info_dict['thumbnail'] = thumbnails[-1]['url']
1007
1008         if 'display_id' not in info_dict and 'id' in info_dict:
1009             info_dict['display_id'] = info_dict['id']
1010
1011         if info_dict.get('upload_date') is None and info_dict.get('timestamp') is not None:
1012             # Working around negative timestamps in Windows
1013             # (see http://bugs.python.org/issue1646728)
1014             if info_dict['timestamp'] < 0 and os.name == 'nt':
1015                 info_dict['timestamp'] = 0
1016             upload_date = datetime.datetime.utcfromtimestamp(
1017                 info_dict['timestamp'])
1018             info_dict['upload_date'] = upload_date.strftime('%Y%m%d')
1019
1020         # This extractors handle format selection themselves
1021         if info_dict['extractor'] in ['Youku']:
1022             if download:
1023                 self.process_info(info_dict)
1024             return info_dict
1025
1026         # We now pick which formats have to be downloaded
1027         if info_dict.get('formats') is None:
1028             # There's only one format available
1029             formats = [info_dict]
1030         else:
1031             formats = info_dict['formats']
1032
1033         if not formats:
1034             raise ExtractorError('No video formats found!')
1035
1036         # We check that all the formats have the format and format_id fields
1037         for i, format in enumerate(formats):
1038             if 'url' not in format:
1039                 raise ExtractorError('Missing "url" key in result (index %d)' % i)
1040
1041             if format.get('format_id') is None:
1042                 format['format_id'] = compat_str(i)
1043             if format.get('format') is None:
1044                 format['format'] = '{id} - {res}{note}'.format(
1045                     id=format['format_id'],
1046                     res=self.format_resolution(format),
1047                     note=' ({0})'.format(format['format_note']) if format.get('format_note') is not None else '',
1048                 )
1049             # Automatically determine file extension if missing
1050             if 'ext' not in format:
1051                 format['ext'] = determine_ext(format['url']).lower()
1052             # Add HTTP headers, so that external programs can use them from the
1053             # json output
1054             full_format_info = info_dict.copy()
1055             full_format_info.update(format)
1056             format['http_headers'] = self._calc_headers(full_format_info)
1057
1058         format_limit = self.params.get('format_limit', None)
1059         if format_limit:
1060             formats = list(takewhile_inclusive(
1061                 lambda f: f['format_id'] != format_limit, formats
1062             ))
1063
1064         # TODO Central sorting goes here
1065
1066         if formats[0] is not info_dict:
1067             # only set the 'formats' fields if the original info_dict list them
1068             # otherwise we end up with a circular reference, the first (and unique)
1069             # element in the 'formats' field in info_dict is info_dict itself,
1070             # wich can't be exported to json
1071             info_dict['formats'] = formats
1072         if self.params.get('listformats'):
1073             self.list_formats(info_dict)
1074             return
1075         if self.params.get('list_thumbnails'):
1076             self.list_thumbnails(info_dict)
1077             return
1078
1079         req_format = self.params.get('format')
1080         if req_format is None:
1081             req_format = 'best'
1082         formats_to_download = []
1083         # The -1 is for supporting YoutubeIE
1084         if req_format in ('-1', 'all'):
1085             formats_to_download = formats
1086         else:
1087             for rfstr in req_format.split(','):
1088                 # We can accept formats requested in the format: 34/5/best, we pick
1089                 # the first that is available, starting from left
1090                 req_formats = rfstr.split('/')
1091                 for rf in req_formats:
1092                     if re.match(r'.+?\+.+?', rf) is not None:
1093                         # Two formats have been requested like '137+139'
1094                         format_1, format_2 = rf.split('+')
1095                         formats_info = (self.select_format(format_1, formats),
1096                                         self.select_format(format_2, formats))
1097                         if all(formats_info):
1098                             # The first format must contain the video and the
1099                             # second the audio
1100                             if formats_info[0].get('vcodec') == 'none':
1101                                 self.report_error('The first format must '
1102                                                   'contain the video, try using '
1103                                                   '"-f %s+%s"' % (format_2, format_1))
1104                                 return
1105                             output_ext = (
1106                                 formats_info[0]['ext']
1107                                 if self.params.get('merge_output_format') is None
1108                                 else self.params['merge_output_format'])
1109                             selected_format = {
1110                                 'requested_formats': formats_info,
1111                                 'format': '%s+%s' % (formats_info[0].get('format'),
1112                                                      formats_info[1].get('format')),
1113                                 'format_id': '%s+%s' % (formats_info[0].get('format_id'),
1114                                                         formats_info[1].get('format_id')),
1115                                 'width': formats_info[0].get('width'),
1116                                 'height': formats_info[0].get('height'),
1117                                 'resolution': formats_info[0].get('resolution'),
1118                                 'fps': formats_info[0].get('fps'),
1119                                 'vcodec': formats_info[0].get('vcodec'),
1120                                 'vbr': formats_info[0].get('vbr'),
1121                                 'stretched_ratio': formats_info[0].get('stretched_ratio'),
1122                                 'acodec': formats_info[1].get('acodec'),
1123                                 'abr': formats_info[1].get('abr'),
1124                                 'ext': output_ext,
1125                             }
1126                         else:
1127                             selected_format = None
1128                     else:
1129                         selected_format = self.select_format(rf, formats)
1130                     if selected_format is not None:
1131                         formats_to_download.append(selected_format)
1132                         break
1133         if not formats_to_download:
1134             raise ExtractorError('requested format not available',
1135                                  expected=True)
1136
1137         if download:
1138             if len(formats_to_download) > 1:
1139                 self.to_screen('[info] %s: downloading video in %s formats' % (info_dict['id'], len(formats_to_download)))
1140             for format in formats_to_download:
1141                 new_info = dict(info_dict)
1142                 new_info.update(format)
1143                 self.process_info(new_info)
1144         # We update the info dict with the best quality format (backwards compatibility)
1145         info_dict.update(formats_to_download[-1])
1146         return info_dict
1147
1148     def process_info(self, info_dict):
1149         """Process a single resolved IE result."""
1150
1151         assert info_dict.get('_type', 'video') == 'video'
1152
1153         max_downloads = self.params.get('max_downloads')
1154         if max_downloads is not None:
1155             if self._num_downloads >= int(max_downloads):
1156                 raise MaxDownloadsReached()
1157
1158         info_dict['fulltitle'] = info_dict['title']
1159         if len(info_dict['title']) > 200:
1160             info_dict['title'] = info_dict['title'][:197] + '...'
1161
1162         # Keep for backwards compatibility
1163         info_dict['stitle'] = info_dict['title']
1164
1165         if 'format' not in info_dict:
1166             info_dict['format'] = info_dict['ext']
1167
1168         reason = self._match_entry(info_dict)
1169         if reason is not None:
1170             self.to_screen('[download] ' + reason)
1171             return
1172
1173         self._num_downloads += 1
1174
1175         info_dict['_filename'] = filename = self.prepare_filename(info_dict)
1176
1177         # Forced printings
1178         if self.params.get('forcetitle', False):
1179             self.to_stdout(info_dict['fulltitle'])
1180         if self.params.get('forceid', False):
1181             self.to_stdout(info_dict['id'])
1182         if self.params.get('forceurl', False):
1183             if info_dict.get('requested_formats') is not None:
1184                 for f in info_dict['requested_formats']:
1185                     self.to_stdout(f['url'] + f.get('play_path', ''))
1186             else:
1187                 # For RTMP URLs, also include the playpath
1188                 self.to_stdout(info_dict['url'] + info_dict.get('play_path', ''))
1189         if self.params.get('forcethumbnail', False) and info_dict.get('thumbnail') is not None:
1190             self.to_stdout(info_dict['thumbnail'])
1191         if self.params.get('forcedescription', False) and info_dict.get('description') is not None:
1192             self.to_stdout(info_dict['description'])
1193         if self.params.get('forcefilename', False) and filename is not None:
1194             self.to_stdout(filename)
1195         if self.params.get('forceduration', False) and info_dict.get('duration') is not None:
1196             self.to_stdout(formatSeconds(info_dict['duration']))
1197         if self.params.get('forceformat', False):
1198             self.to_stdout(info_dict['format'])
1199         if self.params.get('forcejson', False):
1200             self.to_stdout(json.dumps(info_dict))
1201
1202         # Do nothing else if in simulate mode
1203         if self.params.get('simulate', False):
1204             return
1205
1206         if filename is None:
1207             return
1208
1209         try:
1210             dn = os.path.dirname(encodeFilename(filename))
1211             if dn and not os.path.exists(dn):
1212                 os.makedirs(dn)
1213         except (OSError, IOError) as err:
1214             self.report_error('unable to create directory ' + compat_str(err))
1215             return
1216
1217         if self.params.get('writedescription', False):
1218             descfn = filename + '.description'
1219             if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(descfn)):
1220                 self.to_screen('[info] Video description is already present')
1221             elif info_dict.get('description') is None:
1222                 self.report_warning('There\'s no description to write.')
1223             else:
1224                 try:
1225                     self.to_screen('[info] Writing video description to: ' + descfn)
1226                     with io.open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile:
1227                         descfile.write(info_dict['description'])
1228                 except (OSError, IOError):
1229                     self.report_error('Cannot write description file ' + descfn)
1230                     return
1231
1232         if self.params.get('writeannotations', False):
1233             annofn = filename + '.annotations.xml'
1234             if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(annofn)):
1235                 self.to_screen('[info] Video annotations are already present')
1236             else:
1237                 try:
1238                     self.to_screen('[info] Writing video annotations to: ' + annofn)
1239                     with io.open(encodeFilename(annofn), 'w', encoding='utf-8') as annofile:
1240                         annofile.write(info_dict['annotations'])
1241                 except (KeyError, TypeError):
1242                     self.report_warning('There are no annotations to write.')
1243                 except (OSError, IOError):
1244                     self.report_error('Cannot write annotations file: ' + annofn)
1245                     return
1246
1247         subtitles_are_requested = any([self.params.get('writesubtitles', False),
1248                                        self.params.get('writeautomaticsub')])
1249
1250         if subtitles_are_requested and 'subtitles' in info_dict and info_dict['subtitles']:
1251             # subtitles download errors are already managed as troubles in relevant IE
1252             # that way it will silently go on when used with unsupporting IE
1253             subtitles = info_dict['subtitles']
1254             sub_format = self.params.get('subtitlesformat', 'srt')
1255             for sub_lang in subtitles.keys():
1256                 sub = subtitles[sub_lang]
1257                 if sub is None:
1258                     continue
1259                 try:
1260                     sub_filename = subtitles_filename(filename, sub_lang, sub_format)
1261                     if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(sub_filename)):
1262                         self.to_screen('[info] Video subtitle %s.%s is already_present' % (sub_lang, sub_format))
1263                     else:
1264                         self.to_screen('[info] Writing video subtitles to: ' + sub_filename)
1265                         with io.open(encodeFilename(sub_filename), 'w', encoding='utf-8') as subfile:
1266                             subfile.write(sub)
1267                 except (OSError, IOError):
1268                     self.report_error('Cannot write subtitles file ' + sub_filename)
1269                     return
1270
1271         if self.params.get('writeinfojson', False):
1272             infofn = os.path.splitext(filename)[0] + '.info.json'
1273             if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(infofn)):
1274                 self.to_screen('[info] Video description metadata is already present')
1275             else:
1276                 self.to_screen('[info] Writing video description metadata as JSON to: ' + infofn)
1277                 try:
1278                     write_json_file(info_dict, infofn)
1279                 except (OSError, IOError):
1280                     self.report_error('Cannot write metadata to JSON file ' + infofn)
1281                     return
1282
1283         self._write_thumbnails(info_dict, filename)
1284
1285         if not self.params.get('skip_download', False):
1286             try:
1287                 def dl(name, info):
1288                     fd = get_suitable_downloader(info, self.params)(self, self.params)
1289                     for ph in self._progress_hooks:
1290                         fd.add_progress_hook(ph)
1291                     if self.params.get('verbose'):
1292                         self.to_stdout('[debug] Invoking downloader on %r' % info.get('url'))
1293                     return fd.download(name, info)
1294
1295                 if info_dict.get('requested_formats') is not None:
1296                     downloaded = []
1297                     success = True
1298                     merger = FFmpegMergerPP(self, not self.params.get('keepvideo'))
1299                     if not merger._executable:
1300                         postprocessors = []
1301                         self.report_warning('You have requested multiple '
1302                                             'formats but ffmpeg or avconv are not installed.'
1303                                             ' The formats won\'t be merged')
1304                     else:
1305                         postprocessors = [merger]
1306                     for f in info_dict['requested_formats']:
1307                         new_info = dict(info_dict)
1308                         new_info.update(f)
1309                         fname = self.prepare_filename(new_info)
1310                         fname = prepend_extension(fname, 'f%s' % f['format_id'])
1311                         downloaded.append(fname)
1312                         partial_success = dl(fname, new_info)
1313                         success = success and partial_success
1314                     info_dict['__postprocessors'] = postprocessors
1315                     info_dict['__files_to_merge'] = downloaded
1316                 else:
1317                     # Just a single file
1318                     success = dl(filename, info_dict)
1319             except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
1320                 self.report_error('unable to download video data: %s' % str(err))
1321                 return
1322             except (OSError, IOError) as err:
1323                 raise UnavailableVideoError(err)
1324             except (ContentTooShortError, ) as err:
1325                 self.report_error('content too short (expected %s bytes and served %s)' % (err.expected, err.downloaded))
1326                 return
1327
1328             if success:
1329                 # Fixup content
1330                 fixup_policy = self.params.get('fixup')
1331                 if fixup_policy is None:
1332                     fixup_policy = 'detect_or_warn'
1333
1334                 stretched_ratio = info_dict.get('stretched_ratio')
1335                 if stretched_ratio is not None and stretched_ratio != 1:
1336                     if fixup_policy == 'warn':
1337                         self.report_warning('%s: Non-uniform pixel ratio (%s)' % (
1338                             info_dict['id'], stretched_ratio))
1339                     elif fixup_policy == 'detect_or_warn':
1340                         stretched_pp = FFmpegFixupStretchedPP(self)
1341                         if stretched_pp.available:
1342                             info_dict.setdefault('__postprocessors', [])
1343                             info_dict['__postprocessors'].append(stretched_pp)
1344                         else:
1345                             self.report_warning(
1346                                 '%s: Non-uniform pixel ratio (%s). Install ffmpeg or avconv to fix this automatically.' % (
1347                                     info_dict['id'], stretched_ratio))
1348                     else:
1349                         assert fixup_policy in ('ignore', 'never')
1350
1351                 if info_dict.get('requested_formats') is None and info_dict.get('container') == 'm4a_dash':
1352                     if fixup_policy == 'warn':
1353                         self.report_warning('%s: writing DASH m4a. Only some players support this container.' % (
1354                             info_dict['id']))
1355                     elif fixup_policy == 'detect_or_warn':
1356                         fixup_pp = FFmpegFixupM4aPP(self)
1357                         if fixup_pp.available:
1358                             info_dict.setdefault('__postprocessors', [])
1359                             info_dict['__postprocessors'].append(fixup_pp)
1360                         else:
1361                             self.report_warning(
1362                                 '%s: writing DASH m4a. Only some players support this container. Install ffmpeg or avconv to fix this automatically.' % (
1363                                     info_dict['id']))
1364                     else:
1365                         assert fixup_policy in ('ignore', 'never')
1366
1367                 try:
1368                     self.post_process(filename, info_dict)
1369                 except (PostProcessingError) as err:
1370                     self.report_error('postprocessing: %s' % str(err))
1371                     return
1372                 self.record_download_archive(info_dict)
1373
1374     def download(self, url_list):
1375         """Download a given list of URLs."""
1376         outtmpl = self.params.get('outtmpl', DEFAULT_OUTTMPL)
1377         if (len(url_list) > 1 and
1378                 '%' not in outtmpl
1379                 and self.params.get('max_downloads') != 1):
1380             raise SameFileError(outtmpl)
1381
1382         for url in url_list:
1383             try:
1384                 # It also downloads the videos
1385                 res = self.extract_info(url)
1386             except UnavailableVideoError:
1387                 self.report_error('unable to download video')
1388             except MaxDownloadsReached:
1389                 self.to_screen('[info] Maximum number of downloaded files reached.')
1390                 raise
1391             else:
1392                 if self.params.get('dump_single_json', False):
1393                     self.to_stdout(json.dumps(res))
1394
1395         return self._download_retcode
1396
1397     def download_with_info_file(self, info_filename):
1398         with io.open(info_filename, 'r', encoding='utf-8') as f:
1399             info = json.load(f)
1400         try:
1401             self.process_ie_result(info, download=True)
1402         except DownloadError:
1403             webpage_url = info.get('webpage_url')
1404             if webpage_url is not None:
1405                 self.report_warning('The info failed to download, trying with "%s"' % webpage_url)
1406                 return self.download([webpage_url])
1407             else:
1408                 raise
1409         return self._download_retcode
1410
1411     def post_process(self, filename, ie_info):
1412         """Run all the postprocessors on the given file."""
1413         info = dict(ie_info)
1414         info['filepath'] = filename
1415         pps_chain = []
1416         if ie_info.get('__postprocessors') is not None:
1417             pps_chain.extend(ie_info['__postprocessors'])
1418         pps_chain.extend(self._pps)
1419         for pp in pps_chain:
1420             keep_video = None
1421             old_filename = info['filepath']
1422             try:
1423                 keep_video_wish, info = pp.run(info)
1424                 if keep_video_wish is not None:
1425                     if keep_video_wish:
1426                         keep_video = keep_video_wish
1427                     elif keep_video is None:
1428                         # No clear decision yet, let IE decide
1429                         keep_video = keep_video_wish
1430             except PostProcessingError as e:
1431                 self.report_error(e.msg)
1432             if keep_video is False and not self.params.get('keepvideo', False):
1433                 try:
1434                     self.to_screen('Deleting original file %s (pass -k to keep)' % old_filename)
1435                     os.remove(encodeFilename(old_filename))
1436                 except (IOError, OSError):
1437                     self.report_warning('Unable to remove downloaded video file')
1438
1439     def _make_archive_id(self, info_dict):
1440         # Future-proof against any change in case
1441         # and backwards compatibility with prior versions
1442         extractor = info_dict.get('extractor_key')
1443         if extractor is None:
1444             if 'id' in info_dict:
1445                 extractor = info_dict.get('ie_key')  # key in a playlist
1446         if extractor is None:
1447             return None  # Incomplete video information
1448         return extractor.lower() + ' ' + info_dict['id']
1449
1450     def in_download_archive(self, info_dict):
1451         fn = self.params.get('download_archive')
1452         if fn is None:
1453             return False
1454
1455         vid_id = self._make_archive_id(info_dict)
1456         if vid_id is None:
1457             return False  # Incomplete video information
1458
1459         try:
1460             with locked_file(fn, 'r', encoding='utf-8') as archive_file:
1461                 for line in archive_file:
1462                     if line.strip() == vid_id:
1463                         return True
1464         except IOError as ioe:
1465             if ioe.errno != errno.ENOENT:
1466                 raise
1467         return False
1468
1469     def record_download_archive(self, info_dict):
1470         fn = self.params.get('download_archive')
1471         if fn is None:
1472             return
1473         vid_id = self._make_archive_id(info_dict)
1474         assert vid_id
1475         with locked_file(fn, 'a', encoding='utf-8') as archive_file:
1476             archive_file.write(vid_id + '\n')
1477
1478     @staticmethod
1479     def format_resolution(format, default='unknown'):
1480         if format.get('vcodec') == 'none':
1481             return 'audio only'
1482         if format.get('resolution') is not None:
1483             return format['resolution']
1484         if format.get('height') is not None:
1485             if format.get('width') is not None:
1486                 res = '%sx%s' % (format['width'], format['height'])
1487             else:
1488                 res = '%sp' % format['height']
1489         elif format.get('width') is not None:
1490             res = '?x%d' % format['width']
1491         else:
1492             res = default
1493         return res
1494
1495     def _format_note(self, fdict):
1496         res = ''
1497         if fdict.get('ext') in ['f4f', 'f4m']:
1498             res += '(unsupported) '
1499         if fdict.get('format_note') is not None:
1500             res += fdict['format_note'] + ' '
1501         if fdict.get('tbr') is not None:
1502             res += '%4dk ' % fdict['tbr']
1503         if fdict.get('container') is not None:
1504             if res:
1505                 res += ', '
1506             res += '%s container' % fdict['container']
1507         if (fdict.get('vcodec') is not None and
1508                 fdict.get('vcodec') != 'none'):
1509             if res:
1510                 res += ', '
1511             res += fdict['vcodec']
1512             if fdict.get('vbr') is not None:
1513                 res += '@'
1514         elif fdict.get('vbr') is not None and fdict.get('abr') is not None:
1515             res += 'video@'
1516         if fdict.get('vbr') is not None:
1517             res += '%4dk' % fdict['vbr']
1518         if fdict.get('fps') is not None:
1519             res += ', %sfps' % fdict['fps']
1520         if fdict.get('acodec') is not None:
1521             if res:
1522                 res += ', '
1523             if fdict['acodec'] == 'none':
1524                 res += 'video only'
1525             else:
1526                 res += '%-5s' % fdict['acodec']
1527         elif fdict.get('abr') is not None:
1528             if res:
1529                 res += ', '
1530             res += 'audio'
1531         if fdict.get('abr') is not None:
1532             res += '@%3dk' % fdict['abr']
1533         if fdict.get('asr') is not None:
1534             res += ' (%5dHz)' % fdict['asr']
1535         if fdict.get('filesize') is not None:
1536             if res:
1537                 res += ', '
1538             res += format_bytes(fdict['filesize'])
1539         elif fdict.get('filesize_approx') is not None:
1540             if res:
1541                 res += ', '
1542             res += '~' + format_bytes(fdict['filesize_approx'])
1543         return res
1544
1545     def list_formats(self, info_dict):
1546         def line(format, idlen=20):
1547             return (('%-' + compat_str(idlen + 1) + 's%-10s%-12s%s') % (
1548                 format['format_id'],
1549                 format['ext'],
1550                 self.format_resolution(format),
1551                 self._format_note(format),
1552             ))
1553
1554         formats = info_dict.get('formats', [info_dict])
1555         idlen = max(len('format code'),
1556                     max(len(f['format_id']) for f in formats))
1557         formats_s = [
1558             line(f, idlen) for f in formats
1559             if f.get('preference') is None or f['preference'] >= -1000]
1560         if len(formats) > 1:
1561             formats_s[-1] += (' ' if self._format_note(formats[-1]) else '') + '(best)'
1562
1563         header_line = line({
1564             'format_id': 'format code', 'ext': 'extension',
1565             'resolution': 'resolution', 'format_note': 'note'}, idlen=idlen)
1566         self.to_screen(
1567             '[info] Available formats for %s:\n%s\n%s' %
1568             (info_dict['id'], header_line, '\n'.join(formats_s)))
1569
1570     def list_thumbnails(self, info_dict):
1571         thumbnails = info_dict.get('thumbnails')
1572         if not thumbnails:
1573             tn_url = info_dict.get('thumbnail')
1574             if tn_url:
1575                 thumbnails = [{'id': '0', 'url': tn_url}]
1576             else:
1577                 self.to_screen(
1578                     '[info] No thumbnails present for %s' % info_dict['id'])
1579                 return
1580
1581         self.to_screen(
1582             '[info] Thumbnails for %s:' % info_dict['id'])
1583         self.to_screen(render_table(
1584             ['ID', 'width', 'height', 'URL'],
1585             [[t['id'], t.get('width', 'unknown'), t.get('height', 'unknown'), t['url']] for t in thumbnails]))
1586
1587     def urlopen(self, req):
1588         """ Start an HTTP download """
1589
1590         # According to RFC 3986, URLs can not contain non-ASCII characters, however this is not
1591         # always respected by websites, some tend to give out URLs with non percent-encoded
1592         # non-ASCII characters (see telemb.py, ard.py [#3412])
1593         # urllib chokes on URLs with non-ASCII characters (see http://bugs.python.org/issue3991)
1594         # To work around aforementioned issue we will replace request's original URL with
1595         # percent-encoded one
1596         req_is_string = isinstance(req, compat_basestring)
1597         url = req if req_is_string else req.get_full_url()
1598         url_escaped = escape_url(url)
1599
1600         # Substitute URL if any change after escaping
1601         if url != url_escaped:
1602             if req_is_string:
1603                 req = url_escaped
1604             else:
1605                 req = compat_urllib_request.Request(
1606                     url_escaped, data=req.data, headers=req.headers,
1607                     origin_req_host=req.origin_req_host, unverifiable=req.unverifiable)
1608
1609         return self._opener.open(req, timeout=self._socket_timeout)
1610
1611     def print_debug_header(self):
1612         if not self.params.get('verbose'):
1613             return
1614
1615         if type('') is not compat_str:
1616             # Python 2.6 on SLES11 SP1 (https://github.com/rg3/youtube-dl/issues/3326)
1617             self.report_warning(
1618                 'Your Python is broken! Update to a newer and supported version')
1619
1620         stdout_encoding = getattr(
1621             sys.stdout, 'encoding', 'missing (%s)' % type(sys.stdout).__name__)
1622         encoding_str = (
1623             '[debug] Encodings: locale %s, fs %s, out %s, pref %s\n' % (
1624                 locale.getpreferredencoding(),
1625                 sys.getfilesystemencoding(),
1626                 stdout_encoding,
1627                 self.get_encoding()))
1628         write_string(encoding_str, encoding=None)
1629
1630         self._write_string('[debug] youtube-dl version ' + __version__ + '\n')
1631         try:
1632             sp = subprocess.Popen(
1633                 ['git', 'rev-parse', '--short', 'HEAD'],
1634                 stdout=subprocess.PIPE, stderr=subprocess.PIPE,
1635                 cwd=os.path.dirname(os.path.abspath(__file__)))
1636             out, err = sp.communicate()
1637             out = out.decode().strip()
1638             if re.match('[0-9a-f]+', out):
1639                 self._write_string('[debug] Git HEAD: ' + out + '\n')
1640         except:
1641             try:
1642                 sys.exc_clear()
1643             except:
1644                 pass
1645         self._write_string('[debug] Python version %s - %s\n' % (
1646             platform.python_version(), platform_name()))
1647
1648         exe_versions = FFmpegPostProcessor.get_versions()
1649         exe_versions['rtmpdump'] = rtmpdump_version()
1650         exe_str = ', '.join(
1651             '%s %s' % (exe, v)
1652             for exe, v in sorted(exe_versions.items())
1653             if v
1654         )
1655         if not exe_str:
1656             exe_str = 'none'
1657         self._write_string('[debug] exe versions: %s\n' % exe_str)
1658
1659         proxy_map = {}
1660         for handler in self._opener.handlers:
1661             if hasattr(handler, 'proxies'):
1662                 proxy_map.update(handler.proxies)
1663         self._write_string('[debug] Proxy map: ' + compat_str(proxy_map) + '\n')
1664
1665         if self.params.get('call_home', False):
1666             ipaddr = self.urlopen('https://yt-dl.org/ip').read().decode('utf-8')
1667             self._write_string('[debug] Public IP address: %s\n' % ipaddr)
1668             latest_version = self.urlopen(
1669                 'https://yt-dl.org/latest/version').read().decode('utf-8')
1670             if version_tuple(latest_version) > version_tuple(__version__):
1671                 self.report_warning(
1672                     'You are using an outdated version (newest version: %s)! '
1673                     'See https://yt-dl.org/update if you need help updating.' %
1674                     latest_version)
1675
1676     def _setup_opener(self):
1677         timeout_val = self.params.get('socket_timeout')
1678         self._socket_timeout = 600 if timeout_val is None else float(timeout_val)
1679
1680         opts_cookiefile = self.params.get('cookiefile')
1681         opts_proxy = self.params.get('proxy')
1682
1683         if opts_cookiefile is None:
1684             self.cookiejar = compat_cookiejar.CookieJar()
1685         else:
1686             self.cookiejar = compat_cookiejar.MozillaCookieJar(
1687                 opts_cookiefile)
1688             if os.access(opts_cookiefile, os.R_OK):
1689                 self.cookiejar.load()
1690
1691         cookie_processor = compat_urllib_request.HTTPCookieProcessor(
1692             self.cookiejar)
1693         if opts_proxy is not None:
1694             if opts_proxy == '':
1695                 proxies = {}
1696             else:
1697                 proxies = {'http': opts_proxy, 'https': opts_proxy}
1698         else:
1699             proxies = compat_urllib_request.getproxies()
1700             # Set HTTPS proxy to HTTP one if given (https://github.com/rg3/youtube-dl/issues/805)
1701             if 'http' in proxies and 'https' not in proxies:
1702                 proxies['https'] = proxies['http']
1703         proxy_handler = compat_urllib_request.ProxyHandler(proxies)
1704
1705         debuglevel = 1 if self.params.get('debug_printtraffic') else 0
1706         https_handler = make_HTTPS_handler(self.params, debuglevel=debuglevel)
1707         ydlh = YoutubeDLHandler(self.params, debuglevel=debuglevel)
1708         opener = compat_urllib_request.build_opener(
1709             https_handler, proxy_handler, cookie_processor, ydlh)
1710         # Delete the default user-agent header, which would otherwise apply in
1711         # cases where our custom HTTP handler doesn't come into play
1712         # (See https://github.com/rg3/youtube-dl/issues/1309 for details)
1713         opener.addheaders = []
1714         self._opener = opener
1715
1716     def encode(self, s):
1717         if isinstance(s, bytes):
1718             return s  # Already encoded
1719
1720         try:
1721             return s.encode(self.get_encoding())
1722         except UnicodeEncodeError as err:
1723             err.reason = err.reason + '. Check your system encoding configuration or use the --encoding option.'
1724             raise
1725
1726     def get_encoding(self):
1727         encoding = self.params.get('encoding')
1728         if encoding is None:
1729             encoding = preferredencoding()
1730         return encoding
1731
1732     def _write_thumbnails(self, info_dict, filename):
1733         if self.params.get('writethumbnail', False):
1734             thumbnails = info_dict.get('thumbnails')
1735             if thumbnails:
1736                 thumbnails = [thumbnails[-1]]
1737         elif self.params.get('write_all_thumbnails', False):
1738             thumbnails = info_dict.get('thumbnails')
1739         else:
1740             return
1741
1742         if not thumbnails:
1743             # No thumbnails present, so return immediately
1744             return
1745
1746         for t in thumbnails:
1747             thumb_ext = determine_ext(t['url'], 'jpg')
1748             suffix = '_%s' % t['id'] if len(thumbnails) > 1 else ''
1749             thumb_display_id = '%s ' % t['id'] if len(thumbnails) > 1 else ''
1750             thumb_filename = os.path.splitext(filename)[0] + suffix + '.' + thumb_ext
1751
1752             if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(thumb_filename)):
1753                 self.to_screen('[%s] %s: Thumbnail %sis already present' %
1754                                (info_dict['extractor'], info_dict['id'], thumb_display_id))
1755             else:
1756                 self.to_screen('[%s] %s: Downloading thumbnail %s...' %
1757                                (info_dict['extractor'], info_dict['id'], thumb_display_id))
1758                 try:
1759                     uf = self.urlopen(t['url'])
1760                     with open(thumb_filename, 'wb') as thumbf:
1761                         shutil.copyfileobj(uf, thumbf)
1762                     self.to_screen('[%s] %s: Writing thumbnail %sto: %s' %
1763                                    (info_dict['extractor'], info_dict['id'], thumb_display_id, thumb_filename))
1764                 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
1765                     self.report_warning('Unable to download thumbnail "%s": %s' %
1766                                         (t['url'], compat_str(err)))