_ Git - youtube-dl/blob - youtube_dl/YoutubeDL.py

   1 #!/usr/bin/env python
   2 # -*- coding: utf-8 -*-
   3
   4 from __future__ import absolute_import, unicode_literals
   5
   6 import collections
   7 import datetime
   8 import errno
   9 import io
  10 import itertools
  11 import json
  12 import locale
  13 import operator
  14 import os
  15 import platform
  16 import re
  17 import shutil
  18 import subprocess
  19 import socket
  20 import sys
  21 import time
  22 import traceback
  23
  24 if os.name == 'nt':
  25     import ctypes
  26
  27 from .compat import (
  28     compat_basestring,
  29     compat_cookiejar,
  30     compat_expanduser,
  31     compat_http_client,
  32     compat_kwargs,
  33     compat_str,
  34     compat_urllib_error,
  35     compat_urllib_request,
  36 )
  37 from .utils import (
  38     escape_url,
  39     ContentTooShortError,
  40     date_from_str,
  41     DateRange,
  42     DEFAULT_OUTTMPL,
  43     determine_ext,
  44     DownloadError,
  45     encodeFilename,
  46     ExtractorError,
  47     format_bytes,
  48     formatSeconds,
  49     get_term_width,
  50     locked_file,
  51     make_HTTPS_handler,
  52     MaxDownloadsReached,
  53     PagedList,
  54     parse_filesize,
  55     PostProcessingError,
  56     platform_name,
  57     preferredencoding,
  58     render_table,
  59     SameFileError,
  60     sanitize_filename,
  61     std_headers,
  62     subtitles_filename,
  63     takewhile_inclusive,
  64     UnavailableVideoError,
  65     url_basename,
  66     version_tuple,
  67     write_json_file,
  68     write_string,
  69     YoutubeDLHandler,
  70     prepend_extension,
  71     args_to_str,
  72     age_restricted,
  73 )
  74 from .cache import Cache
  75 from .extractor import get_info_extractor, gen_extractors
  76 from .downloader import get_suitable_downloader
  77 from .downloader.rtmp import rtmpdump_version
  78 from .postprocessor import (
  79     FFmpegFixupM4aPP,
  80     FFmpegFixupStretchedPP,
  81     FFmpegMergerPP,
  82     FFmpegPostProcessor,
  83     get_postprocessor,
  84 )
  85 from .version import __version__
  86
  87
  88 class YoutubeDL(object):
  89     """YoutubeDL class.
  90
  91     YoutubeDL objects are the ones responsible of downloading the
  92     actual video file and writing it to disk if the user has requested
  93     it, among some other tasks. In most cases there should be one per
  94     program. As, given a video URL, the downloader doesn't know how to
  95     extract all the needed information, task that InfoExtractors do, it
  96     has to pass the URL to one of them.
  97
  98     For this, YoutubeDL objects have a method that allows
  99     InfoExtractors to be registered in a given order. When it is passed
 100     a URL, the YoutubeDL object handles it to the first InfoExtractor it
 101     finds that reports being able to handle it. The InfoExtractor extracts
 102     all the information about the video or videos the URL refers to, and
 103     YoutubeDL process the extracted information, possibly using a File
 104     Downloader to download the video.
 105
 106     YoutubeDL objects accept a lot of parameters. In order not to saturate
 107     the object constructor with arguments, it receives a dictionary of
 108     options instead. These options are available through the params
 109     attribute for the InfoExtractors to use. The YoutubeDL also
 110     registers itself as the downloader in charge for the InfoExtractors
 111     that are added to it, so this is a "mutual registration".
 112
 113     Available options:
 114
 115     username:          Username for authentication purposes.
 116     password:          Password for authentication purposes.
 117     videopassword:     Password for acces a video.
 118     usenetrc:          Use netrc for authentication instead.
 119     verbose:           Print additional info to stdout.
 120     quiet:             Do not print messages to stdout.
 121     no_warnings:       Do not print out anything for warnings.
 122     forceurl:          Force printing final URL.
 123     forcetitle:        Force printing title.
 124     forceid:           Force printing ID.
 125     forcethumbnail:    Force printing thumbnail URL.
 126     forcedescription:  Force printing description.
 127     forcefilename:     Force printing final filename.
 128     forceduration:     Force printing duration.
 129     forcejson:         Force printing info_dict as JSON.
 130     dump_single_json:  Force printing the info_dict of the whole playlist
 131                        (or video) as a single JSON line.
 132     simulate:          Do not download the video files.
 133     format:            Video format code. See options.py for more information.
 134     format_limit:      Highest quality format to try.
 135     outtmpl:           Template for output names.
 136     restrictfilenames: Do not allow "&" and spaces in file names
 137     ignoreerrors:      Do not stop on download errors.
 138     nooverwrites:      Prevent overwriting files.
 139     playliststart:     Playlist item to start at.
 140     playlistend:       Playlist item to end at.
 141     playlist_items:    Specific indices of playlist to download.
 142     playlistreverse:   Download playlist items in reverse order.
 143     matchtitle:        Download only matching titles.
 144     rejecttitle:       Reject downloads for matching titles.
 145     logger:            Log messages to a logging.Logger instance.
 146     logtostderr:       Log messages to stderr instead of stdout.
 147     writedescription:  Write the video description to a .description file
 148     writeinfojson:     Write the video description to a .info.json file
 149     writeannotations:  Write the video annotations to a .annotations.xml file
 150     writethumbnail:    Write the thumbnail image to a file
 151     write_all_thumbnails:  Write all thumbnail formats to files
 152     writesubtitles:    Write the video subtitles to a file
 153     writeautomaticsub: Write the automatic subtitles to a file
 154     allsubtitles:      Downloads all the subtitles of the video
 155                        (requires writesubtitles or writeautomaticsub)
 156     listsubtitles:     Lists all available subtitles for the video
 157     subtitlesformat:   Subtitle format [srt/sbv/vtt] (default=srt)
 158     subtitleslangs:    List of languages of the subtitles to download
 159     keepvideo:         Keep the video file after post-processing
 160     daterange:         A DateRange object, download only if the upload_date is in the range.
 161     skip_download:     Skip the actual download of the video file
 162     cachedir:          Location of the cache files in the filesystem.
 163                        False to disable filesystem cache.
 164     noplaylist:        Download single video instead of a playlist if in doubt.
 165     age_limit:         An integer representing the user's age in years.
 166                        Unsuitable videos for the given age are skipped.
 167     min_views:         An integer representing the minimum view count the video
 168                        must have in order to not be skipped.
 169                        Videos without view count information are always
 170                        downloaded. None for no limit.
 171     max_views:         An integer representing the maximum view count.
 172                        Videos that are more popular than that are not
 173                        downloaded.
 174                        Videos without view count information are always
 175                        downloaded. None for no limit.
 176     download_archive:  File name of a file where all downloads are recorded.
 177                        Videos already present in the file are not downloaded
 178                        again.
 179     cookiefile:        File name where cookies should be read from and dumped to.
 180     nocheckcertificate:Do not verify SSL certificates
 181     prefer_insecure:   Use HTTP instead of HTTPS to retrieve information.
 182                        At the moment, this is only supported by YouTube.
 183     proxy:             URL of the proxy server to use
 184     socket_timeout:    Time to wait for unresponsive hosts, in seconds
 185     bidi_workaround:   Work around buggy terminals without bidirectional text
 186                        support, using fridibi
 187     debug_printtraffic:Print out sent and received HTTP traffic
 188     include_ads:       Download ads as well
 189     default_search:    Prepend this string if an input url is not valid.
 190                        'auto' for elaborate guessing
 191     encoding:          Use this encoding instead of the system-specified.
 192     extract_flat:      Do not resolve URLs, return the immediate result.
 193                        Pass in 'in_playlist' to only show this behavior for
 194                        playlist items.
 195     postprocessors:    A list of dictionaries, each with an entry
 196                        * key:  The name of the postprocessor. See
 197                                youtube_dl/postprocessor/__init__.py for a list.
 198                        as well as any further keyword arguments for the
 199                        postprocessor.
 200     progress_hooks:    A list of functions that get called on download
 201                        progress, with a dictionary with the entries
 202                        * status: One of "downloading" and "finished".
 203                                  Check this first and ignore unknown values.
 204
 205                        If status is one of "downloading" or "finished", the
 206                        following properties may also be present:
 207                        * filename: The final filename (always present)
 208                        * downloaded_bytes: Bytes on disk
 209                        * total_bytes: Size of the whole file, None if unknown
 210                        * tmpfilename: The filename we're currently writing to
 211                        * eta: The estimated time in seconds, None if unknown
 212                        * speed: The download speed in bytes/second, None if
 213                                 unknown
 214
 215                        Progress hooks are guaranteed to be called at least once
 216                        (with status "finished") if the download is successful.
 217     merge_output_format: Extension to use when merging formats.
 218     fixup:             Automatically correct known faults of the file.
 219                        One of:
 220                        - "never": do nothing
 221                        - "warn": only emit a warning
 222                        - "detect_or_warn": check whether we can do anything
 223                                            about it, warn otherwise (default)
 224     source_address:    (Experimental) Client-side IP address to bind to.
 225     call_home:         Boolean, true iff we are allowed to contact the
 226                        youtube-dl servers for debugging.
 227     sleep_interval:    Number of seconds to sleep before each download.
 228     listformats:       Print an overview of available video formats and exit.
 229     list_thumbnails:   Print a table of all thumbnails and exit.
 230     match_filter:      A function that gets called with the info_dict of
 231                        every video.
 232                        If it returns a message, the video is ignored.
 233                        If it returns None, the video is downloaded.
 234                        match_filter_func in utils.py is one example for this.
 235     no_color:          Do not emit color codes in output.
 236
 237     The following options determine which downloader is picked:
 238     external_downloader: Executable of the external downloader to call.
 239                        None or unset for standard (built-in) downloader.
 240     hls_prefer_native: Use the native HLS downloader instead of ffmpeg/avconv.
 241
 242     The following parameters are not used by YoutubeDL itself, they are used by
 243     the FileDownloader:
 244     nopart, updatetime, buffersize, ratelimit, min_filesize, max_filesize, test,
 245     noresizebuffer, retries, continuedl, noprogress, consoletitle,
 246     xattr_set_filesize.
 247
 248     The following options are used by the post processors:
 249     prefer_ffmpeg:     If True, use ffmpeg instead of avconv if both are available,
 250                        otherwise prefer avconv.
 251     exec_cmd:          Arbitrary command to run after downloading
 252     """
 253
 254     params = None
 255     _ies = []
 256     _pps = []
 257     _download_retcode = None
 258     _num_downloads = None
 259     _screen_file = None
 260
 261     def __init__(self, params=None, auto_init=True):
 262         """Create a FileDownloader object with the given options."""
 263         if params is None:
 264             params = {}
 265         self._ies = []
 266         self._ies_instances = {}
 267         self._pps = []
 268         self._progress_hooks = []
 269         self._download_retcode = 0
 270         self._num_downloads = 0
 271         self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)]
 272         self._err_file = sys.stderr
 273         self.params = params
 274         self.cache = Cache(self)
 275
 276         if params.get('bidi_workaround', False):
 277             try:
 278                 import pty
 279                 master, slave = pty.openpty()
 280                 width = get_term_width()
 281                 if width is None:
 282                     width_args = []
 283                 else:
 284                     width_args = ['-w', str(width)]
 285                 sp_kwargs = dict(
 286                     stdin=subprocess.PIPE,
 287                     stdout=slave,
 288                     stderr=self._err_file)
 289                 try:
 290                     self._output_process = subprocess.Popen(
 291                         ['bidiv'] + width_args, **sp_kwargs
 292                     )
 293                 except OSError:
 294                     self._output_process = subprocess.Popen(
 295                         ['fribidi', '-c', 'UTF-8'] + width_args, **sp_kwargs)
 296                 self._output_channel = os.fdopen(master, 'rb')
 297             except OSError as ose:
 298                 if ose.errno == 2:
 299                     self.report_warning('Could not find fribidi executable, ignoring --bidi-workaround . Make sure that  fribidi  is an executable file in one of the directories in your $PATH.')
 300                 else:
 301                     raise
 302
 303         if (sys.version_info >= (3,) and sys.platform != 'win32' and
 304                 sys.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968']
 305                 and not params.get('restrictfilenames', False)):
 306             # On Python 3, the Unicode filesystem API will throw errors (#1474)
 307             self.report_warning(
 308                 'Assuming --restrict-filenames since file system encoding '
 309                 'cannot encode all characters. '
 310                 'Set the LC_ALL environment variable to fix this.')
 311             self.params['restrictfilenames'] = True
 312
 313         if '%(stitle)s' in self.params.get('outtmpl', ''):
 314             self.report_warning('%(stitle)s is deprecated. Use the %(title)s and the --restrict-filenames flag(which also secures %(uploader)s et al) instead.')
 315
 316         self._setup_opener()
 317
 318         if auto_init:
 319             self.print_debug_header()
 320             self.add_default_info_extractors()
 321
 322         for pp_def_raw in self.params.get('postprocessors', []):
 323             pp_class = get_postprocessor(pp_def_raw['key'])
 324             pp_def = dict(pp_def_raw)
 325             del pp_def['key']
 326             pp = pp_class(self, **compat_kwargs(pp_def))
 327             self.add_post_processor(pp)
 328
 329         for ph in self.params.get('progress_hooks', []):
 330             self.add_progress_hook(ph)
 331
 332     def warn_if_short_id(self, argv):
 333         # short YouTube ID starting with dash?
 334         idxs = [
 335             i for i, a in enumerate(argv)
 336             if re.match(r'^-[0-9A-Za-z_-]{10}$', a)]
 337         if idxs:
 338             correct_argv = (
 339                 ['youtube-dl'] +
 340                 [a for i, a in enumerate(argv) if i not in idxs] +
 341                 ['--'] + [argv[i] for i in idxs]
 342             )
 343             self.report_warning(
 344                 'Long argument string detected. '
 345                 'Use -- to separate parameters and URLs, like this:\n%s\n' %
 346                 args_to_str(correct_argv))
 347
 348     def add_info_extractor(self, ie):
 349         """Add an InfoExtractor object to the end of the list."""
 350         self._ies.append(ie)
 351         self._ies_instances[ie.ie_key()] = ie
 352         ie.set_downloader(self)
 353
 354     def get_info_extractor(self, ie_key):
 355         """
 356         Get an instance of an IE with name ie_key, it will try to get one from
 357         the _ies list, if there's no instance it will create a new one and add
 358         it to the extractor list.
 359         """
 360         ie = self._ies_instances.get(ie_key)
 361         if ie is None:
 362             ie = get_info_extractor(ie_key)()
 363             self.add_info_extractor(ie)
 364         return ie
 365
 366     def add_default_info_extractors(self):
 367         """
 368         Add the InfoExtractors returned by gen_extractors to the end of the list
 369         """
 370         for ie in gen_extractors():
 371             self.add_info_extractor(ie)
 372
 373     def add_post_processor(self, pp):
 374         """Add a PostProcessor object to the end of the chain."""
 375         self._pps.append(pp)
 376         pp.set_downloader(self)
 377
 378     def add_progress_hook(self, ph):
 379         """Add the progress hook (currently only for the file downloader)"""
 380         self._progress_hooks.append(ph)
 381
 382     def _bidi_workaround(self, message):
 383         if not hasattr(self, '_output_channel'):
 384             return message
 385
 386         assert hasattr(self, '_output_process')
 387         assert isinstance(message, compat_str)
 388         line_count = message.count('\n') + 1
 389         self._output_process.stdin.write((message + '\n').encode('utf-8'))
 390         self._output_process.stdin.flush()
 391         res = ''.join(self._output_channel.readline().decode('utf-8')
 392                       for _ in range(line_count))
 393         return res[:-len('\n')]
 394
 395     def to_screen(self, message, skip_eol=False):
 396         """Print message to stdout if not in quiet mode."""
 397         return self.to_stdout(message, skip_eol, check_quiet=True)
 398
 399     def _write_string(self, s, out=None):
 400         write_string(s, out=out, encoding=self.params.get('encoding'))
 401
 402     def to_stdout(self, message, skip_eol=False, check_quiet=False):
 403         """Print message to stdout if not in quiet mode."""
 404         if self.params.get('logger'):
 405             self.params['logger'].debug(message)
 406         elif not check_quiet or not self.params.get('quiet', False):
 407             message = self._bidi_workaround(message)
 408             terminator = ['\n', ''][skip_eol]
 409             output = message + terminator
 410
 411             self._write_string(output, self._screen_file)
 412
 413     def to_stderr(self, message):
 414         """Print message to stderr."""
 415         assert isinstance(message, compat_str)
 416         if self.params.get('logger'):
 417             self.params['logger'].error(message)
 418         else:
 419             message = self._bidi_workaround(message)
 420             output = message + '\n'
 421             self._write_string(output, self._err_file)
 422
 423     def to_console_title(self, message):
 424         if not self.params.get('consoletitle', False):
 425             return
 426         if os.name == 'nt' and ctypes.windll.kernel32.GetConsoleWindow():
 427             # c_wchar_p() might not be necessary if `message` is
 428             # already of type unicode()
 429             ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message))
 430         elif 'TERM' in os.environ:
 431             self._write_string('\033]0;%s\007' % message, self._screen_file)
 432
 433     def save_console_title(self):
 434         if not self.params.get('consoletitle', False):
 435             return
 436         if 'TERM' in os.environ:
 437             # Save the title on stack
 438             self._write_string('\033[22;0t', self._screen_file)
 439
 440     def restore_console_title(self):
 441         if not self.params.get('consoletitle', False):
 442             return
 443         if 'TERM' in os.environ:
 444             # Restore the title from stack
 445             self._write_string('\033[23;0t', self._screen_file)
 446
 447     def __enter__(self):
 448         self.save_console_title()
 449         return self
 450
 451     def __exit__(self, *args):
 452         self.restore_console_title()
 453
 454         if self.params.get('cookiefile') is not None:
 455             self.cookiejar.save()
 456
 457     def trouble(self, message=None, tb=None):
 458         """Determine action to take when a download problem appears.
 459
 460         Depending on if the downloader has been configured to ignore
 461         download errors or not, this method may throw an exception or
 462         not when errors are found, after printing the message.
 463
 464         tb, if given, is additional traceback information.
 465         """
 466         if message is not None:
 467             self.to_stderr(message)
 468         if self.params.get('verbose'):
 469             if tb is None:
 470                 if sys.exc_info()[0]:  # if .trouble has been called from an except block
 471                     tb = ''
 472                     if hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
 473                         tb += ''.join(traceback.format_exception(*sys.exc_info()[1].exc_info))
 474                     tb += compat_str(traceback.format_exc())
 475                 else:
 476                     tb_data = traceback.format_list(traceback.extract_stack())
 477                     tb = ''.join(tb_data)
 478             self.to_stderr(tb)
 479         if not self.params.get('ignoreerrors', False):
 480             if sys.exc_info()[0] and hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
 481                 exc_info = sys.exc_info()[1].exc_info
 482             else:
 483                 exc_info = sys.exc_info()
 484             raise DownloadError(message, exc_info)
 485         self._download_retcode = 1
 486
 487     def report_warning(self, message):
 488         '''
 489         Print the message to stderr, it will be prefixed with 'WARNING:'
 490         If stderr is a tty file the 'WARNING:' will be colored
 491         '''
 492         if self.params.get('logger') is not None:
 493             self.params['logger'].warning(message)
 494         else:
 495             if self.params.get('no_warnings'):
 496                 return
 497             if not self.params.get('no_color') and self._err_file.isatty() and os.name != 'nt':
 498                 _msg_header = '\033[0;33mWARNING:\033[0m'
 499             else:
 500                 _msg_header = 'WARNING:'
 501             warning_message = '%s %s' % (_msg_header, message)
 502             self.to_stderr(warning_message)
 503
 504     def report_error(self, message, tb=None):
 505         '''
 506         Do the same as trouble, but prefixes the message with 'ERROR:', colored
 507         in red if stderr is a tty file.
 508         '''
 509         if not self.params.get('no_color') and self._err_file.isatty() and os.name != 'nt':
 510             _msg_header = '\033[0;31mERROR:\033[0m'
 511         else:
 512             _msg_header = 'ERROR:'
 513         error_message = '%s %s' % (_msg_header, message)
 514         self.trouble(error_message, tb)
 515
 516     def report_file_already_downloaded(self, file_name):
 517         """Report file has already been fully downloaded."""
 518         try:
 519             self.to_screen('[download] %s has already been downloaded' % file_name)
 520         except UnicodeEncodeError:
 521             self.to_screen('[download] The file has already been downloaded')
 522
 523     def prepare_filename(self, info_dict):
 524         """Generate the output filename."""
 525         try:
 526             template_dict = dict(info_dict)
 527
 528             template_dict['epoch'] = int(time.time())
 529             autonumber_size = self.params.get('autonumber_size')
 530             if autonumber_size is None:
 531                 autonumber_size = 5
 532             autonumber_templ = '%0' + str(autonumber_size) + 'd'
 533             template_dict['autonumber'] = autonumber_templ % self._num_downloads
 534             if template_dict.get('playlist_index') is not None:
 535                 template_dict['playlist_index'] = '%0*d' % (len(str(template_dict['n_entries'])), template_dict['playlist_index'])
 536             if template_dict.get('resolution') is None:
 537                 if template_dict.get('width') and template_dict.get('height'):
 538                     template_dict['resolution'] = '%dx%d' % (template_dict['width'], template_dict['height'])
 539                 elif template_dict.get('height'):
 540                     template_dict['resolution'] = '%sp' % template_dict['height']
 541                 elif template_dict.get('width'):
 542                     template_dict['resolution'] = '?x%d' % template_dict['width']
 543
 544             sanitize = lambda k, v: sanitize_filename(
 545                 compat_str(v),
 546                 restricted=self.params.get('restrictfilenames'),
 547                 is_id=(k == 'id'))
 548             template_dict = dict((k, sanitize(k, v))
 549                                  for k, v in template_dict.items()
 550                                  if v is not None)
 551             template_dict = collections.defaultdict(lambda: 'NA', template_dict)
 552
 553             outtmpl = self.params.get('outtmpl', DEFAULT_OUTTMPL)
 554             tmpl = compat_expanduser(outtmpl)
 555             filename = tmpl % template_dict
 556             # Temporary fix for #4787
 557             # 'Treat' all problem characters by passing filename through preferredencoding
 558             # to workaround encoding issues with subprocess on python2 @ Windows
 559             if sys.version_info < (3, 0) and sys.platform == 'win32':
 560                 filename = encodeFilename(filename, True).decode(preferredencoding())
 561             return filename
 562         except ValueError as err:
 563             self.report_error('Error in output template: ' + str(err) + ' (encoding: ' + repr(preferredencoding()) + ')')
 564             return None
 565
 566     def _match_entry(self, info_dict, incomplete):
 567         """ Returns None iff the file should be downloaded """
 568
 569         video_title = info_dict.get('title', info_dict.get('id', 'video'))
 570         if 'title' in info_dict:
 571             # This can happen when we're just evaluating the playlist
 572             title = info_dict['title']
 573             matchtitle = self.params.get('matchtitle', False)
 574             if matchtitle:
 575                 if not re.search(matchtitle, title, re.IGNORECASE):
 576                     return '"' + title + '" title did not match pattern "' + matchtitle + '"'
 577             rejecttitle = self.params.get('rejecttitle', False)
 578             if rejecttitle:
 579                 if re.search(rejecttitle, title, re.IGNORECASE):
 580                     return '"' + title + '" title matched reject pattern "' + rejecttitle + '"'
 581         date = info_dict.get('upload_date', None)
 582         if date is not None:
 583             dateRange = self.params.get('daterange', DateRange())
 584             if date not in dateRange:
 585                 return '%s upload date is not in range %s' % (date_from_str(date).isoformat(), dateRange)
 586         view_count = info_dict.get('view_count', None)
 587         if view_count is not None:
 588             min_views = self.params.get('min_views')
 589             if min_views is not None and view_count < min_views:
 590                 return 'Skipping %s, because it has not reached minimum view count (%d/%d)' % (video_title, view_count, min_views)
 591             max_views = self.params.get('max_views')
 592             if max_views is not None and view_count > max_views:
 593                 return 'Skipping %s, because it has exceeded the maximum view count (%d/%d)' % (video_title, view_count, max_views)
 594         if age_restricted(info_dict.get('age_limit'), self.params.get('age_limit')):
 595             return 'Skipping "%s" because it is age restricted' % video_title
 596         if self.in_download_archive(info_dict):
 597             return '%s has already been recorded in archive' % video_title
 598
 599         if not incomplete:
 600             match_filter = self.params.get('match_filter')
 601             if match_filter is not None:
 602                 ret = match_filter(info_dict)
 603                 if ret is not None:
 604                     return ret
 605
 606         return None
 607
 608     @staticmethod
 609     def add_extra_info(info_dict, extra_info):
 610         '''Set the keys from extra_info in info dict if they are missing'''
 611         for key, value in extra_info.items():
 612             info_dict.setdefault(key, value)
 613
 614     def extract_info(self, url, download=True, ie_key=None, extra_info={},
 615                      process=True):
 616         '''
 617         Returns a list with a dictionary for each video we find.
 618         If 'download', also downloads the videos.
 619         extra_info is a dict containing the extra values to add to each result
 620          '''
 621
 622         if ie_key:
 623             ies = [self.get_info_extractor(ie_key)]
 624         else:
 625             ies = self._ies
 626
 627         for ie in ies:
 628             if not ie.suitable(url):
 629                 continue
 630
 631             if not ie.working():
 632                 self.report_warning('The program functionality for this site has been marked as broken, '
 633                                     'and will probably not work.')
 634
 635             try:
 636                 ie_result = ie.extract(url)
 637                 if ie_result is None:  # Finished already (backwards compatibility; listformats and friends should be moved here)
 638                     break
 639                 if isinstance(ie_result, list):
 640                     # Backwards compatibility: old IE result format
 641                     ie_result = {
 642                         '_type': 'compat_list',
 643                         'entries': ie_result,
 644                     }
 645                 self.add_default_extra_info(ie_result, ie, url)
 646                 if process:
 647                     return self.process_ie_result(ie_result, download, extra_info)
 648                 else:
 649                     return ie_result
 650             except ExtractorError as de:  # An error we somewhat expected
 651                 self.report_error(compat_str(de), de.format_traceback())
 652                 break
 653             except MaxDownloadsReached:
 654                 raise
 655             except Exception as e:
 656                 if self.params.get('ignoreerrors', False):
 657                     self.report_error(compat_str(e), tb=compat_str(traceback.format_exc()))
 658                     break
 659                 else:
 660                     raise
 661         else:
 662             self.report_error('no suitable InfoExtractor for URL %s' % url)
 663
 664     def add_default_extra_info(self, ie_result, ie, url):
 665         self.add_extra_info(ie_result, {
 666             'extractor': ie.IE_NAME,
 667             'webpage_url': url,
 668             'webpage_url_basename': url_basename(url),
 669             'extractor_key': ie.ie_key(),
 670         })
 671
 672     def process_ie_result(self, ie_result, download=True, extra_info={}):
 673         """
 674         Take the result of the ie(may be modified) and resolve all unresolved
 675         references (URLs, playlist items).
 676
 677         It will also download the videos if 'download'.
 678         Returns the resolved ie_result.
 679         """
 680
 681         result_type = ie_result.get('_type', 'video')
 682
 683         if result_type in ('url', 'url_transparent'):
 684             extract_flat = self.params.get('extract_flat', False)
 685             if ((extract_flat == 'in_playlist' and 'playlist' in extra_info) or
 686                     extract_flat is True):
 687                 if self.params.get('forcejson', False):
 688                     self.to_stdout(json.dumps(ie_result))
 689                 return ie_result
 690
 691         if result_type == 'video':
 692             self.add_extra_info(ie_result, extra_info)
 693             return self.process_video_result(ie_result, download=download)
 694         elif result_type == 'url':
 695             # We have to add extra_info to the results because it may be
 696             # contained in a playlist
 697             return self.extract_info(ie_result['url'],
 698                                      download,
 699                                      ie_key=ie_result.get('ie_key'),
 700                                      extra_info=extra_info)
 701         elif result_type == 'url_transparent':
 702             # Use the information from the embedding page
 703             info = self.extract_info(
 704                 ie_result['url'], ie_key=ie_result.get('ie_key'),
 705                 extra_info=extra_info, download=False, process=False)
 706
 707             force_properties = dict(
 708                 (k, v) for k, v in ie_result.items() if v is not None)
 709             for f in ('_type', 'url'):
 710                 if f in force_properties:
 711                     del force_properties[f]
 712             new_result = info.copy()
 713             new_result.update(force_properties)
 714
 715             assert new_result.get('_type') != 'url_transparent'
 716
 717             return self.process_ie_result(
 718                 new_result, download=download, extra_info=extra_info)
 719         elif result_type == 'playlist' or result_type == 'multi_video':
 720             # We process each entry in the playlist
 721             playlist = ie_result.get('title', None) or ie_result.get('id', None)
 722             self.to_screen('[download] Downloading playlist: %s' % playlist)
 723
 724             playlist_results = []
 725
 726             playliststart = self.params.get('playliststart', 1) - 1
 727             playlistend = self.params.get('playlistend', None)
 728             # For backwards compatibility, interpret -1 as whole list
 729             if playlistend == -1:
 730                 playlistend = None
 731
 732             playlistitems_str = self.params.get('playlist_items', None)
 733             playlistitems = None
 734             if playlistitems_str is not None:
 735                 def iter_playlistitems(format):
 736                     for string_segment in format.split(','):
 737                         if '-' in string_segment:
 738                             start, end = string_segment.split('-')
 739                             for item in range(int(start), int(end) + 1):
 740                                 yield int(item)
 741                         else:
 742                             yield int(string_segment)
 743                 playlistitems = iter_playlistitems(playlistitems_str)
 744
 745             ie_entries = ie_result['entries']
 746             if isinstance(ie_entries, list):
 747                 n_all_entries = len(ie_entries)
 748                 if playlistitems:
 749                     entries = [ie_entries[i - 1] for i in playlistitems]
 750                 else:
 751                     entries = ie_entries[playliststart:playlistend]
 752                 n_entries = len(entries)
 753                 self.to_screen(
 754                     "[%s] playlist %s: Collected %d video ids (downloading %d of them)" %
 755                     (ie_result['extractor'], playlist, n_all_entries, n_entries))
 756             elif isinstance(ie_entries, PagedList):
 757                 if playlistitems:
 758                     entries = []
 759                     for item in playlistitems:
 760                         entries.extend(ie_entries.getslice(
 761                             item - 1, item
 762                         ))
 763                 else:
 764                     entries = ie_entries.getslice(
 765                         playliststart, playlistend)
 766                 n_entries = len(entries)
 767                 self.to_screen(
 768                     "[%s] playlist %s: Downloading %d videos" %
 769                     (ie_result['extractor'], playlist, n_entries))
 770             else:  # iterable
 771                 if playlistitems:
 772                     entry_list = list(ie_entries)
 773                     entries = [entry_list[i - 1] for i in playlistitems]
 774                 else:
 775                     entries = list(itertools.islice(
 776                         ie_entries, playliststart, playlistend))
 777                 n_entries = len(entries)
 778                 self.to_screen(
 779                     "[%s] playlist %s: Downloading %d videos" %
 780                     (ie_result['extractor'], playlist, n_entries))
 781
 782             if self.params.get('playlistreverse', False):
 783                 entries = entries[::-1]
 784
 785             for i, entry in enumerate(entries, 1):
 786                 self.to_screen('[download] Downloading video %s of %s' % (i, n_entries))
 787                 extra = {
 788                     'n_entries': n_entries,
 789                     'playlist': playlist,
 790                     'playlist_id': ie_result.get('id'),
 791                     'playlist_title': ie_result.get('title'),
 792                     'playlist_index': i + playliststart,
 793                     'extractor': ie_result['extractor'],
 794                     'webpage_url': ie_result['webpage_url'],
 795                     'webpage_url_basename': url_basename(ie_result['webpage_url']),
 796                     'extractor_key': ie_result['extractor_key'],
 797                 }
 798
 799                 reason = self._match_entry(entry, incomplete=True)
 800                 if reason is not None:
 801                     self.to_screen('[download] ' + reason)
 802                     continue
 803
 804                 entry_result = self.process_ie_result(entry,
 805                                                       download=download,
 806                                                       extra_info=extra)
 807                 playlist_results.append(entry_result)
 808             ie_result['entries'] = playlist_results
 809             return ie_result
 810         elif result_type == 'compat_list':
 811             self.report_warning(
 812                 'Extractor %s returned a compat_list result. '
 813                 'It needs to be updated.' % ie_result.get('extractor'))
 814
 815             def _fixup(r):
 816                 self.add_extra_info(
 817                     r,
 818                     {
 819                         'extractor': ie_result['extractor'],
 820                         'webpage_url': ie_result['webpage_url'],
 821                         'webpage_url_basename': url_basename(ie_result['webpage_url']),
 822                         'extractor_key': ie_result['extractor_key'],
 823                     }
 824                 )
 825                 return r
 826             ie_result['entries'] = [
 827                 self.process_ie_result(_fixup(r), download, extra_info)
 828                 for r in ie_result['entries']
 829             ]
 830             return ie_result
 831         else:
 832             raise Exception('Invalid result type: %s' % result_type)
 833
 834     def _apply_format_filter(self, format_spec, available_formats):
 835         " Returns a tuple of the remaining format_spec and filtered formats "
 836
 837         OPERATORS = {
 838             '<': operator.lt,
 839             '<=': operator.le,
 840             '>': operator.gt,
 841             '>=': operator.ge,
 842             '=': operator.eq,
 843             '!=': operator.ne,
 844         }
 845         operator_rex = re.compile(r'''(?x)\s*\[
 846             (?P<key>width|height|tbr|abr|vbr|asr|filesize|fps)
 847             \s*(?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
 848             (?P<value>[0-9.]+(?:[kKmMgGtTpPeEzZyY]i?[Bb]?)?)
 849             \]$
 850             ''' % '|'.join(map(re.escape, OPERATORS.keys())))
 851         m = operator_rex.search(format_spec)
 852         if m:
 853             try:
 854                 comparison_value = int(m.group('value'))
 855             except ValueError:
 856                 comparison_value = parse_filesize(m.group('value'))
 857                 if comparison_value is None:
 858                     comparison_value = parse_filesize(m.group('value') + 'B')
 859                 if comparison_value is None:
 860                     raise ValueError(
 861                         'Invalid value %r in format specification %r' % (
 862                             m.group('value'), format_spec))
 863             op = OPERATORS[m.group('op')]
 864
 865         if not m:
 866             STR_OPERATORS = {
 867                 '=': operator.eq,
 868                 '!=': operator.ne,
 869             }
 870             str_operator_rex = re.compile(r'''(?x)\s*\[
 871                 \s*(?P<key>ext|acodec|vcodec|container|protocol)
 872                 \s*(?P<op>%s)(?P<none_inclusive>\s*\?)?
 873                 \s*(?P<value>[a-zA-Z0-9_-]+)
 874                 \s*\]$
 875                 ''' % '|'.join(map(re.escape, STR_OPERATORS.keys())))
 876             m = str_operator_rex.search(format_spec)
 877             if m:
 878                 comparison_value = m.group('value')
 879                 op = STR_OPERATORS[m.group('op')]
 880
 881         if not m:
 882             raise ValueError('Invalid format specification %r' % format_spec)
 883
 884         def _filter(f):
 885             actual_value = f.get(m.group('key'))
 886             if actual_value is None:
 887                 return m.group('none_inclusive')
 888             return op(actual_value, comparison_value)
 889         new_formats = [f for f in available_formats if _filter(f)]
 890
 891         new_format_spec = format_spec[:-len(m.group(0))]
 892         if not new_format_spec:
 893             new_format_spec = 'best'
 894
 895         return (new_format_spec, new_formats)
 896
 897     def select_format(self, format_spec, available_formats):
 898         while format_spec.endswith(']'):
 899             format_spec, available_formats = self._apply_format_filter(
 900                 format_spec, available_formats)
 901         if not available_formats:
 902             return None
 903
 904         if format_spec == 'best' or format_spec is None:
 905             return available_formats[-1]
 906         elif format_spec == 'worst':
 907             return available_formats[0]
 908         elif format_spec == 'bestaudio':
 909             audio_formats = [
 910                 f for f in available_formats
 911                 if f.get('vcodec') == 'none']
 912             if audio_formats:
 913                 return audio_formats[-1]
 914         elif format_spec == 'worstaudio':
 915             audio_formats = [
 916                 f for f in available_formats
 917                 if f.get('vcodec') == 'none']
 918             if audio_formats:
 919                 return audio_formats[0]
 920         elif format_spec == 'bestvideo':
 921             video_formats = [
 922                 f for f in available_formats
 923                 if f.get('acodec') == 'none']
 924             if video_formats:
 925                 return video_formats[-1]
 926         elif format_spec == 'worstvideo':
 927             video_formats = [
 928                 f for f in available_formats
 929                 if f.get('acodec') == 'none']
 930             if video_formats:
 931                 return video_formats[0]
 932         else:
 933             extensions = ['mp4', 'flv', 'webm', '3gp', 'm4a', 'mp3', 'ogg', 'aac', 'wav']
 934             if format_spec in extensions:
 935                 filter_f = lambda f: f['ext'] == format_spec
 936             else:
 937                 filter_f = lambda f: f['format_id'] == format_spec
 938             matches = list(filter(filter_f, available_formats))
 939             if matches:
 940                 return matches[-1]
 941         return None
 942
 943     def _calc_headers(self, info_dict):
 944         res = std_headers.copy()
 945
 946         add_headers = info_dict.get('http_headers')
 947         if add_headers:
 948             res.update(add_headers)
 949
 950         cookies = self._calc_cookies(info_dict)
 951         if cookies:
 952             res['Cookie'] = cookies
 953
 954         return res
 955
 956     def _calc_cookies(self, info_dict):
 957         class _PseudoRequest(object):
 958             def __init__(self, url):
 959                 self.url = url
 960                 self.headers = {}
 961                 self.unverifiable = False
 962
 963             def add_unredirected_header(self, k, v):
 964                 self.headers[k] = v
 965
 966             def get_full_url(self):
 967                 return self.url
 968
 969             def is_unverifiable(self):
 970                 return self.unverifiable
 971
 972             def has_header(self, h):
 973                 return h in self.headers
 974
 975             def get_header(self, h, default=None):
 976                 return self.headers.get(h, default)
 977
 978         pr = _PseudoRequest(info_dict['url'])
 979         self.cookiejar.add_cookie_header(pr)
 980         return pr.headers.get('Cookie')
 981
 982     def process_video_result(self, info_dict, download=True):
 983         assert info_dict.get('_type', 'video') == 'video'
 984
 985         if 'id' not in info_dict:
 986             raise ExtractorError('Missing "id" field in extractor result')
 987         if 'title' not in info_dict:
 988             raise ExtractorError('Missing "title" field in extractor result')
 989
 990         if 'playlist' not in info_dict:
 991             # It isn't part of a playlist
 992             info_dict['playlist'] = None
 993             info_dict['playlist_index'] = None
 994
 995         thumbnails = info_dict.get('thumbnails')
 996         if thumbnails is None:
 997             thumbnail = info_dict.get('thumbnail')
 998             if thumbnail:
 999                 info_dict['thumbnails'] = thumbnails = [{'url': thumbnail}]
1000         if thumbnails:
1001             thumbnails.sort(key=lambda t: (
1002                 t.get('preference'), t.get('width'), t.get('height'),
1003                 t.get('id'), t.get('url')))
1004             for i, t in enumerate(thumbnails):
1005                 if 'width' in t and 'height' in t:
1006                     t['resolution'] = '%dx%d' % (t['width'], t['height'])
1007                 if t.get('id') is None:
1008                     t['id'] = '%d' % i
1009
1010         if thumbnails and 'thumbnail' not in info_dict:
1011             info_dict['thumbnail'] = thumbnails[-1]['url']
1012
1013         if 'display_id' not in info_dict and 'id' in info_dict:
1014             info_dict['display_id'] = info_dict['id']
1015
1016         if info_dict.get('upload_date') is None and info_dict.get('timestamp') is not None:
1017             # Working around negative timestamps in Windows
1018             # (see http://bugs.python.org/issue1646728)
1019             if info_dict['timestamp'] < 0 and os.name == 'nt':
1020                 info_dict['timestamp'] = 0
1021             upload_date = datetime.datetime.utcfromtimestamp(
1022                 info_dict['timestamp'])
1023             info_dict['upload_date'] = upload_date.strftime('%Y%m%d')
1024
1025         # This extractors handle format selection themselves
1026         if info_dict['extractor'] in ['Youku']:
1027             if download:
1028                 self.process_info(info_dict)
1029             return info_dict
1030
1031         # We now pick which formats have to be downloaded
1032         if info_dict.get('formats') is None:
1033             # There's only one format available
1034             formats = [info_dict]
1035         else:
1036             formats = info_dict['formats']
1037
1038         if not formats:
1039             raise ExtractorError('No video formats found!')
1040
1041         # We check that all the formats have the format and format_id fields
1042         for i, format in enumerate(formats):
1043             if 'url' not in format:
1044                 raise ExtractorError('Missing "url" key in result (index %d)' % i)
1045
1046             if format.get('format_id') is None:
1047                 format['format_id'] = compat_str(i)
1048             if format.get('format') is None:
1049                 format['format'] = '{id} - {res}{note}'.format(
1050                     id=format['format_id'],
1051                     res=self.format_resolution(format),
1052                     note=' ({0})'.format(format['format_note']) if format.get('format_note') is not None else '',
1053                 )
1054             # Automatically determine file extension if missing
1055             if 'ext' not in format:
1056                 format['ext'] = determine_ext(format['url']).lower()
1057             # Add HTTP headers, so that external programs can use them from the
1058             # json output
1059             full_format_info = info_dict.copy()
1060             full_format_info.update(format)
1061             format['http_headers'] = self._calc_headers(full_format_info)
1062
1063         format_limit = self.params.get('format_limit', None)
1064         if format_limit:
1065             formats = list(takewhile_inclusive(
1066                 lambda f: f['format_id'] != format_limit, formats
1067             ))
1068
1069         # TODO Central sorting goes here
1070
1071         if formats[0] is not info_dict:
1072             # only set the 'formats' fields if the original info_dict list them
1073             # otherwise we end up with a circular reference, the first (and unique)
1074             # element in the 'formats' field in info_dict is info_dict itself,
1075             # wich can't be exported to json
1076             info_dict['formats'] = formats
1077         if self.params.get('listformats'):
1078             self.list_formats(info_dict)
1079             return
1080         if self.params.get('list_thumbnails'):
1081             self.list_thumbnails(info_dict)
1082             return
1083
1084         req_format = self.params.get('format')
1085         if req_format is None:
1086             req_format = 'best'
1087         formats_to_download = []
1088         # The -1 is for supporting YoutubeIE
1089         if req_format in ('-1', 'all'):
1090             formats_to_download = formats
1091         else:
1092             for rfstr in req_format.split(','):
1093                 # We can accept formats requested in the format: 34/5/best, we pick
1094                 # the first that is available, starting from left
1095                 req_formats = rfstr.split('/')
1096                 for rf in req_formats:
1097                     if re.match(r'.+?\+.+?', rf) is not None:
1098                         # Two formats have been requested like '137+139'
1099                         format_1, format_2 = rf.split('+')
1100                         formats_info = (self.select_format(format_1, formats),
1101                                         self.select_format(format_2, formats))
1102                         if all(formats_info):
1103                             # The first format must contain the video and the
1104                             # second the audio
1105                             if formats_info[0].get('vcodec') == 'none':
1106                                 self.report_error('The first format must '
1107                                                   'contain the video, try using '
1108                                                   '"-f %s+%s"' % (format_2, format_1))
1109                                 return
1110                             output_ext = (
1111                                 formats_info[0]['ext']
1112                                 if self.params.get('merge_output_format') is None
1113                                 else self.params['merge_output_format'])
1114                             selected_format = {
1115                                 'requested_formats': formats_info,
1116                                 'format': '%s+%s' % (formats_info[0].get('format'),
1117                                                      formats_info[1].get('format')),
1118                                 'format_id': '%s+%s' % (formats_info[0].get('format_id'),
1119                                                         formats_info[1].get('format_id')),
1120                                 'width': formats_info[0].get('width'),
1121                                 'height': formats_info[0].get('height'),
1122                                 'resolution': formats_info[0].get('resolution'),
1123                                 'fps': formats_info[0].get('fps'),
1124                                 'vcodec': formats_info[0].get('vcodec'),
1125                                 'vbr': formats_info[0].get('vbr'),
1126                                 'stretched_ratio': formats_info[0].get('stretched_ratio'),
1127                                 'acodec': formats_info[1].get('acodec'),
1128                                 'abr': formats_info[1].get('abr'),
1129                                 'ext': output_ext,
1130                             }
1131                         else:
1132                             selected_format = None
1133                     else:
1134                         selected_format = self.select_format(rf, formats)
1135                     if selected_format is not None:
1136                         formats_to_download.append(selected_format)
1137                         break
1138         if not formats_to_download:
1139             raise ExtractorError('requested format not available',
1140                                  expected=True)
1141
1142         if download:
1143             if len(formats_to_download) > 1:
1144                 self.to_screen('[info] %s: downloading video in %s formats' % (info_dict['id'], len(formats_to_download)))
1145             for format in formats_to_download:
1146                 new_info = dict(info_dict)
1147                 new_info.update(format)
1148                 self.process_info(new_info)
1149         # We update the info dict with the best quality format (backwards compatibility)
1150         info_dict.update(formats_to_download[-1])
1151         return info_dict
1152
1153     def process_info(self, info_dict):
1154         """Process a single resolved IE result."""
1155
1156         assert info_dict.get('_type', 'video') == 'video'
1157
1158         max_downloads = self.params.get('max_downloads')
1159         if max_downloads is not None:
1160             if self._num_downloads >= int(max_downloads):
1161                 raise MaxDownloadsReached()
1162
1163         info_dict['fulltitle'] = info_dict['title']
1164         if len(info_dict['title']) > 200:
1165             info_dict['title'] = info_dict['title'][:197] + '...'
1166
1167         # Keep for backwards compatibility
1168         info_dict['stitle'] = info_dict['title']
1169
1170         if 'format' not in info_dict:
1171             info_dict['format'] = info_dict['ext']
1172
1173         reason = self._match_entry(info_dict, incomplete=False)
1174         if reason is not None:
1175             self.to_screen('[download] ' + reason)
1176             return
1177
1178         self._num_downloads += 1
1179
1180         info_dict['_filename'] = filename = self.prepare_filename(info_dict)
1181
1182         # Forced printings
1183         if self.params.get('forcetitle', False):
1184             self.to_stdout(info_dict['fulltitle'])
1185         if self.params.get('forceid', False):
1186             self.to_stdout(info_dict['id'])
1187         if self.params.get('forceurl', False):
1188             if info_dict.get('requested_formats') is not None:
1189                 for f in info_dict['requested_formats']:
1190                     self.to_stdout(f['url'] + f.get('play_path', ''))
1191             else:
1192                 # For RTMP URLs, also include the playpath
1193                 self.to_stdout(info_dict['url'] + info_dict.get('play_path', ''))
1194         if self.params.get('forcethumbnail', False) and info_dict.get('thumbnail') is not None:
1195             self.to_stdout(info_dict['thumbnail'])
1196         if self.params.get('forcedescription', False) and info_dict.get('description') is not None:
1197             self.to_stdout(info_dict['description'])
1198         if self.params.get('forcefilename', False) and filename is not None:
1199             self.to_stdout(filename)
1200         if self.params.get('forceduration', False) and info_dict.get('duration') is not None:
1201             self.to_stdout(formatSeconds(info_dict['duration']))
1202         if self.params.get('forceformat', False):
1203             self.to_stdout(info_dict['format'])
1204         if self.params.get('forcejson', False):
1205             self.to_stdout(json.dumps(info_dict))
1206
1207         # Do nothing else if in simulate mode
1208         if self.params.get('simulate', False):
1209             return
1210
1211         if filename is None:
1212             return
1213
1214         try:
1215             dn = os.path.dirname(encodeFilename(filename))
1216             if dn and not os.path.exists(dn):
1217                 os.makedirs(dn)
1218         except (OSError, IOError) as err:
1219             self.report_error('unable to create directory ' + compat_str(err))
1220             return
1221
1222         if self.params.get('writedescription', False):
1223             descfn = filename + '.description'
1224             if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(descfn)):
1225                 self.to_screen('[info] Video description is already present')
1226             elif info_dict.get('description') is None:
1227                 self.report_warning('There\'s no description to write.')
1228             else:
1229                 try:
1230                     self.to_screen('[info] Writing video description to: ' + descfn)
1231                     with io.open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile:
1232                         descfile.write(info_dict['description'])
1233                 except (OSError, IOError):
1234                     self.report_error('Cannot write description file ' + descfn)
1235                     return
1236
1237         if self.params.get('writeannotations', False):
1238             annofn = filename + '.annotations.xml'
1239             if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(annofn)):
1240                 self.to_screen('[info] Video annotations are already present')
1241             else:
1242                 try:
1243                     self.to_screen('[info] Writing video annotations to: ' + annofn)
1244                     with io.open(encodeFilename(annofn), 'w', encoding='utf-8') as annofile:
1245                         annofile.write(info_dict['annotations'])
1246                 except (KeyError, TypeError):
1247                     self.report_warning('There are no annotations to write.')
1248                 except (OSError, IOError):
1249                     self.report_error('Cannot write annotations file: ' + annofn)
1250                     return
1251
1252         subtitles_are_requested = any([self.params.get('writesubtitles', False),
1253                                        self.params.get('writeautomaticsub')])
1254
1255         if subtitles_are_requested and 'subtitles' in info_dict and info_dict['subtitles']:
1256             # subtitles download errors are already managed as troubles in relevant IE
1257             # that way it will silently go on when used with unsupporting IE
1258             subtitles = info_dict['subtitles']
1259             sub_format = self.params.get('subtitlesformat', 'srt')
1260             for sub_lang in subtitles.keys():
1261                 sub = subtitles[sub_lang]
1262                 if sub is None:
1263                     continue
1264                 try:
1265                     sub_filename = subtitles_filename(filename, sub_lang, sub_format)
1266                     if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(sub_filename)):
1267                         self.to_screen('[info] Video subtitle %s.%s is already_present' % (sub_lang, sub_format))
1268                     else:
1269                         self.to_screen('[info] Writing video subtitles to: ' + sub_filename)
1270                         with io.open(encodeFilename(sub_filename), 'w', encoding='utf-8') as subfile:
1271                             subfile.write(sub)
1272                 except (OSError, IOError):
1273                     self.report_error('Cannot write subtitles file ' + sub_filename)
1274                     return
1275
1276         if self.params.get('writeinfojson', False):
1277             infofn = os.path.splitext(filename)[0] + '.info.json'
1278             if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(infofn)):
1279                 self.to_screen('[info] Video description metadata is already present')
1280             else:
1281                 self.to_screen('[info] Writing video description metadata as JSON to: ' + infofn)
1282                 try:
1283                     write_json_file(info_dict, infofn)
1284                 except (OSError, IOError):
1285                     self.report_error('Cannot write metadata to JSON file ' + infofn)
1286                     return
1287
1288         self._write_thumbnails(info_dict, filename)
1289
1290         if not self.params.get('skip_download', False):
1291             try:
1292                 def dl(name, info):
1293                     fd = get_suitable_downloader(info, self.params)(self, self.params)
1294                     for ph in self._progress_hooks:
1295                         fd.add_progress_hook(ph)
1296                     if self.params.get('verbose'):
1297                         self.to_stdout('[debug] Invoking downloader on %r' % info.get('url'))
1298                     return fd.download(name, info)
1299
1300                 if info_dict.get('requested_formats') is not None:
1301                     downloaded = []
1302                     success = True
1303                     merger = FFmpegMergerPP(self, not self.params.get('keepvideo'))
1304                     if not merger.available():
1305                         postprocessors = []
1306                         self.report_warning('You have requested multiple '
1307                                             'formats but ffmpeg or avconv are not installed.'
1308                                             ' The formats won\'t be merged')
1309                     else:
1310                         postprocessors = [merger]
1311                     for f in info_dict['requested_formats']:
1312                         new_info = dict(info_dict)
1313                         new_info.update(f)
1314                         fname = self.prepare_filename(new_info)
1315                         fname = prepend_extension(fname, 'f%s' % f['format_id'])
1316                         downloaded.append(fname)
1317                         partial_success = dl(fname, new_info)
1318                         success = success and partial_success
1319                     info_dict['__postprocessors'] = postprocessors
1320                     info_dict['__files_to_merge'] = downloaded
1321                 else:
1322                     # Just a single file
1323                     success = dl(filename, info_dict)
1324             except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
1325                 self.report_error('unable to download video data: %s' % str(err))
1326                 return
1327             except (OSError, IOError) as err:
1328                 raise UnavailableVideoError(err)
1329             except (ContentTooShortError, ) as err:
1330                 self.report_error('content too short (expected %s bytes and served %s)' % (err.expected, err.downloaded))
1331                 return
1332
1333             if success:
1334                 # Fixup content
1335                 fixup_policy = self.params.get('fixup')
1336                 if fixup_policy is None:
1337                     fixup_policy = 'detect_or_warn'
1338
1339                 stretched_ratio = info_dict.get('stretched_ratio')
1340                 if stretched_ratio is not None and stretched_ratio != 1:
1341                     if fixup_policy == 'warn':
1342                         self.report_warning('%s: Non-uniform pixel ratio (%s)' % (
1343                             info_dict['id'], stretched_ratio))
1344                     elif fixup_policy == 'detect_or_warn':
1345                         stretched_pp = FFmpegFixupStretchedPP(self)
1346                         if stretched_pp.available:
1347                             info_dict.setdefault('__postprocessors', [])
1348                             info_dict['__postprocessors'].append(stretched_pp)
1349                         else:
1350                             self.report_warning(
1351                                 '%s: Non-uniform pixel ratio (%s). Install ffmpeg or avconv to fix this automatically.' % (
1352                                     info_dict['id'], stretched_ratio))
1353                     else:
1354                         assert fixup_policy in ('ignore', 'never')
1355
1356                 if info_dict.get('requested_formats') is None and info_dict.get('container') == 'm4a_dash':
1357                     if fixup_policy == 'warn':
1358                         self.report_warning('%s: writing DASH m4a. Only some players support this container.' % (
1359                             info_dict['id']))
1360                     elif fixup_policy == 'detect_or_warn':
1361                         fixup_pp = FFmpegFixupM4aPP(self)
1362                         if fixup_pp.available:
1363                             info_dict.setdefault('__postprocessors', [])
1364                             info_dict['__postprocessors'].append(fixup_pp)
1365                         else:
1366                             self.report_warning(
1367                                 '%s: writing DASH m4a. Only some players support this container. Install ffmpeg or avconv to fix this automatically.' % (
1368                                     info_dict['id']))
1369                     else:
1370                         assert fixup_policy in ('ignore', 'never')
1371
1372                 try:
1373                     self.post_process(filename, info_dict)
1374                 except (PostProcessingError) as err:
1375                     self.report_error('postprocessing: %s' % str(err))
1376                     return
1377                 self.record_download_archive(info_dict)
1378
1379     def download(self, url_list):
1380         """Download a given list of URLs."""
1381         outtmpl = self.params.get('outtmpl', DEFAULT_OUTTMPL)
1382         if (len(url_list) > 1 and
1383                 '%' not in outtmpl
1384                 and self.params.get('max_downloads') != 1):
1385             raise SameFileError(outtmpl)
1386
1387         for url in url_list:
1388             try:
1389                 # It also downloads the videos
1390                 res = self.extract_info(url)
1391             except UnavailableVideoError:
1392                 self.report_error('unable to download video')
1393             except MaxDownloadsReached:
1394                 self.to_screen('[info] Maximum number of downloaded files reached.')
1395                 raise
1396             else:
1397                 if self.params.get('dump_single_json', False):
1398                     self.to_stdout(json.dumps(res))
1399
1400         return self._download_retcode
1401
1402     def download_with_info_file(self, info_filename):
1403         with io.open(info_filename, 'r', encoding='utf-8') as f:
1404             info = json.load(f)
1405         try:
1406             self.process_ie_result(info, download=True)
1407         except DownloadError:
1408             webpage_url = info.get('webpage_url')
1409             if webpage_url is not None:
1410                 self.report_warning('The info failed to download, trying with "%s"' % webpage_url)
1411                 return self.download([webpage_url])
1412             else:
1413                 raise
1414         return self._download_retcode
1415
1416     def post_process(self, filename, ie_info):
1417         """Run all the postprocessors on the given file."""
1418         info = dict(ie_info)
1419         info['filepath'] = filename
1420         pps_chain = []
1421         if ie_info.get('__postprocessors') is not None:
1422             pps_chain.extend(ie_info['__postprocessors'])
1423         pps_chain.extend(self._pps)
1424         for pp in pps_chain:
1425             keep_video = None
1426             old_filename = info['filepath']
1427             try:
1428                 keep_video_wish, info = pp.run(info)
1429                 if keep_video_wish is not None:
1430                     if keep_video_wish:
1431                         keep_video = keep_video_wish
1432                     elif keep_video is None:
1433                         # No clear decision yet, let IE decide
1434                         keep_video = keep_video_wish
1435             except PostProcessingError as e:
1436                 self.report_error(e.msg)
1437             if keep_video is False and not self.params.get('keepvideo', False):
1438                 try:
1439                     self.to_screen('Deleting original file %s (pass -k to keep)' % old_filename)
1440                     os.remove(encodeFilename(old_filename))
1441                 except (IOError, OSError):
1442                     self.report_warning('Unable to remove downloaded video file')
1443
1444     def _make_archive_id(self, info_dict):
1445         # Future-proof against any change in case
1446         # and backwards compatibility with prior versions
1447         extractor = info_dict.get('extractor_key')
1448         if extractor is None:
1449             if 'id' in info_dict:
1450                 extractor = info_dict.get('ie_key')  # key in a playlist
1451         if extractor is None:
1452             return None  # Incomplete video information
1453         return extractor.lower() + ' ' + info_dict['id']
1454
1455     def in_download_archive(self, info_dict):
1456         fn = self.params.get('download_archive')
1457         if fn is None:
1458             return False
1459
1460         vid_id = self._make_archive_id(info_dict)
1461         if vid_id is None:
1462             return False  # Incomplete video information
1463
1464         try:
1465             with locked_file(fn, 'r', encoding='utf-8') as archive_file:
1466                 for line in archive_file:
1467                     if line.strip() == vid_id:
1468                         return True
1469         except IOError as ioe:
1470             if ioe.errno != errno.ENOENT:
1471                 raise
1472         return False
1473
1474     def record_download_archive(self, info_dict):
1475         fn = self.params.get('download_archive')
1476         if fn is None:
1477             return
1478         vid_id = self._make_archive_id(info_dict)
1479         assert vid_id
1480         with locked_file(fn, 'a', encoding='utf-8') as archive_file:
1481             archive_file.write(vid_id + '\n')
1482
1483     @staticmethod
1484     def format_resolution(format, default='unknown'):
1485         if format.get('vcodec') == 'none':
1486             return 'audio only'
1487         if format.get('resolution') is not None:
1488             return format['resolution']
1489         if format.get('height') is not None:
1490             if format.get('width') is not None:
1491                 res = '%sx%s' % (format['width'], format['height'])
1492             else:
1493                 res = '%sp' % format['height']
1494         elif format.get('width') is not None:
1495             res = '?x%d' % format['width']
1496         else:
1497             res = default
1498         return res
1499
1500     def _format_note(self, fdict):
1501         res = ''
1502         if fdict.get('ext') in ['f4f', 'f4m']:
1503             res += '(unsupported) '
1504         if fdict.get('format_note') is not None:
1505             res += fdict['format_note'] + ' '
1506         if fdict.get('tbr') is not None:
1507             res += '%4dk ' % fdict['tbr']
1508         if fdict.get('container') is not None:
1509             if res:
1510                 res += ', '
1511             res += '%s container' % fdict['container']
1512         if (fdict.get('vcodec') is not None and
1513                 fdict.get('vcodec') != 'none'):
1514             if res:
1515                 res += ', '
1516             res += fdict['vcodec']
1517             if fdict.get('vbr') is not None:
1518                 res += '@'
1519         elif fdict.get('vbr') is not None and fdict.get('abr') is not None:
1520             res += 'video@'
1521         if fdict.get('vbr') is not None:
1522             res += '%4dk' % fdict['vbr']
1523         if fdict.get('fps') is not None:
1524             res += ', %sfps' % fdict['fps']
1525         if fdict.get('acodec') is not None:
1526             if res:
1527                 res += ', '
1528             if fdict['acodec'] == 'none':
1529                 res += 'video only'
1530             else:
1531                 res += '%-5s' % fdict['acodec']
1532         elif fdict.get('abr') is not None:
1533             if res:
1534                 res += ', '
1535             res += 'audio'
1536         if fdict.get('abr') is not None:
1537             res += '@%3dk' % fdict['abr']
1538         if fdict.get('asr') is not None:
1539             res += ' (%5dHz)' % fdict['asr']
1540         if fdict.get('filesize') is not None:
1541             if res:
1542                 res += ', '
1543             res += format_bytes(fdict['filesize'])
1544         elif fdict.get('filesize_approx') is not None:
1545             if res:
1546                 res += ', '
1547             res += '~' + format_bytes(fdict['filesize_approx'])
1548         return res
1549
1550     def list_formats(self, info_dict):
1551         def line(format, idlen=20):
1552             return (('%-' + compat_str(idlen + 1) + 's%-10s%-12s%s') % (
1553                 format['format_id'],
1554                 format['ext'],
1555                 self.format_resolution(format),
1556                 self._format_note(format),
1557             ))
1558
1559         formats = info_dict.get('formats', [info_dict])
1560         idlen = max(len('format code'),
1561                     max(len(f['format_id']) for f in formats))
1562         formats_s = [
1563             line(f, idlen) for f in formats
1564             if f.get('preference') is None or f['preference'] >= -1000]
1565         if len(formats) > 1:
1566             formats_s[-1] += (' ' if self._format_note(formats[-1]) else '') + '(best)'
1567
1568         header_line = line({
1569             'format_id': 'format code', 'ext': 'extension',
1570             'resolution': 'resolution', 'format_note': 'note'}, idlen=idlen)
1571         self.to_screen(
1572             '[info] Available formats for %s:\n%s\n%s' %
1573             (info_dict['id'], header_line, '\n'.join(formats_s)))
1574
1575     def list_thumbnails(self, info_dict):
1576         thumbnails = info_dict.get('thumbnails')
1577         if not thumbnails:
1578             tn_url = info_dict.get('thumbnail')
1579             if tn_url:
1580                 thumbnails = [{'id': '0', 'url': tn_url}]
1581             else:
1582                 self.to_screen(
1583                     '[info] No thumbnails present for %s' % info_dict['id'])
1584                 return
1585
1586         self.to_screen(
1587             '[info] Thumbnails for %s:' % info_dict['id'])
1588         self.to_screen(render_table(
1589             ['ID', 'width', 'height', 'URL'],
1590             [[t['id'], t.get('width', 'unknown'), t.get('height', 'unknown'), t['url']] for t in thumbnails]))
1591
1592     def urlopen(self, req):
1593         """ Start an HTTP download """
1594
1595         # According to RFC 3986, URLs can not contain non-ASCII characters, however this is not
1596         # always respected by websites, some tend to give out URLs with non percent-encoded
1597         # non-ASCII characters (see telemb.py, ard.py [#3412])
1598         # urllib chokes on URLs with non-ASCII characters (see http://bugs.python.org/issue3991)
1599         # To work around aforementioned issue we will replace request's original URL with
1600         # percent-encoded one
1601         req_is_string = isinstance(req, compat_basestring)
1602         url = req if req_is_string else req.get_full_url()
1603         url_escaped = escape_url(url)
1604
1605         # Substitute URL if any change after escaping
1606         if url != url_escaped:
1607             if req_is_string:
1608                 req = url_escaped
1609             else:
1610                 req = compat_urllib_request.Request(
1611                     url_escaped, data=req.data, headers=req.headers,
1612                     origin_req_host=req.origin_req_host, unverifiable=req.unverifiable)
1613
1614         return self._opener.open(req, timeout=self._socket_timeout)
1615
1616     def print_debug_header(self):
1617         if not self.params.get('verbose'):
1618             return
1619
1620         if type('') is not compat_str:
1621             # Python 2.6 on SLES11 SP1 (https://github.com/rg3/youtube-dl/issues/3326)
1622             self.report_warning(
1623                 'Your Python is broken! Update to a newer and supported version')
1624
1625         stdout_encoding = getattr(
1626             sys.stdout, 'encoding', 'missing (%s)' % type(sys.stdout).__name__)
1627         encoding_str = (
1628             '[debug] Encodings: locale %s, fs %s, out %s, pref %s\n' % (
1629                 locale.getpreferredencoding(),
1630                 sys.getfilesystemencoding(),
1631                 stdout_encoding,
1632                 self.get_encoding()))
1633         write_string(encoding_str, encoding=None)
1634
1635         self._write_string('[debug] youtube-dl version ' + __version__ + '\n')
1636         try:
1637             sp = subprocess.Popen(
1638                 ['git', 'rev-parse', '--short', 'HEAD'],
1639                 stdout=subprocess.PIPE, stderr=subprocess.PIPE,
1640                 cwd=os.path.dirname(os.path.abspath(__file__)))
1641             out, err = sp.communicate()
1642             out = out.decode().strip()
1643             if re.match('[0-9a-f]+', out):
1644                 self._write_string('[debug] Git HEAD: ' + out + '\n')
1645         except:
1646             try:
1647                 sys.exc_clear()
1648             except:
1649                 pass
1650         self._write_string('[debug] Python version %s - %s\n' % (
1651             platform.python_version(), platform_name()))
1652
1653         exe_versions = FFmpegPostProcessor.get_versions(self)
1654         exe_versions['rtmpdump'] = rtmpdump_version()
1655         exe_str = ', '.join(
1656             '%s %s' % (exe, v)
1657             for exe, v in sorted(exe_versions.items())
1658             if v
1659         )
1660         if not exe_str:
1661             exe_str = 'none'
1662         self._write_string('[debug] exe versions: %s\n' % exe_str)
1663
1664         proxy_map = {}
1665         for handler in self._opener.handlers:
1666             if hasattr(handler, 'proxies'):
1667                 proxy_map.update(handler.proxies)
1668         self._write_string('[debug] Proxy map: ' + compat_str(proxy_map) + '\n')
1669
1670         if self.params.get('call_home', False):
1671             ipaddr = self.urlopen('https://yt-dl.org/ip').read().decode('utf-8')
1672             self._write_string('[debug] Public IP address: %s\n' % ipaddr)
1673             latest_version = self.urlopen(
1674                 'https://yt-dl.org/latest/version').read().decode('utf-8')
1675             if version_tuple(latest_version) > version_tuple(__version__):
1676                 self.report_warning(
1677                     'You are using an outdated version (newest version: %s)! '
1678                     'See https://yt-dl.org/update if you need help updating.' %
1679                     latest_version)
1680
1681     def _setup_opener(self):
1682         timeout_val = self.params.get('socket_timeout')
1683         self._socket_timeout = 600 if timeout_val is None else float(timeout_val)
1684
1685         opts_cookiefile = self.params.get('cookiefile')
1686         opts_proxy = self.params.get('proxy')
1687
1688         if opts_cookiefile is None:
1689             self.cookiejar = compat_cookiejar.CookieJar()
1690         else:
1691             self.cookiejar = compat_cookiejar.MozillaCookieJar(
1692                 opts_cookiefile)
1693             if os.access(opts_cookiefile, os.R_OK):
1694                 self.cookiejar.load()
1695
1696         cookie_processor = compat_urllib_request.HTTPCookieProcessor(
1697             self.cookiejar)
1698         if opts_proxy is not None:
1699             if opts_proxy == '':
1700                 proxies = {}
1701             else:
1702                 proxies = {'http': opts_proxy, 'https': opts_proxy}
1703         else:
1704             proxies = compat_urllib_request.getproxies()
1705             # Set HTTPS proxy to HTTP one if given (https://github.com/rg3/youtube-dl/issues/805)
1706             if 'http' in proxies and 'https' not in proxies:
1707                 proxies['https'] = proxies['http']
1708         proxy_handler = compat_urllib_request.ProxyHandler(proxies)
1709
1710         debuglevel = 1 if self.params.get('debug_printtraffic') else 0
1711         https_handler = make_HTTPS_handler(self.params, debuglevel=debuglevel)
1712         ydlh = YoutubeDLHandler(self.params, debuglevel=debuglevel)
1713         opener = compat_urllib_request.build_opener(
1714             https_handler, proxy_handler, cookie_processor, ydlh)
1715         # Delete the default user-agent header, which would otherwise apply in
1716         # cases where our custom HTTP handler doesn't come into play
1717         # (See https://github.com/rg3/youtube-dl/issues/1309 for details)
1718         opener.addheaders = []
1719         self._opener = opener
1720
1721     def encode(self, s):
1722         if isinstance(s, bytes):
1723             return s  # Already encoded
1724
1725         try:
1726             return s.encode(self.get_encoding())
1727         except UnicodeEncodeError as err:
1728             err.reason = err.reason + '. Check your system encoding configuration or use the --encoding option.'
1729             raise
1730
1731     def get_encoding(self):
1732         encoding = self.params.get('encoding')
1733         if encoding is None:
1734             encoding = preferredencoding()
1735         return encoding
1736
1737     def _write_thumbnails(self, info_dict, filename):
1738         if self.params.get('writethumbnail', False):
1739             thumbnails = info_dict.get('thumbnails')
1740             if thumbnails:
1741                 thumbnails = [thumbnails[-1]]
1742         elif self.params.get('write_all_thumbnails', False):
1743             thumbnails = info_dict.get('thumbnails')
1744         else:
1745             return
1746
1747         if not thumbnails:
1748             # No thumbnails present, so return immediately
1749             return
1750
1751         for t in thumbnails:
1752             thumb_ext = determine_ext(t['url'], 'jpg')
1753             suffix = '_%s' % t['id'] if len(thumbnails) > 1 else ''
1754             thumb_display_id = '%s ' % t['id'] if len(thumbnails) > 1 else ''
1755             thumb_filename = os.path.splitext(filename)[0] + suffix + '.' + thumb_ext
1756
1757             if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(thumb_filename)):
1758                 self.to_screen('[%s] %s: Thumbnail %sis already present' %
1759                                (info_dict['extractor'], info_dict['id'], thumb_display_id))
1760             else:
1761                 self.to_screen('[%s] %s: Downloading thumbnail %s...' %
1762                                (info_dict['extractor'], info_dict['id'], thumb_display_id))
1763                 try:
1764                     uf = self.urlopen(t['url'])
1765                     with open(thumb_filename, 'wb') as thumbf:
1766                         shutil.copyfileobj(uf, thumbf)
1767                     self.to_screen('[%s] %s: Writing thumbnail %sto: %s' %
1768                                    (info_dict['extractor'], info_dict['id'], thumb_display_id, thumb_filename))
1769                 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
1770                     self.report_warning('Unable to download thumbnail "%s": %s' %
1771                                         (t['url'], compat_str(err)))