_ Git - youtube-dl/blob - youtube_dl/YoutubeDL.py

   1 #!/usr/bin/env python
   2 # -*- coding: utf-8 -*-
   3
   4 from __future__ import absolute_import, unicode_literals
   5
   6 import collections
   7 import datetime
   8 import errno
   9 import io
  10 import itertools
  11 import json
  12 import locale
  13 import operator
  14 import os
  15 import platform
  16 import re
  17 import shutil
  18 import subprocess
  19 import socket
  20 import sys
  21 import time
  22 import traceback
  23
  24 if os.name == 'nt':
  25     import ctypes
  26
  27 from .compat import (
  28     compat_basestring,
  29     compat_cookiejar,
  30     compat_expanduser,
  31     compat_http_client,
  32     compat_kwargs,
  33     compat_str,
  34     compat_urllib_error,
  35     compat_urllib_request,
  36 )
  37 from .utils import (
  38     escape_url,
  39     ContentTooShortError,
  40     date_from_str,
  41     DateRange,
  42     DEFAULT_OUTTMPL,
  43     determine_ext,
  44     DownloadError,
  45     encodeFilename,
  46     ExtractorError,
  47     format_bytes,
  48     formatSeconds,
  49     get_term_width,
  50     locked_file,
  51     make_HTTPS_handler,
  52     MaxDownloadsReached,
  53     PagedList,
  54     parse_filesize,
  55     PostProcessingError,
  56     platform_name,
  57     preferredencoding,
  58     render_table,
  59     SameFileError,
  60     sanitize_filename,
  61     std_headers,
  62     subtitles_filename,
  63     takewhile_inclusive,
  64     UnavailableVideoError,
  65     url_basename,
  66     version_tuple,
  67     write_json_file,
  68     write_string,
  69     YoutubeDLHandler,
  70     prepend_extension,
  71     args_to_str,
  72     age_restricted,
  73 )
  74 from .cache import Cache
  75 from .extractor import get_info_extractor, gen_extractors
  76 from .downloader import get_suitable_downloader
  77 from .downloader.rtmp import rtmpdump_version
  78 from .postprocessor import (
  79     FFmpegFixupM4aPP,
  80     FFmpegFixupStretchedPP,
  81     FFmpegMergerPP,
  82     FFmpegPostProcessor,
  83     get_postprocessor,
  84 )
  85 from .version import __version__
  86
  87
  88 class YoutubeDL(object):
  89     """YoutubeDL class.
  90
  91     YoutubeDL objects are the ones responsible of downloading the
  92     actual video file and writing it to disk if the user has requested
  93     it, among some other tasks. In most cases there should be one per
  94     program. As, given a video URL, the downloader doesn't know how to
  95     extract all the needed information, task that InfoExtractors do, it
  96     has to pass the URL to one of them.
  97
  98     For this, YoutubeDL objects have a method that allows
  99     InfoExtractors to be registered in a given order. When it is passed
 100     a URL, the YoutubeDL object handles it to the first InfoExtractor it
 101     finds that reports being able to handle it. The InfoExtractor extracts
 102     all the information about the video or videos the URL refers to, and
 103     YoutubeDL process the extracted information, possibly using a File
 104     Downloader to download the video.
 105
 106     YoutubeDL objects accept a lot of parameters. In order not to saturate
 107     the object constructor with arguments, it receives a dictionary of
 108     options instead. These options are available through the params
 109     attribute for the InfoExtractors to use. The YoutubeDL also
 110     registers itself as the downloader in charge for the InfoExtractors
 111     that are added to it, so this is a "mutual registration".
 112
 113     Available options:
 114
 115     username:          Username for authentication purposes.
 116     password:          Password for authentication purposes.
 117     videopassword:     Password for acces a video.
 118     usenetrc:          Use netrc for authentication instead.
 119     verbose:           Print additional info to stdout.
 120     quiet:             Do not print messages to stdout.
 121     no_warnings:       Do not print out anything for warnings.
 122     forceurl:          Force printing final URL.
 123     forcetitle:        Force printing title.
 124     forceid:           Force printing ID.
 125     forcethumbnail:    Force printing thumbnail URL.
 126     forcedescription:  Force printing description.
 127     forcefilename:     Force printing final filename.
 128     forceduration:     Force printing duration.
 129     forcejson:         Force printing info_dict as JSON.
 130     dump_single_json:  Force printing the info_dict of the whole playlist
 131                        (or video) as a single JSON line.
 132     simulate:          Do not download the video files.
 133     format:            Video format code. See options.py for more information.
 134     format_limit:      Highest quality format to try.
 135     outtmpl:           Template for output names.
 136     restrictfilenames: Do not allow "&" and spaces in file names
 137     ignoreerrors:      Do not stop on download errors.
 138     nooverwrites:      Prevent overwriting files.
 139     playliststart:     Playlist item to start at.
 140     playlistend:       Playlist item to end at.
 141     playlist_items:    Specific indices of playlist to download.
 142     playlistreverse:   Download playlist items in reverse order.
 143     matchtitle:        Download only matching titles.
 144     rejecttitle:       Reject downloads for matching titles.
 145     logger:            Log messages to a logging.Logger instance.
 146     logtostderr:       Log messages to stderr instead of stdout.
 147     writedescription:  Write the video description to a .description file
 148     writeinfojson:     Write the video description to a .info.json file
 149     writeannotations:  Write the video annotations to a .annotations.xml file
 150     writethumbnail:    Write the thumbnail image to a file
 151     write_all_thumbnails:  Write all thumbnail formats to files
 152     writesubtitles:    Write the video subtitles to a file
 153     writeautomaticsub: Write the automatic subtitles to a file
 154     allsubtitles:      Downloads all the subtitles of the video
 155                        (requires writesubtitles or writeautomaticsub)
 156     listsubtitles:     Lists all available subtitles for the video
 157     subtitlesformat:   Subtitle format [srt/sbv/vtt] (default=srt)
 158     subtitleslangs:    List of languages of the subtitles to download
 159     keepvideo:         Keep the video file after post-processing
 160     daterange:         A DateRange object, download only if the upload_date is in the range.
 161     skip_download:     Skip the actual download of the video file
 162     cachedir:          Location of the cache files in the filesystem.
 163                        False to disable filesystem cache.
 164     noplaylist:        Download single video instead of a playlist if in doubt.
 165     age_limit:         An integer representing the user's age in years.
 166                        Unsuitable videos for the given age are skipped.
 167     min_views:         An integer representing the minimum view count the video
 168                        must have in order to not be skipped.
 169                        Videos without view count information are always
 170                        downloaded. None for no limit.
 171     max_views:         An integer representing the maximum view count.
 172                        Videos that are more popular than that are not
 173                        downloaded.
 174                        Videos without view count information are always
 175                        downloaded. None for no limit.
 176     download_archive:  File name of a file where all downloads are recorded.
 177                        Videos already present in the file are not downloaded
 178                        again.
 179     cookiefile:        File name where cookies should be read from and dumped to.
 180     nocheckcertificate:Do not verify SSL certificates
 181     prefer_insecure:   Use HTTP instead of HTTPS to retrieve information.
 182                        At the moment, this is only supported by YouTube.
 183     proxy:             URL of the proxy server to use
 184     socket_timeout:    Time to wait for unresponsive hosts, in seconds
 185     bidi_workaround:   Work around buggy terminals without bidirectional text
 186                        support, using fridibi
 187     debug_printtraffic:Print out sent and received HTTP traffic
 188     include_ads:       Download ads as well
 189     default_search:    Prepend this string if an input url is not valid.
 190                        'auto' for elaborate guessing
 191     encoding:          Use this encoding instead of the system-specified.
 192     extract_flat:      Do not resolve URLs, return the immediate result.
 193                        Pass in 'in_playlist' to only show this behavior for
 194                        playlist items.
 195     postprocessors:    A list of dictionaries, each with an entry
 196                        * key:  The name of the postprocessor. See
 197                                youtube_dl/postprocessor/__init__.py for a list.
 198                        as well as any further keyword arguments for the
 199                        postprocessor.
 200     progress_hooks:    A list of functions that get called on download
 201                        progress, with a dictionary with the entries
 202                        * status: One of "downloading", "error", or "finished".
 203                                  Check this first and ignore unknown values.
 204
 205                        If status is one of "downloading", or "finished", the
 206                        following properties may also be present:
 207                        * filename: The final filename (always present)
 208                        * tmpfilename: The filename we're currently writing to
 209                        * downloaded_bytes: Bytes on disk
 210                        * total_bytes: Size of the whole file, None if unknown
 211                        * total_bytes_estimate: Guess of the eventual file size,
 212                                                None if unavailable.
 213                        * elapsed: The number of seconds since download started.
 214                        * eta: The estimated time in seconds, None if unknown
 215                        * speed: The download speed in bytes/second, None if
 216                                 unknown
 217                        * fragment_index: The counter of the currently
 218                                          downloaded video fragment.
 219                        * fragment_count: The number of fragments (= individual
 220                                          files that will be merged)
 221
 222                        Progress hooks are guaranteed to be called at least once
 223                        (with status "finished") if the download is successful.
 224     merge_output_format: Extension to use when merging formats.
 225     fixup:             Automatically correct known faults of the file.
 226                        One of:
 227                        - "never": do nothing
 228                        - "warn": only emit a warning
 229                        - "detect_or_warn": check whether we can do anything
 230                                            about it, warn otherwise (default)
 231     source_address:    (Experimental) Client-side IP address to bind to.
 232     call_home:         Boolean, true iff we are allowed to contact the
 233                        youtube-dl servers for debugging.
 234     sleep_interval:    Number of seconds to sleep before each download.
 235     listformats:       Print an overview of available video formats and exit.
 236     list_thumbnails:   Print a table of all thumbnails and exit.
 237     match_filter:      A function that gets called with the info_dict of
 238                        every video.
 239                        If it returns a message, the video is ignored.
 240                        If it returns None, the video is downloaded.
 241                        match_filter_func in utils.py is one example for this.
 242     no_color:          Do not emit color codes in output.
 243
 244     The following options determine which downloader is picked:
 245     external_downloader: Executable of the external downloader to call.
 246                        None or unset for standard (built-in) downloader.
 247     hls_prefer_native: Use the native HLS downloader instead of ffmpeg/avconv.
 248
 249     The following parameters are not used by YoutubeDL itself, they are used by
 250     the FileDownloader:
 251     nopart, updatetime, buffersize, ratelimit, min_filesize, max_filesize, test,
 252     noresizebuffer, retries, continuedl, noprogress, consoletitle,
 253     xattr_set_filesize.
 254
 255     The following options are used by the post processors:
 256     prefer_ffmpeg:     If True, use ffmpeg instead of avconv if both are available,
 257                        otherwise prefer avconv.
 258     exec_cmd:          Arbitrary command to run after downloading
 259     """
 260
 261     params = None
 262     _ies = []
 263     _pps = []
 264     _download_retcode = None
 265     _num_downloads = None
 266     _screen_file = None
 267
 268     def __init__(self, params=None, auto_init=True):
 269         """Create a FileDownloader object with the given options."""
 270         if params is None:
 271             params = {}
 272         self._ies = []
 273         self._ies_instances = {}
 274         self._pps = []
 275         self._progress_hooks = []
 276         self._download_retcode = 0
 277         self._num_downloads = 0
 278         self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)]
 279         self._err_file = sys.stderr
 280         self.params = params
 281         self.cache = Cache(self)
 282
 283         if params.get('bidi_workaround', False):
 284             try:
 285                 import pty
 286                 master, slave = pty.openpty()
 287                 width = get_term_width()
 288                 if width is None:
 289                     width_args = []
 290                 else:
 291                     width_args = ['-w', str(width)]
 292                 sp_kwargs = dict(
 293                     stdin=subprocess.PIPE,
 294                     stdout=slave,
 295                     stderr=self._err_file)
 296                 try:
 297                     self._output_process = subprocess.Popen(
 298                         ['bidiv'] + width_args, **sp_kwargs
 299                     )
 300                 except OSError:
 301                     self._output_process = subprocess.Popen(
 302                         ['fribidi', '-c', 'UTF-8'] + width_args, **sp_kwargs)
 303                 self._output_channel = os.fdopen(master, 'rb')
 304             except OSError as ose:
 305                 if ose.errno == 2:
 306                     self.report_warning('Could not find fribidi executable, ignoring --bidi-workaround . Make sure that  fribidi  is an executable file in one of the directories in your $PATH.')
 307                 else:
 308                     raise
 309
 310         if (sys.version_info >= (3,) and sys.platform != 'win32' and
 311                 sys.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968']
 312                 and not params.get('restrictfilenames', False)):
 313             # On Python 3, the Unicode filesystem API will throw errors (#1474)
 314             self.report_warning(
 315                 'Assuming --restrict-filenames since file system encoding '
 316                 'cannot encode all characters. '
 317                 'Set the LC_ALL environment variable to fix this.')
 318             self.params['restrictfilenames'] = True
 319
 320         if '%(stitle)s' in self.params.get('outtmpl', ''):
 321             self.report_warning('%(stitle)s is deprecated. Use the %(title)s and the --restrict-filenames flag(which also secures %(uploader)s et al) instead.')
 322
 323         self._setup_opener()
 324
 325         if auto_init:
 326             self.print_debug_header()
 327             self.add_default_info_extractors()
 328
 329         for pp_def_raw in self.params.get('postprocessors', []):
 330             pp_class = get_postprocessor(pp_def_raw['key'])
 331             pp_def = dict(pp_def_raw)
 332             del pp_def['key']
 333             pp = pp_class(self, **compat_kwargs(pp_def))
 334             self.add_post_processor(pp)
 335
 336         for ph in self.params.get('progress_hooks', []):
 337             self.add_progress_hook(ph)
 338
 339     def warn_if_short_id(self, argv):
 340         # short YouTube ID starting with dash?
 341         idxs = [
 342             i for i, a in enumerate(argv)
 343             if re.match(r'^-[0-9A-Za-z_-]{10}$', a)]
 344         if idxs:
 345             correct_argv = (
 346                 ['youtube-dl'] +
 347                 [a for i, a in enumerate(argv) if i not in idxs] +
 348                 ['--'] + [argv[i] for i in idxs]
 349             )
 350             self.report_warning(
 351                 'Long argument string detected. '
 352                 'Use -- to separate parameters and URLs, like this:\n%s\n' %
 353                 args_to_str(correct_argv))
 354
 355     def add_info_extractor(self, ie):
 356         """Add an InfoExtractor object to the end of the list."""
 357         self._ies.append(ie)
 358         self._ies_instances[ie.ie_key()] = ie
 359         ie.set_downloader(self)
 360
 361     def get_info_extractor(self, ie_key):
 362         """
 363         Get an instance of an IE with name ie_key, it will try to get one from
 364         the _ies list, if there's no instance it will create a new one and add
 365         it to the extractor list.
 366         """
 367         ie = self._ies_instances.get(ie_key)
 368         if ie is None:
 369             ie = get_info_extractor(ie_key)()
 370             self.add_info_extractor(ie)
 371         return ie
 372
 373     def add_default_info_extractors(self):
 374         """
 375         Add the InfoExtractors returned by gen_extractors to the end of the list
 376         """
 377         for ie in gen_extractors():
 378             self.add_info_extractor(ie)
 379
 380     def add_post_processor(self, pp):
 381         """Add a PostProcessor object to the end of the chain."""
 382         self._pps.append(pp)
 383         pp.set_downloader(self)
 384
 385     def add_progress_hook(self, ph):
 386         """Add the progress hook (currently only for the file downloader)"""
 387         self._progress_hooks.append(ph)
 388
 389     def _bidi_workaround(self, message):
 390         if not hasattr(self, '_output_channel'):
 391             return message
 392
 393         assert hasattr(self, '_output_process')
 394         assert isinstance(message, compat_str)
 395         line_count = message.count('\n') + 1
 396         self._output_process.stdin.write((message + '\n').encode('utf-8'))
 397         self._output_process.stdin.flush()
 398         res = ''.join(self._output_channel.readline().decode('utf-8')
 399                       for _ in range(line_count))
 400         return res[:-len('\n')]
 401
 402     def to_screen(self, message, skip_eol=False):
 403         """Print message to stdout if not in quiet mode."""
 404         return self.to_stdout(message, skip_eol, check_quiet=True)
 405
 406     def _write_string(self, s, out=None):
 407         write_string(s, out=out, encoding=self.params.get('encoding'))
 408
 409     def to_stdout(self, message, skip_eol=False, check_quiet=False):
 410         """Print message to stdout if not in quiet mode."""
 411         if self.params.get('logger'):
 412             self.params['logger'].debug(message)
 413         elif not check_quiet or not self.params.get('quiet', False):
 414             message = self._bidi_workaround(message)
 415             terminator = ['\n', ''][skip_eol]
 416             output = message + terminator
 417
 418             self._write_string(output, self._screen_file)
 419
 420     def to_stderr(self, message):
 421         """Print message to stderr."""
 422         assert isinstance(message, compat_str)
 423         if self.params.get('logger'):
 424             self.params['logger'].error(message)
 425         else:
 426             message = self._bidi_workaround(message)
 427             output = message + '\n'
 428             self._write_string(output, self._err_file)
 429
 430     def to_console_title(self, message):
 431         if not self.params.get('consoletitle', False):
 432             return
 433         if os.name == 'nt' and ctypes.windll.kernel32.GetConsoleWindow():
 434             # c_wchar_p() might not be necessary if `message` is
 435             # already of type unicode()
 436             ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message))
 437         elif 'TERM' in os.environ:
 438             self._write_string('\033]0;%s\007' % message, self._screen_file)
 439
 440     def save_console_title(self):
 441         if not self.params.get('consoletitle', False):
 442             return
 443         if 'TERM' in os.environ:
 444             # Save the title on stack
 445             self._write_string('\033[22;0t', self._screen_file)
 446
 447     def restore_console_title(self):
 448         if not self.params.get('consoletitle', False):
 449             return
 450         if 'TERM' in os.environ:
 451             # Restore the title from stack
 452             self._write_string('\033[23;0t', self._screen_file)
 453
 454     def __enter__(self):
 455         self.save_console_title()
 456         return self
 457
 458     def __exit__(self, *args):
 459         self.restore_console_title()
 460
 461         if self.params.get('cookiefile') is not None:
 462             self.cookiejar.save()
 463
 464     def trouble(self, message=None, tb=None):
 465         """Determine action to take when a download problem appears.
 466
 467         Depending on if the downloader has been configured to ignore
 468         download errors or not, this method may throw an exception or
 469         not when errors are found, after printing the message.
 470
 471         tb, if given, is additional traceback information.
 472         """
 473         if message is not None:
 474             self.to_stderr(message)
 475         if self.params.get('verbose'):
 476             if tb is None:
 477                 if sys.exc_info()[0]:  # if .trouble has been called from an except block
 478                     tb = ''
 479                     if hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
 480                         tb += ''.join(traceback.format_exception(*sys.exc_info()[1].exc_info))
 481                     tb += compat_str(traceback.format_exc())
 482                 else:
 483                     tb_data = traceback.format_list(traceback.extract_stack())
 484                     tb = ''.join(tb_data)
 485             self.to_stderr(tb)
 486         if not self.params.get('ignoreerrors', False):
 487             if sys.exc_info()[0] and hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
 488                 exc_info = sys.exc_info()[1].exc_info
 489             else:
 490                 exc_info = sys.exc_info()
 491             raise DownloadError(message, exc_info)
 492         self._download_retcode = 1
 493
 494     def report_warning(self, message):
 495         '''
 496         Print the message to stderr, it will be prefixed with 'WARNING:'
 497         If stderr is a tty file the 'WARNING:' will be colored
 498         '''
 499         if self.params.get('logger') is not None:
 500             self.params['logger'].warning(message)
 501         else:
 502             if self.params.get('no_warnings'):
 503                 return
 504             if not self.params.get('no_color') and self._err_file.isatty() and os.name != 'nt':
 505                 _msg_header = '\033[0;33mWARNING:\033[0m'
 506             else:
 507                 _msg_header = 'WARNING:'
 508             warning_message = '%s %s' % (_msg_header, message)
 509             self.to_stderr(warning_message)
 510
 511     def report_error(self, message, tb=None):
 512         '''
 513         Do the same as trouble, but prefixes the message with 'ERROR:', colored
 514         in red if stderr is a tty file.
 515         '''
 516         if not self.params.get('no_color') and self._err_file.isatty() and os.name != 'nt':
 517             _msg_header = '\033[0;31mERROR:\033[0m'
 518         else:
 519             _msg_header = 'ERROR:'
 520         error_message = '%s %s' % (_msg_header, message)
 521         self.trouble(error_message, tb)
 522
 523     def report_file_already_downloaded(self, file_name):
 524         """Report file has already been fully downloaded."""
 525         try:
 526             self.to_screen('[download] %s has already been downloaded' % file_name)
 527         except UnicodeEncodeError:
 528             self.to_screen('[download] The file has already been downloaded')
 529
 530     def prepare_filename(self, info_dict):
 531         """Generate the output filename."""
 532         try:
 533             template_dict = dict(info_dict)
 534
 535             template_dict['epoch'] = int(time.time())
 536             autonumber_size = self.params.get('autonumber_size')
 537             if autonumber_size is None:
 538                 autonumber_size = 5
 539             autonumber_templ = '%0' + str(autonumber_size) + 'd'
 540             template_dict['autonumber'] = autonumber_templ % self._num_downloads
 541             if template_dict.get('playlist_index') is not None:
 542                 template_dict['playlist_index'] = '%0*d' % (len(str(template_dict['n_entries'])), template_dict['playlist_index'])
 543             if template_dict.get('resolution') is None:
 544                 if template_dict.get('width') and template_dict.get('height'):
 545                     template_dict['resolution'] = '%dx%d' % (template_dict['width'], template_dict['height'])
 546                 elif template_dict.get('height'):
 547                     template_dict['resolution'] = '%sp' % template_dict['height']
 548                 elif template_dict.get('width'):
 549                     template_dict['resolution'] = '?x%d' % template_dict['width']
 550
 551             sanitize = lambda k, v: sanitize_filename(
 552                 compat_str(v),
 553                 restricted=self.params.get('restrictfilenames'),
 554                 is_id=(k == 'id'))
 555             template_dict = dict((k, sanitize(k, v))
 556                                  for k, v in template_dict.items()
 557                                  if v is not None)
 558             template_dict = collections.defaultdict(lambda: 'NA', template_dict)
 559
 560             outtmpl = self.params.get('outtmpl', DEFAULT_OUTTMPL)
 561             tmpl = compat_expanduser(outtmpl)
 562             filename = tmpl % template_dict
 563             # Temporary fix for #4787
 564             # 'Treat' all problem characters by passing filename through preferredencoding
 565             # to workaround encoding issues with subprocess on python2 @ Windows
 566             if sys.version_info < (3, 0) and sys.platform == 'win32':
 567                 filename = encodeFilename(filename, True).decode(preferredencoding())
 568             return filename
 569         except ValueError as err:
 570             self.report_error('Error in output template: ' + str(err) + ' (encoding: ' + repr(preferredencoding()) + ')')
 571             return None
 572
 573     def _match_entry(self, info_dict, incomplete):
 574         """ Returns None iff the file should be downloaded """
 575
 576         video_title = info_dict.get('title', info_dict.get('id', 'video'))
 577         if 'title' in info_dict:
 578             # This can happen when we're just evaluating the playlist
 579             title = info_dict['title']
 580             matchtitle = self.params.get('matchtitle', False)
 581             if matchtitle:
 582                 if not re.search(matchtitle, title, re.IGNORECASE):
 583                     return '"' + title + '" title did not match pattern "' + matchtitle + '"'
 584             rejecttitle = self.params.get('rejecttitle', False)
 585             if rejecttitle:
 586                 if re.search(rejecttitle, title, re.IGNORECASE):
 587                     return '"' + title + '" title matched reject pattern "' + rejecttitle + '"'
 588         date = info_dict.get('upload_date', None)
 589         if date is not None:
 590             dateRange = self.params.get('daterange', DateRange())
 591             if date not in dateRange:
 592                 return '%s upload date is not in range %s' % (date_from_str(date).isoformat(), dateRange)
 593         view_count = info_dict.get('view_count', None)
 594         if view_count is not None:
 595             min_views = self.params.get('min_views')
 596             if min_views is not None and view_count < min_views:
 597                 return 'Skipping %s, because it has not reached minimum view count (%d/%d)' % (video_title, view_count, min_views)
 598             max_views = self.params.get('max_views')
 599             if max_views is not None and view_count > max_views:
 600                 return 'Skipping %s, because it has exceeded the maximum view count (%d/%d)' % (video_title, view_count, max_views)
 601         if age_restricted(info_dict.get('age_limit'), self.params.get('age_limit')):
 602             return 'Skipping "%s" because it is age restricted' % video_title
 603         if self.in_download_archive(info_dict):
 604             return '%s has already been recorded in archive' % video_title
 605
 606         if not incomplete:
 607             match_filter = self.params.get('match_filter')
 608             if match_filter is not None:
 609                 ret = match_filter(info_dict)
 610                 if ret is not None:
 611                     return ret
 612
 613         return None
 614
 615     @staticmethod
 616     def add_extra_info(info_dict, extra_info):
 617         '''Set the keys from extra_info in info dict if they are missing'''
 618         for key, value in extra_info.items():
 619             info_dict.setdefault(key, value)
 620
 621     def extract_info(self, url, download=True, ie_key=None, extra_info={},
 622                      process=True):
 623         '''
 624         Returns a list with a dictionary for each video we find.
 625         If 'download', also downloads the videos.
 626         extra_info is a dict containing the extra values to add to each result
 627          '''
 628
 629         if ie_key:
 630             ies = [self.get_info_extractor(ie_key)]
 631         else:
 632             ies = self._ies
 633
 634         for ie in ies:
 635             if not ie.suitable(url):
 636                 continue
 637
 638             if not ie.working():
 639                 self.report_warning('The program functionality for this site has been marked as broken, '
 640                                     'and will probably not work.')
 641
 642             try:
 643                 ie_result = ie.extract(url)
 644                 if ie_result is None:  # Finished already (backwards compatibility; listformats and friends should be moved here)
 645                     break
 646                 if isinstance(ie_result, list):
 647                     # Backwards compatibility: old IE result format
 648                     ie_result = {
 649                         '_type': 'compat_list',
 650                         'entries': ie_result,
 651                     }
 652                 self.add_default_extra_info(ie_result, ie, url)
 653                 if process:
 654                     return self.process_ie_result(ie_result, download, extra_info)
 655                 else:
 656                     return ie_result
 657             except ExtractorError as de:  # An error we somewhat expected
 658                 self.report_error(compat_str(de), de.format_traceback())
 659                 break
 660             except MaxDownloadsReached:
 661                 raise
 662             except Exception as e:
 663                 if self.params.get('ignoreerrors', False):
 664                     self.report_error(compat_str(e), tb=compat_str(traceback.format_exc()))
 665                     break
 666                 else:
 667                     raise
 668         else:
 669             self.report_error('no suitable InfoExtractor for URL %s' % url)
 670
 671     def add_default_extra_info(self, ie_result, ie, url):
 672         self.add_extra_info(ie_result, {
 673             'extractor': ie.IE_NAME,
 674             'webpage_url': url,
 675             'webpage_url_basename': url_basename(url),
 676             'extractor_key': ie.ie_key(),
 677         })
 678
 679     def process_ie_result(self, ie_result, download=True, extra_info={}):
 680         """
 681         Take the result of the ie(may be modified) and resolve all unresolved
 682         references (URLs, playlist items).
 683
 684         It will also download the videos if 'download'.
 685         Returns the resolved ie_result.
 686         """
 687
 688         result_type = ie_result.get('_type', 'video')
 689
 690         if result_type in ('url', 'url_transparent'):
 691             extract_flat = self.params.get('extract_flat', False)
 692             if ((extract_flat == 'in_playlist' and 'playlist' in extra_info) or
 693                     extract_flat is True):
 694                 if self.params.get('forcejson', False):
 695                     self.to_stdout(json.dumps(ie_result))
 696                 return ie_result
 697
 698         if result_type == 'video':
 699             self.add_extra_info(ie_result, extra_info)
 700             return self.process_video_result(ie_result, download=download)
 701         elif result_type == 'url':
 702             # We have to add extra_info to the results because it may be
 703             # contained in a playlist
 704             return self.extract_info(ie_result['url'],
 705                                      download,
 706                                      ie_key=ie_result.get('ie_key'),
 707                                      extra_info=extra_info)
 708         elif result_type == 'url_transparent':
 709             # Use the information from the embedding page
 710             info = self.extract_info(
 711                 ie_result['url'], ie_key=ie_result.get('ie_key'),
 712                 extra_info=extra_info, download=False, process=False)
 713
 714             force_properties = dict(
 715                 (k, v) for k, v in ie_result.items() if v is not None)
 716             for f in ('_type', 'url'):
 717                 if f in force_properties:
 718                     del force_properties[f]
 719             new_result = info.copy()
 720             new_result.update(force_properties)
 721
 722             assert new_result.get('_type') != 'url_transparent'
 723
 724             return self.process_ie_result(
 725                 new_result, download=download, extra_info=extra_info)
 726         elif result_type == 'playlist' or result_type == 'multi_video':
 727             # We process each entry in the playlist
 728             playlist = ie_result.get('title', None) or ie_result.get('id', None)
 729             self.to_screen('[download] Downloading playlist: %s' % playlist)
 730
 731             playlist_results = []
 732
 733             playliststart = self.params.get('playliststart', 1) - 1
 734             playlistend = self.params.get('playlistend', None)
 735             # For backwards compatibility, interpret -1 as whole list
 736             if playlistend == -1:
 737                 playlistend = None
 738
 739             playlistitems_str = self.params.get('playlist_items', None)
 740             playlistitems = None
 741             if playlistitems_str is not None:
 742                 def iter_playlistitems(format):
 743                     for string_segment in format.split(','):
 744                         if '-' in string_segment:
 745                             start, end = string_segment.split('-')
 746                             for item in range(int(start), int(end) + 1):
 747                                 yield int(item)
 748                         else:
 749                             yield int(string_segment)
 750                 playlistitems = iter_playlistitems(playlistitems_str)
 751
 752             ie_entries = ie_result['entries']
 753             if isinstance(ie_entries, list):
 754                 n_all_entries = len(ie_entries)
 755                 if playlistitems:
 756                     entries = [ie_entries[i - 1] for i in playlistitems]
 757                 else:
 758                     entries = ie_entries[playliststart:playlistend]
 759                 n_entries = len(entries)
 760                 self.to_screen(
 761                     "[%s] playlist %s: Collected %d video ids (downloading %d of them)" %
 762                     (ie_result['extractor'], playlist, n_all_entries, n_entries))
 763             elif isinstance(ie_entries, PagedList):
 764                 if playlistitems:
 765                     entries = []
 766                     for item in playlistitems:
 767                         entries.extend(ie_entries.getslice(
 768                             item - 1, item
 769                         ))
 770                 else:
 771                     entries = ie_entries.getslice(
 772                         playliststart, playlistend)
 773                 n_entries = len(entries)
 774                 self.to_screen(
 775                     "[%s] playlist %s: Downloading %d videos" %
 776                     (ie_result['extractor'], playlist, n_entries))
 777             else:  # iterable
 778                 if playlistitems:
 779                     entry_list = list(ie_entries)
 780                     entries = [entry_list[i - 1] for i in playlistitems]
 781                 else:
 782                     entries = list(itertools.islice(
 783                         ie_entries, playliststart, playlistend))
 784                 n_entries = len(entries)
 785                 self.to_screen(
 786                     "[%s] playlist %s: Downloading %d videos" %
 787                     (ie_result['extractor'], playlist, n_entries))
 788
 789             if self.params.get('playlistreverse', False):
 790                 entries = entries[::-1]
 791
 792             for i, entry in enumerate(entries, 1):
 793                 self.to_screen('[download] Downloading video %s of %s' % (i, n_entries))
 794                 extra = {
 795                     'n_entries': n_entries,
 796                     'playlist': playlist,
 797                     'playlist_id': ie_result.get('id'),
 798                     'playlist_title': ie_result.get('title'),
 799                     'playlist_index': i + playliststart,
 800                     'extractor': ie_result['extractor'],
 801                     'webpage_url': ie_result['webpage_url'],
 802                     'webpage_url_basename': url_basename(ie_result['webpage_url']),
 803                     'extractor_key': ie_result['extractor_key'],
 804                 }
 805
 806                 reason = self._match_entry(entry, incomplete=True)
 807                 if reason is not None:
 808                     self.to_screen('[download] ' + reason)
 809                     continue
 810
 811                 entry_result = self.process_ie_result(entry,
 812                                                       download=download,
 813                                                       extra_info=extra)
 814                 playlist_results.append(entry_result)
 815             ie_result['entries'] = playlist_results
 816             return ie_result
 817         elif result_type == 'compat_list':
 818             self.report_warning(
 819                 'Extractor %s returned a compat_list result. '
 820                 'It needs to be updated.' % ie_result.get('extractor'))
 821
 822             def _fixup(r):
 823                 self.add_extra_info(
 824                     r,
 825                     {
 826                         'extractor': ie_result['extractor'],
 827                         'webpage_url': ie_result['webpage_url'],
 828                         'webpage_url_basename': url_basename(ie_result['webpage_url']),
 829                         'extractor_key': ie_result['extractor_key'],
 830                     }
 831                 )
 832                 return r
 833             ie_result['entries'] = [
 834                 self.process_ie_result(_fixup(r), download, extra_info)
 835                 for r in ie_result['entries']
 836             ]
 837             return ie_result
 838         else:
 839             raise Exception('Invalid result type: %s' % result_type)
 840
 841     def _apply_format_filter(self, format_spec, available_formats):
 842         " Returns a tuple of the remaining format_spec and filtered formats "
 843
 844         OPERATORS = {
 845             '<': operator.lt,
 846             '<=': operator.le,
 847             '>': operator.gt,
 848             '>=': operator.ge,
 849             '=': operator.eq,
 850             '!=': operator.ne,
 851         }
 852         operator_rex = re.compile(r'''(?x)\s*\[
 853             (?P<key>width|height|tbr|abr|vbr|asr|filesize|fps)
 854             \s*(?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
 855             (?P<value>[0-9.]+(?:[kKmMgGtTpPeEzZyY]i?[Bb]?)?)
 856             \]$
 857             ''' % '|'.join(map(re.escape, OPERATORS.keys())))
 858         m = operator_rex.search(format_spec)
 859         if m:
 860             try:
 861                 comparison_value = int(m.group('value'))
 862             except ValueError:
 863                 comparison_value = parse_filesize(m.group('value'))
 864                 if comparison_value is None:
 865                     comparison_value = parse_filesize(m.group('value') + 'B')
 866                 if comparison_value is None:
 867                     raise ValueError(
 868                         'Invalid value %r in format specification %r' % (
 869                             m.group('value'), format_spec))
 870             op = OPERATORS[m.group('op')]
 871
 872         if not m:
 873             STR_OPERATORS = {
 874                 '=': operator.eq,
 875                 '!=': operator.ne,
 876             }
 877             str_operator_rex = re.compile(r'''(?x)\s*\[
 878                 \s*(?P<key>ext|acodec|vcodec|container|protocol)
 879                 \s*(?P<op>%s)(?P<none_inclusive>\s*\?)?
 880                 \s*(?P<value>[a-zA-Z0-9_-]+)
 881                 \s*\]$
 882                 ''' % '|'.join(map(re.escape, STR_OPERATORS.keys())))
 883             m = str_operator_rex.search(format_spec)
 884             if m:
 885                 comparison_value = m.group('value')
 886                 op = STR_OPERATORS[m.group('op')]
 887
 888         if not m:
 889             raise ValueError('Invalid format specification %r' % format_spec)
 890
 891         def _filter(f):
 892             actual_value = f.get(m.group('key'))
 893             if actual_value is None:
 894                 return m.group('none_inclusive')
 895             return op(actual_value, comparison_value)
 896         new_formats = [f for f in available_formats if _filter(f)]
 897
 898         new_format_spec = format_spec[:-len(m.group(0))]
 899         if not new_format_spec:
 900             new_format_spec = 'best'
 901
 902         return (new_format_spec, new_formats)
 903
 904     def select_format(self, format_spec, available_formats):
 905         while format_spec.endswith(']'):
 906             format_spec, available_formats = self._apply_format_filter(
 907                 format_spec, available_formats)
 908         if not available_formats:
 909             return None
 910
 911         if format_spec == 'best' or format_spec is None:
 912             return available_formats[-1]
 913         elif format_spec == 'worst':
 914             return available_formats[0]
 915         elif format_spec == 'bestaudio':
 916             audio_formats = [
 917                 f for f in available_formats
 918                 if f.get('vcodec') == 'none']
 919             if audio_formats:
 920                 return audio_formats[-1]
 921         elif format_spec == 'worstaudio':
 922             audio_formats = [
 923                 f for f in available_formats
 924                 if f.get('vcodec') == 'none']
 925             if audio_formats:
 926                 return audio_formats[0]
 927         elif format_spec == 'bestvideo':
 928             video_formats = [
 929                 f for f in available_formats
 930                 if f.get('acodec') == 'none']
 931             if video_formats:
 932                 return video_formats[-1]
 933         elif format_spec == 'worstvideo':
 934             video_formats = [
 935                 f for f in available_formats
 936                 if f.get('acodec') == 'none']
 937             if video_formats:
 938                 return video_formats[0]
 939         else:
 940             extensions = ['mp4', 'flv', 'webm', '3gp', 'm4a', 'mp3', 'ogg', 'aac', 'wav']
 941             if format_spec in extensions:
 942                 filter_f = lambda f: f['ext'] == format_spec
 943             else:
 944                 filter_f = lambda f: f['format_id'] == format_spec
 945             matches = list(filter(filter_f, available_formats))
 946             if matches:
 947                 return matches[-1]
 948         return None
 949
 950     def _calc_headers(self, info_dict):
 951         res = std_headers.copy()
 952
 953         add_headers = info_dict.get('http_headers')
 954         if add_headers:
 955             res.update(add_headers)
 956
 957         cookies = self._calc_cookies(info_dict)
 958         if cookies:
 959             res['Cookie'] = cookies
 960
 961         return res
 962
 963     def _calc_cookies(self, info_dict):
 964         pr = compat_urllib_request.Request(info_dict['url'])
 965         self.cookiejar.add_cookie_header(pr)
 966         return pr.get_header('Cookie')
 967
 968     def process_video_result(self, info_dict, download=True):
 969         assert info_dict.get('_type', 'video') == 'video'
 970
 971         if 'id' not in info_dict:
 972             raise ExtractorError('Missing "id" field in extractor result')
 973         if 'title' not in info_dict:
 974             raise ExtractorError('Missing "title" field in extractor result')
 975
 976         if 'playlist' not in info_dict:
 977             # It isn't part of a playlist
 978             info_dict['playlist'] = None
 979             info_dict['playlist_index'] = None
 980
 981         thumbnails = info_dict.get('thumbnails')
 982         if thumbnails is None:
 983             thumbnail = info_dict.get('thumbnail')
 984             if thumbnail:
 985                 info_dict['thumbnails'] = thumbnails = [{'url': thumbnail}]
 986         if thumbnails:
 987             thumbnails.sort(key=lambda t: (
 988                 t.get('preference'), t.get('width'), t.get('height'),
 989                 t.get('id'), t.get('url')))
 990             for i, t in enumerate(thumbnails):
 991                 if 'width' in t and 'height' in t:
 992                     t['resolution'] = '%dx%d' % (t['width'], t['height'])
 993                 if t.get('id') is None:
 994                     t['id'] = '%d' % i
 995
 996         if thumbnails and 'thumbnail' not in info_dict:
 997             info_dict['thumbnail'] = thumbnails[-1]['url']
 998
 999         if 'display_id' not in info_dict and 'id' in info_dict:
1000             info_dict['display_id'] = info_dict['id']
1001
1002         if info_dict.get('upload_date') is None and info_dict.get('timestamp') is not None:
1003             # Working around negative timestamps in Windows
1004             # (see http://bugs.python.org/issue1646728)
1005             if info_dict['timestamp'] < 0 and os.name == 'nt':
1006                 info_dict['timestamp'] = 0
1007             upload_date = datetime.datetime.utcfromtimestamp(
1008                 info_dict['timestamp'])
1009             info_dict['upload_date'] = upload_date.strftime('%Y%m%d')
1010
1011         # This extractors handle format selection themselves
1012         if info_dict['extractor'] in ['Youku']:
1013             if download:
1014                 self.process_info(info_dict)
1015             return info_dict
1016
1017         # We now pick which formats have to be downloaded
1018         if info_dict.get('formats') is None:
1019             # There's only one format available
1020             formats = [info_dict]
1021         else:
1022             formats = info_dict['formats']
1023
1024         if not formats:
1025             raise ExtractorError('No video formats found!')
1026
1027         # We check that all the formats have the format and format_id fields
1028         for i, format in enumerate(formats):
1029             if 'url' not in format:
1030                 raise ExtractorError('Missing "url" key in result (index %d)' % i)
1031
1032             if format.get('format_id') is None:
1033                 format['format_id'] = compat_str(i)
1034             if format.get('format') is None:
1035                 format['format'] = '{id} - {res}{note}'.format(
1036                     id=format['format_id'],
1037                     res=self.format_resolution(format),
1038                     note=' ({0})'.format(format['format_note']) if format.get('format_note') is not None else '',
1039                 )
1040             # Automatically determine file extension if missing
1041             if 'ext' not in format:
1042                 format['ext'] = determine_ext(format['url']).lower()
1043             # Add HTTP headers, so that external programs can use them from the
1044             # json output
1045             full_format_info = info_dict.copy()
1046             full_format_info.update(format)
1047             format['http_headers'] = self._calc_headers(full_format_info)
1048
1049         format_limit = self.params.get('format_limit', None)
1050         if format_limit:
1051             formats = list(takewhile_inclusive(
1052                 lambda f: f['format_id'] != format_limit, formats
1053             ))
1054
1055         # TODO Central sorting goes here
1056
1057         if formats[0] is not info_dict:
1058             # only set the 'formats' fields if the original info_dict list them
1059             # otherwise we end up with a circular reference, the first (and unique)
1060             # element in the 'formats' field in info_dict is info_dict itself,
1061             # wich can't be exported to json
1062             info_dict['formats'] = formats
1063         if self.params.get('listformats'):
1064             self.list_formats(info_dict)
1065             return
1066         if self.params.get('list_thumbnails'):
1067             self.list_thumbnails(info_dict)
1068             return
1069
1070         req_format = self.params.get('format')
1071         if req_format is None:
1072             req_format = 'best'
1073         formats_to_download = []
1074         # The -1 is for supporting YoutubeIE
1075         if req_format in ('-1', 'all'):
1076             formats_to_download = formats
1077         else:
1078             for rfstr in req_format.split(','):
1079                 # We can accept formats requested in the format: 34/5/best, we pick
1080                 # the first that is available, starting from left
1081                 req_formats = rfstr.split('/')
1082                 for rf in req_formats:
1083                     if re.match(r'.+?\+.+?', rf) is not None:
1084                         # Two formats have been requested like '137+139'
1085                         format_1, format_2 = rf.split('+')
1086                         formats_info = (self.select_format(format_1, formats),
1087                                         self.select_format(format_2, formats))
1088                         if all(formats_info):
1089                             # The first format must contain the video and the
1090                             # second the audio
1091                             if formats_info[0].get('vcodec') == 'none':
1092                                 self.report_error('The first format must '
1093                                                   'contain the video, try using '
1094                                                   '"-f %s+%s"' % (format_2, format_1))
1095                                 return
1096                             output_ext = (
1097                                 formats_info[0]['ext']
1098                                 if self.params.get('merge_output_format') is None
1099                                 else self.params['merge_output_format'])
1100                             selected_format = {
1101                                 'requested_formats': formats_info,
1102                                 'format': '%s+%s' % (formats_info[0].get('format'),
1103                                                      formats_info[1].get('format')),
1104                                 'format_id': '%s+%s' % (formats_info[0].get('format_id'),
1105                                                         formats_info[1].get('format_id')),
1106                                 'width': formats_info[0].get('width'),
1107                                 'height': formats_info[0].get('height'),
1108                                 'resolution': formats_info[0].get('resolution'),
1109                                 'fps': formats_info[0].get('fps'),
1110                                 'vcodec': formats_info[0].get('vcodec'),
1111                                 'vbr': formats_info[0].get('vbr'),
1112                                 'stretched_ratio': formats_info[0].get('stretched_ratio'),
1113                                 'acodec': formats_info[1].get('acodec'),
1114                                 'abr': formats_info[1].get('abr'),
1115                                 'ext': output_ext,
1116                             }
1117                         else:
1118                             selected_format = None
1119                     else:
1120                         selected_format = self.select_format(rf, formats)
1121                     if selected_format is not None:
1122                         formats_to_download.append(selected_format)
1123                         break
1124         if not formats_to_download:
1125             raise ExtractorError('requested format not available',
1126                                  expected=True)
1127
1128         if download:
1129             if len(formats_to_download) > 1:
1130                 self.to_screen('[info] %s: downloading video in %s formats' % (info_dict['id'], len(formats_to_download)))
1131             for format in formats_to_download:
1132                 new_info = dict(info_dict)
1133                 new_info.update(format)
1134                 self.process_info(new_info)
1135         # We update the info dict with the best quality format (backwards compatibility)
1136         info_dict.update(formats_to_download[-1])
1137         return info_dict
1138
1139     def process_info(self, info_dict):
1140         """Process a single resolved IE result."""
1141
1142         assert info_dict.get('_type', 'video') == 'video'
1143
1144         max_downloads = self.params.get('max_downloads')
1145         if max_downloads is not None:
1146             if self._num_downloads >= int(max_downloads):
1147                 raise MaxDownloadsReached()
1148
1149         info_dict['fulltitle'] = info_dict['title']
1150         if len(info_dict['title']) > 200:
1151             info_dict['title'] = info_dict['title'][:197] + '...'
1152
1153         # Keep for backwards compatibility
1154         info_dict['stitle'] = info_dict['title']
1155
1156         if 'format' not in info_dict:
1157             info_dict['format'] = info_dict['ext']
1158
1159         reason = self._match_entry(info_dict, incomplete=False)
1160         if reason is not None:
1161             self.to_screen('[download] ' + reason)
1162             return
1163
1164         self._num_downloads += 1
1165
1166         info_dict['_filename'] = filename = self.prepare_filename(info_dict)
1167
1168         # Forced printings
1169         if self.params.get('forcetitle', False):
1170             self.to_stdout(info_dict['fulltitle'])
1171         if self.params.get('forceid', False):
1172             self.to_stdout(info_dict['id'])
1173         if self.params.get('forceurl', False):
1174             if info_dict.get('requested_formats') is not None:
1175                 for f in info_dict['requested_formats']:
1176                     self.to_stdout(f['url'] + f.get('play_path', ''))
1177             else:
1178                 # For RTMP URLs, also include the playpath
1179                 self.to_stdout(info_dict['url'] + info_dict.get('play_path', ''))
1180         if self.params.get('forcethumbnail', False) and info_dict.get('thumbnail') is not None:
1181             self.to_stdout(info_dict['thumbnail'])
1182         if self.params.get('forcedescription', False) and info_dict.get('description') is not None:
1183             self.to_stdout(info_dict['description'])
1184         if self.params.get('forcefilename', False) and filename is not None:
1185             self.to_stdout(filename)
1186         if self.params.get('forceduration', False) and info_dict.get('duration') is not None:
1187             self.to_stdout(formatSeconds(info_dict['duration']))
1188         if self.params.get('forceformat', False):
1189             self.to_stdout(info_dict['format'])
1190         if self.params.get('forcejson', False):
1191             self.to_stdout(json.dumps(info_dict))
1192
1193         # Do nothing else if in simulate mode
1194         if self.params.get('simulate', False):
1195             return
1196
1197         if filename is None:
1198             return
1199
1200         try:
1201             dn = os.path.dirname(encodeFilename(filename))
1202             if dn and not os.path.exists(dn):
1203                 os.makedirs(dn)
1204         except (OSError, IOError) as err:
1205             self.report_error('unable to create directory ' + compat_str(err))
1206             return
1207
1208         if self.params.get('writedescription', False):
1209             descfn = filename + '.description'
1210             if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(descfn)):
1211                 self.to_screen('[info] Video description is already present')
1212             elif info_dict.get('description') is None:
1213                 self.report_warning('There\'s no description to write.')
1214             else:
1215                 try:
1216                     self.to_screen('[info] Writing video description to: ' + descfn)
1217                     with io.open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile:
1218                         descfile.write(info_dict['description'])
1219                 except (OSError, IOError):
1220                     self.report_error('Cannot write description file ' + descfn)
1221                     return
1222
1223         if self.params.get('writeannotations', False):
1224             annofn = filename + '.annotations.xml'
1225             if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(annofn)):
1226                 self.to_screen('[info] Video annotations are already present')
1227             else:
1228                 try:
1229                     self.to_screen('[info] Writing video annotations to: ' + annofn)
1230                     with io.open(encodeFilename(annofn), 'w', encoding='utf-8') as annofile:
1231                         annofile.write(info_dict['annotations'])
1232                 except (KeyError, TypeError):
1233                     self.report_warning('There are no annotations to write.')
1234                 except (OSError, IOError):
1235                     self.report_error('Cannot write annotations file: ' + annofn)
1236                     return
1237
1238         subtitles_are_requested = any([self.params.get('writesubtitles', False),
1239                                        self.params.get('writeautomaticsub')])
1240
1241         if subtitles_are_requested and 'subtitles' in info_dict and info_dict['subtitles']:
1242             # subtitles download errors are already managed as troubles in relevant IE
1243             # that way it will silently go on when used with unsupporting IE
1244             subtitles = info_dict['subtitles']
1245             sub_format = self.params.get('subtitlesformat', 'srt')
1246             for sub_lang in subtitles.keys():
1247                 sub = subtitles[sub_lang]
1248                 if sub is None:
1249                     continue
1250                 try:
1251                     sub_filename = subtitles_filename(filename, sub_lang, sub_format)
1252                     if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(sub_filename)):
1253                         self.to_screen('[info] Video subtitle %s.%s is already_present' % (sub_lang, sub_format))
1254                     else:
1255                         self.to_screen('[info] Writing video subtitles to: ' + sub_filename)
1256                         with io.open(encodeFilename(sub_filename), 'w', encoding='utf-8') as subfile:
1257                             subfile.write(sub)
1258                 except (OSError, IOError):
1259                     self.report_error('Cannot write subtitles file ' + sub_filename)
1260                     return
1261
1262         if self.params.get('writeinfojson', False):
1263             infofn = os.path.splitext(filename)[0] + '.info.json'
1264             if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(infofn)):
1265                 self.to_screen('[info] Video description metadata is already present')
1266             else:
1267                 self.to_screen('[info] Writing video description metadata as JSON to: ' + infofn)
1268                 try:
1269                     write_json_file(info_dict, infofn)
1270                 except (OSError, IOError):
1271                     self.report_error('Cannot write metadata to JSON file ' + infofn)
1272                     return
1273
1274         self._write_thumbnails(info_dict, filename)
1275
1276         if not self.params.get('skip_download', False):
1277             try:
1278                 def dl(name, info):
1279                     fd = get_suitable_downloader(info, self.params)(self, self.params)
1280                     for ph in self._progress_hooks:
1281                         fd.add_progress_hook(ph)
1282                     if self.params.get('verbose'):
1283                         self.to_stdout('[debug] Invoking downloader on %r' % info.get('url'))
1284                     return fd.download(name, info)
1285
1286                 if info_dict.get('requested_formats') is not None:
1287                     downloaded = []
1288                     success = True
1289                     merger = FFmpegMergerPP(self, not self.params.get('keepvideo'))
1290                     if not merger.available:
1291                         postprocessors = []
1292                         self.report_warning('You have requested multiple '
1293                                             'formats but ffmpeg or avconv are not installed.'
1294                                             ' The formats won\'t be merged')
1295                     else:
1296                         postprocessors = [merger]
1297                     for f in info_dict['requested_formats']:
1298                         new_info = dict(info_dict)
1299                         new_info.update(f)
1300                         fname = self.prepare_filename(new_info)
1301                         fname = prepend_extension(fname, 'f%s' % f['format_id'])
1302                         downloaded.append(fname)
1303                         partial_success = dl(fname, new_info)
1304                         success = success and partial_success
1305                     info_dict['__postprocessors'] = postprocessors
1306                     info_dict['__files_to_merge'] = downloaded
1307                 else:
1308                     # Just a single file
1309                     success = dl(filename, info_dict)
1310             except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
1311                 self.report_error('unable to download video data: %s' % str(err))
1312                 return
1313             except (OSError, IOError) as err:
1314                 raise UnavailableVideoError(err)
1315             except (ContentTooShortError, ) as err:
1316                 self.report_error('content too short (expected %s bytes and served %s)' % (err.expected, err.downloaded))
1317                 return
1318
1319             if success:
1320                 # Fixup content
1321                 fixup_policy = self.params.get('fixup')
1322                 if fixup_policy is None:
1323                     fixup_policy = 'detect_or_warn'
1324
1325                 stretched_ratio = info_dict.get('stretched_ratio')
1326                 if stretched_ratio is not None and stretched_ratio != 1:
1327                     if fixup_policy == 'warn':
1328                         self.report_warning('%s: Non-uniform pixel ratio (%s)' % (
1329                             info_dict['id'], stretched_ratio))
1330                     elif fixup_policy == 'detect_or_warn':
1331                         stretched_pp = FFmpegFixupStretchedPP(self)
1332                         if stretched_pp.available:
1333                             info_dict.setdefault('__postprocessors', [])
1334                             info_dict['__postprocessors'].append(stretched_pp)
1335                         else:
1336                             self.report_warning(
1337                                 '%s: Non-uniform pixel ratio (%s). Install ffmpeg or avconv to fix this automatically.' % (
1338                                     info_dict['id'], stretched_ratio))
1339                     else:
1340                         assert fixup_policy in ('ignore', 'never')
1341
1342                 if info_dict.get('requested_formats') is None and info_dict.get('container') == 'm4a_dash':
1343                     if fixup_policy == 'warn':
1344                         self.report_warning('%s: writing DASH m4a. Only some players support this container.' % (
1345                             info_dict['id']))
1346                     elif fixup_policy == 'detect_or_warn':
1347                         fixup_pp = FFmpegFixupM4aPP(self)
1348                         if fixup_pp.available:
1349                             info_dict.setdefault('__postprocessors', [])
1350                             info_dict['__postprocessors'].append(fixup_pp)
1351                         else:
1352                             self.report_warning(
1353                                 '%s: writing DASH m4a. Only some players support this container. Install ffmpeg or avconv to fix this automatically.' % (
1354                                     info_dict['id']))
1355                     else:
1356                         assert fixup_policy in ('ignore', 'never')
1357
1358                 try:
1359                     self.post_process(filename, info_dict)
1360                 except (PostProcessingError) as err:
1361                     self.report_error('postprocessing: %s' % str(err))
1362                     return
1363                 self.record_download_archive(info_dict)
1364
1365     def download(self, url_list):
1366         """Download a given list of URLs."""
1367         outtmpl = self.params.get('outtmpl', DEFAULT_OUTTMPL)
1368         if (len(url_list) > 1 and
1369                 '%' not in outtmpl
1370                 and self.params.get('max_downloads') != 1):
1371             raise SameFileError(outtmpl)
1372
1373         for url in url_list:
1374             try:
1375                 # It also downloads the videos
1376                 res = self.extract_info(url)
1377             except UnavailableVideoError:
1378                 self.report_error('unable to download video')
1379             except MaxDownloadsReached:
1380                 self.to_screen('[info] Maximum number of downloaded files reached.')
1381                 raise
1382             else:
1383                 if self.params.get('dump_single_json', False):
1384                     self.to_stdout(json.dumps(res))
1385
1386         return self._download_retcode
1387
1388     def download_with_info_file(self, info_filename):
1389         with io.open(info_filename, 'r', encoding='utf-8') as f:
1390             info = json.load(f)
1391         try:
1392             self.process_ie_result(info, download=True)
1393         except DownloadError:
1394             webpage_url = info.get('webpage_url')
1395             if webpage_url is not None:
1396                 self.report_warning('The info failed to download, trying with "%s"' % webpage_url)
1397                 return self.download([webpage_url])
1398             else:
1399                 raise
1400         return self._download_retcode
1401
1402     def post_process(self, filename, ie_info):
1403         """Run all the postprocessors on the given file."""
1404         info = dict(ie_info)
1405         info['filepath'] = filename
1406         pps_chain = []
1407         if ie_info.get('__postprocessors') is not None:
1408             pps_chain.extend(ie_info['__postprocessors'])
1409         pps_chain.extend(self._pps)
1410         for pp in pps_chain:
1411             keep_video = None
1412             old_filename = info['filepath']
1413             try:
1414                 keep_video_wish, info = pp.run(info)
1415                 if keep_video_wish is not None:
1416                     if keep_video_wish:
1417                         keep_video = keep_video_wish
1418                     elif keep_video is None:
1419                         # No clear decision yet, let IE decide
1420                         keep_video = keep_video_wish
1421             except PostProcessingError as e:
1422                 self.report_error(e.msg)
1423             if keep_video is False and not self.params.get('keepvideo', False):
1424                 try:
1425                     self.to_screen('Deleting original file %s (pass -k to keep)' % old_filename)
1426                     os.remove(encodeFilename(old_filename))
1427                 except (IOError, OSError):
1428                     self.report_warning('Unable to remove downloaded video file')
1429
1430     def _make_archive_id(self, info_dict):
1431         # Future-proof against any change in case
1432         # and backwards compatibility with prior versions
1433         extractor = info_dict.get('extractor_key')
1434         if extractor is None:
1435             if 'id' in info_dict:
1436                 extractor = info_dict.get('ie_key')  # key in a playlist
1437         if extractor is None:
1438             return None  # Incomplete video information
1439         return extractor.lower() + ' ' + info_dict['id']
1440
1441     def in_download_archive(self, info_dict):
1442         fn = self.params.get('download_archive')
1443         if fn is None:
1444             return False
1445
1446         vid_id = self._make_archive_id(info_dict)
1447         if vid_id is None:
1448             return False  # Incomplete video information
1449
1450         try:
1451             with locked_file(fn, 'r', encoding='utf-8') as archive_file:
1452                 for line in archive_file:
1453                     if line.strip() == vid_id:
1454                         return True
1455         except IOError as ioe:
1456             if ioe.errno != errno.ENOENT:
1457                 raise
1458         return False
1459
1460     def record_download_archive(self, info_dict):
1461         fn = self.params.get('download_archive')
1462         if fn is None:
1463             return
1464         vid_id = self._make_archive_id(info_dict)
1465         assert vid_id
1466         with locked_file(fn, 'a', encoding='utf-8') as archive_file:
1467             archive_file.write(vid_id + '\n')
1468
1469     @staticmethod
1470     def format_resolution(format, default='unknown'):
1471         if format.get('vcodec') == 'none':
1472             return 'audio only'
1473         if format.get('resolution') is not None:
1474             return format['resolution']
1475         if format.get('height') is not None:
1476             if format.get('width') is not None:
1477                 res = '%sx%s' % (format['width'], format['height'])
1478             else:
1479                 res = '%sp' % format['height']
1480         elif format.get('width') is not None:
1481             res = '?x%d' % format['width']
1482         else:
1483             res = default
1484         return res
1485
1486     def _format_note(self, fdict):
1487         res = ''
1488         if fdict.get('ext') in ['f4f', 'f4m']:
1489             res += '(unsupported) '
1490         if fdict.get('format_note') is not None:
1491             res += fdict['format_note'] + ' '
1492         if fdict.get('tbr') is not None:
1493             res += '%4dk ' % fdict['tbr']
1494         if fdict.get('container') is not None:
1495             if res:
1496                 res += ', '
1497             res += '%s container' % fdict['container']
1498         if (fdict.get('vcodec') is not None and
1499                 fdict.get('vcodec') != 'none'):
1500             if res:
1501                 res += ', '
1502             res += fdict['vcodec']
1503             if fdict.get('vbr') is not None:
1504                 res += '@'
1505         elif fdict.get('vbr') is not None and fdict.get('abr') is not None:
1506             res += 'video@'
1507         if fdict.get('vbr') is not None:
1508             res += '%4dk' % fdict['vbr']
1509         if fdict.get('fps') is not None:
1510             res += ', %sfps' % fdict['fps']
1511         if fdict.get('acodec') is not None:
1512             if res:
1513                 res += ', '
1514             if fdict['acodec'] == 'none':
1515                 res += 'video only'
1516             else:
1517                 res += '%-5s' % fdict['acodec']
1518         elif fdict.get('abr') is not None:
1519             if res:
1520                 res += ', '
1521             res += 'audio'
1522         if fdict.get('abr') is not None:
1523             res += '@%3dk' % fdict['abr']
1524         if fdict.get('asr') is not None:
1525             res += ' (%5dHz)' % fdict['asr']
1526         if fdict.get('filesize') is not None:
1527             if res:
1528                 res += ', '
1529             res += format_bytes(fdict['filesize'])
1530         elif fdict.get('filesize_approx') is not None:
1531             if res:
1532                 res += ', '
1533             res += '~' + format_bytes(fdict['filesize_approx'])
1534         return res
1535
1536     def list_formats(self, info_dict):
1537         def line(format, idlen=20):
1538             return (('%-' + compat_str(idlen + 1) + 's%-10s%-12s%s') % (
1539                 format['format_id'],
1540                 format['ext'],
1541                 self.format_resolution(format),
1542                 self._format_note(format),
1543             ))
1544
1545         formats = info_dict.get('formats', [info_dict])
1546         idlen = max(len('format code'),
1547                     max(len(f['format_id']) for f in formats))
1548         formats_s = [
1549             line(f, idlen) for f in formats
1550             if f.get('preference') is None or f['preference'] >= -1000]
1551         if len(formats) > 1:
1552             formats_s[-1] += (' ' if self._format_note(formats[-1]) else '') + '(best)'
1553
1554         header_line = line({
1555             'format_id': 'format code', 'ext': 'extension',
1556             'resolution': 'resolution', 'format_note': 'note'}, idlen=idlen)
1557         self.to_screen(
1558             '[info] Available formats for %s:\n%s\n%s' %
1559             (info_dict['id'], header_line, '\n'.join(formats_s)))
1560
1561     def list_thumbnails(self, info_dict):
1562         thumbnails = info_dict.get('thumbnails')
1563         if not thumbnails:
1564             tn_url = info_dict.get('thumbnail')
1565             if tn_url:
1566                 thumbnails = [{'id': '0', 'url': tn_url}]
1567             else:
1568                 self.to_screen(
1569                     '[info] No thumbnails present for %s' % info_dict['id'])
1570                 return
1571
1572         self.to_screen(
1573             '[info] Thumbnails for %s:' % info_dict['id'])
1574         self.to_screen(render_table(
1575             ['ID', 'width', 'height', 'URL'],
1576             [[t['id'], t.get('width', 'unknown'), t.get('height', 'unknown'), t['url']] for t in thumbnails]))
1577
1578     def urlopen(self, req):
1579         """ Start an HTTP download """
1580
1581         # According to RFC 3986, URLs can not contain non-ASCII characters, however this is not
1582         # always respected by websites, some tend to give out URLs with non percent-encoded
1583         # non-ASCII characters (see telemb.py, ard.py [#3412])
1584         # urllib chokes on URLs with non-ASCII characters (see http://bugs.python.org/issue3991)
1585         # To work around aforementioned issue we will replace request's original URL with
1586         # percent-encoded one
1587         req_is_string = isinstance(req, compat_basestring)
1588         url = req if req_is_string else req.get_full_url()
1589         url_escaped = escape_url(url)
1590
1591         # Substitute URL if any change after escaping
1592         if url != url_escaped:
1593             if req_is_string:
1594                 req = url_escaped
1595             else:
1596                 req = compat_urllib_request.Request(
1597                     url_escaped, data=req.data, headers=req.headers,
1598                     origin_req_host=req.origin_req_host, unverifiable=req.unverifiable)
1599
1600         return self._opener.open(req, timeout=self._socket_timeout)
1601
1602     def print_debug_header(self):
1603         if not self.params.get('verbose'):
1604             return
1605
1606         if type('') is not compat_str:
1607             # Python 2.6 on SLES11 SP1 (https://github.com/rg3/youtube-dl/issues/3326)
1608             self.report_warning(
1609                 'Your Python is broken! Update to a newer and supported version')
1610
1611         stdout_encoding = getattr(
1612             sys.stdout, 'encoding', 'missing (%s)' % type(sys.stdout).__name__)
1613         encoding_str = (
1614             '[debug] Encodings: locale %s, fs %s, out %s, pref %s\n' % (
1615                 locale.getpreferredencoding(),
1616                 sys.getfilesystemencoding(),
1617                 stdout_encoding,
1618                 self.get_encoding()))
1619         write_string(encoding_str, encoding=None)
1620
1621         self._write_string('[debug] youtube-dl version ' + __version__ + '\n')
1622         try:
1623             sp = subprocess.Popen(
1624                 ['git', 'rev-parse', '--short', 'HEAD'],
1625                 stdout=subprocess.PIPE, stderr=subprocess.PIPE,
1626                 cwd=os.path.dirname(os.path.abspath(__file__)))
1627             out, err = sp.communicate()
1628             out = out.decode().strip()
1629             if re.match('[0-9a-f]+', out):
1630                 self._write_string('[debug] Git HEAD: ' + out + '\n')
1631         except:
1632             try:
1633                 sys.exc_clear()
1634             except:
1635                 pass
1636         self._write_string('[debug] Python version %s - %s\n' % (
1637             platform.python_version(), platform_name()))
1638
1639         exe_versions = FFmpegPostProcessor.get_versions(self)
1640         exe_versions['rtmpdump'] = rtmpdump_version()
1641         exe_str = ', '.join(
1642             '%s %s' % (exe, v)
1643             for exe, v in sorted(exe_versions.items())
1644             if v
1645         )
1646         if not exe_str:
1647             exe_str = 'none'
1648         self._write_string('[debug] exe versions: %s\n' % exe_str)
1649
1650         proxy_map = {}
1651         for handler in self._opener.handlers:
1652             if hasattr(handler, 'proxies'):
1653                 proxy_map.update(handler.proxies)
1654         self._write_string('[debug] Proxy map: ' + compat_str(proxy_map) + '\n')
1655
1656         if self.params.get('call_home', False):
1657             ipaddr = self.urlopen('https://yt-dl.org/ip').read().decode('utf-8')
1658             self._write_string('[debug] Public IP address: %s\n' % ipaddr)
1659             latest_version = self.urlopen(
1660                 'https://yt-dl.org/latest/version').read().decode('utf-8')
1661             if version_tuple(latest_version) > version_tuple(__version__):
1662                 self.report_warning(
1663                     'You are using an outdated version (newest version: %s)! '
1664                     'See https://yt-dl.org/update if you need help updating.' %
1665                     latest_version)
1666
1667     def _setup_opener(self):
1668         timeout_val = self.params.get('socket_timeout')
1669         self._socket_timeout = 600 if timeout_val is None else float(timeout_val)
1670
1671         opts_cookiefile = self.params.get('cookiefile')
1672         opts_proxy = self.params.get('proxy')
1673
1674         if opts_cookiefile is None:
1675             self.cookiejar = compat_cookiejar.CookieJar()
1676         else:
1677             self.cookiejar = compat_cookiejar.MozillaCookieJar(
1678                 opts_cookiefile)
1679             if os.access(opts_cookiefile, os.R_OK):
1680                 self.cookiejar.load()
1681
1682         cookie_processor = compat_urllib_request.HTTPCookieProcessor(
1683             self.cookiejar)
1684         if opts_proxy is not None:
1685             if opts_proxy == '':
1686                 proxies = {}
1687             else:
1688                 proxies = {'http': opts_proxy, 'https': opts_proxy}
1689         else:
1690             proxies = compat_urllib_request.getproxies()
1691             # Set HTTPS proxy to HTTP one if given (https://github.com/rg3/youtube-dl/issues/805)
1692             if 'http' in proxies and 'https' not in proxies:
1693                 proxies['https'] = proxies['http']
1694         proxy_handler = compat_urllib_request.ProxyHandler(proxies)
1695
1696         debuglevel = 1 if self.params.get('debug_printtraffic') else 0
1697         https_handler = make_HTTPS_handler(self.params, debuglevel=debuglevel)
1698         ydlh = YoutubeDLHandler(self.params, debuglevel=debuglevel)
1699         opener = compat_urllib_request.build_opener(
1700             https_handler, proxy_handler, cookie_processor, ydlh)
1701         # Delete the default user-agent header, which would otherwise apply in
1702         # cases where our custom HTTP handler doesn't come into play
1703         # (See https://github.com/rg3/youtube-dl/issues/1309 for details)
1704         opener.addheaders = []
1705         self._opener = opener
1706
1707     def encode(self, s):
1708         if isinstance(s, bytes):
1709             return s  # Already encoded
1710
1711         try:
1712             return s.encode(self.get_encoding())
1713         except UnicodeEncodeError as err:
1714             err.reason = err.reason + '. Check your system encoding configuration or use the --encoding option.'
1715             raise
1716
1717     def get_encoding(self):
1718         encoding = self.params.get('encoding')
1719         if encoding is None:
1720             encoding = preferredencoding()
1721         return encoding
1722
1723     def _write_thumbnails(self, info_dict, filename):
1724         if self.params.get('writethumbnail', False):
1725             thumbnails = info_dict.get('thumbnails')
1726             if thumbnails:
1727                 thumbnails = [thumbnails[-1]]
1728         elif self.params.get('write_all_thumbnails', False):
1729             thumbnails = info_dict.get('thumbnails')
1730         else:
1731             return
1732
1733         if not thumbnails:
1734             # No thumbnails present, so return immediately
1735             return
1736
1737         for t in thumbnails:
1738             thumb_ext = determine_ext(t['url'], 'jpg')
1739             suffix = '_%s' % t['id'] if len(thumbnails) > 1 else ''
1740             thumb_display_id = '%s ' % t['id'] if len(thumbnails) > 1 else ''
1741             thumb_filename = os.path.splitext(filename)[0] + suffix + '.' + thumb_ext
1742
1743             if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(thumb_filename)):
1744                 self.to_screen('[%s] %s: Thumbnail %sis already present' %
1745                                (info_dict['extractor'], info_dict['id'], thumb_display_id))
1746             else:
1747                 self.to_screen('[%s] %s: Downloading thumbnail %s...' %
1748                                (info_dict['extractor'], info_dict['id'], thumb_display_id))
1749                 try:
1750                     uf = self.urlopen(t['url'])
1751                     with open(thumb_filename, 'wb') as thumbf:
1752                         shutil.copyfileobj(uf, thumbf)
1753                     self.to_screen('[%s] %s: Writing thumbnail %sto: %s' %
1754                                    (info_dict['extractor'], info_dict['id'], thumb_display_id, thumb_filename))
1755                 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
1756                     self.report_warning('Unable to download thumbnail "%s": %s' %
1757                                         (t['url'], compat_str(err)))