git.bitcoin.ninja Git - youtube-dl/blob - youtube_dl/YoutubeDL.py

   1 #!/usr/bin/env python
   2 # -*- coding: utf-8 -*-
   3
   4 from __future__ import absolute_import, unicode_literals
   5
   6 import collections
   7 import datetime
   8 import errno
   9 import io
  10 import itertools
  11 import json
  12 import locale
  13 import operator
  14 import os
  15 import platform
  16 import re
  17 import shutil
  18 import subprocess
  19 import socket
  20 import sys
  21 import time
  22 import traceback
  23
  24 if os.name == 'nt':
  25     import ctypes
  26
  27 from .compat import (
  28     compat_cookiejar,
  29     compat_expanduser,
  30     compat_http_client,
  31     compat_kwargs,
  32     compat_str,
  33     compat_urllib_error,
  34     compat_urllib_request,
  35 )
  36 from .utils import (
  37     escape_url,
  38     ContentTooShortError,
  39     date_from_str,
  40     DateRange,
  41     DEFAULT_OUTTMPL,
  42     determine_ext,
  43     DownloadError,
  44     encodeFilename,
  45     ExtractorError,
  46     format_bytes,
  47     formatSeconds,
  48     get_term_width,
  49     locked_file,
  50     make_HTTPS_handler,
  51     MaxDownloadsReached,
  52     PagedList,
  53     parse_filesize,
  54     PostProcessingError,
  55     platform_name,
  56     preferredencoding,
  57     render_table,
  58     SameFileError,
  59     sanitize_filename,
  60     std_headers,
  61     subtitles_filename,
  62     takewhile_inclusive,
  63     UnavailableVideoError,
  64     url_basename,
  65     version_tuple,
  66     write_json_file,
  67     write_string,
  68     YoutubeDLHandler,
  69     prepend_extension,
  70     args_to_str,
  71     age_restricted,
  72 )
  73 from .cache import Cache
  74 from .extractor import get_info_extractor, gen_extractors
  75 from .downloader import get_suitable_downloader
  76 from .downloader.rtmp import rtmpdump_version
  77 from .postprocessor import (
  78     FFmpegFixupM4aPP,
  79     FFmpegFixupStretchedPP,
  80     FFmpegMergerPP,
  81     FFmpegPostProcessor,
  82     get_postprocessor,
  83 )
  84 from .version import __version__
  85
  86
  87 class YoutubeDL(object):
  88     """YoutubeDL class.
  89
  90     YoutubeDL objects are the ones responsible of downloading the
  91     actual video file and writing it to disk if the user has requested
  92     it, among some other tasks. In most cases there should be one per
  93     program. As, given a video URL, the downloader doesn't know how to
  94     extract all the needed information, task that InfoExtractors do, it
  95     has to pass the URL to one of them.
  96
  97     For this, YoutubeDL objects have a method that allows
  98     InfoExtractors to be registered in a given order. When it is passed
  99     a URL, the YoutubeDL object handles it to the first InfoExtractor it
 100     finds that reports being able to handle it. The InfoExtractor extracts
 101     all the information about the video or videos the URL refers to, and
 102     YoutubeDL process the extracted information, possibly using a File
 103     Downloader to download the video.
 104
 105     YoutubeDL objects accept a lot of parameters. In order not to saturate
 106     the object constructor with arguments, it receives a dictionary of
 107     options instead. These options are available through the params
 108     attribute for the InfoExtractors to use. The YoutubeDL also
 109     registers itself as the downloader in charge for the InfoExtractors
 110     that are added to it, so this is a "mutual registration".
 111
 112     Available options:
 113
 114     username:          Username for authentication purposes.
 115     password:          Password for authentication purposes.
 116     videopassword:     Password for acces a video.
 117     usenetrc:          Use netrc for authentication instead.
 118     verbose:           Print additional info to stdout.
 119     quiet:             Do not print messages to stdout.
 120     no_warnings:       Do not print out anything for warnings.
 121     forceurl:          Force printing final URL.
 122     forcetitle:        Force printing title.
 123     forceid:           Force printing ID.
 124     forcethumbnail:    Force printing thumbnail URL.
 125     forcedescription:  Force printing description.
 126     forcefilename:     Force printing final filename.
 127     forceduration:     Force printing duration.
 128     forcejson:         Force printing info_dict as JSON.
 129     dump_single_json:  Force printing the info_dict of the whole playlist
 130                        (or video) as a single JSON line.
 131     simulate:          Do not download the video files.
 132     format:            Video format code. See options.py for more information.
 133     format_limit:      Highest quality format to try.
 134     outtmpl:           Template for output names.
 135     restrictfilenames: Do not allow "&" and spaces in file names
 136     ignoreerrors:      Do not stop on download errors.
 137     nooverwrites:      Prevent overwriting files.
 138     playliststart:     Playlist item to start at.
 139     playlistend:       Playlist item to end at.
 140     playlist_items:    Specific indices of playlist to download.
 141     playlistreverse:   Download playlist items in reverse order.
 142     matchtitle:        Download only matching titles.
 143     rejecttitle:       Reject downloads for matching titles.
 144     logger:            Log messages to a logging.Logger instance.
 145     logtostderr:       Log messages to stderr instead of stdout.
 146     writedescription:  Write the video description to a .description file
 147     writeinfojson:     Write the video description to a .info.json file
 148     writeannotations:  Write the video annotations to a .annotations.xml file
 149     writethumbnail:    Write the thumbnail image to a file
 150     write_all_thumbnails:  Write all thumbnail formats to files
 151     writesubtitles:    Write the video subtitles to a file
 152     writeautomaticsub: Write the automatic subtitles to a file
 153     allsubtitles:      Downloads all the subtitles of the video
 154                        (requires writesubtitles or writeautomaticsub)
 155     listsubtitles:     Lists all available subtitles for the video
 156     subtitlesformat:   Subtitle format [srt/sbv/vtt] (default=srt)
 157     subtitleslangs:    List of languages of the subtitles to download
 158     keepvideo:         Keep the video file after post-processing
 159     daterange:         A DateRange object, download only if the upload_date is in the range.
 160     skip_download:     Skip the actual download of the video file
 161     cachedir:          Location of the cache files in the filesystem.
 162                        False to disable filesystem cache.
 163     noplaylist:        Download single video instead of a playlist if in doubt.
 164     age_limit:         An integer representing the user's age in years.
 165                        Unsuitable videos for the given age are skipped.
 166     min_views:         An integer representing the minimum view count the video
 167                        must have in order to not be skipped.
 168                        Videos without view count information are always
 169                        downloaded. None for no limit.
 170     max_views:         An integer representing the maximum view count.
 171                        Videos that are more popular than that are not
 172                        downloaded.
 173                        Videos without view count information are always
 174                        downloaded. None for no limit.
 175     download_archive:  File name of a file where all downloads are recorded.
 176                        Videos already present in the file are not downloaded
 177                        again.
 178     cookiefile:        File name where cookies should be read from and dumped to.
 179     nocheckcertificate:Do not verify SSL certificates
 180     prefer_insecure:   Use HTTP instead of HTTPS to retrieve information.
 181                        At the moment, this is only supported by YouTube.
 182     proxy:             URL of the proxy server to use
 183     socket_timeout:    Time to wait for unresponsive hosts, in seconds
 184     bidi_workaround:   Work around buggy terminals without bidirectional text
 185                        support, using fridibi
 186     debug_printtraffic:Print out sent and received HTTP traffic
 187     include_ads:       Download ads as well
 188     default_search:    Prepend this string if an input url is not valid.
 189                        'auto' for elaborate guessing
 190     encoding:          Use this encoding instead of the system-specified.
 191     extract_flat:      Do not resolve URLs, return the immediate result.
 192                        Pass in 'in_playlist' to only show this behavior for
 193                        playlist items.
 194     postprocessors:    A list of dictionaries, each with an entry
 195                        * key:  The name of the postprocessor. See
 196                                youtube_dl/postprocessor/__init__.py for a list.
 197                        as well as any further keyword arguments for the
 198                        postprocessor.
 199     progress_hooks:    A list of functions that get called on download
 200                        progress, with a dictionary with the entries
 201                        * status: One of "downloading" and "finished".
 202                                  Check this first and ignore unknown values.
 203
 204                        If status is one of "downloading" or "finished", the
 205                        following properties may also be present:
 206                        * filename: The final filename (always present)
 207                        * downloaded_bytes: Bytes on disk
 208                        * total_bytes: Size of the whole file, None if unknown
 209                        * tmpfilename: The filename we're currently writing to
 210                        * eta: The estimated time in seconds, None if unknown
 211                        * speed: The download speed in bytes/second, None if
 212                                 unknown
 213
 214                        Progress hooks are guaranteed to be called at least once
 215                        (with status "finished") if the download is successful.
 216     merge_output_format: Extension to use when merging formats.
 217     fixup:             Automatically correct known faults of the file.
 218                        One of:
 219                        - "never": do nothing
 220                        - "warn": only emit a warning
 221                        - "detect_or_warn": check whether we can do anything
 222                                            about it, warn otherwise (default)
 223     source_address:    (Experimental) Client-side IP address to bind to.
 224     call_home:         Boolean, true iff we are allowed to contact the
 225                        youtube-dl servers for debugging.
 226     sleep_interval:    Number of seconds to sleep before each download.
 227     external_downloader:  Executable of the external downloader to call.
 228     listformats:       Print an overview of available video formats and exit.
 229     list_thumbnails:   Print a table of all thumbnails and exit.
 230
 231
 232     The following parameters are not used by YoutubeDL itself, they are used by
 233     the FileDownloader:
 234     nopart, updatetime, buffersize, ratelimit, min_filesize, max_filesize, test,
 235     noresizebuffer, retries, continuedl, noprogress, consoletitle,
 236     xattr_set_filesize.
 237
 238     The following options are used by the post processors:
 239     prefer_ffmpeg:     If True, use ffmpeg instead of avconv if both are available,
 240                        otherwise prefer avconv.
 241     exec_cmd:          Arbitrary command to run after downloading
 242     """
 243
 244     params = None
 245     _ies = []
 246     _pps = []
 247     _download_retcode = None
 248     _num_downloads = None
 249     _screen_file = None
 250
 251     def __init__(self, params=None, auto_init=True):
 252         """Create a FileDownloader object with the given options."""
 253         if params is None:
 254             params = {}
 255         self._ies = []
 256         self._ies_instances = {}
 257         self._pps = []
 258         self._progress_hooks = []
 259         self._download_retcode = 0
 260         self._num_downloads = 0
 261         self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)]
 262         self._err_file = sys.stderr
 263         self.params = params
 264         self.cache = Cache(self)
 265
 266         if params.get('bidi_workaround', False):
 267             try:
 268                 import pty
 269                 master, slave = pty.openpty()
 270                 width = get_term_width()
 271                 if width is None:
 272                     width_args = []
 273                 else:
 274                     width_args = ['-w', str(width)]
 275                 sp_kwargs = dict(
 276                     stdin=subprocess.PIPE,
 277                     stdout=slave,
 278                     stderr=self._err_file)
 279                 try:
 280                     self._output_process = subprocess.Popen(
 281                         ['bidiv'] + width_args, **sp_kwargs
 282                     )
 283                 except OSError:
 284                     self._output_process = subprocess.Popen(
 285                         ['fribidi', '-c', 'UTF-8'] + width_args, **sp_kwargs)
 286                 self._output_channel = os.fdopen(master, 'rb')
 287             except OSError as ose:
 288                 if ose.errno == 2:
 289                     self.report_warning('Could not find fribidi executable, ignoring --bidi-workaround . Make sure that  fribidi  is an executable file in one of the directories in your $PATH.')
 290                 else:
 291                     raise
 292
 293         if (sys.version_info >= (3,) and sys.platform != 'win32' and
 294                 sys.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968']
 295                 and not params.get('restrictfilenames', False)):
 296             # On Python 3, the Unicode filesystem API will throw errors (#1474)
 297             self.report_warning(
 298                 'Assuming --restrict-filenames since file system encoding '
 299                 'cannot encode all characters. '
 300                 'Set the LC_ALL environment variable to fix this.')
 301             self.params['restrictfilenames'] = True
 302
 303         if '%(stitle)s' in self.params.get('outtmpl', ''):
 304             self.report_warning('%(stitle)s is deprecated. Use the %(title)s and the --restrict-filenames flag(which also secures %(uploader)s et al) instead.')
 305
 306         self._setup_opener()
 307
 308         if auto_init:
 309             self.print_debug_header()
 310             self.add_default_info_extractors()
 311
 312         for pp_def_raw in self.params.get('postprocessors', []):
 313             pp_class = get_postprocessor(pp_def_raw['key'])
 314             pp_def = dict(pp_def_raw)
 315             del pp_def['key']
 316             pp = pp_class(self, **compat_kwargs(pp_def))
 317             self.add_post_processor(pp)
 318
 319         for ph in self.params.get('progress_hooks', []):
 320             self.add_progress_hook(ph)
 321
 322     def warn_if_short_id(self, argv):
 323         # short YouTube ID starting with dash?
 324         idxs = [
 325             i for i, a in enumerate(argv)
 326             if re.match(r'^-[0-9A-Za-z_-]{10}$', a)]
 327         if idxs:
 328             correct_argv = (
 329                 ['youtube-dl'] +
 330                 [a for i, a in enumerate(argv) if i not in idxs] +
 331                 ['--'] + [argv[i] for i in idxs]
 332             )
 333             self.report_warning(
 334                 'Long argument string detected. '
 335                 'Use -- to separate parameters and URLs, like this:\n%s\n' %
 336                 args_to_str(correct_argv))
 337
 338     def add_info_extractor(self, ie):
 339         """Add an InfoExtractor object to the end of the list."""
 340         self._ies.append(ie)
 341         self._ies_instances[ie.ie_key()] = ie
 342         ie.set_downloader(self)
 343
 344     def get_info_extractor(self, ie_key):
 345         """
 346         Get an instance of an IE with name ie_key, it will try to get one from
 347         the _ies list, if there's no instance it will create a new one and add
 348         it to the extractor list.
 349         """
 350         ie = self._ies_instances.get(ie_key)
 351         if ie is None:
 352             ie = get_info_extractor(ie_key)()
 353             self.add_info_extractor(ie)
 354         return ie
 355
 356     def add_default_info_extractors(self):
 357         """
 358         Add the InfoExtractors returned by gen_extractors to the end of the list
 359         """
 360         for ie in gen_extractors():
 361             self.add_info_extractor(ie)
 362
 363     def add_post_processor(self, pp):
 364         """Add a PostProcessor object to the end of the chain."""
 365         self._pps.append(pp)
 366         pp.set_downloader(self)
 367
 368     def add_progress_hook(self, ph):
 369         """Add the progress hook (currently only for the file downloader)"""
 370         self._progress_hooks.append(ph)
 371
 372     def _bidi_workaround(self, message):
 373         if not hasattr(self, '_output_channel'):
 374             return message
 375
 376         assert hasattr(self, '_output_process')
 377         assert isinstance(message, compat_str)
 378         line_count = message.count('\n') + 1
 379         self._output_process.stdin.write((message + '\n').encode('utf-8'))
 380         self._output_process.stdin.flush()
 381         res = ''.join(self._output_channel.readline().decode('utf-8')
 382                       for _ in range(line_count))
 383         return res[:-len('\n')]
 384
 385     def to_screen(self, message, skip_eol=False):
 386         """Print message to stdout if not in quiet mode."""
 387         return self.to_stdout(message, skip_eol, check_quiet=True)
 388
 389     def _write_string(self, s, out=None):
 390         write_string(s, out=out, encoding=self.params.get('encoding'))
 391
 392     def to_stdout(self, message, skip_eol=False, check_quiet=False):
 393         """Print message to stdout if not in quiet mode."""
 394         if self.params.get('logger'):
 395             self.params['logger'].debug(message)
 396         elif not check_quiet or not self.params.get('quiet', False):
 397             message = self._bidi_workaround(message)
 398             terminator = ['\n', ''][skip_eol]
 399             output = message + terminator
 400
 401             self._write_string(output, self._screen_file)
 402
 403     def to_stderr(self, message):
 404         """Print message to stderr."""
 405         assert isinstance(message, compat_str)
 406         if self.params.get('logger'):
 407             self.params['logger'].error(message)
 408         else:
 409             message = self._bidi_workaround(message)
 410             output = message + '\n'
 411             self._write_string(output, self._err_file)
 412
 413     def to_console_title(self, message):
 414         if not self.params.get('consoletitle', False):
 415             return
 416         if os.name == 'nt' and ctypes.windll.kernel32.GetConsoleWindow():
 417             # c_wchar_p() might not be necessary if `message` is
 418             # already of type unicode()
 419             ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message))
 420         elif 'TERM' in os.environ:
 421             self._write_string('\033]0;%s\007' % message, self._screen_file)
 422
 423     def save_console_title(self):
 424         if not self.params.get('consoletitle', False):
 425             return
 426         if 'TERM' in os.environ:
 427             # Save the title on stack
 428             self._write_string('\033[22;0t', self._screen_file)
 429
 430     def restore_console_title(self):
 431         if not self.params.get('consoletitle', False):
 432             return
 433         if 'TERM' in os.environ:
 434             # Restore the title from stack
 435             self._write_string('\033[23;0t', self._screen_file)
 436
 437     def __enter__(self):
 438         self.save_console_title()
 439         return self
 440
 441     def __exit__(self, *args):
 442         self.restore_console_title()
 443
 444         if self.params.get('cookiefile') is not None:
 445             self.cookiejar.save()
 446
 447     def trouble(self, message=None, tb=None):
 448         """Determine action to take when a download problem appears.
 449
 450         Depending on if the downloader has been configured to ignore
 451         download errors or not, this method may throw an exception or
 452         not when errors are found, after printing the message.
 453
 454         tb, if given, is additional traceback information.
 455         """
 456         if message is not None:
 457             self.to_stderr(message)
 458         if self.params.get('verbose'):
 459             if tb is None:
 460                 if sys.exc_info()[0]:  # if .trouble has been called from an except block
 461                     tb = ''
 462                     if hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
 463                         tb += ''.join(traceback.format_exception(*sys.exc_info()[1].exc_info))
 464                     tb += compat_str(traceback.format_exc())
 465                 else:
 466                     tb_data = traceback.format_list(traceback.extract_stack())
 467                     tb = ''.join(tb_data)
 468             self.to_stderr(tb)
 469         if not self.params.get('ignoreerrors', False):
 470             if sys.exc_info()[0] and hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
 471                 exc_info = sys.exc_info()[1].exc_info
 472             else:
 473                 exc_info = sys.exc_info()
 474             raise DownloadError(message, exc_info)
 475         self._download_retcode = 1
 476
 477     def report_warning(self, message):
 478         '''
 479         Print the message to stderr, it will be prefixed with 'WARNING:'
 480         If stderr is a tty file the 'WARNING:' will be colored
 481         '''
 482         if self.params.get('logger') is not None:
 483             self.params['logger'].warning(message)
 484         else:
 485             if self.params.get('no_warnings'):
 486                 return
 487             if self._err_file.isatty() and os.name != 'nt':
 488                 _msg_header = '\033[0;33mWARNING:\033[0m'
 489             else:
 490                 _msg_header = 'WARNING:'
 491             warning_message = '%s %s' % (_msg_header, message)
 492             self.to_stderr(warning_message)
 493
 494     def report_error(self, message, tb=None):
 495         '''
 496         Do the same as trouble, but prefixes the message with 'ERROR:', colored
 497         in red if stderr is a tty file.
 498         '''
 499         if self._err_file.isatty() and os.name != 'nt':
 500             _msg_header = '\033[0;31mERROR:\033[0m'
 501         else:
 502             _msg_header = 'ERROR:'
 503         error_message = '%s %s' % (_msg_header, message)
 504         self.trouble(error_message, tb)
 505
 506     def report_file_already_downloaded(self, file_name):
 507         """Report file has already been fully downloaded."""
 508         try:
 509             self.to_screen('[download] %s has already been downloaded' % file_name)
 510         except UnicodeEncodeError:
 511             self.to_screen('[download] The file has already been downloaded')
 512
 513     def prepare_filename(self, info_dict):
 514         """Generate the output filename."""
 515         try:
 516             template_dict = dict(info_dict)
 517
 518             template_dict['epoch'] = int(time.time())
 519             autonumber_size = self.params.get('autonumber_size')
 520             if autonumber_size is None:
 521                 autonumber_size = 5
 522             autonumber_templ = '%0' + str(autonumber_size) + 'd'
 523             template_dict['autonumber'] = autonumber_templ % self._num_downloads
 524             if template_dict.get('playlist_index') is not None:
 525                 template_dict['playlist_index'] = '%0*d' % (len(str(template_dict['n_entries'])), template_dict['playlist_index'])
 526             if template_dict.get('resolution') is None:
 527                 if template_dict.get('width') and template_dict.get('height'):
 528                     template_dict['resolution'] = '%dx%d' % (template_dict['width'], template_dict['height'])
 529                 elif template_dict.get('height'):
 530                     template_dict['resolution'] = '%sp' % template_dict['height']
 531                 elif template_dict.get('width'):
 532                     template_dict['resolution'] = '?x%d' % template_dict['width']
 533
 534             sanitize = lambda k, v: sanitize_filename(
 535                 compat_str(v),
 536                 restricted=self.params.get('restrictfilenames'),
 537                 is_id=(k == 'id'))
 538             template_dict = dict((k, sanitize(k, v))
 539                                  for k, v in template_dict.items()
 540                                  if v is not None)
 541             template_dict = collections.defaultdict(lambda: 'NA', template_dict)
 542
 543             outtmpl = self.params.get('outtmpl', DEFAULT_OUTTMPL)
 544             tmpl = compat_expanduser(outtmpl)
 545             filename = tmpl % template_dict
 546             return filename
 547         except ValueError as err:
 548             self.report_error('Error in output template: ' + str(err) + ' (encoding: ' + repr(preferredencoding()) + ')')
 549             return None
 550
 551     def _match_entry(self, info_dict):
 552         """ Returns None iff the file should be downloaded """
 553
 554         video_title = info_dict.get('title', info_dict.get('id', 'video'))
 555         if 'title' in info_dict:
 556             # This can happen when we're just evaluating the playlist
 557             title = info_dict['title']
 558             matchtitle = self.params.get('matchtitle', False)
 559             if matchtitle:
 560                 if not re.search(matchtitle, title, re.IGNORECASE):
 561                     return '"' + title + '" title did not match pattern "' + matchtitle + '"'
 562             rejecttitle = self.params.get('rejecttitle', False)
 563             if rejecttitle:
 564                 if re.search(rejecttitle, title, re.IGNORECASE):
 565                     return '"' + title + '" title matched reject pattern "' + rejecttitle + '"'
 566         date = info_dict.get('upload_date', None)
 567         if date is not None:
 568             dateRange = self.params.get('daterange', DateRange())
 569             if date not in dateRange:
 570                 return '%s upload date is not in range %s' % (date_from_str(date).isoformat(), dateRange)
 571         view_count = info_dict.get('view_count', None)
 572         if view_count is not None:
 573             min_views = self.params.get('min_views')
 574             if min_views is not None and view_count < min_views:
 575                 return 'Skipping %s, because it has not reached minimum view count (%d/%d)' % (video_title, view_count, min_views)
 576             max_views = self.params.get('max_views')
 577             if max_views is not None and view_count > max_views:
 578                 return 'Skipping %s, because it has exceeded the maximum view count (%d/%d)' % (video_title, view_count, max_views)
 579         if age_restricted(info_dict.get('age_limit'), self.params.get('age_limit')):
 580             return 'Skipping "%s" because it is age restricted' % title
 581         if self.in_download_archive(info_dict):
 582             return '%s has already been recorded in archive' % video_title
 583         return None
 584
 585     @staticmethod
 586     def add_extra_info(info_dict, extra_info):
 587         '''Set the keys from extra_info in info dict if they are missing'''
 588         for key, value in extra_info.items():
 589             info_dict.setdefault(key, value)
 590
 591     def extract_info(self, url, download=True, ie_key=None, extra_info={},
 592                      process=True):
 593         '''
 594         Returns a list with a dictionary for each video we find.
 595         If 'download', also downloads the videos.
 596         extra_info is a dict containing the extra values to add to each result
 597          '''
 598
 599         if ie_key:
 600             ies = [self.get_info_extractor(ie_key)]
 601         else:
 602             ies = self._ies
 603
 604         for ie in ies:
 605             if not ie.suitable(url):
 606                 continue
 607
 608             if not ie.working():
 609                 self.report_warning('The program functionality for this site has been marked as broken, '
 610                                     'and will probably not work.')
 611
 612             try:
 613                 ie_result = ie.extract(url)
 614                 if ie_result is None:  # Finished already (backwards compatibility; listformats and friends should be moved here)
 615                     break
 616                 if isinstance(ie_result, list):
 617                     # Backwards compatibility: old IE result format
 618                     ie_result = {
 619                         '_type': 'compat_list',
 620                         'entries': ie_result,
 621                     }
 622                 self.add_default_extra_info(ie_result, ie, url)
 623                 if process:
 624                     return self.process_ie_result(ie_result, download, extra_info)
 625                 else:
 626                     return ie_result
 627             except ExtractorError as de:  # An error we somewhat expected
 628                 self.report_error(compat_str(de), de.format_traceback())
 629                 break
 630             except MaxDownloadsReached:
 631                 raise
 632             except Exception as e:
 633                 if self.params.get('ignoreerrors', False):
 634                     self.report_error(compat_str(e), tb=compat_str(traceback.format_exc()))
 635                     break
 636                 else:
 637                     raise
 638         else:
 639             self.report_error('no suitable InfoExtractor for URL %s' % url)
 640
 641     def add_default_extra_info(self, ie_result, ie, url):
 642         self.add_extra_info(ie_result, {
 643             'extractor': ie.IE_NAME,
 644             'webpage_url': url,
 645             'webpage_url_basename': url_basename(url),
 646             'extractor_key': ie.ie_key(),
 647         })
 648
 649     def process_ie_result(self, ie_result, download=True, extra_info={}):
 650         """
 651         Take the result of the ie(may be modified) and resolve all unresolved
 652         references (URLs, playlist items).
 653
 654         It will also download the videos if 'download'.
 655         Returns the resolved ie_result.
 656         """
 657
 658         result_type = ie_result.get('_type', 'video')
 659
 660         if result_type in ('url', 'url_transparent'):
 661             extract_flat = self.params.get('extract_flat', False)
 662             if ((extract_flat == 'in_playlist' and 'playlist' in extra_info) or
 663                     extract_flat is True):
 664                 if self.params.get('forcejson', False):
 665                     self.to_stdout(json.dumps(ie_result))
 666                 return ie_result
 667
 668         if result_type == 'video':
 669             self.add_extra_info(ie_result, extra_info)
 670             return self.process_video_result(ie_result, download=download)
 671         elif result_type == 'url':
 672             # We have to add extra_info to the results because it may be
 673             # contained in a playlist
 674             return self.extract_info(ie_result['url'],
 675                                      download,
 676                                      ie_key=ie_result.get('ie_key'),
 677                                      extra_info=extra_info)
 678         elif result_type == 'url_transparent':
 679             # Use the information from the embedding page
 680             info = self.extract_info(
 681                 ie_result['url'], ie_key=ie_result.get('ie_key'),
 682                 extra_info=extra_info, download=False, process=False)
 683
 684             force_properties = dict(
 685                 (k, v) for k, v in ie_result.items() if v is not None)
 686             for f in ('_type', 'url'):
 687                 if f in force_properties:
 688                     del force_properties[f]
 689             new_result = info.copy()
 690             new_result.update(force_properties)
 691
 692             assert new_result.get('_type') != 'url_transparent'
 693
 694             return self.process_ie_result(
 695                 new_result, download=download, extra_info=extra_info)
 696         elif result_type == 'playlist' or result_type == 'multi_video':
 697             # We process each entry in the playlist
 698             playlist = ie_result.get('title', None) or ie_result.get('id', None)
 699             self.to_screen('[download] Downloading playlist: %s' % playlist)
 700
 701             playlist_results = []
 702
 703             playliststart = self.params.get('playliststart', 1) - 1
 704             playlistend = self.params.get('playlistend', None)
 705             # For backwards compatibility, interpret -1 as whole list
 706             if playlistend == -1:
 707                 playlistend = None
 708
 709             playlistitems_str = self.params.get('playlist_items', None)
 710             playlistitems = None
 711             if playlistitems_str is not None:
 712                 def iter_playlistitems(format):
 713                     for string_segment in format.split(','):
 714                         if '-' in string_segment:
 715                             start, end = string_segment.split('-')
 716                             for item in range(int(start), int(end) + 1):
 717                                 yield int(item)
 718                         else:
 719                             yield int(string_segment)
 720                 playlistitems = iter_playlistitems(playlistitems_str)
 721
 722             ie_entries = ie_result['entries']
 723             if isinstance(ie_entries, list):
 724                 n_all_entries = len(ie_entries)
 725                 if playlistitems:
 726                     entries = [ie_entries[i - 1] for i in playlistitems]
 727                 else:
 728                     entries = ie_entries[playliststart:playlistend]
 729                 n_entries = len(entries)
 730                 self.to_screen(
 731                     "[%s] playlist %s: Collected %d video ids (downloading %d of them)" %
 732                     (ie_result['extractor'], playlist, n_all_entries, n_entries))
 733             elif isinstance(ie_entries, PagedList):
 734                 if playlistitems:
 735                     entries = []
 736                     for item in playlistitems:
 737                         entries.extend(ie_entries.getslice(
 738                             item - 1, item
 739                         ))
 740                 else:
 741                     entries = ie_entries.getslice(
 742                         playliststart, playlistend)
 743                 n_entries = len(entries)
 744                 self.to_screen(
 745                     "[%s] playlist %s: Downloading %d videos" %
 746                     (ie_result['extractor'], playlist, n_entries))
 747             else:  # iterable
 748                 if playlistitems:
 749                     entry_list = list(ie_entries)
 750                     entries = [entry_list[i - 1] for i in playlistitems]
 751                 else:
 752                     entries = list(itertools.islice(
 753                         ie_entries, playliststart, playlistend))
 754                 n_entries = len(entries)
 755                 self.to_screen(
 756                     "[%s] playlist %s: Downloading %d videos" %
 757                     (ie_result['extractor'], playlist, n_entries))
 758
 759             if self.params.get('playlistreverse', False):
 760                 entries = entries[::-1]
 761
 762             for i, entry in enumerate(entries, 1):
 763                 self.to_screen('[download] Downloading video %s of %s' % (i, n_entries))
 764                 extra = {
 765                     'n_entries': n_entries,
 766                     'playlist': playlist,
 767                     'playlist_id': ie_result.get('id'),
 768                     'playlist_title': ie_result.get('title'),
 769                     'playlist_index': i + playliststart,
 770                     'extractor': ie_result['extractor'],
 771                     'webpage_url': ie_result['webpage_url'],
 772                     'webpage_url_basename': url_basename(ie_result['webpage_url']),
 773                     'extractor_key': ie_result['extractor_key'],
 774                 }
 775
 776                 reason = self._match_entry(entry)
 777                 if reason is not None:
 778                     self.to_screen('[download] ' + reason)
 779                     continue
 780
 781                 entry_result = self.process_ie_result(entry,
 782                                                       download=download,
 783                                                       extra_info=extra)
 784                 playlist_results.append(entry_result)
 785             ie_result['entries'] = playlist_results
 786             return ie_result
 787         elif result_type == 'compat_list':
 788             self.report_warning(
 789                 'Extractor %s returned a compat_list result. '
 790                 'It needs to be updated.' % ie_result.get('extractor'))
 791
 792             def _fixup(r):
 793                 self.add_extra_info(
 794                     r,
 795                     {
 796                         'extractor': ie_result['extractor'],
 797                         'webpage_url': ie_result['webpage_url'],
 798                         'webpage_url_basename': url_basename(ie_result['webpage_url']),
 799                         'extractor_key': ie_result['extractor_key'],
 800                     }
 801                 )
 802                 return r
 803             ie_result['entries'] = [
 804                 self.process_ie_result(_fixup(r), download, extra_info)
 805                 for r in ie_result['entries']
 806             ]
 807             return ie_result
 808         else:
 809             raise Exception('Invalid result type: %s' % result_type)
 810
 811     def _apply_format_filter(self, format_spec, available_formats):
 812         " Returns a tuple of the remaining format_spec and filtered formats "
 813
 814         OPERATORS = {
 815             '<': operator.lt,
 816             '<=': operator.le,
 817             '>': operator.gt,
 818             '>=': operator.ge,
 819             '=': operator.eq,
 820             '!=': operator.ne,
 821         }
 822         operator_rex = re.compile(r'''(?x)\s*\[
 823             (?P<key>width|height|tbr|abr|vbr|filesize|fps)
 824             \s*(?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
 825             (?P<value>[0-9.]+(?:[kKmMgGtTpPeEzZyY]i?[Bb]?)?)
 826             \]$
 827             ''' % '|'.join(map(re.escape, OPERATORS.keys())))
 828         m = operator_rex.search(format_spec)
 829         if not m:
 830             raise ValueError('Invalid format specification %r' % format_spec)
 831
 832         try:
 833             comparison_value = int(m.group('value'))
 834         except ValueError:
 835             comparison_value = parse_filesize(m.group('value'))
 836             if comparison_value is None:
 837                 comparison_value = parse_filesize(m.group('value') + 'B')
 838             if comparison_value is None:
 839                 raise ValueError(
 840                     'Invalid value %r in format specification %r' % (
 841                         m.group('value'), format_spec))
 842         op = OPERATORS[m.group('op')]
 843
 844         def _filter(f):
 845             actual_value = f.get(m.group('key'))
 846             if actual_value is None:
 847                 return m.group('none_inclusive')
 848             return op(actual_value, comparison_value)
 849         new_formats = [f for f in available_formats if _filter(f)]
 850
 851         new_format_spec = format_spec[:-len(m.group(0))]
 852         if not new_format_spec:
 853             new_format_spec = 'best'
 854
 855         return (new_format_spec, new_formats)
 856
 857     def select_format(self, format_spec, available_formats):
 858         while format_spec.endswith(']'):
 859             format_spec, available_formats = self._apply_format_filter(
 860                 format_spec, available_formats)
 861         if not available_formats:
 862             return None
 863
 864         if format_spec == 'best' or format_spec is None:
 865             return available_formats[-1]
 866         elif format_spec == 'worst':
 867             return available_formats[0]
 868         elif format_spec == 'bestaudio':
 869             audio_formats = [
 870                 f for f in available_formats
 871                 if f.get('vcodec') == 'none']
 872             if audio_formats:
 873                 return audio_formats[-1]
 874         elif format_spec == 'worstaudio':
 875             audio_formats = [
 876                 f for f in available_formats
 877                 if f.get('vcodec') == 'none']
 878             if audio_formats:
 879                 return audio_formats[0]
 880         elif format_spec == 'bestvideo':
 881             video_formats = [
 882                 f for f in available_formats
 883                 if f.get('acodec') == 'none']
 884             if video_formats:
 885                 return video_formats[-1]
 886         elif format_spec == 'worstvideo':
 887             video_formats = [
 888                 f for f in available_formats
 889                 if f.get('acodec') == 'none']
 890             if video_formats:
 891                 return video_formats[0]
 892         else:
 893             extensions = ['mp4', 'flv', 'webm', '3gp', 'm4a', 'mp3', 'ogg', 'aac', 'wav']
 894             if format_spec in extensions:
 895                 filter_f = lambda f: f['ext'] == format_spec
 896             else:
 897                 filter_f = lambda f: f['format_id'] == format_spec
 898             matches = list(filter(filter_f, available_formats))
 899             if matches:
 900                 return matches[-1]
 901         return None
 902
 903     def _calc_headers(self, info_dict):
 904         res = std_headers.copy()
 905
 906         add_headers = info_dict.get('http_headers')
 907         if add_headers:
 908             res.update(add_headers)
 909
 910         cookies = self._calc_cookies(info_dict)
 911         if cookies:
 912             res['Cookie'] = cookies
 913
 914         return res
 915
 916     def _calc_cookies(self, info_dict):
 917         class _PseudoRequest(object):
 918             def __init__(self, url):
 919                 self.url = url
 920                 self.headers = {}
 921                 self.unverifiable = False
 922
 923             def add_unredirected_header(self, k, v):
 924                 self.headers[k] = v
 925
 926             def get_full_url(self):
 927                 return self.url
 928
 929             def is_unverifiable(self):
 930                 return self.unverifiable
 931
 932             def has_header(self, h):
 933                 return h in self.headers
 934
 935         pr = _PseudoRequest(info_dict['url'])
 936         self.cookiejar.add_cookie_header(pr)
 937         return pr.headers.get('Cookie')
 938
 939     def process_video_result(self, info_dict, download=True):
 940         assert info_dict.get('_type', 'video') == 'video'
 941
 942         if 'id' not in info_dict:
 943             raise ExtractorError('Missing "id" field in extractor result')
 944         if 'title' not in info_dict:
 945             raise ExtractorError('Missing "title" field in extractor result')
 946
 947         if 'playlist' not in info_dict:
 948             # It isn't part of a playlist
 949             info_dict['playlist'] = None
 950             info_dict['playlist_index'] = None
 951
 952         thumbnails = info_dict.get('thumbnails')
 953         if thumbnails is None:
 954             thumbnail = info_dict.get('thumbnail')
 955             if thumbnail:
 956                 thumbnails = [{'url': thumbnail}]
 957         if thumbnails:
 958             thumbnails.sort(key=lambda t: (
 959                 t.get('preference'), t.get('width'), t.get('height'),
 960                 t.get('id'), t.get('url')))
 961             for t in thumbnails:
 962                 if 'width' in t and 'height' in t:
 963                     t['resolution'] = '%dx%d' % (t['width'], t['height'])
 964
 965         if thumbnails and 'thumbnail' not in info_dict:
 966             info_dict['thumbnail'] = thumbnails[-1]['url']
 967
 968         if 'display_id' not in info_dict and 'id' in info_dict:
 969             info_dict['display_id'] = info_dict['id']
 970
 971         if info_dict.get('upload_date') is None and info_dict.get('timestamp') is not None:
 972             # Working around negative timestamps in Windows
 973             # (see http://bugs.python.org/issue1646728)
 974             if info_dict['timestamp'] < 0 and os.name == 'nt':
 975                 info_dict['timestamp'] = 0
 976             upload_date = datetime.datetime.utcfromtimestamp(
 977                 info_dict['timestamp'])
 978             info_dict['upload_date'] = upload_date.strftime('%Y%m%d')
 979
 980         # This extractors handle format selection themselves
 981         if info_dict['extractor'] in ['Youku']:
 982             if download:
 983                 self.process_info(info_dict)
 984             return info_dict
 985
 986         # We now pick which formats have to be downloaded
 987         if info_dict.get('formats') is None:
 988             # There's only one format available
 989             formats = [info_dict]
 990         else:
 991             formats = info_dict['formats']
 992
 993         if not formats:
 994             raise ExtractorError('No video formats found!')
 995
 996         # We check that all the formats have the format and format_id fields
 997         for i, format in enumerate(formats):
 998             if 'url' not in format:
 999                 raise ExtractorError('Missing "url" key in result (index %d)' % i)
1000
1001             if format.get('format_id') is None:
1002                 format['format_id'] = compat_str(i)
1003             if format.get('format') is None:
1004                 format['format'] = '{id} - {res}{note}'.format(
1005                     id=format['format_id'],
1006                     res=self.format_resolution(format),
1007                     note=' ({0})'.format(format['format_note']) if format.get('format_note') is not None else '',
1008                 )
1009             # Automatically determine file extension if missing
1010             if 'ext' not in format:
1011                 format['ext'] = determine_ext(format['url']).lower()
1012             # Add HTTP headers, so that external programs can use them from the
1013             # json output
1014             full_format_info = info_dict.copy()
1015             full_format_info.update(format)
1016             format['http_headers'] = self._calc_headers(full_format_info)
1017
1018         format_limit = self.params.get('format_limit', None)
1019         if format_limit:
1020             formats = list(takewhile_inclusive(
1021                 lambda f: f['format_id'] != format_limit, formats
1022             ))
1023
1024         # TODO Central sorting goes here
1025
1026         if formats[0] is not info_dict:
1027             # only set the 'formats' fields if the original info_dict list them
1028             # otherwise we end up with a circular reference, the first (and unique)
1029             # element in the 'formats' field in info_dict is info_dict itself,
1030             # wich can't be exported to json
1031             info_dict['formats'] = formats
1032         if self.params.get('listformats'):
1033             self.list_formats(info_dict)
1034             return
1035         if self.params.get('list_thumbnails'):
1036             self.list_thumbnails(info_dict)
1037             return
1038
1039         req_format = self.params.get('format')
1040         if req_format is None:
1041             req_format = 'best'
1042         formats_to_download = []
1043         # The -1 is for supporting YoutubeIE
1044         if req_format in ('-1', 'all'):
1045             formats_to_download = formats
1046         else:
1047             for rfstr in req_format.split(','):
1048                 # We can accept formats requested in the format: 34/5/best, we pick
1049                 # the first that is available, starting from left
1050                 req_formats = rfstr.split('/')
1051                 for rf in req_formats:
1052                     if re.match(r'.+?\+.+?', rf) is not None:
1053                         # Two formats have been requested like '137+139'
1054                         format_1, format_2 = rf.split('+')
1055                         formats_info = (self.select_format(format_1, formats),
1056                                         self.select_format(format_2, formats))
1057                         if all(formats_info):
1058                             # The first format must contain the video and the
1059                             # second the audio
1060                             if formats_info[0].get('vcodec') == 'none':
1061                                 self.report_error('The first format must '
1062                                                   'contain the video, try using '
1063                                                   '"-f %s+%s"' % (format_2, format_1))
1064                                 return
1065                             output_ext = (
1066                                 formats_info[0]['ext']
1067                                 if self.params.get('merge_output_format') is None
1068                                 else self.params['merge_output_format'])
1069                             selected_format = {
1070                                 'requested_formats': formats_info,
1071                                 'format': rf,
1072                                 'ext': formats_info[0]['ext'],
1073                                 'width': formats_info[0].get('width'),
1074                                 'height': formats_info[0].get('height'),
1075                                 'resolution': formats_info[0].get('resolution'),
1076                                 'fps': formats_info[0].get('fps'),
1077                                 'vcodec': formats_info[0].get('vcodec'),
1078                                 'vbr': formats_info[0].get('vbr'),
1079                                 'stretched_ratio': formats_info[0].get('stretched_ratio'),
1080                                 'acodec': formats_info[1].get('acodec'),
1081                                 'abr': formats_info[1].get('abr'),
1082                                 'ext': output_ext,
1083                             }
1084                         else:
1085                             selected_format = None
1086                     else:
1087                         selected_format = self.select_format(rf, formats)
1088                     if selected_format is not None:
1089                         formats_to_download.append(selected_format)
1090                         break
1091         if not formats_to_download:
1092             raise ExtractorError('requested format not available',
1093                                  expected=True)
1094
1095         if download:
1096             if len(formats_to_download) > 1:
1097                 self.to_screen('[info] %s: downloading video in %s formats' % (info_dict['id'], len(formats_to_download)))
1098             for format in formats_to_download:
1099                 new_info = dict(info_dict)
1100                 new_info.update(format)
1101                 self.process_info(new_info)
1102         # We update the info dict with the best quality format (backwards compatibility)
1103         info_dict.update(formats_to_download[-1])
1104         return info_dict
1105
1106     def process_info(self, info_dict):
1107         """Process a single resolved IE result."""
1108
1109         assert info_dict.get('_type', 'video') == 'video'
1110
1111         max_downloads = self.params.get('max_downloads')
1112         if max_downloads is not None:
1113             if self._num_downloads >= int(max_downloads):
1114                 raise MaxDownloadsReached()
1115
1116         info_dict['fulltitle'] = info_dict['title']
1117         if len(info_dict['title']) > 200:
1118             info_dict['title'] = info_dict['title'][:197] + '...'
1119
1120         # Keep for backwards compatibility
1121         info_dict['stitle'] = info_dict['title']
1122
1123         if 'format' not in info_dict:
1124             info_dict['format'] = info_dict['ext']
1125
1126         reason = self._match_entry(info_dict)
1127         if reason is not None:
1128             self.to_screen('[download] ' + reason)
1129             return
1130
1131         self._num_downloads += 1
1132
1133         info_dict['_filename'] = filename = self.prepare_filename(info_dict)
1134
1135         # Forced printings
1136         if self.params.get('forcetitle', False):
1137             self.to_stdout(info_dict['fulltitle'])
1138         if self.params.get('forceid', False):
1139             self.to_stdout(info_dict['id'])
1140         if self.params.get('forceurl', False):
1141             if info_dict.get('requested_formats') is not None:
1142                 for f in info_dict['requested_formats']:
1143                     self.to_stdout(f['url'] + f.get('play_path', ''))
1144             else:
1145                 # For RTMP URLs, also include the playpath
1146                 self.to_stdout(info_dict['url'] + info_dict.get('play_path', ''))
1147         if self.params.get('forcethumbnail', False) and info_dict.get('thumbnail') is not None:
1148             self.to_stdout(info_dict['thumbnail'])
1149         if self.params.get('forcedescription', False) and info_dict.get('description') is not None:
1150             self.to_stdout(info_dict['description'])
1151         if self.params.get('forcefilename', False) and filename is not None:
1152             self.to_stdout(filename)
1153         if self.params.get('forceduration', False) and info_dict.get('duration') is not None:
1154             self.to_stdout(formatSeconds(info_dict['duration']))
1155         if self.params.get('forceformat', False):
1156             self.to_stdout(info_dict['format'])
1157         if self.params.get('forcejson', False):
1158             self.to_stdout(json.dumps(info_dict))
1159
1160         # Do nothing else if in simulate mode
1161         if self.params.get('simulate', False):
1162             return
1163
1164         if filename is None:
1165             return
1166
1167         try:
1168             dn = os.path.dirname(encodeFilename(filename))
1169             if dn and not os.path.exists(dn):
1170                 os.makedirs(dn)
1171         except (OSError, IOError) as err:
1172             self.report_error('unable to create directory ' + compat_str(err))
1173             return
1174
1175         if self.params.get('writedescription', False):
1176             descfn = filename + '.description'
1177             if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(descfn)):
1178                 self.to_screen('[info] Video description is already present')
1179             elif info_dict.get('description') is None:
1180                 self.report_warning('There\'s no description to write.')
1181             else:
1182                 try:
1183                     self.to_screen('[info] Writing video description to: ' + descfn)
1184                     with io.open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile:
1185                         descfile.write(info_dict['description'])
1186                 except (OSError, IOError):
1187                     self.report_error('Cannot write description file ' + descfn)
1188                     return
1189
1190         if self.params.get('writeannotations', False):
1191             annofn = filename + '.annotations.xml'
1192             if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(annofn)):
1193                 self.to_screen('[info] Video annotations are already present')
1194             else:
1195                 try:
1196                     self.to_screen('[info] Writing video annotations to: ' + annofn)
1197                     with io.open(encodeFilename(annofn), 'w', encoding='utf-8') as annofile:
1198                         annofile.write(info_dict['annotations'])
1199                 except (KeyError, TypeError):
1200                     self.report_warning('There are no annotations to write.')
1201                 except (OSError, IOError):
1202                     self.report_error('Cannot write annotations file: ' + annofn)
1203                     return
1204
1205         subtitles_are_requested = any([self.params.get('writesubtitles', False),
1206                                        self.params.get('writeautomaticsub')])
1207
1208         if subtitles_are_requested and 'subtitles' in info_dict and info_dict['subtitles']:
1209             # subtitles download errors are already managed as troubles in relevant IE
1210             # that way it will silently go on when used with unsupporting IE
1211             subtitles = info_dict['subtitles']
1212             sub_format = self.params.get('subtitlesformat', 'srt')
1213             for sub_lang in subtitles.keys():
1214                 sub = subtitles[sub_lang]
1215                 if sub is None:
1216                     continue
1217                 try:
1218                     sub_filename = subtitles_filename(filename, sub_lang, sub_format)
1219                     if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(sub_filename)):
1220                         self.to_screen('[info] Video subtitle %s.%s is already_present' % (sub_lang, sub_format))
1221                     else:
1222                         self.to_screen('[info] Writing video subtitles to: ' + sub_filename)
1223                         with io.open(encodeFilename(sub_filename), 'w', encoding='utf-8') as subfile:
1224                             subfile.write(sub)
1225                 except (OSError, IOError):
1226                     self.report_error('Cannot write subtitles file ' + sub_filename)
1227                     return
1228
1229         if self.params.get('writeinfojson', False):
1230             infofn = os.path.splitext(filename)[0] + '.info.json'
1231             if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(infofn)):
1232                 self.to_screen('[info] Video description metadata is already present')
1233             else:
1234                 self.to_screen('[info] Writing video description metadata as JSON to: ' + infofn)
1235                 try:
1236                     write_json_file(info_dict, infofn)
1237                 except (OSError, IOError):
1238                     self.report_error('Cannot write metadata to JSON file ' + infofn)
1239                     return
1240
1241         self._write_thumbnails(info_dict, filename)
1242
1243         if not self.params.get('skip_download', False):
1244             try:
1245                 def dl(name, info):
1246                     fd = get_suitable_downloader(info, self.params)(self, self.params)
1247                     for ph in self._progress_hooks:
1248                         fd.add_progress_hook(ph)
1249                     if self.params.get('verbose'):
1250                         self.to_stdout('[debug] Invoking downloader on %r' % info.get('url'))
1251                     return fd.download(name, info)
1252
1253                 if info_dict.get('requested_formats') is not None:
1254                     downloaded = []
1255                     success = True
1256                     merger = FFmpegMergerPP(self, not self.params.get('keepvideo'))
1257                     if not merger._executable:
1258                         postprocessors = []
1259                         self.report_warning('You have requested multiple '
1260                                             'formats but ffmpeg or avconv are not installed.'
1261                                             ' The formats won\'t be merged')
1262                     else:
1263                         postprocessors = [merger]
1264                     for f in info_dict['requested_formats']:
1265                         new_info = dict(info_dict)
1266                         new_info.update(f)
1267                         fname = self.prepare_filename(new_info)
1268                         fname = prepend_extension(fname, 'f%s' % f['format_id'])
1269                         downloaded.append(fname)
1270                         partial_success = dl(fname, new_info)
1271                         success = success and partial_success
1272                     info_dict['__postprocessors'] = postprocessors
1273                     info_dict['__files_to_merge'] = downloaded
1274                 else:
1275                     # Just a single file
1276                     success = dl(filename, info_dict)
1277             except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
1278                 self.report_error('unable to download video data: %s' % str(err))
1279                 return
1280             except (OSError, IOError) as err:
1281                 raise UnavailableVideoError(err)
1282             except (ContentTooShortError, ) as err:
1283                 self.report_error('content too short (expected %s bytes and served %s)' % (err.expected, err.downloaded))
1284                 return
1285
1286             if success:
1287                 # Fixup content
1288                 fixup_policy = self.params.get('fixup')
1289                 if fixup_policy is None:
1290                     fixup_policy = 'detect_or_warn'
1291
1292                 stretched_ratio = info_dict.get('stretched_ratio')
1293                 if stretched_ratio is not None and stretched_ratio != 1:
1294                     if fixup_policy == 'warn':
1295                         self.report_warning('%s: Non-uniform pixel ratio (%s)' % (
1296                             info_dict['id'], stretched_ratio))
1297                     elif fixup_policy == 'detect_or_warn':
1298                         stretched_pp = FFmpegFixupStretchedPP(self)
1299                         if stretched_pp.available:
1300                             info_dict.setdefault('__postprocessors', [])
1301                             info_dict['__postprocessors'].append(stretched_pp)
1302                         else:
1303                             self.report_warning(
1304                                 '%s: Non-uniform pixel ratio (%s). Install ffmpeg or avconv to fix this automatically.' % (
1305                                     info_dict['id'], stretched_ratio))
1306                     else:
1307                         assert fixup_policy in ('ignore', 'never')
1308
1309                 if info_dict.get('requested_formats') is None and info_dict.get('container') == 'm4a_dash':
1310                     if fixup_policy == 'warn':
1311                         self.report_warning('%s: writing DASH m4a. Only some players support this container.' % (
1312                             info_dict['id']))
1313                     elif fixup_policy == 'detect_or_warn':
1314                         fixup_pp = FFmpegFixupM4aPP(self)
1315                         if fixup_pp.available:
1316                             info_dict.setdefault('__postprocessors', [])
1317                             info_dict['__postprocessors'].append(fixup_pp)
1318                         else:
1319                             self.report_warning(
1320                                 '%s: writing DASH m4a. Only some players support this container. Install ffmpeg or avconv to fix this automatically.' % (
1321                                     info_dict['id']))
1322                     else:
1323                         assert fixup_policy in ('ignore', 'never')
1324
1325                 try:
1326                     self.post_process(filename, info_dict)
1327                 except (PostProcessingError) as err:
1328                     self.report_error('postprocessing: %s' % str(err))
1329                     return
1330                 self.record_download_archive(info_dict)
1331
1332     def download(self, url_list):
1333         """Download a given list of URLs."""
1334         outtmpl = self.params.get('outtmpl', DEFAULT_OUTTMPL)
1335         if (len(url_list) > 1 and
1336                 '%' not in outtmpl
1337                 and self.params.get('max_downloads') != 1):
1338             raise SameFileError(outtmpl)
1339
1340         for url in url_list:
1341             try:
1342                 # It also downloads the videos
1343                 res = self.extract_info(url)
1344             except UnavailableVideoError:
1345                 self.report_error('unable to download video')
1346             except MaxDownloadsReached:
1347                 self.to_screen('[info] Maximum number of downloaded files reached.')
1348                 raise
1349             else:
1350                 if self.params.get('dump_single_json', False):
1351                     self.to_stdout(json.dumps(res))
1352
1353         return self._download_retcode
1354
1355     def download_with_info_file(self, info_filename):
1356         with io.open(info_filename, 'r', encoding='utf-8') as f:
1357             info = json.load(f)
1358         try:
1359             self.process_ie_result(info, download=True)
1360         except DownloadError:
1361             webpage_url = info.get('webpage_url')
1362             if webpage_url is not None:
1363                 self.report_warning('The info failed to download, trying with "%s"' % webpage_url)
1364                 return self.download([webpage_url])
1365             else:
1366                 raise
1367         return self._download_retcode
1368
1369     def post_process(self, filename, ie_info):
1370         """Run all the postprocessors on the given file."""
1371         info = dict(ie_info)
1372         info['filepath'] = filename
1373         pps_chain = []
1374         if ie_info.get('__postprocessors') is not None:
1375             pps_chain.extend(ie_info['__postprocessors'])
1376         pps_chain.extend(self._pps)
1377         for pp in pps_chain:
1378             keep_video = None
1379             old_filename = info['filepath']
1380             try:
1381                 keep_video_wish, info = pp.run(info)
1382                 if keep_video_wish is not None:
1383                     if keep_video_wish:
1384                         keep_video = keep_video_wish
1385                     elif keep_video is None:
1386                         # No clear decision yet, let IE decide
1387                         keep_video = keep_video_wish
1388             except PostProcessingError as e:
1389                 self.report_error(e.msg)
1390             if keep_video is False and not self.params.get('keepvideo', False):
1391                 try:
1392                     self.to_screen('Deleting original file %s (pass -k to keep)' % old_filename)
1393                     os.remove(encodeFilename(old_filename))
1394                 except (IOError, OSError):
1395                     self.report_warning('Unable to remove downloaded video file')
1396
1397     def _make_archive_id(self, info_dict):
1398         # Future-proof against any change in case
1399         # and backwards compatibility with prior versions
1400         extractor = info_dict.get('extractor_key')
1401         if extractor is None:
1402             if 'id' in info_dict:
1403                 extractor = info_dict.get('ie_key')  # key in a playlist
1404         if extractor is None:
1405             return None  # Incomplete video information
1406         return extractor.lower() + ' ' + info_dict['id']
1407
1408     def in_download_archive(self, info_dict):
1409         fn = self.params.get('download_archive')
1410         if fn is None:
1411             return False
1412
1413         vid_id = self._make_archive_id(info_dict)
1414         if vid_id is None:
1415             return False  # Incomplete video information
1416
1417         try:
1418             with locked_file(fn, 'r', encoding='utf-8') as archive_file:
1419                 for line in archive_file:
1420                     if line.strip() == vid_id:
1421                         return True
1422         except IOError as ioe:
1423             if ioe.errno != errno.ENOENT:
1424                 raise
1425         return False
1426
1427     def record_download_archive(self, info_dict):
1428         fn = self.params.get('download_archive')
1429         if fn is None:
1430             return
1431         vid_id = self._make_archive_id(info_dict)
1432         assert vid_id
1433         with locked_file(fn, 'a', encoding='utf-8') as archive_file:
1434             archive_file.write(vid_id + '\n')
1435
1436     @staticmethod
1437     def format_resolution(format, default='unknown'):
1438         if format.get('vcodec') == 'none':
1439             return 'audio only'
1440         if format.get('resolution') is not None:
1441             return format['resolution']
1442         if format.get('height') is not None:
1443             if format.get('width') is not None:
1444                 res = '%sx%s' % (format['width'], format['height'])
1445             else:
1446                 res = '%sp' % format['height']
1447         elif format.get('width') is not None:
1448             res = '?x%d' % format['width']
1449         else:
1450             res = default
1451         return res
1452
1453     def _format_note(self, fdict):
1454         res = ''
1455         if fdict.get('ext') in ['f4f', 'f4m']:
1456             res += '(unsupported) '
1457         if fdict.get('format_note') is not None:
1458             res += fdict['format_note'] + ' '
1459         if fdict.get('tbr') is not None:
1460             res += '%4dk ' % fdict['tbr']
1461         if fdict.get('container') is not None:
1462             if res:
1463                 res += ', '
1464             res += '%s container' % fdict['container']
1465         if (fdict.get('vcodec') is not None and
1466                 fdict.get('vcodec') != 'none'):
1467             if res:
1468                 res += ', '
1469             res += fdict['vcodec']
1470             if fdict.get('vbr') is not None:
1471                 res += '@'
1472         elif fdict.get('vbr') is not None and fdict.get('abr') is not None:
1473             res += 'video@'
1474         if fdict.get('vbr') is not None:
1475             res += '%4dk' % fdict['vbr']
1476         if fdict.get('fps') is not None:
1477             res += ', %sfps' % fdict['fps']
1478         if fdict.get('acodec') is not None:
1479             if res:
1480                 res += ', '
1481             if fdict['acodec'] == 'none':
1482                 res += 'video only'
1483             else:
1484                 res += '%-5s' % fdict['acodec']
1485         elif fdict.get('abr') is not None:
1486             if res:
1487                 res += ', '
1488             res += 'audio'
1489         if fdict.get('abr') is not None:
1490             res += '@%3dk' % fdict['abr']
1491         if fdict.get('asr') is not None:
1492             res += ' (%5dHz)' % fdict['asr']
1493         if fdict.get('filesize') is not None:
1494             if res:
1495                 res += ', '
1496             res += format_bytes(fdict['filesize'])
1497         elif fdict.get('filesize_approx') is not None:
1498             if res:
1499                 res += ', '
1500             res += '~' + format_bytes(fdict['filesize_approx'])
1501         return res
1502
1503     def list_formats(self, info_dict):
1504         def line(format, idlen=20):
1505             return (('%-' + compat_str(idlen + 1) + 's%-10s%-12s%s') % (
1506                 format['format_id'],
1507                 format['ext'],
1508                 self.format_resolution(format),
1509                 self._format_note(format),
1510             ))
1511
1512         formats = info_dict.get('formats', [info_dict])
1513         idlen = max(len('format code'),
1514                     max(len(f['format_id']) for f in formats))
1515         formats_s = [
1516             line(f, idlen) for f in formats
1517             if f.get('preference') is None or f['preference'] >= -1000]
1518         if len(formats) > 1:
1519             formats_s[0] += (' ' if self._format_note(formats[0]) else '') + '(worst)'
1520             formats_s[-1] += (' ' if self._format_note(formats[-1]) else '') + '(best)'
1521
1522         header_line = line({
1523             'format_id': 'format code', 'ext': 'extension',
1524             'resolution': 'resolution', 'format_note': 'note'}, idlen=idlen)
1525         self.to_screen(
1526             '[info] Available formats for %s:\n%s\n%s' %
1527             (info_dict['id'], header_line, '\n'.join(formats_s)))
1528
1529     def list_thumbnails(self, info_dict):
1530         thumbnails = info_dict.get('thumbnails')
1531         if not thumbnails:
1532             tn_url = info_dict.get('thumbnail')
1533             if tn_url:
1534                 thumbnails = [{'id': '0', 'url': tn_url}]
1535             else:
1536                 self.to_screen(
1537                     '[info] No thumbnails present for %s' % info_dict['id'])
1538                 return
1539
1540         self.to_screen(
1541             '[info] Thumbnails for %s:' % info_dict['id'])
1542         self.to_screen(render_table(
1543             ['ID', 'width', 'height', 'URL'],
1544             [[t['id'], t.get('width', 'unknown'), t.get('height', 'unknown'), t['url']] for t in thumbnails]))
1545
1546     def urlopen(self, req):
1547         """ Start an HTTP download """
1548
1549         # According to RFC 3986, URLs can not contain non-ASCII characters, however this is not
1550         # always respected by websites, some tend to give out URLs with non percent-encoded
1551         # non-ASCII characters (see telemb.py, ard.py [#3412])
1552         # urllib chokes on URLs with non-ASCII characters (see http://bugs.python.org/issue3991)
1553         # To work around aforementioned issue we will replace request's original URL with
1554         # percent-encoded one
1555         req_is_string = isinstance(req, basestring if sys.version_info < (3, 0) else compat_str)
1556         url = req if req_is_string else req.get_full_url()
1557         url_escaped = escape_url(url)
1558
1559         # Substitute URL if any change after escaping
1560         if url != url_escaped:
1561             if req_is_string:
1562                 req = url_escaped
1563             else:
1564                 req = compat_urllib_request.Request(
1565                     url_escaped, data=req.data, headers=req.headers,
1566                     origin_req_host=req.origin_req_host, unverifiable=req.unverifiable)
1567
1568         return self._opener.open(req, timeout=self._socket_timeout)
1569
1570     def print_debug_header(self):
1571         if not self.params.get('verbose'):
1572             return
1573
1574         if type('') is not compat_str:
1575             # Python 2.6 on SLES11 SP1 (https://github.com/rg3/youtube-dl/issues/3326)
1576             self.report_warning(
1577                 'Your Python is broken! Update to a newer and supported version')
1578
1579         stdout_encoding = getattr(
1580             sys.stdout, 'encoding', 'missing (%s)' % type(sys.stdout).__name__)
1581         encoding_str = (
1582             '[debug] Encodings: locale %s, fs %s, out %s, pref %s\n' % (
1583                 locale.getpreferredencoding(),
1584                 sys.getfilesystemencoding(),
1585                 stdout_encoding,
1586                 self.get_encoding()))
1587         write_string(encoding_str, encoding=None)
1588
1589         self._write_string('[debug] youtube-dl version ' + __version__ + '\n')
1590         try:
1591             sp = subprocess.Popen(
1592                 ['git', 'rev-parse', '--short', 'HEAD'],
1593                 stdout=subprocess.PIPE, stderr=subprocess.PIPE,
1594                 cwd=os.path.dirname(os.path.abspath(__file__)))
1595             out, err = sp.communicate()
1596             out = out.decode().strip()
1597             if re.match('[0-9a-f]+', out):
1598                 self._write_string('[debug] Git HEAD: ' + out + '\n')
1599         except:
1600             try:
1601                 sys.exc_clear()
1602             except:
1603                 pass
1604         self._write_string('[debug] Python version %s - %s\n' % (
1605             platform.python_version(), platform_name()))
1606
1607         exe_versions = FFmpegPostProcessor.get_versions()
1608         exe_versions['rtmpdump'] = rtmpdump_version()
1609         exe_str = ', '.join(
1610             '%s %s' % (exe, v)
1611             for exe, v in sorted(exe_versions.items())
1612             if v
1613         )
1614         if not exe_str:
1615             exe_str = 'none'
1616         self._write_string('[debug] exe versions: %s\n' % exe_str)
1617
1618         proxy_map = {}
1619         for handler in self._opener.handlers:
1620             if hasattr(handler, 'proxies'):
1621                 proxy_map.update(handler.proxies)
1622         self._write_string('[debug] Proxy map: ' + compat_str(proxy_map) + '\n')
1623
1624         if self.params.get('call_home', False):
1625             ipaddr = self.urlopen('https://yt-dl.org/ip').read().decode('utf-8')
1626             self._write_string('[debug] Public IP address: %s\n' % ipaddr)
1627             latest_version = self.urlopen(
1628                 'https://yt-dl.org/latest/version').read().decode('utf-8')
1629             if version_tuple(latest_version) > version_tuple(__version__):
1630                 self.report_warning(
1631                     'You are using an outdated version (newest version: %s)! '
1632                     'See https://yt-dl.org/update if you need help updating.' %
1633                     latest_version)
1634
1635     def _setup_opener(self):
1636         timeout_val = self.params.get('socket_timeout')
1637         self._socket_timeout = 600 if timeout_val is None else float(timeout_val)
1638
1639         opts_cookiefile = self.params.get('cookiefile')
1640         opts_proxy = self.params.get('proxy')
1641
1642         if opts_cookiefile is None:
1643             self.cookiejar = compat_cookiejar.CookieJar()
1644         else:
1645             self.cookiejar = compat_cookiejar.MozillaCookieJar(
1646                 opts_cookiefile)
1647             if os.access(opts_cookiefile, os.R_OK):
1648                 self.cookiejar.load()
1649
1650         cookie_processor = compat_urllib_request.HTTPCookieProcessor(
1651             self.cookiejar)
1652         if opts_proxy is not None:
1653             if opts_proxy == '':
1654                 proxies = {}
1655             else:
1656                 proxies = {'http': opts_proxy, 'https': opts_proxy}
1657         else:
1658             proxies = compat_urllib_request.getproxies()
1659             # Set HTTPS proxy to HTTP one if given (https://github.com/rg3/youtube-dl/issues/805)
1660             if 'http' in proxies and 'https' not in proxies:
1661                 proxies['https'] = proxies['http']
1662         proxy_handler = compat_urllib_request.ProxyHandler(proxies)
1663
1664         debuglevel = 1 if self.params.get('debug_printtraffic') else 0
1665         https_handler = make_HTTPS_handler(self.params, debuglevel=debuglevel)
1666         ydlh = YoutubeDLHandler(self.params, debuglevel=debuglevel)
1667         opener = compat_urllib_request.build_opener(
1668             https_handler, proxy_handler, cookie_processor, ydlh)
1669         # Delete the default user-agent header, which would otherwise apply in
1670         # cases where our custom HTTP handler doesn't come into play
1671         # (See https://github.com/rg3/youtube-dl/issues/1309 for details)
1672         opener.addheaders = []
1673         self._opener = opener
1674
1675     def encode(self, s):
1676         if isinstance(s, bytes):
1677             return s  # Already encoded
1678
1679         try:
1680             return s.encode(self.get_encoding())
1681         except UnicodeEncodeError as err:
1682             err.reason = err.reason + '. Check your system encoding configuration or use the --encoding option.'
1683             raise
1684
1685     def get_encoding(self):
1686         encoding = self.params.get('encoding')
1687         if encoding is None:
1688             encoding = preferredencoding()
1689         return encoding
1690
1691     def _write_thumbnails(self, info_dict, filename):
1692         if self.params.get('writethumbnail', False):
1693             thumbnails = info_dict.get('thumbnails')
1694             if thumbnails:
1695                 thumbnails = [thumbnails[-1]]
1696         elif self.params.get('write_all_thumbnails', False):
1697             thumbnails = info_dict.get('thumbnails')
1698         else:
1699             return
1700
1701         if not thumbnails:
1702             # No thumbnails present, so return immediately
1703             return
1704
1705         for t in thumbnails:
1706             thumb_ext = determine_ext(t['url'], 'jpg')
1707             suffix = '_%s' % t['id'] if len(thumbnails) > 1 else ''
1708             thumb_display_id = '%s ' % t['id'] if len(thumbnails) > 1 else ''
1709             thumb_filename = os.path.splitext(filename)[0] + suffix + '.' + thumb_ext
1710
1711             if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(thumb_filename)):
1712                 self.to_screen('[%s] %s: Thumbnail %sis already present' %
1713                                (info_dict['extractor'], info_dict['id'], thumb_display_id))
1714             else:
1715                 self.to_screen('[%s] %s: Downloading thumbnail %s...' %
1716                                (info_dict['extractor'], info_dict['id'], thumb_display_id))
1717                 try:
1718                     uf = self.urlopen(t['url'])
1719                     with open(thumb_filename, 'wb') as thumbf:
1720                         shutil.copyfileobj(uf, thumbf)
1721                     self.to_screen('[%s] %s: Writing thumbnail %sto: %s' %
1722                                    (info_dict['extractor'], info_dict['id'], thumb_display_id, thumb_filename))
1723                 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
1724                     self.report_warning('Unable to download thumbnail "%s": %s' %
1725                                         (t['url'], compat_str(err)))