_ Git - youtube-dl/blob - youtube_dl/YoutubeDL.py

   1 #!/usr/bin/env python
   2 # -*- coding: utf-8 -*-
   3
   4 from __future__ import absolute_import, unicode_literals
   5
   6 import collections
   7 import datetime
   8 import errno
   9 import io
  10 import itertools
  11 import json
  12 import locale
  13 import operator
  14 import os
  15 import platform
  16 import re
  17 import shutil
  18 import subprocess
  19 import socket
  20 import sys
  21 import time
  22 import traceback
  23
  24 if os.name == 'nt':
  25     import ctypes
  26
  27 from .compat import (
  28     compat_cookiejar,
  29     compat_expanduser,
  30     compat_http_client,
  31     compat_kwargs,
  32     compat_str,
  33     compat_urllib_error,
  34     compat_urllib_request,
  35 )
  36 from .utils import (
  37     escape_url,
  38     ContentTooShortError,
  39     date_from_str,
  40     DateRange,
  41     DEFAULT_OUTTMPL,
  42     determine_ext,
  43     DownloadError,
  44     encodeFilename,
  45     ExtractorError,
  46     format_bytes,
  47     formatSeconds,
  48     get_term_width,
  49     locked_file,
  50     make_HTTPS_handler,
  51     MaxDownloadsReached,
  52     PagedList,
  53     parse_filesize,
  54     PostProcessingError,
  55     platform_name,
  56     preferredencoding,
  57     SameFileError,
  58     sanitize_filename,
  59     std_headers,
  60     subtitles_filename,
  61     takewhile_inclusive,
  62     UnavailableVideoError,
  63     url_basename,
  64     version_tuple,
  65     write_json_file,
  66     write_string,
  67     YoutubeDLHandler,
  68     prepend_extension,
  69     args_to_str,
  70     age_restricted,
  71 )
  72 from .cache import Cache
  73 from .extractor import get_info_extractor, gen_extractors
  74 from .downloader import get_suitable_downloader
  75 from .downloader.rtmp import rtmpdump_version
  76 from .postprocessor import (
  77     FFmpegFixupM4aPP,
  78     FFmpegFixupStretchedPP,
  79     FFmpegMergerPP,
  80     FFmpegPostProcessor,
  81     get_postprocessor,
  82 )
  83 from .version import __version__
  84
  85
  86 class YoutubeDL(object):
  87     """YoutubeDL class.
  88
  89     YoutubeDL objects are the ones responsible of downloading the
  90     actual video file and writing it to disk if the user has requested
  91     it, among some other tasks. In most cases there should be one per
  92     program. As, given a video URL, the downloader doesn't know how to
  93     extract all the needed information, task that InfoExtractors do, it
  94     has to pass the URL to one of them.
  95
  96     For this, YoutubeDL objects have a method that allows
  97     InfoExtractors to be registered in a given order. When it is passed
  98     a URL, the YoutubeDL object handles it to the first InfoExtractor it
  99     finds that reports being able to handle it. The InfoExtractor extracts
 100     all the information about the video or videos the URL refers to, and
 101     YoutubeDL process the extracted information, possibly using a File
 102     Downloader to download the video.
 103
 104     YoutubeDL objects accept a lot of parameters. In order not to saturate
 105     the object constructor with arguments, it receives a dictionary of
 106     options instead. These options are available through the params
 107     attribute for the InfoExtractors to use. The YoutubeDL also
 108     registers itself as the downloader in charge for the InfoExtractors
 109     that are added to it, so this is a "mutual registration".
 110
 111     Available options:
 112
 113     username:          Username for authentication purposes.
 114     password:          Password for authentication purposes.
 115     videopassword:     Password for acces a video.
 116     usenetrc:          Use netrc for authentication instead.
 117     verbose:           Print additional info to stdout.
 118     quiet:             Do not print messages to stdout.
 119     no_warnings:       Do not print out anything for warnings.
 120     forceurl:          Force printing final URL.
 121     forcetitle:        Force printing title.
 122     forceid:           Force printing ID.
 123     forcethumbnail:    Force printing thumbnail URL.
 124     forcedescription:  Force printing description.
 125     forcefilename:     Force printing final filename.
 126     forceduration:     Force printing duration.
 127     forcejson:         Force printing info_dict as JSON.
 128     dump_single_json:  Force printing the info_dict of the whole playlist
 129                        (or video) as a single JSON line.
 130     simulate:          Do not download the video files.
 131     format:            Video format code. See options.py for more information.
 132     format_limit:      Highest quality format to try.
 133     outtmpl:           Template for output names.
 134     restrictfilenames: Do not allow "&" and spaces in file names
 135     ignoreerrors:      Do not stop on download errors.
 136     nooverwrites:      Prevent overwriting files.
 137     playliststart:     Playlist item to start at.
 138     playlistend:       Playlist item to end at.
 139     playlistreverse:   Download playlist items in reverse order.
 140     matchtitle:        Download only matching titles.
 141     rejecttitle:       Reject downloads for matching titles.
 142     logger:            Log messages to a logging.Logger instance.
 143     logtostderr:       Log messages to stderr instead of stdout.
 144     writedescription:  Write the video description to a .description file
 145     writeinfojson:     Write the video description to a .info.json file
 146     writeannotations:  Write the video annotations to a .annotations.xml file
 147     writethumbnail:    Write the thumbnail image to a file
 148     writesubtitles:    Write the video subtitles to a file
 149     writeautomaticsub: Write the automatic subtitles to a file
 150     allsubtitles:      Downloads all the subtitles of the video
 151                        (requires writesubtitles or writeautomaticsub)
 152     listsubtitles:     Lists all available subtitles for the video
 153     subtitlesformat:   Subtitle format [srt/sbv/vtt] (default=srt)
 154     subtitleslangs:    List of languages of the subtitles to download
 155     keepvideo:         Keep the video file after post-processing
 156     daterange:         A DateRange object, download only if the upload_date is in the range.
 157     skip_download:     Skip the actual download of the video file
 158     cachedir:          Location of the cache files in the filesystem.
 159                        False to disable filesystem cache.
 160     noplaylist:        Download single video instead of a playlist if in doubt.
 161     age_limit:         An integer representing the user's age in years.
 162                        Unsuitable videos for the given age are skipped.
 163     min_views:         An integer representing the minimum view count the video
 164                        must have in order to not be skipped.
 165                        Videos without view count information are always
 166                        downloaded. None for no limit.
 167     max_views:         An integer representing the maximum view count.
 168                        Videos that are more popular than that are not
 169                        downloaded.
 170                        Videos without view count information are always
 171                        downloaded. None for no limit.
 172     download_archive:  File name of a file where all downloads are recorded.
 173                        Videos already present in the file are not downloaded
 174                        again.
 175     cookiefile:        File name where cookies should be read from and dumped to.
 176     nocheckcertificate:Do not verify SSL certificates
 177     prefer_insecure:   Use HTTP instead of HTTPS to retrieve information.
 178                        At the moment, this is only supported by YouTube.
 179     proxy:             URL of the proxy server to use
 180     socket_timeout:    Time to wait for unresponsive hosts, in seconds
 181     bidi_workaround:   Work around buggy terminals without bidirectional text
 182                        support, using fridibi
 183     debug_printtraffic:Print out sent and received HTTP traffic
 184     include_ads:       Download ads as well
 185     default_search:    Prepend this string if an input url is not valid.
 186                        'auto' for elaborate guessing
 187     encoding:          Use this encoding instead of the system-specified.
 188     extract_flat:      Do not resolve URLs, return the immediate result.
 189                        Pass in 'in_playlist' to only show this behavior for
 190                        playlist items.
 191     postprocessors:    A list of dictionaries, each with an entry
 192                        * key:  The name of the postprocessor. See
 193                                youtube_dl/postprocessor/__init__.py for a list.
 194                        as well as any further keyword arguments for the
 195                        postprocessor.
 196     progress_hooks:    A list of functions that get called on download
 197                        progress, with a dictionary with the entries
 198                        * filename: The final filename
 199                        * status: One of "downloading" and "finished"
 200
 201                        The dict may also have some of the following entries:
 202
 203                        * downloaded_bytes: Bytes on disk
 204                        * total_bytes: Size of the whole file, None if unknown
 205                        * tmpfilename: The filename we're currently writing to
 206                        * eta: The estimated time in seconds, None if unknown
 207                        * speed: The download speed in bytes/second, None if
 208                                 unknown
 209
 210                        Progress hooks are guaranteed to be called at least once
 211                        (with status "finished") if the download is successful.
 212     merge_output_format: Extension to use when merging formats.
 213     fixup:             Automatically correct known faults of the file.
 214                        One of:
 215                        - "never": do nothing
 216                        - "warn": only emit a warning
 217                        - "detect_or_warn": check whether we can do anything
 218                                            about it, warn otherwise (default)
 219     source_address:    (Experimental) Client-side IP address to bind to.
 220     call_home:         Boolean, true iff we are allowed to contact the
 221                        youtube-dl servers for debugging.
 222     sleep_interval:    Number of seconds to sleep before each download.
 223     external_downloader:  Executable of the external downloader to call.
 224
 225
 226     The following parameters are not used by YoutubeDL itself, they are used by
 227     the FileDownloader:
 228     nopart, updatetime, buffersize, ratelimit, min_filesize, max_filesize, test,
 229     noresizebuffer, retries, continuedl, noprogress, consoletitle
 230
 231     The following options are used by the post processors:
 232     prefer_ffmpeg:     If True, use ffmpeg instead of avconv if both are available,
 233                        otherwise prefer avconv.
 234     exec_cmd:          Arbitrary command to run after downloading
 235     """
 236
 237     params = None
 238     _ies = []
 239     _pps = []
 240     _download_retcode = None
 241     _num_downloads = None
 242     _screen_file = None
 243
 244     def __init__(self, params=None, auto_init=True):
 245         """Create a FileDownloader object with the given options."""
 246         if params is None:
 247             params = {}
 248         self._ies = []
 249         self._ies_instances = {}
 250         self._pps = []
 251         self._progress_hooks = []
 252         self._download_retcode = 0
 253         self._num_downloads = 0
 254         self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)]
 255         self._err_file = sys.stderr
 256         self.params = params
 257         self.cache = Cache(self)
 258
 259         if params.get('bidi_workaround', False):
 260             try:
 261                 import pty
 262                 master, slave = pty.openpty()
 263                 width = get_term_width()
 264                 if width is None:
 265                     width_args = []
 266                 else:
 267                     width_args = ['-w', str(width)]
 268                 sp_kwargs = dict(
 269                     stdin=subprocess.PIPE,
 270                     stdout=slave,
 271                     stderr=self._err_file)
 272                 try:
 273                     self._output_process = subprocess.Popen(
 274                         ['bidiv'] + width_args, **sp_kwargs
 275                     )
 276                 except OSError:
 277                     self._output_process = subprocess.Popen(
 278                         ['fribidi', '-c', 'UTF-8'] + width_args, **sp_kwargs)
 279                 self._output_channel = os.fdopen(master, 'rb')
 280             except OSError as ose:
 281                 if ose.errno == 2:
 282                     self.report_warning('Could not find fribidi executable, ignoring --bidi-workaround . Make sure that  fribidi  is an executable file in one of the directories in your $PATH.')
 283                 else:
 284                     raise
 285
 286         if (sys.version_info >= (3,) and sys.platform != 'win32' and
 287                 sys.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968']
 288                 and not params.get('restrictfilenames', False)):
 289             # On Python 3, the Unicode filesystem API will throw errors (#1474)
 290             self.report_warning(
 291                 'Assuming --restrict-filenames since file system encoding '
 292                 'cannot encode all characters. '
 293                 'Set the LC_ALL environment variable to fix this.')
 294             self.params['restrictfilenames'] = True
 295
 296         if '%(stitle)s' in self.params.get('outtmpl', ''):
 297             self.report_warning('%(stitle)s is deprecated. Use the %(title)s and the --restrict-filenames flag(which also secures %(uploader)s et al) instead.')
 298
 299         self._setup_opener()
 300
 301         if auto_init:
 302             self.print_debug_header()
 303             self.add_default_info_extractors()
 304
 305         for pp_def_raw in self.params.get('postprocessors', []):
 306             pp_class = get_postprocessor(pp_def_raw['key'])
 307             pp_def = dict(pp_def_raw)
 308             del pp_def['key']
 309             pp = pp_class(self, **compat_kwargs(pp_def))
 310             self.add_post_processor(pp)
 311
 312         for ph in self.params.get('progress_hooks', []):
 313             self.add_progress_hook(ph)
 314
 315     def warn_if_short_id(self, argv):
 316         # short YouTube ID starting with dash?
 317         idxs = [
 318             i for i, a in enumerate(argv)
 319             if re.match(r'^-[0-9A-Za-z_-]{10}$', a)]
 320         if idxs:
 321             correct_argv = (
 322                 ['youtube-dl'] +
 323                 [a for i, a in enumerate(argv) if i not in idxs] +
 324                 ['--'] + [argv[i] for i in idxs]
 325             )
 326             self.report_warning(
 327                 'Long argument string detected. '
 328                 'Use -- to separate parameters and URLs, like this:\n%s\n' %
 329                 args_to_str(correct_argv))
 330
 331     def add_info_extractor(self, ie):
 332         """Add an InfoExtractor object to the end of the list."""
 333         self._ies.append(ie)
 334         self._ies_instances[ie.ie_key()] = ie
 335         ie.set_downloader(self)
 336
 337     def get_info_extractor(self, ie_key):
 338         """
 339         Get an instance of an IE with name ie_key, it will try to get one from
 340         the _ies list, if there's no instance it will create a new one and add
 341         it to the extractor list.
 342         """
 343         ie = self._ies_instances.get(ie_key)
 344         if ie is None:
 345             ie = get_info_extractor(ie_key)()
 346             self.add_info_extractor(ie)
 347         return ie
 348
 349     def add_default_info_extractors(self):
 350         """
 351         Add the InfoExtractors returned by gen_extractors to the end of the list
 352         """
 353         for ie in gen_extractors():
 354             self.add_info_extractor(ie)
 355
 356     def add_post_processor(self, pp):
 357         """Add a PostProcessor object to the end of the chain."""
 358         self._pps.append(pp)
 359         pp.set_downloader(self)
 360
 361     def add_progress_hook(self, ph):
 362         """Add the progress hook (currently only for the file downloader)"""
 363         self._progress_hooks.append(ph)
 364
 365     def _bidi_workaround(self, message):
 366         if not hasattr(self, '_output_channel'):
 367             return message
 368
 369         assert hasattr(self, '_output_process')
 370         assert isinstance(message, compat_str)
 371         line_count = message.count('\n') + 1
 372         self._output_process.stdin.write((message + '\n').encode('utf-8'))
 373         self._output_process.stdin.flush()
 374         res = ''.join(self._output_channel.readline().decode('utf-8')
 375                       for _ in range(line_count))
 376         return res[:-len('\n')]
 377
 378     def to_screen(self, message, skip_eol=False):
 379         """Print message to stdout if not in quiet mode."""
 380         return self.to_stdout(message, skip_eol, check_quiet=True)
 381
 382     def _write_string(self, s, out=None):
 383         write_string(s, out=out, encoding=self.params.get('encoding'))
 384
 385     def to_stdout(self, message, skip_eol=False, check_quiet=False):
 386         """Print message to stdout if not in quiet mode."""
 387         if self.params.get('logger'):
 388             self.params['logger'].debug(message)
 389         elif not check_quiet or not self.params.get('quiet', False):
 390             message = self._bidi_workaround(message)
 391             terminator = ['\n', ''][skip_eol]
 392             output = message + terminator
 393
 394             self._write_string(output, self._screen_file)
 395
 396     def to_stderr(self, message):
 397         """Print message to stderr."""
 398         assert isinstance(message, compat_str)
 399         if self.params.get('logger'):
 400             self.params['logger'].error(message)
 401         else:
 402             message = self._bidi_workaround(message)
 403             output = message + '\n'
 404             self._write_string(output, self._err_file)
 405
 406     def to_console_title(self, message):
 407         if not self.params.get('consoletitle', False):
 408             return
 409         if os.name == 'nt' and ctypes.windll.kernel32.GetConsoleWindow():
 410             # c_wchar_p() might not be necessary if `message` is
 411             # already of type unicode()
 412             ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message))
 413         elif 'TERM' in os.environ:
 414             self._write_string('\033]0;%s\007' % message, self._screen_file)
 415
 416     def save_console_title(self):
 417         if not self.params.get('consoletitle', False):
 418             return
 419         if 'TERM' in os.environ:
 420             # Save the title on stack
 421             self._write_string('\033[22;0t', self._screen_file)
 422
 423     def restore_console_title(self):
 424         if not self.params.get('consoletitle', False):
 425             return
 426         if 'TERM' in os.environ:
 427             # Restore the title from stack
 428             self._write_string('\033[23;0t', self._screen_file)
 429
 430     def __enter__(self):
 431         self.save_console_title()
 432         return self
 433
 434     def __exit__(self, *args):
 435         self.restore_console_title()
 436
 437         if self.params.get('cookiefile') is not None:
 438             self.cookiejar.save()
 439
 440     def trouble(self, message=None, tb=None):
 441         """Determine action to take when a download problem appears.
 442
 443         Depending on if the downloader has been configured to ignore
 444         download errors or not, this method may throw an exception or
 445         not when errors are found, after printing the message.
 446
 447         tb, if given, is additional traceback information.
 448         """
 449         if message is not None:
 450             self.to_stderr(message)
 451         if self.params.get('verbose'):
 452             if tb is None:
 453                 if sys.exc_info()[0]:  # if .trouble has been called from an except block
 454                     tb = ''
 455                     if hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
 456                         tb += ''.join(traceback.format_exception(*sys.exc_info()[1].exc_info))
 457                     tb += compat_str(traceback.format_exc())
 458                 else:
 459                     tb_data = traceback.format_list(traceback.extract_stack())
 460                     tb = ''.join(tb_data)
 461             self.to_stderr(tb)
 462         if not self.params.get('ignoreerrors', False):
 463             if sys.exc_info()[0] and hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
 464                 exc_info = sys.exc_info()[1].exc_info
 465             else:
 466                 exc_info = sys.exc_info()
 467             raise DownloadError(message, exc_info)
 468         self._download_retcode = 1
 469
 470     def report_warning(self, message):
 471         '''
 472         Print the message to stderr, it will be prefixed with 'WARNING:'
 473         If stderr is a tty file the 'WARNING:' will be colored
 474         '''
 475         if self.params.get('logger') is not None:
 476             self.params['logger'].warning(message)
 477         else:
 478             if self.params.get('no_warnings'):
 479                 return
 480             if self._err_file.isatty() and os.name != 'nt':
 481                 _msg_header = '\033[0;33mWARNING:\033[0m'
 482             else:
 483                 _msg_header = 'WARNING:'
 484             warning_message = '%s %s' % (_msg_header, message)
 485             self.to_stderr(warning_message)
 486
 487     def report_error(self, message, tb=None):
 488         '''
 489         Do the same as trouble, but prefixes the message with 'ERROR:', colored
 490         in red if stderr is a tty file.
 491         '''
 492         if self._err_file.isatty() and os.name != 'nt':
 493             _msg_header = '\033[0;31mERROR:\033[0m'
 494         else:
 495             _msg_header = 'ERROR:'
 496         error_message = '%s %s' % (_msg_header, message)
 497         self.trouble(error_message, tb)
 498
 499     def report_file_already_downloaded(self, file_name):
 500         """Report file has already been fully downloaded."""
 501         try:
 502             self.to_screen('[download] %s has already been downloaded' % file_name)
 503         except UnicodeEncodeError:
 504             self.to_screen('[download] The file has already been downloaded')
 505
 506     def prepare_filename(self, info_dict):
 507         """Generate the output filename."""
 508         try:
 509             template_dict = dict(info_dict)
 510
 511             template_dict['epoch'] = int(time.time())
 512             autonumber_size = self.params.get('autonumber_size')
 513             if autonumber_size is None:
 514                 autonumber_size = 5
 515             autonumber_templ = '%0' + str(autonumber_size) + 'd'
 516             template_dict['autonumber'] = autonumber_templ % self._num_downloads
 517             if template_dict.get('playlist_index') is not None:
 518                 template_dict['playlist_index'] = '%0*d' % (len(str(template_dict['n_entries'])), template_dict['playlist_index'])
 519             if template_dict.get('resolution') is None:
 520                 if template_dict.get('width') and template_dict.get('height'):
 521                     template_dict['resolution'] = '%dx%d' % (template_dict['width'], template_dict['height'])
 522                 elif template_dict.get('height'):
 523                     template_dict['resolution'] = '%sp' % template_dict['height']
 524                 elif template_dict.get('width'):
 525                     template_dict['resolution'] = '?x%d' % template_dict['width']
 526
 527             sanitize = lambda k, v: sanitize_filename(
 528                 compat_str(v),
 529                 restricted=self.params.get('restrictfilenames'),
 530                 is_id=(k == 'id'))
 531             template_dict = dict((k, sanitize(k, v))
 532                                  for k, v in template_dict.items()
 533                                  if v is not None)
 534             template_dict = collections.defaultdict(lambda: 'NA', template_dict)
 535
 536             outtmpl = self.params.get('outtmpl', DEFAULT_OUTTMPL)
 537             tmpl = compat_expanduser(outtmpl)
 538             filename = tmpl % template_dict
 539             return filename
 540         except ValueError as err:
 541             self.report_error('Error in output template: ' + str(err) + ' (encoding: ' + repr(preferredencoding()) + ')')
 542             return None
 543
 544     def _match_entry(self, info_dict):
 545         """ Returns None iff the file should be downloaded """
 546
 547         video_title = info_dict.get('title', info_dict.get('id', 'video'))
 548         if 'title' in info_dict:
 549             # This can happen when we're just evaluating the playlist
 550             title = info_dict['title']
 551             matchtitle = self.params.get('matchtitle', False)
 552             if matchtitle:
 553                 if not re.search(matchtitle, title, re.IGNORECASE):
 554                     return '"' + title + '" title did not match pattern "' + matchtitle + '"'
 555             rejecttitle = self.params.get('rejecttitle', False)
 556             if rejecttitle:
 557                 if re.search(rejecttitle, title, re.IGNORECASE):
 558                     return '"' + title + '" title matched reject pattern "' + rejecttitle + '"'
 559         date = info_dict.get('upload_date', None)
 560         if date is not None:
 561             dateRange = self.params.get('daterange', DateRange())
 562             if date not in dateRange:
 563                 return '%s upload date is not in range %s' % (date_from_str(date).isoformat(), dateRange)
 564         view_count = info_dict.get('view_count', None)
 565         if view_count is not None:
 566             min_views = self.params.get('min_views')
 567             if min_views is not None and view_count < min_views:
 568                 return 'Skipping %s, because it has not reached minimum view count (%d/%d)' % (video_title, view_count, min_views)
 569             max_views = self.params.get('max_views')
 570             if max_views is not None and view_count > max_views:
 571                 return 'Skipping %s, because it has exceeded the maximum view count (%d/%d)' % (video_title, view_count, max_views)
 572         if age_restricted(info_dict.get('age_limit'), self.params.get('age_limit')):
 573             return 'Skipping "%s" because it is age restricted' % title
 574         if self.in_download_archive(info_dict):
 575             return '%s has already been recorded in archive' % video_title
 576         return None
 577
 578     @staticmethod
 579     def add_extra_info(info_dict, extra_info):
 580         '''Set the keys from extra_info in info dict if they are missing'''
 581         for key, value in extra_info.items():
 582             info_dict.setdefault(key, value)
 583
 584     def extract_info(self, url, download=True, ie_key=None, extra_info={},
 585                      process=True):
 586         '''
 587         Returns a list with a dictionary for each video we find.
 588         If 'download', also downloads the videos.
 589         extra_info is a dict containing the extra values to add to each result
 590          '''
 591
 592         if ie_key:
 593             ies = [self.get_info_extractor(ie_key)]
 594         else:
 595             ies = self._ies
 596
 597         for ie in ies:
 598             if not ie.suitable(url):
 599                 continue
 600
 601             if not ie.working():
 602                 self.report_warning('The program functionality for this site has been marked as broken, '
 603                                     'and will probably not work.')
 604
 605             try:
 606                 ie_result = ie.extract(url)
 607                 if ie_result is None:  # Finished already (backwards compatibility; listformats and friends should be moved here)
 608                     break
 609                 if isinstance(ie_result, list):
 610                     # Backwards compatibility: old IE result format
 611                     ie_result = {
 612                         '_type': 'compat_list',
 613                         'entries': ie_result,
 614                     }
 615                 self.add_default_extra_info(ie_result, ie, url)
 616                 if process:
 617                     return self.process_ie_result(ie_result, download, extra_info)
 618                 else:
 619                     return ie_result
 620             except ExtractorError as de:  # An error we somewhat expected
 621                 self.report_error(compat_str(de), de.format_traceback())
 622                 break
 623             except MaxDownloadsReached:
 624                 raise
 625             except Exception as e:
 626                 if self.params.get('ignoreerrors', False):
 627                     self.report_error(compat_str(e), tb=compat_str(traceback.format_exc()))
 628                     break
 629                 else:
 630                     raise
 631         else:
 632             self.report_error('no suitable InfoExtractor for URL %s' % url)
 633
 634     def add_default_extra_info(self, ie_result, ie, url):
 635         self.add_extra_info(ie_result, {
 636             'extractor': ie.IE_NAME,
 637             'webpage_url': url,
 638             'webpage_url_basename': url_basename(url),
 639             'extractor_key': ie.ie_key(),
 640         })
 641
 642     def process_ie_result(self, ie_result, download=True, extra_info={}):
 643         """
 644         Take the result of the ie(may be modified) and resolve all unresolved
 645         references (URLs, playlist items).
 646
 647         It will also download the videos if 'download'.
 648         Returns the resolved ie_result.
 649         """
 650
 651         result_type = ie_result.get('_type', 'video')
 652
 653         if result_type in ('url', 'url_transparent'):
 654             extract_flat = self.params.get('extract_flat', False)
 655             if ((extract_flat == 'in_playlist' and 'playlist' in extra_info) or
 656                     extract_flat is True):
 657                 if self.params.get('forcejson', False):
 658                     self.to_stdout(json.dumps(ie_result))
 659                 return ie_result
 660
 661         if result_type == 'video':
 662             self.add_extra_info(ie_result, extra_info)
 663             return self.process_video_result(ie_result, download=download)
 664         elif result_type == 'url':
 665             # We have to add extra_info to the results because it may be
 666             # contained in a playlist
 667             return self.extract_info(ie_result['url'],
 668                                      download,
 669                                      ie_key=ie_result.get('ie_key'),
 670                                      extra_info=extra_info)
 671         elif result_type == 'url_transparent':
 672             # Use the information from the embedding page
 673             info = self.extract_info(
 674                 ie_result['url'], ie_key=ie_result.get('ie_key'),
 675                 extra_info=extra_info, download=False, process=False)
 676
 677             force_properties = dict(
 678                 (k, v) for k, v in ie_result.items() if v is not None)
 679             for f in ('_type', 'url'):
 680                 if f in force_properties:
 681                     del force_properties[f]
 682             new_result = info.copy()
 683             new_result.update(force_properties)
 684
 685             assert new_result.get('_type') != 'url_transparent'
 686
 687             return self.process_ie_result(
 688                 new_result, download=download, extra_info=extra_info)
 689         elif result_type == 'playlist' or result_type == 'multi_video':
 690             # We process each entry in the playlist
 691             playlist = ie_result.get('title', None) or ie_result.get('id', None)
 692             self.to_screen('[download] Downloading playlist: %s' % playlist)
 693
 694             playlist_results = []
 695
 696             playliststart = self.params.get('playliststart', 1) - 1
 697             playlistend = self.params.get('playlistend', None)
 698             # For backwards compatibility, interpret -1 as whole list
 699             if playlistend == -1:
 700                 playlistend = None
 701
 702             ie_entries = ie_result['entries']
 703             if isinstance(ie_entries, list):
 704                 n_all_entries = len(ie_entries)
 705                 entries = ie_entries[playliststart:playlistend]
 706                 n_entries = len(entries)
 707                 self.to_screen(
 708                     "[%s] playlist %s: Collected %d video ids (downloading %d of them)" %
 709                     (ie_result['extractor'], playlist, n_all_entries, n_entries))
 710             elif isinstance(ie_entries, PagedList):
 711                 entries = ie_entries.getslice(
 712                     playliststart, playlistend)
 713                 n_entries = len(entries)
 714                 self.to_screen(
 715                     "[%s] playlist %s: Downloading %d videos" %
 716                     (ie_result['extractor'], playlist, n_entries))
 717             else:  # iterable
 718                 entries = list(itertools.islice(
 719                     ie_entries, playliststart, playlistend))
 720                 n_entries = len(entries)
 721                 self.to_screen(
 722                     "[%s] playlist %s: Downloading %d videos" %
 723                     (ie_result['extractor'], playlist, n_entries))
 724
 725             if self.params.get('playlistreverse', False):
 726                 entries = entries[::-1]
 727
 728             for i, entry in enumerate(entries, 1):
 729                 self.to_screen('[download] Downloading video %s of %s' % (i, n_entries))
 730                 extra = {
 731                     'n_entries': n_entries,
 732                     'playlist': playlist,
 733                     'playlist_id': ie_result.get('id'),
 734                     'playlist_title': ie_result.get('title'),
 735                     'playlist_index': i + playliststart,
 736                     'extractor': ie_result['extractor'],
 737                     'webpage_url': ie_result['webpage_url'],
 738                     'webpage_url_basename': url_basename(ie_result['webpage_url']),
 739                     'extractor_key': ie_result['extractor_key'],
 740                 }
 741
 742                 reason = self._match_entry(entry)
 743                 if reason is not None:
 744                     self.to_screen('[download] ' + reason)
 745                     continue
 746
 747                 entry_result = self.process_ie_result(entry,
 748                                                       download=download,
 749                                                       extra_info=extra)
 750                 playlist_results.append(entry_result)
 751             ie_result['entries'] = playlist_results
 752             return ie_result
 753         elif result_type == 'compat_list':
 754             self.report_warning(
 755                 'Extractor %s returned a compat_list result. '
 756                 'It needs to be updated.' % ie_result.get('extractor'))
 757
 758             def _fixup(r):
 759                 self.add_extra_info(
 760                     r,
 761                     {
 762                         'extractor': ie_result['extractor'],
 763                         'webpage_url': ie_result['webpage_url'],
 764                         'webpage_url_basename': url_basename(ie_result['webpage_url']),
 765                         'extractor_key': ie_result['extractor_key'],
 766                     }
 767                 )
 768                 return r
 769             ie_result['entries'] = [
 770                 self.process_ie_result(_fixup(r), download, extra_info)
 771                 for r in ie_result['entries']
 772             ]
 773             return ie_result
 774         else:
 775             raise Exception('Invalid result type: %s' % result_type)
 776
 777     def _apply_format_filter(self, format_spec, available_formats):
 778         " Returns a tuple of the remaining format_spec and filtered formats "
 779
 780         OPERATORS = {
 781             '<': operator.lt,
 782             '<=': operator.le,
 783             '>': operator.gt,
 784             '>=': operator.ge,
 785             '=': operator.eq,
 786             '!=': operator.ne,
 787         }
 788         operator_rex = re.compile(r'''(?x)\s*\[
 789             (?P<key>width|height|tbr|abr|vbr|filesize)
 790             \s*(?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
 791             (?P<value>[0-9.]+(?:[kKmMgGtTpPeEzZyY]i?[Bb]?)?)
 792             \]$
 793             ''' % '|'.join(map(re.escape, OPERATORS.keys())))
 794         m = operator_rex.search(format_spec)
 795         if not m:
 796             raise ValueError('Invalid format specification %r' % format_spec)
 797
 798         try:
 799             comparison_value = int(m.group('value'))
 800         except ValueError:
 801             comparison_value = parse_filesize(m.group('value'))
 802             if comparison_value is None:
 803                 comparison_value = parse_filesize(m.group('value') + 'B')
 804             if comparison_value is None:
 805                 raise ValueError(
 806                     'Invalid value %r in format specification %r' % (
 807                         m.group('value'), format_spec))
 808         op = OPERATORS[m.group('op')]
 809
 810         def _filter(f):
 811             actual_value = f.get(m.group('key'))
 812             if actual_value is None:
 813                 return m.group('none_inclusive')
 814             return op(actual_value, comparison_value)
 815         new_formats = [f for f in available_formats if _filter(f)]
 816
 817         new_format_spec = format_spec[:-len(m.group(0))]
 818         if not new_format_spec:
 819             new_format_spec = 'best'
 820
 821         return (new_format_spec, new_formats)
 822
 823     def select_format(self, format_spec, available_formats):
 824         while format_spec.endswith(']'):
 825             format_spec, available_formats = self._apply_format_filter(
 826                 format_spec, available_formats)
 827         if not available_formats:
 828             return None
 829
 830         if format_spec == 'best' or format_spec is None:
 831             return available_formats[-1]
 832         elif format_spec == 'worst':
 833             return available_formats[0]
 834         elif format_spec == 'bestaudio':
 835             audio_formats = [
 836                 f for f in available_formats
 837                 if f.get('vcodec') == 'none']
 838             if audio_formats:
 839                 return audio_formats[-1]
 840         elif format_spec == 'worstaudio':
 841             audio_formats = [
 842                 f for f in available_formats
 843                 if f.get('vcodec') == 'none']
 844             if audio_formats:
 845                 return audio_formats[0]
 846         elif format_spec == 'bestvideo':
 847             video_formats = [
 848                 f for f in available_formats
 849                 if f.get('acodec') == 'none']
 850             if video_formats:
 851                 return video_formats[-1]
 852         elif format_spec == 'worstvideo':
 853             video_formats = [
 854                 f for f in available_formats
 855                 if f.get('acodec') == 'none']
 856             if video_formats:
 857                 return video_formats[0]
 858         else:
 859             extensions = ['mp4', 'flv', 'webm', '3gp', 'm4a', 'mp3', 'ogg', 'aac', 'wav']
 860             if format_spec in extensions:
 861                 filter_f = lambda f: f['ext'] == format_spec
 862             else:
 863                 filter_f = lambda f: f['format_id'] == format_spec
 864             matches = list(filter(filter_f, available_formats))
 865             if matches:
 866                 return matches[-1]
 867         return None
 868
 869     def _calc_headers(self, info_dict):
 870         res = std_headers.copy()
 871
 872         add_headers = info_dict.get('http_headers')
 873         if add_headers:
 874             res.update(add_headers)
 875
 876         cookies = self._calc_cookies(info_dict)
 877         if cookies:
 878             res['Cookie'] = cookies
 879
 880         return res
 881
 882     def _calc_cookies(self, info_dict):
 883         class _PseudoRequest(object):
 884             def __init__(self, url):
 885                 self.url = url
 886                 self.headers = {}
 887                 self.unverifiable = False
 888
 889             def add_unredirected_header(self, k, v):
 890                 self.headers[k] = v
 891
 892             def get_full_url(self):
 893                 return self.url
 894
 895             def is_unverifiable(self):
 896                 return self.unverifiable
 897
 898             def has_header(self, h):
 899                 return h in self.headers
 900
 901         pr = _PseudoRequest(info_dict['url'])
 902         self.cookiejar.add_cookie_header(pr)
 903         return pr.headers.get('Cookie')
 904
 905     def process_video_result(self, info_dict, download=True):
 906         assert info_dict.get('_type', 'video') == 'video'
 907
 908         if 'id' not in info_dict:
 909             raise ExtractorError('Missing "id" field in extractor result')
 910         if 'title' not in info_dict:
 911             raise ExtractorError('Missing "title" field in extractor result')
 912
 913         if 'playlist' not in info_dict:
 914             # It isn't part of a playlist
 915             info_dict['playlist'] = None
 916             info_dict['playlist_index'] = None
 917
 918         thumbnails = info_dict.get('thumbnails')
 919         if thumbnails:
 920             thumbnails.sort(key=lambda t: (
 921                 t.get('width'), t.get('height'), t.get('url')))
 922             for t in thumbnails:
 923                 if 'width' in t and 'height' in t:
 924                     t['resolution'] = '%dx%d' % (t['width'], t['height'])
 925
 926         if thumbnails and 'thumbnail' not in info_dict:
 927             info_dict['thumbnail'] = thumbnails[-1]['url']
 928
 929         if 'display_id' not in info_dict and 'id' in info_dict:
 930             info_dict['display_id'] = info_dict['id']
 931
 932         if info_dict.get('upload_date') is None and info_dict.get('timestamp') is not None:
 933             # Working around negative timestamps in Windows
 934             # (see http://bugs.python.org/issue1646728)
 935             if info_dict['timestamp'] < 0 and os.name == 'nt':
 936                 info_dict['timestamp'] = 0
 937             upload_date = datetime.datetime.utcfromtimestamp(
 938                 info_dict['timestamp'])
 939             info_dict['upload_date'] = upload_date.strftime('%Y%m%d')
 940
 941         # This extractors handle format selection themselves
 942         if info_dict['extractor'] in ['Youku']:
 943             if download:
 944                 self.process_info(info_dict)
 945             return info_dict
 946
 947         # We now pick which formats have to be downloaded
 948         if info_dict.get('formats') is None:
 949             # There's only one format available
 950             formats = [info_dict]
 951         else:
 952             formats = info_dict['formats']
 953
 954         if not formats:
 955             raise ExtractorError('No video formats found!')
 956
 957         # We check that all the formats have the format and format_id fields
 958         for i, format in enumerate(formats):
 959             if 'url' not in format:
 960                 raise ExtractorError('Missing "url" key in result (index %d)' % i)
 961
 962             if format.get('format_id') is None:
 963                 format['format_id'] = compat_str(i)
 964             if format.get('format') is None:
 965                 format['format'] = '{id} - {res}{note}'.format(
 966                     id=format['format_id'],
 967                     res=self.format_resolution(format),
 968                     note=' ({0})'.format(format['format_note']) if format.get('format_note') is not None else '',
 969                 )
 970             # Automatically determine file extension if missing
 971             if 'ext' not in format:
 972                 format['ext'] = determine_ext(format['url']).lower()
 973             # Add HTTP headers, so that external programs can use them from the
 974             # json output
 975             full_format_info = info_dict.copy()
 976             full_format_info.update(format)
 977             format['http_headers'] = self._calc_headers(full_format_info)
 978
 979         format_limit = self.params.get('format_limit', None)
 980         if format_limit:
 981             formats = list(takewhile_inclusive(
 982                 lambda f: f['format_id'] != format_limit, formats
 983             ))
 984
 985         # TODO Central sorting goes here
 986
 987         if formats[0] is not info_dict:
 988             # only set the 'formats' fields if the original info_dict list them
 989             # otherwise we end up with a circular reference, the first (and unique)
 990             # element in the 'formats' field in info_dict is info_dict itself,
 991             # wich can't be exported to json
 992             info_dict['formats'] = formats
 993         if self.params.get('listformats', None):
 994             self.list_formats(info_dict)
 995             return
 996
 997         req_format = self.params.get('format')
 998         if req_format is None:
 999             req_format = 'best'
1000         formats_to_download = []
1001         # The -1 is for supporting YoutubeIE
1002         if req_format in ('-1', 'all'):
1003             formats_to_download = formats
1004         else:
1005             for rfstr in req_format.split(','):
1006                 # We can accept formats requested in the format: 34/5/best, we pick
1007                 # the first that is available, starting from left
1008                 req_formats = rfstr.split('/')
1009                 for rf in req_formats:
1010                     if re.match(r'.+?\+.+?', rf) is not None:
1011                         # Two formats have been requested like '137+139'
1012                         format_1, format_2 = rf.split('+')
1013                         formats_info = (self.select_format(format_1, formats),
1014                                         self.select_format(format_2, formats))
1015                         if all(formats_info):
1016                             # The first format must contain the video and the
1017                             # second the audio
1018                             if formats_info[0].get('vcodec') == 'none':
1019                                 self.report_error('The first format must '
1020                                                   'contain the video, try using '
1021                                                   '"-f %s+%s"' % (format_2, format_1))
1022                                 return
1023                             output_ext = (
1024                                 formats_info[0]['ext']
1025                                 if self.params.get('merge_output_format') is None
1026                                 else self.params['merge_output_format'])
1027                             selected_format = {
1028                                 'requested_formats': formats_info,
1029                                 'format': rf,
1030                                 'ext': formats_info[0]['ext'],
1031                                 'width': formats_info[0].get('width'),
1032                                 'height': formats_info[0].get('height'),
1033                                 'resolution': formats_info[0].get('resolution'),
1034                                 'fps': formats_info[0].get('fps'),
1035                                 'vcodec': formats_info[0].get('vcodec'),
1036                                 'vbr': formats_info[0].get('vbr'),
1037                                 'stretched_ratio': formats_info[0].get('stretched_ratio'),
1038                                 'acodec': formats_info[1].get('acodec'),
1039                                 'abr': formats_info[1].get('abr'),
1040                                 'ext': output_ext,
1041                             }
1042                         else:
1043                             selected_format = None
1044                     else:
1045                         selected_format = self.select_format(rf, formats)
1046                     if selected_format is not None:
1047                         formats_to_download.append(selected_format)
1048                         break
1049         if not formats_to_download:
1050             raise ExtractorError('requested format not available',
1051                                  expected=True)
1052
1053         if download:
1054             if len(formats_to_download) > 1:
1055                 self.to_screen('[info] %s: downloading video in %s formats' % (info_dict['id'], len(formats_to_download)))
1056             for format in formats_to_download:
1057                 new_info = dict(info_dict)
1058                 new_info.update(format)
1059                 self.process_info(new_info)
1060         # We update the info dict with the best quality format (backwards compatibility)
1061         info_dict.update(formats_to_download[-1])
1062         return info_dict
1063
1064     def process_info(self, info_dict):
1065         """Process a single resolved IE result."""
1066
1067         assert info_dict.get('_type', 'video') == 'video'
1068
1069         max_downloads = self.params.get('max_downloads')
1070         if max_downloads is not None:
1071             if self._num_downloads >= int(max_downloads):
1072                 raise MaxDownloadsReached()
1073
1074         info_dict['fulltitle'] = info_dict['title']
1075         if len(info_dict['title']) > 200:
1076             info_dict['title'] = info_dict['title'][:197] + '...'
1077
1078         # Keep for backwards compatibility
1079         info_dict['stitle'] = info_dict['title']
1080
1081         if 'format' not in info_dict:
1082             info_dict['format'] = info_dict['ext']
1083
1084         reason = self._match_entry(info_dict)
1085         if reason is not None:
1086             self.to_screen('[download] ' + reason)
1087             return
1088
1089         self._num_downloads += 1
1090
1091         filename = self.prepare_filename(info_dict)
1092
1093         # Forced printings
1094         if self.params.get('forcetitle', False):
1095             self.to_stdout(info_dict['fulltitle'])
1096         if self.params.get('forceid', False):
1097             self.to_stdout(info_dict['id'])
1098         if self.params.get('forceurl', False):
1099             if info_dict.get('requested_formats') is not None:
1100                 for f in info_dict['requested_formats']:
1101                     self.to_stdout(f['url'] + f.get('play_path', ''))
1102             else:
1103                 # For RTMP URLs, also include the playpath
1104                 self.to_stdout(info_dict['url'] + info_dict.get('play_path', ''))
1105         if self.params.get('forcethumbnail', False) and info_dict.get('thumbnail') is not None:
1106             self.to_stdout(info_dict['thumbnail'])
1107         if self.params.get('forcedescription', False) and info_dict.get('description') is not None:
1108             self.to_stdout(info_dict['description'])
1109         if self.params.get('forcefilename', False) and filename is not None:
1110             self.to_stdout(filename)
1111         if self.params.get('forceduration', False) and info_dict.get('duration') is not None:
1112             self.to_stdout(formatSeconds(info_dict['duration']))
1113         if self.params.get('forceformat', False):
1114             self.to_stdout(info_dict['format'])
1115         if self.params.get('forcejson', False):
1116             info_dict['_filename'] = filename
1117             self.to_stdout(json.dumps(info_dict))
1118         if self.params.get('dump_single_json', False):
1119             info_dict['_filename'] = filename
1120
1121         # Do nothing else if in simulate mode
1122         if self.params.get('simulate', False):
1123             return
1124
1125         if filename is None:
1126             return
1127
1128         try:
1129             dn = os.path.dirname(encodeFilename(filename))
1130             if dn and not os.path.exists(dn):
1131                 os.makedirs(dn)
1132         except (OSError, IOError) as err:
1133             self.report_error('unable to create directory ' + compat_str(err))
1134             return
1135
1136         if self.params.get('writedescription', False):
1137             descfn = filename + '.description'
1138             if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(descfn)):
1139                 self.to_screen('[info] Video description is already present')
1140             elif info_dict.get('description') is None:
1141                 self.report_warning('There\'s no description to write.')
1142             else:
1143                 try:
1144                     self.to_screen('[info] Writing video description to: ' + descfn)
1145                     with io.open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile:
1146                         descfile.write(info_dict['description'])
1147                 except (OSError, IOError):
1148                     self.report_error('Cannot write description file ' + descfn)
1149                     return
1150
1151         if self.params.get('writeannotations', False):
1152             annofn = filename + '.annotations.xml'
1153             if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(annofn)):
1154                 self.to_screen('[info] Video annotations are already present')
1155             else:
1156                 try:
1157                     self.to_screen('[info] Writing video annotations to: ' + annofn)
1158                     with io.open(encodeFilename(annofn), 'w', encoding='utf-8') as annofile:
1159                         annofile.write(info_dict['annotations'])
1160                 except (KeyError, TypeError):
1161                     self.report_warning('There are no annotations to write.')
1162                 except (OSError, IOError):
1163                     self.report_error('Cannot write annotations file: ' + annofn)
1164                     return
1165
1166         subtitles_are_requested = any([self.params.get('writesubtitles', False),
1167                                        self.params.get('writeautomaticsub')])
1168
1169         if subtitles_are_requested and 'subtitles' in info_dict and info_dict['subtitles']:
1170             # subtitles download errors are already managed as troubles in relevant IE
1171             # that way it will silently go on when used with unsupporting IE
1172             subtitles = info_dict['subtitles']
1173             sub_format = self.params.get('subtitlesformat', 'srt')
1174             for sub_lang in subtitles.keys():
1175                 sub = subtitles[sub_lang]
1176                 if sub is None:
1177                     continue
1178                 try:
1179                     sub_filename = subtitles_filename(filename, sub_lang, sub_format)
1180                     if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(sub_filename)):
1181                         self.to_screen('[info] Video subtitle %s.%s is already_present' % (sub_lang, sub_format))
1182                     else:
1183                         self.to_screen('[info] Writing video subtitles to: ' + sub_filename)
1184                         with io.open(encodeFilename(sub_filename), 'w', encoding='utf-8') as subfile:
1185                             subfile.write(sub)
1186                 except (OSError, IOError):
1187                     self.report_error('Cannot write subtitles file ' + sub_filename)
1188                     return
1189
1190         if self.params.get('writeinfojson', False):
1191             infofn = os.path.splitext(filename)[0] + '.info.json'
1192             if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(infofn)):
1193                 self.to_screen('[info] Video description metadata is already present')
1194             else:
1195                 self.to_screen('[info] Writing video description metadata as JSON to: ' + infofn)
1196                 try:
1197                     write_json_file(info_dict, infofn)
1198                 except (OSError, IOError):
1199                     self.report_error('Cannot write metadata to JSON file ' + infofn)
1200                     return
1201
1202         if self.params.get('writethumbnail', False):
1203             if info_dict.get('thumbnail') is not None:
1204                 thumb_format = determine_ext(info_dict['thumbnail'], 'jpg')
1205                 thumb_filename = os.path.splitext(filename)[0] + '.' + thumb_format
1206                 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(thumb_filename)):
1207                     self.to_screen('[%s] %s: Thumbnail is already present' %
1208                                    (info_dict['extractor'], info_dict['id']))
1209                 else:
1210                     self.to_screen('[%s] %s: Downloading thumbnail ...' %
1211                                    (info_dict['extractor'], info_dict['id']))
1212                     try:
1213                         uf = self.urlopen(info_dict['thumbnail'])
1214                         with open(thumb_filename, 'wb') as thumbf:
1215                             shutil.copyfileobj(uf, thumbf)
1216                         self.to_screen('[%s] %s: Writing thumbnail to: %s' %
1217                                        (info_dict['extractor'], info_dict['id'], thumb_filename))
1218                     except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
1219                         self.report_warning('Unable to download thumbnail "%s": %s' %
1220                                             (info_dict['thumbnail'], compat_str(err)))
1221
1222         if not self.params.get('skip_download', False):
1223             try:
1224                 def dl(name, info):
1225                     fd = get_suitable_downloader(info, self.params)(self, self.params)
1226                     for ph in self._progress_hooks:
1227                         fd.add_progress_hook(ph)
1228                     if self.params.get('verbose'):
1229                         self.to_stdout('[debug] Invoking downloader on %r' % info.get('url'))
1230                     return fd.download(name, info)
1231                 if info_dict.get('requested_formats') is not None:
1232                     downloaded = []
1233                     success = True
1234                     merger = FFmpegMergerPP(self, not self.params.get('keepvideo'))
1235                     if not merger._executable:
1236                         postprocessors = []
1237                         self.report_warning('You have requested multiple '
1238                                             'formats but ffmpeg or avconv are not installed.'
1239                                             ' The formats won\'t be merged')
1240                     else:
1241                         postprocessors = [merger]
1242                     for f in info_dict['requested_formats']:
1243                         new_info = dict(info_dict)
1244                         new_info.update(f)
1245                         fname = self.prepare_filename(new_info)
1246                         fname = prepend_extension(fname, 'f%s' % f['format_id'])
1247                         downloaded.append(fname)
1248                         partial_success = dl(fname, new_info)
1249                         success = success and partial_success
1250                     info_dict['__postprocessors'] = postprocessors
1251                     info_dict['__files_to_merge'] = downloaded
1252                 else:
1253                     # Just a single file
1254                     success = dl(filename, info_dict)
1255             except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
1256                 self.report_error('unable to download video data: %s' % str(err))
1257                 return
1258             except (OSError, IOError) as err:
1259                 raise UnavailableVideoError(err)
1260             except (ContentTooShortError, ) as err:
1261                 self.report_error('content too short (expected %s bytes and served %s)' % (err.expected, err.downloaded))
1262                 return
1263
1264             if success:
1265                 # Fixup content
1266                 fixup_policy = self.params.get('fixup')
1267                 if fixup_policy is None:
1268                     fixup_policy = 'detect_or_warn'
1269
1270                 stretched_ratio = info_dict.get('stretched_ratio')
1271                 if stretched_ratio is not None and stretched_ratio != 1:
1272                     if fixup_policy == 'warn':
1273                         self.report_warning('%s: Non-uniform pixel ratio (%s)' % (
1274                             info_dict['id'], stretched_ratio))
1275                     elif fixup_policy == 'detect_or_warn':
1276                         stretched_pp = FFmpegFixupStretchedPP(self)
1277                         if stretched_pp.available:
1278                             info_dict.setdefault('__postprocessors', [])
1279                             info_dict['__postprocessors'].append(stretched_pp)
1280                         else:
1281                             self.report_warning(
1282                                 '%s: Non-uniform pixel ratio (%s). Install ffmpeg or avconv to fix this automatically.' % (
1283                                     info_dict['id'], stretched_ratio))
1284                     else:
1285                         assert fixup_policy in ('ignore', 'never')
1286
1287                 if info_dict.get('requested_formats') is None and info_dict.get('container') == 'm4a_dash':
1288                     if fixup_policy == 'warn':
1289                         self.report_warning('%s: writing DASH m4a. Only some players support this container.' % (
1290                             info_dict['id']))
1291                     elif fixup_policy == 'detect_or_warn':
1292                         fixup_pp = FFmpegFixupM4aPP(self)
1293                         if fixup_pp.available:
1294                             info_dict.setdefault('__postprocessors', [])
1295                             info_dict['__postprocessors'].append(fixup_pp)
1296                         else:
1297                             self.report_warning(
1298                                 '%s: writing DASH m4a. Only some players support this container. Install ffmpeg or avconv to fix this automatically.' % (
1299                                     info_dict['id']))
1300                     else:
1301                         assert fixup_policy in ('ignore', 'never')
1302
1303                 try:
1304                     self.post_process(filename, info_dict)
1305                 except (PostProcessingError) as err:
1306                     self.report_error('postprocessing: %s' % str(err))
1307                     return
1308                 self.record_download_archive(info_dict)
1309
1310     def download(self, url_list):
1311         """Download a given list of URLs."""
1312         outtmpl = self.params.get('outtmpl', DEFAULT_OUTTMPL)
1313         if (len(url_list) > 1 and
1314                 '%' not in outtmpl
1315                 and self.params.get('max_downloads') != 1):
1316             raise SameFileError(outtmpl)
1317
1318         for url in url_list:
1319             try:
1320                 # It also downloads the videos
1321                 res = self.extract_info(url)
1322             except UnavailableVideoError:
1323                 self.report_error('unable to download video')
1324             except MaxDownloadsReached:
1325                 self.to_screen('[info] Maximum number of downloaded files reached.')
1326                 raise
1327             else:
1328                 if self.params.get('dump_single_json', False):
1329                     self.to_stdout(json.dumps(res))
1330
1331         return self._download_retcode
1332
1333     def download_with_info_file(self, info_filename):
1334         with io.open(info_filename, 'r', encoding='utf-8') as f:
1335             info = json.load(f)
1336         try:
1337             self.process_ie_result(info, download=True)
1338         except DownloadError:
1339             webpage_url = info.get('webpage_url')
1340             if webpage_url is not None:
1341                 self.report_warning('The info failed to download, trying with "%s"' % webpage_url)
1342                 return self.download([webpage_url])
1343             else:
1344                 raise
1345         return self._download_retcode
1346
1347     def post_process(self, filename, ie_info):
1348         """Run all the postprocessors on the given file."""
1349         info = dict(ie_info)
1350         info['filepath'] = filename
1351         pps_chain = []
1352         if ie_info.get('__postprocessors') is not None:
1353             pps_chain.extend(ie_info['__postprocessors'])
1354         pps_chain.extend(self._pps)
1355         for pp in pps_chain:
1356             keep_video = None
1357             old_filename = info['filepath']
1358             try:
1359                 keep_video_wish, info = pp.run(info)
1360                 if keep_video_wish is not None:
1361                     if keep_video_wish:
1362                         keep_video = keep_video_wish
1363                     elif keep_video is None:
1364                         # No clear decision yet, let IE decide
1365                         keep_video = keep_video_wish
1366             except PostProcessingError as e:
1367                 self.report_error(e.msg)
1368             if keep_video is False and not self.params.get('keepvideo', False):
1369                 try:
1370                     self.to_screen('Deleting original file %s (pass -k to keep)' % old_filename)
1371                     os.remove(encodeFilename(old_filename))
1372                 except (IOError, OSError):
1373                     self.report_warning('Unable to remove downloaded video file')
1374
1375     def _make_archive_id(self, info_dict):
1376         # Future-proof against any change in case
1377         # and backwards compatibility with prior versions
1378         extractor = info_dict.get('extractor_key')
1379         if extractor is None:
1380             if 'id' in info_dict:
1381                 extractor = info_dict.get('ie_key')  # key in a playlist
1382         if extractor is None:
1383             return None  # Incomplete video information
1384         return extractor.lower() + ' ' + info_dict['id']
1385
1386     def in_download_archive(self, info_dict):
1387         fn = self.params.get('download_archive')
1388         if fn is None:
1389             return False
1390
1391         vid_id = self._make_archive_id(info_dict)
1392         if vid_id is None:
1393             return False  # Incomplete video information
1394
1395         try:
1396             with locked_file(fn, 'r', encoding='utf-8') as archive_file:
1397                 for line in archive_file:
1398                     if line.strip() == vid_id:
1399                         return True
1400         except IOError as ioe:
1401             if ioe.errno != errno.ENOENT:
1402                 raise
1403         return False
1404
1405     def record_download_archive(self, info_dict):
1406         fn = self.params.get('download_archive')
1407         if fn is None:
1408             return
1409         vid_id = self._make_archive_id(info_dict)
1410         assert vid_id
1411         with locked_file(fn, 'a', encoding='utf-8') as archive_file:
1412             archive_file.write(vid_id + '\n')
1413
1414     @staticmethod
1415     def format_resolution(format, default='unknown'):
1416         if format.get('vcodec') == 'none':
1417             return 'audio only'
1418         if format.get('resolution') is not None:
1419             return format['resolution']
1420         if format.get('height') is not None:
1421             if format.get('width') is not None:
1422                 res = '%sx%s' % (format['width'], format['height'])
1423             else:
1424                 res = '%sp' % format['height']
1425         elif format.get('width') is not None:
1426             res = '?x%d' % format['width']
1427         else:
1428             res = default
1429         return res
1430
1431     def _format_note(self, fdict):
1432         res = ''
1433         if fdict.get('ext') in ['f4f', 'f4m']:
1434             res += '(unsupported) '
1435         if fdict.get('format_note') is not None:
1436             res += fdict['format_note'] + ' '
1437         if fdict.get('tbr') is not None:
1438             res += '%4dk ' % fdict['tbr']
1439         if fdict.get('container') is not None:
1440             if res:
1441                 res += ', '
1442             res += '%s container' % fdict['container']
1443         if (fdict.get('vcodec') is not None and
1444                 fdict.get('vcodec') != 'none'):
1445             if res:
1446                 res += ', '
1447             res += fdict['vcodec']
1448             if fdict.get('vbr') is not None:
1449                 res += '@'
1450         elif fdict.get('vbr') is not None and fdict.get('abr') is not None:
1451             res += 'video@'
1452         if fdict.get('vbr') is not None:
1453             res += '%4dk' % fdict['vbr']
1454         if fdict.get('fps') is not None:
1455             res += ', %sfps' % fdict['fps']
1456         if fdict.get('acodec') is not None:
1457             if res:
1458                 res += ', '
1459             if fdict['acodec'] == 'none':
1460                 res += 'video only'
1461             else:
1462                 res += '%-5s' % fdict['acodec']
1463         elif fdict.get('abr') is not None:
1464             if res:
1465                 res += ', '
1466             res += 'audio'
1467         if fdict.get('abr') is not None:
1468             res += '@%3dk' % fdict['abr']
1469         if fdict.get('asr') is not None:
1470             res += ' (%5dHz)' % fdict['asr']
1471         if fdict.get('filesize') is not None:
1472             if res:
1473                 res += ', '
1474             res += format_bytes(fdict['filesize'])
1475         elif fdict.get('filesize_approx') is not None:
1476             if res:
1477                 res += ', '
1478             res += '~' + format_bytes(fdict['filesize_approx'])
1479         return res
1480
1481     def list_formats(self, info_dict):
1482         def line(format, idlen=20):
1483             return (('%-' + compat_str(idlen + 1) + 's%-10s%-12s%s') % (
1484                 format['format_id'],
1485                 format['ext'],
1486                 self.format_resolution(format),
1487                 self._format_note(format),
1488             ))
1489
1490         formats = info_dict.get('formats', [info_dict])
1491         idlen = max(len('format code'),
1492                     max(len(f['format_id']) for f in formats))
1493         formats_s = [
1494             line(f, idlen) for f in formats
1495             if f.get('preference') is None or f['preference'] >= -1000]
1496         if len(formats) > 1:
1497             formats_s[0] += (' ' if self._format_note(formats[0]) else '') + '(worst)'
1498             formats_s[-1] += (' ' if self._format_note(formats[-1]) else '') + '(best)'
1499
1500         header_line = line({
1501             'format_id': 'format code', 'ext': 'extension',
1502             'resolution': 'resolution', 'format_note': 'note'}, idlen=idlen)
1503         self.to_screen('[info] Available formats for %s:\n%s\n%s' %
1504                        (info_dict['id'], header_line, '\n'.join(formats_s)))
1505
1506     def urlopen(self, req):
1507         """ Start an HTTP download """
1508
1509         # According to RFC 3986, URLs can not contain non-ASCII characters, however this is not
1510         # always respected by websites, some tend to give out URLs with non percent-encoded
1511         # non-ASCII characters (see telemb.py, ard.py [#3412])
1512         # urllib chokes on URLs with non-ASCII characters (see http://bugs.python.org/issue3991)
1513         # To work around aforementioned issue we will replace request's original URL with
1514         # percent-encoded one
1515         req_is_string = isinstance(req, basestring if sys.version_info < (3, 0) else compat_str)
1516         url = req if req_is_string else req.get_full_url()
1517         url_escaped = escape_url(url)
1518
1519         # Substitute URL if any change after escaping
1520         if url != url_escaped:
1521             if req_is_string:
1522                 req = url_escaped
1523             else:
1524                 req = compat_urllib_request.Request(
1525                     url_escaped, data=req.data, headers=req.headers,
1526                     origin_req_host=req.origin_req_host, unverifiable=req.unverifiable)
1527
1528         return self._opener.open(req, timeout=self._socket_timeout)
1529
1530     def print_debug_header(self):
1531         if not self.params.get('verbose'):
1532             return
1533
1534         if type('') is not compat_str:
1535             # Python 2.6 on SLES11 SP1 (https://github.com/rg3/youtube-dl/issues/3326)
1536             self.report_warning(
1537                 'Your Python is broken! Update to a newer and supported version')
1538
1539         stdout_encoding = getattr(
1540             sys.stdout, 'encoding', 'missing (%s)' % type(sys.stdout).__name__)
1541         encoding_str = (
1542             '[debug] Encodings: locale %s, fs %s, out %s, pref %s\n' % (
1543                 locale.getpreferredencoding(),
1544                 sys.getfilesystemencoding(),
1545                 stdout_encoding,
1546                 self.get_encoding()))
1547         write_string(encoding_str, encoding=None)
1548
1549         self._write_string('[debug] youtube-dl version ' + __version__ + '\n')
1550         try:
1551             sp = subprocess.Popen(
1552                 ['git', 'rev-parse', '--short', 'HEAD'],
1553                 stdout=subprocess.PIPE, stderr=subprocess.PIPE,
1554                 cwd=os.path.dirname(os.path.abspath(__file__)))
1555             out, err = sp.communicate()
1556             out = out.decode().strip()
1557             if re.match('[0-9a-f]+', out):
1558                 self._write_string('[debug] Git HEAD: ' + out + '\n')
1559         except:
1560             try:
1561                 sys.exc_clear()
1562             except:
1563                 pass
1564         self._write_string('[debug] Python version %s - %s\n' % (
1565             platform.python_version(), platform_name()))
1566
1567         exe_versions = FFmpegPostProcessor.get_versions()
1568         exe_versions['rtmpdump'] = rtmpdump_version()
1569         exe_str = ', '.join(
1570             '%s %s' % (exe, v)
1571             for exe, v in sorted(exe_versions.items())
1572             if v
1573         )
1574         if not exe_str:
1575             exe_str = 'none'
1576         self._write_string('[debug] exe versions: %s\n' % exe_str)
1577
1578         proxy_map = {}
1579         for handler in self._opener.handlers:
1580             if hasattr(handler, 'proxies'):
1581                 proxy_map.update(handler.proxies)
1582         self._write_string('[debug] Proxy map: ' + compat_str(proxy_map) + '\n')
1583
1584         if self.params.get('call_home', False):
1585             ipaddr = self.urlopen('https://yt-dl.org/ip').read().decode('utf-8')
1586             self._write_string('[debug] Public IP address: %s\n' % ipaddr)
1587             latest_version = self.urlopen(
1588                 'https://yt-dl.org/latest/version').read().decode('utf-8')
1589             if version_tuple(latest_version) > version_tuple(__version__):
1590                 self.report_warning(
1591                     'You are using an outdated version (newest version: %s)! '
1592                     'See https://yt-dl.org/update if you need help updating.' %
1593                     latest_version)
1594
1595     def _setup_opener(self):
1596         timeout_val = self.params.get('socket_timeout')
1597         self._socket_timeout = 600 if timeout_val is None else float(timeout_val)
1598
1599         opts_cookiefile = self.params.get('cookiefile')
1600         opts_proxy = self.params.get('proxy')
1601
1602         if opts_cookiefile is None:
1603             self.cookiejar = compat_cookiejar.CookieJar()
1604         else:
1605             self.cookiejar = compat_cookiejar.MozillaCookieJar(
1606                 opts_cookiefile)
1607             if os.access(opts_cookiefile, os.R_OK):
1608                 self.cookiejar.load()
1609
1610         cookie_processor = compat_urllib_request.HTTPCookieProcessor(
1611             self.cookiejar)
1612         if opts_proxy is not None:
1613             if opts_proxy == '':
1614                 proxies = {}
1615             else:
1616                 proxies = {'http': opts_proxy, 'https': opts_proxy}
1617         else:
1618             proxies = compat_urllib_request.getproxies()
1619             # Set HTTPS proxy to HTTP one if given (https://github.com/rg3/youtube-dl/issues/805)
1620             if 'http' in proxies and 'https' not in proxies:
1621                 proxies['https'] = proxies['http']
1622         proxy_handler = compat_urllib_request.ProxyHandler(proxies)
1623
1624         debuglevel = 1 if self.params.get('debug_printtraffic') else 0
1625         https_handler = make_HTTPS_handler(self.params, debuglevel=debuglevel)
1626         ydlh = YoutubeDLHandler(self.params, debuglevel=debuglevel)
1627         opener = compat_urllib_request.build_opener(
1628             https_handler, proxy_handler, cookie_processor, ydlh)
1629         # Delete the default user-agent header, which would otherwise apply in
1630         # cases where our custom HTTP handler doesn't come into play
1631         # (See https://github.com/rg3/youtube-dl/issues/1309 for details)
1632         opener.addheaders = []
1633         self._opener = opener
1634
1635     def encode(self, s):
1636         if isinstance(s, bytes):
1637             return s  # Already encoded
1638
1639         try:
1640             return s.encode(self.get_encoding())
1641         except UnicodeEncodeError as err:
1642             err.reason = err.reason + '. Check your system encoding configuration or use the --encoding option.'
1643             raise
1644
1645     def get_encoding(self):
1646         encoding = self.params.get('encoding')
1647         if encoding is None:
1648             encoding = preferredencoding()
1649         return encoding