_ Git - youtube-dl/blob - youtube_dl/YoutubeDL.py

   1 #!/usr/bin/env python
   2 # -*- coding: utf-8 -*-
   3
   4 from __future__ import absolute_import
   5
   6 import errno
   7 import io
   8 import os
   9 import re
  10 import shutil
  11 import socket
  12 import sys
  13 import time
  14 import traceback
  15
  16 if os.name == 'nt':
  17     import ctypes
  18
  19 from .utils import (
  20     compat_http_client,
  21     compat_print,
  22     compat_str,
  23     compat_urllib_error,
  24     compat_urllib_request,
  25     ContentTooShortError,
  26     date_from_str,
  27     DateRange,
  28     determine_ext,
  29     DownloadError,
  30     encodeFilename,
  31     ExtractorError,
  32     locked_file,
  33     MaxDownloadsReached,
  34     PostProcessingError,
  35     preferredencoding,
  36     SameFileError,
  37     sanitize_filename,
  38     subtitles_filename,
  39     takewhile_inclusive,
  40     UnavailableVideoError,
  41     write_json_file,
  42     write_string,
  43 )
  44 from .extractor import get_info_extractor, gen_extractors
  45 from .FileDownloader import FileDownloader
  46
  47
  48 class YoutubeDL(object):
  49     """YoutubeDL class.
  50
  51     YoutubeDL objects are the ones responsible of downloading the
  52     actual video file and writing it to disk if the user has requested
  53     it, among some other tasks. In most cases there should be one per
  54     program. As, given a video URL, the downloader doesn't know how to
  55     extract all the needed information, task that InfoExtractors do, it
  56     has to pass the URL to one of them.
  57
  58     For this, YoutubeDL objects have a method that allows
  59     InfoExtractors to be registered in a given order. When it is passed
  60     a URL, the YoutubeDL object handles it to the first InfoExtractor it
  61     finds that reports being able to handle it. The InfoExtractor extracts
  62     all the information about the video or videos the URL refers to, and
  63     YoutubeDL process the extracted information, possibly using a File
  64     Downloader to download the video.
  65
  66     YoutubeDL objects accept a lot of parameters. In order not to saturate
  67     the object constructor with arguments, it receives a dictionary of
  68     options instead. These options are available through the params
  69     attribute for the InfoExtractors to use. The YoutubeDL also
  70     registers itself as the downloader in charge for the InfoExtractors
  71     that are added to it, so this is a "mutual registration".
  72
  73     Available options:
  74
  75     username:          Username for authentication purposes.
  76     password:          Password for authentication purposes.
  77     videopassword:     Password for acces a video.
  78     usenetrc:          Use netrc for authentication instead.
  79     verbose:           Print additional info to stdout.
  80     quiet:             Do not print messages to stdout.
  81     forceurl:          Force printing final URL.
  82     forcetitle:        Force printing title.
  83     forceid:           Force printing ID.
  84     forcethumbnail:    Force printing thumbnail URL.
  85     forcedescription:  Force printing description.
  86     forcefilename:     Force printing final filename.
  87     forcejson:         Force printing json information.
  88     simulate:          Do not download the video files.
  89     format:            Video format code.
  90     format_limit:      Highest quality format to try.
  91     outtmpl:           Template for output names.
  92     restrictfilenames: Do not allow "&" and spaces in file names
  93     ignoreerrors:      Do not stop on download errors.
  94     nooverwrites:      Prevent overwriting files.
  95     playliststart:     Playlist item to start at.
  96     playlistend:       Playlist item to end at.
  97     matchtitle:        Download only matching titles.
  98     rejecttitle:       Reject downloads for matching titles.
  99     logtostderr:       Log messages to stderr instead of stdout.
 100     writedescription:  Write the video description to a .description file
 101     writeinfojson:     Write the video description to a .info.json file
 102     writeannotations:  Write the video annotations to a .annotations.xml file
 103     writethumbnail:    Write the thumbnail image to a file
 104     writesubtitles:    Write the video subtitles to a file
 105     writeautomaticsub: Write the automatic subtitles to a file
 106     allsubtitles:      Downloads all the subtitles of the video
 107                        (requires writesubtitles or writeautomaticsub)
 108     listsubtitles:     Lists all available subtitles for the video
 109     subtitlesformat:   Subtitle format [srt/sbv/vtt] (default=srt)
 110     subtitleslangs:    List of languages of the subtitles to download
 111     keepvideo:         Keep the video file after post-processing
 112     daterange:         A DateRange object, download only if the upload_date is in the range.
 113     skip_download:     Skip the actual download of the video file
 114     cachedir:          Location of the cache files in the filesystem.
 115                        None to disable filesystem cache.
 116     noplaylist:        Download single video instead of a playlist if in doubt.
 117     age_limit:         An integer representing the user's age in years.
 118                        Unsuitable videos for the given age are skipped.
 119     downloadarchive:   File name of a file where all downloads are recorded.
 120                        Videos already present in the file are not downloaded
 121                        again.
 122
 123     The following parameters are not used by YoutubeDL itself, they are used by
 124     the FileDownloader:
 125     nopart, updatetime, buffersize, ratelimit, min_filesize, max_filesize, test,
 126     noresizebuffer, retries, continuedl, noprogress, consoletitle
 127     """
 128
 129     params = None
 130     _ies = []
 131     _pps = []
 132     _download_retcode = None
 133     _num_downloads = None
 134     _screen_file = None
 135
 136     def __init__(self, params):
 137         """Create a FileDownloader object with the given options."""
 138         self._ies = []
 139         self._ies_instances = {}
 140         self._pps = []
 141         self._progress_hooks = []
 142         self._download_retcode = 0
 143         self._num_downloads = 0
 144         self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)]
 145
 146         if (sys.version_info >= (3,) and sys.platform != 'win32' and
 147                 sys.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968']
 148                 and not params['restrictfilenames']):
 149             # On Python 3, the Unicode filesystem API will throw errors (#1474)
 150             self.report_warning(
 151                 u'Assuming --restrict-filenames since file system encoding '
 152                 u'cannot encode all charactes. '
 153                 u'Set the LC_ALL environment variable to fix this.')
 154             params['restrictfilenames'] = True
 155
 156         self.params = params
 157         self.fd = FileDownloader(self, self.params)
 158
 159         if '%(stitle)s' in self.params['outtmpl']:
 160             self.report_warning(u'%(stitle)s is deprecated. Use the %(title)s and the --restrict-filenames flag(which also secures %(uploader)s et al) instead.')
 161
 162     def add_info_extractor(self, ie):
 163         """Add an InfoExtractor object to the end of the list."""
 164         self._ies.append(ie)
 165         self._ies_instances[ie.ie_key()] = ie
 166         ie.set_downloader(self)
 167
 168     def get_info_extractor(self, ie_key):
 169         """
 170         Get an instance of an IE with name ie_key, it will try to get one from
 171         the _ies list, if there's no instance it will create a new one and add
 172         it to the extractor list.
 173         """
 174         ie = self._ies_instances.get(ie_key)
 175         if ie is None:
 176             ie = get_info_extractor(ie_key)()
 177             self.add_info_extractor(ie)
 178         return ie
 179
 180     def add_default_info_extractors(self):
 181         """
 182         Add the InfoExtractors returned by gen_extractors to the end of the list
 183         """
 184         for ie in gen_extractors():
 185             self.add_info_extractor(ie)
 186
 187     def add_post_processor(self, pp):
 188         """Add a PostProcessor object to the end of the chain."""
 189         self._pps.append(pp)
 190         pp.set_downloader(self)
 191
 192     def to_screen(self, message, skip_eol=False):
 193         """Print message to stdout if not in quiet mode."""
 194         if not self.params.get('quiet', False):
 195             terminator = [u'\n', u''][skip_eol]
 196             output = message + terminator
 197             write_string(output, self._screen_file)
 198
 199     def to_stderr(self, message):
 200         """Print message to stderr."""
 201         assert type(message) == type(u'')
 202         output = message + u'\n'
 203         if 'b' in getattr(self._screen_file, 'mode', '') or sys.version_info[0] < 3: # Python 2 lies about the mode of sys.stdout/sys.stderr
 204             output = output.encode(preferredencoding())
 205         sys.stderr.write(output)
 206
 207     def to_console_title(self, message):
 208         if not self.params.get('consoletitle', False):
 209             return
 210         if os.name == 'nt' and ctypes.windll.kernel32.GetConsoleWindow():
 211             # c_wchar_p() might not be necessary if `message` is
 212             # already of type unicode()
 213             ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message))
 214         elif 'TERM' in os.environ:
 215             write_string(u'\033]0;%s\007' % message, self._screen_file)
 216
 217     def save_console_title(self):
 218         if not self.params.get('consoletitle', False):
 219             return
 220         if 'TERM' in os.environ:
 221             # Save the title on stack
 222             write_string(u'\033[22;0t', self._screen_file)
 223
 224     def restore_console_title(self):
 225         if not self.params.get('consoletitle', False):
 226             return
 227         if 'TERM' in os.environ:
 228             # Restore the title from stack
 229             write_string(u'\033[23;0t', self._screen_file)
 230
 231     def __enter__(self):
 232         self.save_console_title()
 233         return self
 234
 235     def __exit__(self, *args):
 236         self.restore_console_title()
 237
 238     def fixed_template(self):
 239         """Checks if the output template is fixed."""
 240         return (re.search(u'(?u)%\\(.+?\\)s', self.params['outtmpl']) is None)
 241
 242     def trouble(self, message=None, tb=None):
 243         """Determine action to take when a download problem appears.
 244
 245         Depending on if the downloader has been configured to ignore
 246         download errors or not, this method may throw an exception or
 247         not when errors are found, after printing the message.
 248
 249         tb, if given, is additional traceback information.
 250         """
 251         if message is not None:
 252             self.to_stderr(message)
 253         if self.params.get('verbose'):
 254             if tb is None:
 255                 if sys.exc_info()[0]:  # if .trouble has been called from an except block
 256                     tb = u''
 257                     if hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
 258                         tb += u''.join(traceback.format_exception(*sys.exc_info()[1].exc_info))
 259                     tb += compat_str(traceback.format_exc())
 260                 else:
 261                     tb_data = traceback.format_list(traceback.extract_stack())
 262                     tb = u''.join(tb_data)
 263             self.to_stderr(tb)
 264         if not self.params.get('ignoreerrors', False):
 265             if sys.exc_info()[0] and hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
 266                 exc_info = sys.exc_info()[1].exc_info
 267             else:
 268                 exc_info = sys.exc_info()
 269             raise DownloadError(message, exc_info)
 270         self._download_retcode = 1
 271
 272     def report_warning(self, message):
 273         '''
 274         Print the message to stderr, it will be prefixed with 'WARNING:'
 275         If stderr is a tty file the 'WARNING:' will be colored
 276         '''
 277         if sys.stderr.isatty() and os.name != 'nt':
 278             _msg_header = u'\033[0;33mWARNING:\033[0m'
 279         else:
 280             _msg_header = u'WARNING:'
 281         warning_message = u'%s %s' % (_msg_header, message)
 282         self.to_stderr(warning_message)
 283
 284     def report_error(self, message, tb=None):
 285         '''
 286         Do the same as trouble, but prefixes the message with 'ERROR:', colored
 287         in red if stderr is a tty file.
 288         '''
 289         if sys.stderr.isatty() and os.name != 'nt':
 290             _msg_header = u'\033[0;31mERROR:\033[0m'
 291         else:
 292             _msg_header = u'ERROR:'
 293         error_message = u'%s %s' % (_msg_header, message)
 294         self.trouble(error_message, tb)
 295
 296     def report_writedescription(self, descfn):
 297         """ Report that the description file is being written """
 298         self.to_screen(u'[info] Writing video description to: ' + descfn)
 299
 300     def report_writesubtitles(self, sub_filename):
 301         """ Report that the subtitles file is being written """
 302         self.to_screen(u'[info] Writing video subtitles to: ' + sub_filename)
 303
 304     def report_writeinfojson(self, infofn):
 305         """ Report that the metadata file has been written """
 306         self.to_screen(u'[info] Video description metadata as JSON to: ' + infofn)
 307
 308     def report_writeannotations(self, annofn):
 309         """ Report that the annotations file has been written. """
 310         self.to_screen(u'[info] Writing video annotations to: ' + annofn)
 311
 312     def report_file_already_downloaded(self, file_name):
 313         """Report file has already been fully downloaded."""
 314         try:
 315             self.to_screen(u'[download] %s has already been downloaded' % file_name)
 316         except UnicodeEncodeError:
 317             self.to_screen(u'[download] The file has already been downloaded')
 318
 319     def increment_downloads(self):
 320         """Increment the ordinal that assigns a number to each file."""
 321         self._num_downloads += 1
 322
 323     def prepare_filename(self, info_dict):
 324         """Generate the output filename."""
 325         try:
 326             template_dict = dict(info_dict)
 327
 328             template_dict['epoch'] = int(time.time())
 329             autonumber_size = self.params.get('autonumber_size')
 330             if autonumber_size is None:
 331                 autonumber_size = 5
 332             autonumber_templ = u'%0' + str(autonumber_size) + u'd'
 333             template_dict['autonumber'] = autonumber_templ % self._num_downloads
 334             if template_dict.get('playlist_index') is not None:
 335                 template_dict['playlist_index'] = u'%05d' % template_dict['playlist_index']
 336
 337             sanitize = lambda k, v: sanitize_filename(
 338                 u'NA' if v is None else compat_str(v),
 339                 restricted=self.params.get('restrictfilenames'),
 340                 is_id=(k == u'id'))
 341             template_dict = dict((k, sanitize(k, v))
 342                                  for k, v in template_dict.items())
 343
 344             tmpl = os.path.expanduser(self.params['outtmpl'])
 345             filename = tmpl % template_dict
 346             return filename
 347         except KeyError as err:
 348             self.report_error(u'Erroneous output template')
 349             return None
 350         except ValueError as err:
 351             self.report_error(u'Error in output template: ' + str(err) + u' (encoding: ' + repr(preferredencoding()) + ')')
 352             return None
 353
 354     def _match_entry(self, info_dict):
 355         """ Returns None iff the file should be downloaded """
 356
 357         title = info_dict['title']
 358         matchtitle = self.params.get('matchtitle', False)
 359         if matchtitle:
 360             if not re.search(matchtitle, title, re.IGNORECASE):
 361                 return u'[download] "' + title + '" title did not match pattern "' + matchtitle + '"'
 362         rejecttitle = self.params.get('rejecttitle', False)
 363         if rejecttitle:
 364             if re.search(rejecttitle, title, re.IGNORECASE):
 365                 return u'"' + title + '" title matched reject pattern "' + rejecttitle + '"'
 366         date = info_dict.get('upload_date', None)
 367         if date is not None:
 368             dateRange = self.params.get('daterange', DateRange())
 369             if date not in dateRange:
 370                 return u'[download] %s upload date is not in range %s' % (date_from_str(date).isoformat(), dateRange)
 371         age_limit = self.params.get('age_limit')
 372         if age_limit is not None:
 373             if age_limit < info_dict.get('age_limit', 0):
 374                 return u'Skipping "' + title + '" because it is age restricted'
 375         if self.in_download_archive(info_dict):
 376             return (u'%(title)s has already been recorded in archive'
 377                     % info_dict)
 378         return None
 379
 380     @staticmethod
 381     def add_extra_info(info_dict, extra_info):
 382         '''Set the keys from extra_info in info dict if they are missing'''
 383         for key, value in extra_info.items():
 384             info_dict.setdefault(key, value)
 385
 386     def extract_info(self, url, download=True, ie_key=None, extra_info={}):
 387         '''
 388         Returns a list with a dictionary for each video we find.
 389         If 'download', also downloads the videos.
 390         extra_info is a dict containing the extra values to add to each result
 391          '''
 392
 393         if ie_key:
 394             ies = [self.get_info_extractor(ie_key)]
 395         else:
 396             ies = self._ies
 397
 398         for ie in ies:
 399             if not ie.suitable(url):
 400                 continue
 401
 402             if not ie.working():
 403                 self.report_warning(u'The program functionality for this site has been marked as broken, '
 404                                     u'and will probably not work.')
 405
 406             try:
 407                 ie_result = ie.extract(url)
 408                 if ie_result is None: # Finished already (backwards compatibility; listformats and friends should be moved here)
 409                     break
 410                 if isinstance(ie_result, list):
 411                     # Backwards compatibility: old IE result format
 412                     ie_result = {
 413                         '_type': 'compat_list',
 414                         'entries': ie_result,
 415                     }
 416                 self.add_extra_info(ie_result,
 417                     {
 418                         'extractor': ie.IE_NAME,
 419                         'webpage_url': url,
 420                         'extractor_key': ie.ie_key(),
 421                     })
 422                 return self.process_ie_result(ie_result, download, extra_info)
 423             except ExtractorError as de: # An error we somewhat expected
 424                 self.report_error(compat_str(de), de.format_traceback())
 425                 break
 426             except Exception as e:
 427                 if self.params.get('ignoreerrors', False):
 428                     self.report_error(compat_str(e), tb=compat_str(traceback.format_exc()))
 429                     break
 430                 else:
 431                     raise
 432         else:
 433             self.report_error(u'no suitable InfoExtractor: %s' % url)
 434
 435     def process_ie_result(self, ie_result, download=True, extra_info={}):
 436         """
 437         Take the result of the ie(may be modified) and resolve all unresolved
 438         references (URLs, playlist items).
 439
 440         It will also download the videos if 'download'.
 441         Returns the resolved ie_result.
 442         """
 443
 444         result_type = ie_result.get('_type', 'video') # If not given we suppose it's a video, support the default old system
 445         if result_type == 'video':
 446             self.add_extra_info(ie_result, extra_info)
 447             return self.process_video_result(ie_result, download=download)
 448         elif result_type == 'url':
 449             # We have to add extra_info to the results because it may be
 450             # contained in a playlist
 451             return self.extract_info(ie_result['url'],
 452                                      download,
 453                                      ie_key=ie_result.get('ie_key'),
 454                                      extra_info=extra_info)
 455         elif result_type == 'playlist':
 456             self.add_extra_info(ie_result, extra_info)
 457             # We process each entry in the playlist
 458             playlist = ie_result.get('title', None) or ie_result.get('id', None)
 459             self.to_screen(u'[download] Downloading playlist: %s' % playlist)
 460
 461             playlist_results = []
 462
 463             n_all_entries = len(ie_result['entries'])
 464             playliststart = self.params.get('playliststart', 1) - 1
 465             playlistend = self.params.get('playlistend', -1)
 466
 467             if playlistend == -1:
 468                 entries = ie_result['entries'][playliststart:]
 469             else:
 470                 entries = ie_result['entries'][playliststart:playlistend]
 471
 472             n_entries = len(entries)
 473
 474             self.to_screen(u"[%s] playlist '%s': Collected %d video ids (downloading %d of them)" %
 475                 (ie_result['extractor'], playlist, n_all_entries, n_entries))
 476
 477             for i, entry in enumerate(entries, 1):
 478                 self.to_screen(u'[download] Downloading video #%s of %s' % (i, n_entries))
 479                 extra = {
 480                     'playlist': playlist,
 481                     'playlist_index': i + playliststart,
 482                     'extractor': ie_result['extractor'],
 483                     'webpage_url': ie_result['webpage_url'],
 484                     'extractor_key': ie_result['extractor_key'],
 485                 }
 486                 entry_result = self.process_ie_result(entry,
 487                                                       download=download,
 488                                                       extra_info=extra)
 489                 playlist_results.append(entry_result)
 490             ie_result['entries'] = playlist_results
 491             return ie_result
 492         elif result_type == 'compat_list':
 493             def _fixup(r):
 494                 self.add_extra_info(r,
 495                     {
 496                         'extractor': ie_result['extractor'],
 497                         'webpage_url': ie_result['webpage_url'],
 498                         'extractor_key': ie_result['extractor_key'],
 499                     })
 500                 return r
 501             ie_result['entries'] = [
 502                 self.process_ie_result(_fixup(r), download, extra_info)
 503                 for r in ie_result['entries']
 504             ]
 505             return ie_result
 506         else:
 507             raise Exception('Invalid result type: %s' % result_type)
 508
 509     def select_format(self, format_spec, available_formats):
 510         if format_spec == 'best' or format_spec is None:
 511             return available_formats[-1]
 512         elif format_spec == 'worst':
 513             return available_formats[0]
 514         else:
 515             extensions = [u'mp4', u'flv', u'webm', u'3gp']
 516             if format_spec in extensions:
 517                 filter_f = lambda f: f['ext'] == format_spec
 518             else:
 519                 filter_f = lambda f: f['format_id'] == format_spec
 520             matches = list(filter(filter_f, available_formats))
 521             if matches:
 522                 return matches[-1]
 523         return None
 524
 525     def process_video_result(self, info_dict, download=True):
 526         assert info_dict.get('_type', 'video') == 'video'
 527
 528         if 'playlist' not in info_dict:
 529             # It isn't part of a playlist
 530             info_dict['playlist'] = None
 531             info_dict['playlist_index'] = None
 532
 533         # This extractors handle format selection themselves
 534         if info_dict['extractor'] in [u'youtube', u'Youku']:
 535             if download:
 536                 self.process_info(info_dict)
 537             return info_dict
 538
 539         # We now pick which formats have to be downloaded
 540         if info_dict.get('formats') is None:
 541             # There's only one format available
 542             formats = [info_dict]
 543         else:
 544             formats = info_dict['formats']
 545
 546         # We check that all the formats have the format and format_id fields
 547         for (i, format) in enumerate(formats):
 548             if format.get('format_id') is None:
 549                 format['format_id'] = compat_str(i)
 550             if format.get('format') is None:
 551                 format['format'] = u'{id} - {res}{note}'.format(
 552                     id=format['format_id'],
 553                     res=self.format_resolution(format),
 554                     note=u' ({0})'.format(format['format_note']) if format.get('format_note') is not None else '',
 555                 )
 556             # Automatically determine file extension if missing
 557             if 'ext' not in format:
 558                 format['ext'] = determine_ext(format['url'])
 559
 560         if self.params.get('listformats', None):
 561             self.list_formats(info_dict)
 562             return
 563
 564         format_limit = self.params.get('format_limit', None)
 565         if format_limit:
 566             formats = list(takewhile_inclusive(
 567                 lambda f: f['format_id'] != format_limit, formats
 568             ))
 569         if self.params.get('prefer_free_formats'):
 570             def _free_formats_key(f):
 571                 try:
 572                     ext_ord = [u'flv', u'mp4', u'webm'].index(f['ext'])
 573                 except ValueError:
 574                     ext_ord = -1
 575                 # We only compare the extension if they have the same height and width
 576                 return (f.get('height'), f.get('width'), ext_ord)
 577             formats = sorted(formats, key=_free_formats_key)
 578
 579         req_format = self.params.get('format', 'best')
 580         if req_format is None:
 581             req_format = 'best'
 582         formats_to_download = []
 583         # The -1 is for supporting YoutubeIE
 584         if req_format in ('-1', 'all'):
 585             formats_to_download = formats
 586         else:
 587             # We can accept formats requestd in the format: 34/5/best, we pick
 588             # the first that is available, starting from left
 589             req_formats = req_format.split('/')
 590             for rf in req_formats:
 591                 selected_format = self.select_format(rf, formats)
 592                 if selected_format is not None:
 593                     formats_to_download = [selected_format]
 594                     break
 595         if not formats_to_download:
 596             raise ExtractorError(u'requested format not available',
 597                                  expected=True)
 598
 599         if download:
 600             if len(formats_to_download) > 1:
 601                 self.to_screen(u'[info] %s: downloading video in %s formats' % (info_dict['id'], len(formats_to_download)))
 602             for format in formats_to_download:
 603                 new_info = dict(info_dict)
 604                 new_info.update(format)
 605                 self.process_info(new_info)
 606         # We update the info dict with the best quality format (backwards compatibility)
 607         info_dict.update(formats_to_download[-1])
 608         return info_dict
 609
 610     def process_info(self, info_dict):
 611         """Process a single resolved IE result."""
 612
 613         assert info_dict.get('_type', 'video') == 'video'
 614         #We increment the download the download count here to match the previous behaviour.
 615         self.increment_downloads()
 616
 617         info_dict['fulltitle'] = info_dict['title']
 618         if len(info_dict['title']) > 200:
 619             info_dict['title'] = info_dict['title'][:197] + u'...'
 620
 621         # Keep for backwards compatibility
 622         info_dict['stitle'] = info_dict['title']
 623
 624         if not 'format' in info_dict:
 625             info_dict['format'] = info_dict['ext']
 626
 627         reason = self._match_entry(info_dict)
 628         if reason is not None:
 629             self.to_screen(u'[download] ' + reason)
 630             return
 631
 632         max_downloads = self.params.get('max_downloads')
 633         if max_downloads is not None:
 634             if self._num_downloads > int(max_downloads):
 635                 raise MaxDownloadsReached()
 636
 637         filename = self.prepare_filename(info_dict)
 638
 639         # Forced printings
 640         if self.params.get('forcetitle', False):
 641             compat_print(info_dict['title'])
 642         if self.params.get('forceid', False):
 643             compat_print(info_dict['id'])
 644         if self.params.get('forceurl', False):
 645             # For RTMP URLs, also include the playpath
 646             compat_print(info_dict['url'] + info_dict.get('play_path', u''))
 647         if self.params.get('forcethumbnail', False) and info_dict.get('thumbnail') is not None:
 648             compat_print(info_dict['thumbnail'])
 649         if self.params.get('forcedescription', False) and info_dict.get('description') is not None:
 650             compat_print(info_dict['description'])
 651         if self.params.get('forcefilename', False) and filename is not None:
 652             compat_print(filename)
 653         if self.params.get('forceformat', False):
 654             compat_print(info_dict['format'])
 655         if self.params.get('forcejson', False):
 656             compat_print(json.dumps(info_dict))
 657
 658         # Do nothing else if in simulate mode
 659         if self.params.get('simulate', False):
 660             return
 661
 662         if filename is None:
 663             return
 664
 665         try:
 666             dn = os.path.dirname(encodeFilename(filename))
 667             if dn != '' and not os.path.exists(dn):
 668                 os.makedirs(dn)
 669         except (OSError, IOError) as err:
 670             self.report_error(u'unable to create directory ' + compat_str(err))
 671             return
 672
 673         if self.params.get('writedescription', False):
 674             try:
 675                 descfn = filename + u'.description'
 676                 self.report_writedescription(descfn)
 677                 with io.open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile:
 678                     descfile.write(info_dict['description'])
 679             except (KeyError, TypeError):
 680                 self.report_warning(u'There\'s no description to write.')
 681             except (OSError, IOError):
 682                 self.report_error(u'Cannot write description file ' + descfn)
 683                 return
 684
 685         if self.params.get('writeannotations', False):
 686             try:
 687                 annofn = filename + u'.annotations.xml'
 688                 self.report_writeannotations(annofn)
 689                 with io.open(encodeFilename(annofn), 'w', encoding='utf-8') as annofile:
 690                     annofile.write(info_dict['annotations'])
 691             except (KeyError, TypeError):
 692                 self.report_warning(u'There are no annotations to write.')
 693             except (OSError, IOError):
 694                 self.report_error(u'Cannot write annotations file: ' + annofn)
 695                 return
 696
 697         subtitles_are_requested = any([self.params.get('writesubtitles', False),
 698                                        self.params.get('writeautomaticsub')])
 699
 700         if subtitles_are_requested and 'subtitles' in info_dict and info_dict['subtitles']:
 701             # subtitles download errors are already managed as troubles in relevant IE
 702             # that way it will silently go on when used with unsupporting IE
 703             subtitles = info_dict['subtitles']
 704             sub_format = self.params.get('subtitlesformat', 'srt')
 705             for sub_lang in subtitles.keys():
 706                 sub = subtitles[sub_lang]
 707                 if sub is None:
 708                     continue
 709                 try:
 710                     sub_filename = subtitles_filename(filename, sub_lang, sub_format)
 711                     self.report_writesubtitles(sub_filename)
 712                     with io.open(encodeFilename(sub_filename), 'w', encoding='utf-8') as subfile:
 713                             subfile.write(sub)
 714                 except (OSError, IOError):
 715                     self.report_error(u'Cannot write subtitles file ' + descfn)
 716                     return
 717
 718         if self.params.get('writeinfojson', False):
 719             infofn = filename + u'.info.json'
 720             self.report_writeinfojson(infofn)
 721             try:
 722                 json_info_dict = dict((k, v) for k, v in info_dict.items() if not k in ['urlhandle'])
 723                 write_json_file(json_info_dict, encodeFilename(infofn))
 724             except (OSError, IOError):
 725                 self.report_error(u'Cannot write metadata to JSON file ' + infofn)
 726                 return
 727
 728         if self.params.get('writethumbnail', False):
 729             if info_dict.get('thumbnail') is not None:
 730                 thumb_format = determine_ext(info_dict['thumbnail'], u'jpg')
 731                 thumb_filename = filename.rpartition('.')[0] + u'.' + thumb_format
 732                 self.to_screen(u'[%s] %s: Downloading thumbnail ...' %
 733                                (info_dict['extractor'], info_dict['id']))
 734                 try:
 735                     uf = compat_urllib_request.urlopen(info_dict['thumbnail'])
 736                     with open(thumb_filename, 'wb') as thumbf:
 737                         shutil.copyfileobj(uf, thumbf)
 738                     self.to_screen(u'[%s] %s: Writing thumbnail to: %s' %
 739                         (info_dict['extractor'], info_dict['id'], thumb_filename))
 740                 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
 741                     self.report_warning(u'Unable to download thumbnail "%s": %s' %
 742                         (info_dict['thumbnail'], compat_str(err)))
 743
 744         if not self.params.get('skip_download', False):
 745             if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(filename)):
 746                 success = True
 747             else:
 748                 try:
 749                     success = self.fd._do_download(filename, info_dict)
 750                 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
 751                     self.report_error(u'unable to download video data: %s' % str(err))
 752                     return
 753                 except (OSError, IOError) as err:
 754                     raise UnavailableVideoError(err)
 755                 except (ContentTooShortError, ) as err:
 756                     self.report_error(u'content too short (expected %s bytes and served %s)' % (err.expected, err.downloaded))
 757                     return
 758
 759             if success:
 760                 try:
 761                     self.post_process(filename, info_dict)
 762                 except (PostProcessingError) as err:
 763                     self.report_error(u'postprocessing: %s' % str(err))
 764                     return
 765
 766         self.record_download_archive(info_dict)
 767
 768     def download(self, url_list):
 769         """Download a given list of URLs."""
 770         if len(url_list) > 1 and self.fixed_template():
 771             raise SameFileError(self.params['outtmpl'])
 772
 773         for url in url_list:
 774             try:
 775                 #It also downloads the videos
 776                 videos = self.extract_info(url)
 777             except UnavailableVideoError:
 778                 self.report_error(u'unable to download video')
 779             except MaxDownloadsReached:
 780                 self.to_screen(u'[info] Maximum number of downloaded files reached.')
 781                 raise
 782
 783         return self._download_retcode
 784
 785     def post_process(self, filename, ie_info):
 786         """Run all the postprocessors on the given file."""
 787         info = dict(ie_info)
 788         info['filepath'] = filename
 789         keep_video = None
 790         for pp in self._pps:
 791             try:
 792                 keep_video_wish, new_info = pp.run(info)
 793                 if keep_video_wish is not None:
 794                     if keep_video_wish:
 795                         keep_video = keep_video_wish
 796                     elif keep_video is None:
 797                         # No clear decision yet, let IE decide
 798                         keep_video = keep_video_wish
 799             except PostProcessingError as e:
 800                 self.report_error(e.msg)
 801         if keep_video is False and not self.params.get('keepvideo', False):
 802             try:
 803                 self.to_screen(u'Deleting original file %s (pass -k to keep)' % filename)
 804                 os.remove(encodeFilename(filename))
 805             except (IOError, OSError):
 806                 self.report_warning(u'Unable to remove downloaded video file')
 807
 808     def in_download_archive(self, info_dict):
 809         fn = self.params.get('download_archive')
 810         if fn is None:
 811             return False
 812         vid_id = info_dict['extractor'] + u' ' + info_dict['id']
 813         try:
 814             with locked_file(fn, 'r', encoding='utf-8') as archive_file:
 815                 for line in archive_file:
 816                     if line.strip() == vid_id:
 817                         return True
 818         except IOError as ioe:
 819             if ioe.errno != errno.ENOENT:
 820                 raise
 821         return False
 822
 823     def record_download_archive(self, info_dict):
 824         fn = self.params.get('download_archive')
 825         if fn is None:
 826             return
 827         vid_id = info_dict['extractor'] + u' ' + info_dict['id']
 828         with locked_file(fn, 'a', encoding='utf-8') as archive_file:
 829             archive_file.write(vid_id + u'\n')
 830
 831     @staticmethod
 832     def format_resolution(format, default='unknown'):
 833         if format.get('_resolution') is not None:
 834             return format['_resolution']
 835         if format.get('height') is not None:
 836             if format.get('width') is not None:
 837                 res = u'%sx%s' % (format['width'], format['height'])
 838             else:
 839                 res = u'%sp' % format['height']
 840         else:
 841             res = default
 842         return res
 843
 844     def list_formats(self, info_dict):
 845         def format_note(fdict):
 846             if fdict.get('format_note') is not None:
 847                 return fdict['format_note']
 848             res = u''
 849             if fdict.get('vcodec') is not None:
 850                 res += u'%-5s' % fdict['vcodec']
 851             elif fdict.get('vbr') is not None:
 852                 res += u'video'
 853             if fdict.get('vbr') is not None:
 854                 res += u'@%4dk' % fdict['vbr']
 855             if fdict.get('acodec') is not None:
 856                 if res:
 857                     res += u', '
 858                 res += u'%-5s' % fdict['acodec']
 859             elif fdict.get('abr') is not None:
 860                 if res:
 861                     res += u', '
 862                 res += 'audio'
 863             if fdict.get('abr') is not None:
 864                 res += u'@%3dk' % fdict['abr']
 865             return res
 866
 867         def line(format):
 868             return (u'%-20s%-10s%-12s%s' % (
 869                 format['format_id'],
 870                 format['ext'],
 871                 self.format_resolution(format),
 872                 format_note(format),
 873                 )
 874             )
 875
 876         formats = info_dict.get('formats', [info_dict])
 877         formats_s = list(map(line, formats))
 878         if len(formats) > 1:
 879             formats_s[0] += (' ' if format_note(formats[0]) else '') + '(worst)'
 880             formats_s[-1] += (' ' if format_note(formats[-1]) else '') + '(best)'
 881
 882         header_line = line({
 883             'format_id': u'format code', 'ext': u'extension',
 884             '_resolution': u'resolution', 'format_note': u'note'})
 885         self.to_screen(u'[info] Available formats for %s:\n%s\n%s' %
 886                        (info_dict['id'], header_line, u"\n".join(formats_s)))