_ Git - youtube-dl/blob - youtube_dl/YoutubeDL.py

   1 #!/usr/bin/env python
   2 # -*- coding: utf-8 -*-
   3
   4 from __future__ import absolute_import
   5
   6 import errno
   7 import io
   8 import json
   9 import os
  10 import re
  11 import shutil
  12 import socket
  13 import sys
  14 import time
  15 import traceback
  16
  17 if os.name == 'nt':
  18     import ctypes
  19
  20 from .utils import (
  21     compat_http_client,
  22     compat_print,
  23     compat_str,
  24     compat_urllib_error,
  25     compat_urllib_request,
  26     ContentTooShortError,
  27     date_from_str,
  28     DateRange,
  29     determine_ext,
  30     DownloadError,
  31     encodeFilename,
  32     ExtractorError,
  33     locked_file,
  34     MaxDownloadsReached,
  35     PostProcessingError,
  36     preferredencoding,
  37     SameFileError,
  38     sanitize_filename,
  39     subtitles_filename,
  40     takewhile_inclusive,
  41     UnavailableVideoError,
  42     write_json_file,
  43     write_string,
  44 )
  45 from .extractor import get_info_extractor, gen_extractors
  46 from .FileDownloader import FileDownloader
  47
  48
  49 class YoutubeDL(object):
  50     """YoutubeDL class.
  51
  52     YoutubeDL objects are the ones responsible of downloading the
  53     actual video file and writing it to disk if the user has requested
  54     it, among some other tasks. In most cases there should be one per
  55     program. As, given a video URL, the downloader doesn't know how to
  56     extract all the needed information, task that InfoExtractors do, it
  57     has to pass the URL to one of them.
  58
  59     For this, YoutubeDL objects have a method that allows
  60     InfoExtractors to be registered in a given order. When it is passed
  61     a URL, the YoutubeDL object handles it to the first InfoExtractor it
  62     finds that reports being able to handle it. The InfoExtractor extracts
  63     all the information about the video or videos the URL refers to, and
  64     YoutubeDL process the extracted information, possibly using a File
  65     Downloader to download the video.
  66
  67     YoutubeDL objects accept a lot of parameters. In order not to saturate
  68     the object constructor with arguments, it receives a dictionary of
  69     options instead. These options are available through the params
  70     attribute for the InfoExtractors to use. The YoutubeDL also
  71     registers itself as the downloader in charge for the InfoExtractors
  72     that are added to it, so this is a "mutual registration".
  73
  74     Available options:
  75
  76     username:          Username for authentication purposes.
  77     password:          Password for authentication purposes.
  78     videopassword:     Password for acces a video.
  79     usenetrc:          Use netrc for authentication instead.
  80     verbose:           Print additional info to stdout.
  81     quiet:             Do not print messages to stdout.
  82     forceurl:          Force printing final URL.
  83     forcetitle:        Force printing title.
  84     forceid:           Force printing ID.
  85     forcethumbnail:    Force printing thumbnail URL.
  86     forcedescription:  Force printing description.
  87     forcefilename:     Force printing final filename.
  88     forcejson:         Force printing info_dict as JSON.
  89     simulate:          Do not download the video files.
  90     format:            Video format code.
  91     format_limit:      Highest quality format to try.
  92     outtmpl:           Template for output names.
  93     restrictfilenames: Do not allow "&" and spaces in file names
  94     ignoreerrors:      Do not stop on download errors.
  95     nooverwrites:      Prevent overwriting files.
  96     playliststart:     Playlist item to start at.
  97     playlistend:       Playlist item to end at.
  98     matchtitle:        Download only matching titles.
  99     rejecttitle:       Reject downloads for matching titles.
 100     logtostderr:       Log messages to stderr instead of stdout.
 101     writedescription:  Write the video description to a .description file
 102     writeinfojson:     Write the video description to a .info.json file
 103     writeannotations:  Write the video annotations to a .annotations.xml file
 104     writethumbnail:    Write the thumbnail image to a file
 105     writesubtitles:    Write the video subtitles to a file
 106     writeautomaticsub: Write the automatic subtitles to a file
 107     allsubtitles:      Downloads all the subtitles of the video
 108                        (requires writesubtitles or writeautomaticsub)
 109     listsubtitles:     Lists all available subtitles for the video
 110     subtitlesformat:   Subtitle format [srt/sbv/vtt] (default=srt)
 111     subtitleslangs:    List of languages of the subtitles to download
 112     keepvideo:         Keep the video file after post-processing
 113     daterange:         A DateRange object, download only if the upload_date is in the range.
 114     skip_download:     Skip the actual download of the video file
 115     cachedir:          Location of the cache files in the filesystem.
 116                        None to disable filesystem cache.
 117     noplaylist:        Download single video instead of a playlist if in doubt.
 118     age_limit:         An integer representing the user's age in years.
 119                        Unsuitable videos for the given age are skipped.
 120     downloadarchive:   File name of a file where all downloads are recorded.
 121                        Videos already present in the file are not downloaded
 122                        again.
 123
 124     The following parameters are not used by YoutubeDL itself, they are used by
 125     the FileDownloader:
 126     nopart, updatetime, buffersize, ratelimit, min_filesize, max_filesize, test,
 127     noresizebuffer, retries, continuedl, noprogress, consoletitle
 128     """
 129
 130     params = None
 131     _ies = []
 132     _pps = []
 133     _download_retcode = None
 134     _num_downloads = None
 135     _screen_file = None
 136
 137     def __init__(self, params):
 138         """Create a FileDownloader object with the given options."""
 139         self._ies = []
 140         self._ies_instances = {}
 141         self._pps = []
 142         self._progress_hooks = []
 143         self._download_retcode = 0
 144         self._num_downloads = 0
 145         self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)]
 146
 147         if (sys.version_info >= (3,) and sys.platform != 'win32' and
 148                 sys.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968']
 149                 and not params['restrictfilenames']):
 150             # On Python 3, the Unicode filesystem API will throw errors (#1474)
 151             self.report_warning(
 152                 u'Assuming --restrict-filenames since file system encoding '
 153                 u'cannot encode all charactes. '
 154                 u'Set the LC_ALL environment variable to fix this.')
 155             params['restrictfilenames'] = True
 156
 157         self.params = params
 158         self.fd = FileDownloader(self, self.params)
 159
 160         if '%(stitle)s' in self.params['outtmpl']:
 161             self.report_warning(u'%(stitle)s is deprecated. Use the %(title)s and the --restrict-filenames flag(which also secures %(uploader)s et al) instead.')
 162
 163     def add_info_extractor(self, ie):
 164         """Add an InfoExtractor object to the end of the list."""
 165         self._ies.append(ie)
 166         self._ies_instances[ie.ie_key()] = ie
 167         ie.set_downloader(self)
 168
 169     def get_info_extractor(self, ie_key):
 170         """
 171         Get an instance of an IE with name ie_key, it will try to get one from
 172         the _ies list, if there's no instance it will create a new one and add
 173         it to the extractor list.
 174         """
 175         ie = self._ies_instances.get(ie_key)
 176         if ie is None:
 177             ie = get_info_extractor(ie_key)()
 178             self.add_info_extractor(ie)
 179         return ie
 180
 181     def add_default_info_extractors(self):
 182         """
 183         Add the InfoExtractors returned by gen_extractors to the end of the list
 184         """
 185         for ie in gen_extractors():
 186             self.add_info_extractor(ie)
 187
 188     def add_post_processor(self, pp):
 189         """Add a PostProcessor object to the end of the chain."""
 190         self._pps.append(pp)
 191         pp.set_downloader(self)
 192
 193     def to_screen(self, message, skip_eol=False):
 194         """Print message to stdout if not in quiet mode."""
 195         if not self.params.get('quiet', False):
 196             terminator = [u'\n', u''][skip_eol]
 197             output = message + terminator
 198             write_string(output, self._screen_file)
 199
 200     def to_stderr(self, message):
 201         """Print message to stderr."""
 202         assert type(message) == type(u'')
 203         output = message + u'\n'
 204         if 'b' in getattr(self._screen_file, 'mode', '') or sys.version_info[0] < 3: # Python 2 lies about the mode of sys.stdout/sys.stderr
 205             output = output.encode(preferredencoding())
 206         sys.stderr.write(output)
 207
 208     def to_console_title(self, message):
 209         if not self.params.get('consoletitle', False):
 210             return
 211         if os.name == 'nt' and ctypes.windll.kernel32.GetConsoleWindow():
 212             # c_wchar_p() might not be necessary if `message` is
 213             # already of type unicode()
 214             ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message))
 215         elif 'TERM' in os.environ:
 216             write_string(u'\033]0;%s\007' % message, self._screen_file)
 217
 218     def save_console_title(self):
 219         if not self.params.get('consoletitle', False):
 220             return
 221         if 'TERM' in os.environ:
 222             # Save the title on stack
 223             write_string(u'\033[22;0t', self._screen_file)
 224
 225     def restore_console_title(self):
 226         if not self.params.get('consoletitle', False):
 227             return
 228         if 'TERM' in os.environ:
 229             # Restore the title from stack
 230             write_string(u'\033[23;0t', self._screen_file)
 231
 232     def __enter__(self):
 233         self.save_console_title()
 234         return self
 235
 236     def __exit__(self, *args):
 237         self.restore_console_title()
 238
 239     def fixed_template(self):
 240         """Checks if the output template is fixed."""
 241         return (re.search(u'(?u)%\\(.+?\\)s', self.params['outtmpl']) is None)
 242
 243     def trouble(self, message=None, tb=None):
 244         """Determine action to take when a download problem appears.
 245
 246         Depending on if the downloader has been configured to ignore
 247         download errors or not, this method may throw an exception or
 248         not when errors are found, after printing the message.
 249
 250         tb, if given, is additional traceback information.
 251         """
 252         if message is not None:
 253             self.to_stderr(message)
 254         if self.params.get('verbose'):
 255             if tb is None:
 256                 if sys.exc_info()[0]:  # if .trouble has been called from an except block
 257                     tb = u''
 258                     if hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
 259                         tb += u''.join(traceback.format_exception(*sys.exc_info()[1].exc_info))
 260                     tb += compat_str(traceback.format_exc())
 261                 else:
 262                     tb_data = traceback.format_list(traceback.extract_stack())
 263                     tb = u''.join(tb_data)
 264             self.to_stderr(tb)
 265         if not self.params.get('ignoreerrors', False):
 266             if sys.exc_info()[0] and hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
 267                 exc_info = sys.exc_info()[1].exc_info
 268             else:
 269                 exc_info = sys.exc_info()
 270             raise DownloadError(message, exc_info)
 271         self._download_retcode = 1
 272
 273     def report_warning(self, message):
 274         '''
 275         Print the message to stderr, it will be prefixed with 'WARNING:'
 276         If stderr is a tty file the 'WARNING:' will be colored
 277         '''
 278         if sys.stderr.isatty() and os.name != 'nt':
 279             _msg_header = u'\033[0;33mWARNING:\033[0m'
 280         else:
 281             _msg_header = u'WARNING:'
 282         warning_message = u'%s %s' % (_msg_header, message)
 283         self.to_stderr(warning_message)
 284
 285     def report_error(self, message, tb=None):
 286         '''
 287         Do the same as trouble, but prefixes the message with 'ERROR:', colored
 288         in red if stderr is a tty file.
 289         '''
 290         if sys.stderr.isatty() and os.name != 'nt':
 291             _msg_header = u'\033[0;31mERROR:\033[0m'
 292         else:
 293             _msg_header = u'ERROR:'
 294         error_message = u'%s %s' % (_msg_header, message)
 295         self.trouble(error_message, tb)
 296
 297     def report_writedescription(self, descfn):
 298         """ Report that the description file is being written """
 299         self.to_screen(u'[info] Writing video description to: ' + descfn)
 300
 301     def report_writesubtitles(self, sub_filename):
 302         """ Report that the subtitles file is being written """
 303         self.to_screen(u'[info] Writing video subtitles to: ' + sub_filename)
 304
 305     def report_writeinfojson(self, infofn):
 306         """ Report that the metadata file has been written """
 307         self.to_screen(u'[info] Video description metadata as JSON to: ' + infofn)
 308
 309     def report_writeannotations(self, annofn):
 310         """ Report that the annotations file has been written. """
 311         self.to_screen(u'[info] Writing video annotations to: ' + annofn)
 312
 313     def report_file_already_downloaded(self, file_name):
 314         """Report file has already been fully downloaded."""
 315         try:
 316             self.to_screen(u'[download] %s has already been downloaded' % file_name)
 317         except UnicodeEncodeError:
 318             self.to_screen(u'[download] The file has already been downloaded')
 319
 320     def increment_downloads(self):
 321         """Increment the ordinal that assigns a number to each file."""
 322         self._num_downloads += 1
 323
 324     def prepare_filename(self, info_dict):
 325         """Generate the output filename."""
 326         try:
 327             template_dict = dict(info_dict)
 328
 329             template_dict['epoch'] = int(time.time())
 330             autonumber_size = self.params.get('autonumber_size')
 331             if autonumber_size is None:
 332                 autonumber_size = 5
 333             autonumber_templ = u'%0' + str(autonumber_size) + u'd'
 334             template_dict['autonumber'] = autonumber_templ % self._num_downloads
 335             if template_dict.get('playlist_index') is not None:
 336                 template_dict['playlist_index'] = u'%05d' % template_dict['playlist_index']
 337
 338             sanitize = lambda k, v: sanitize_filename(
 339                 u'NA' if v is None else compat_str(v),
 340                 restricted=self.params.get('restrictfilenames'),
 341                 is_id=(k == u'id'))
 342             template_dict = dict((k, sanitize(k, v))
 343                                  for k, v in template_dict.items())
 344
 345             tmpl = os.path.expanduser(self.params['outtmpl'])
 346             filename = tmpl % template_dict
 347             return filename
 348         except KeyError as err:
 349             self.report_error(u'Erroneous output template')
 350             return None
 351         except ValueError as err:
 352             self.report_error(u'Error in output template: ' + str(err) + u' (encoding: ' + repr(preferredencoding()) + ')')
 353             return None
 354
 355     def _match_entry(self, info_dict):
 356         """ Returns None iff the file should be downloaded """
 357
 358         title = info_dict['title']
 359         matchtitle = self.params.get('matchtitle', False)
 360         if matchtitle:
 361             if not re.search(matchtitle, title, re.IGNORECASE):
 362                 return u'[download] "' + title + '" title did not match pattern "' + matchtitle + '"'
 363         rejecttitle = self.params.get('rejecttitle', False)
 364         if rejecttitle:
 365             if re.search(rejecttitle, title, re.IGNORECASE):
 366                 return u'"' + title + '" title matched reject pattern "' + rejecttitle + '"'
 367         date = info_dict.get('upload_date', None)
 368         if date is not None:
 369             dateRange = self.params.get('daterange', DateRange())
 370             if date not in dateRange:
 371                 return u'[download] %s upload date is not in range %s' % (date_from_str(date).isoformat(), dateRange)
 372         age_limit = self.params.get('age_limit')
 373         if age_limit is not None:
 374             if age_limit < info_dict.get('age_limit', 0):
 375                 return u'Skipping "' + title + '" because it is age restricted'
 376         if self.in_download_archive(info_dict):
 377             return (u'%(title)s has already been recorded in archive'
 378                     % info_dict)
 379         return None
 380
 381     @staticmethod
 382     def add_extra_info(info_dict, extra_info):
 383         '''Set the keys from extra_info in info dict if they are missing'''
 384         for key, value in extra_info.items():
 385             info_dict.setdefault(key, value)
 386
 387     def extract_info(self, url, download=True, ie_key=None, extra_info={}):
 388         '''
 389         Returns a list with a dictionary for each video we find.
 390         If 'download', also downloads the videos.
 391         extra_info is a dict containing the extra values to add to each result
 392          '''
 393
 394         if ie_key:
 395             ies = [self.get_info_extractor(ie_key)]
 396         else:
 397             ies = self._ies
 398
 399         for ie in ies:
 400             if not ie.suitable(url):
 401                 continue
 402
 403             if not ie.working():
 404                 self.report_warning(u'The program functionality for this site has been marked as broken, '
 405                                     u'and will probably not work.')
 406
 407             try:
 408                 ie_result = ie.extract(url)
 409                 if ie_result is None: # Finished already (backwards compatibility; listformats and friends should be moved here)
 410                     break
 411                 if isinstance(ie_result, list):
 412                     # Backwards compatibility: old IE result format
 413                     ie_result = {
 414                         '_type': 'compat_list',
 415                         'entries': ie_result,
 416                     }
 417                 self.add_extra_info(ie_result,
 418                     {
 419                         'extractor': ie.IE_NAME,
 420                         'webpage_url': url,
 421                         'extractor_key': ie.ie_key(),
 422                     })
 423                 return self.process_ie_result(ie_result, download, extra_info)
 424             except ExtractorError as de: # An error we somewhat expected
 425                 self.report_error(compat_str(de), de.format_traceback())
 426                 break
 427             except Exception as e:
 428                 if self.params.get('ignoreerrors', False):
 429                     self.report_error(compat_str(e), tb=compat_str(traceback.format_exc()))
 430                     break
 431                 else:
 432                     raise
 433         else:
 434             self.report_error(u'no suitable InfoExtractor: %s' % url)
 435
 436     def process_ie_result(self, ie_result, download=True, extra_info={}):
 437         """
 438         Take the result of the ie(may be modified) and resolve all unresolved
 439         references (URLs, playlist items).
 440
 441         It will also download the videos if 'download'.
 442         Returns the resolved ie_result.
 443         """
 444
 445         result_type = ie_result.get('_type', 'video') # If not given we suppose it's a video, support the default old system
 446         if result_type == 'video':
 447             self.add_extra_info(ie_result, extra_info)
 448             return self.process_video_result(ie_result, download=download)
 449         elif result_type == 'url':
 450             # We have to add extra_info to the results because it may be
 451             # contained in a playlist
 452             return self.extract_info(ie_result['url'],
 453                                      download,
 454                                      ie_key=ie_result.get('ie_key'),
 455                                      extra_info=extra_info)
 456         elif result_type == 'playlist':
 457             self.add_extra_info(ie_result, extra_info)
 458             # We process each entry in the playlist
 459             playlist = ie_result.get('title', None) or ie_result.get('id', None)
 460             self.to_screen(u'[download] Downloading playlist: %s' % playlist)
 461
 462             playlist_results = []
 463
 464             n_all_entries = len(ie_result['entries'])
 465             playliststart = self.params.get('playliststart', 1) - 1
 466             playlistend = self.params.get('playlistend', -1)
 467
 468             if playlistend == -1:
 469                 entries = ie_result['entries'][playliststart:]
 470             else:
 471                 entries = ie_result['entries'][playliststart:playlistend]
 472
 473             n_entries = len(entries)
 474
 475             self.to_screen(u"[%s] playlist '%s': Collected %d video ids (downloading %d of them)" %
 476                 (ie_result['extractor'], playlist, n_all_entries, n_entries))
 477
 478             for i, entry in enumerate(entries, 1):
 479                 self.to_screen(u'[download] Downloading video #%s of %s' % (i, n_entries))
 480                 extra = {
 481                     'playlist': playlist,
 482                     'playlist_index': i + playliststart,
 483                     'extractor': ie_result['extractor'],
 484                     'webpage_url': ie_result['webpage_url'],
 485                     'extractor_key': ie_result['extractor_key'],
 486                 }
 487                 entry_result = self.process_ie_result(entry,
 488                                                       download=download,
 489                                                       extra_info=extra)
 490                 playlist_results.append(entry_result)
 491             ie_result['entries'] = playlist_results
 492             return ie_result
 493         elif result_type == 'compat_list':
 494             def _fixup(r):
 495                 self.add_extra_info(r,
 496                     {
 497                         'extractor': ie_result['extractor'],
 498                         'webpage_url': ie_result['webpage_url'],
 499                         'extractor_key': ie_result['extractor_key'],
 500                     })
 501                 return r
 502             ie_result['entries'] = [
 503                 self.process_ie_result(_fixup(r), download, extra_info)
 504                 for r in ie_result['entries']
 505             ]
 506             return ie_result
 507         else:
 508             raise Exception('Invalid result type: %s' % result_type)
 509
 510     def select_format(self, format_spec, available_formats):
 511         if format_spec == 'best' or format_spec is None:
 512             return available_formats[-1]
 513         elif format_spec == 'worst':
 514             return available_formats[0]
 515         else:
 516             extensions = [u'mp4', u'flv', u'webm', u'3gp']
 517             if format_spec in extensions:
 518                 filter_f = lambda f: f['ext'] == format_spec
 519             else:
 520                 filter_f = lambda f: f['format_id'] == format_spec
 521             matches = list(filter(filter_f, available_formats))
 522             if matches:
 523                 return matches[-1]
 524         return None
 525
 526     def process_video_result(self, info_dict, download=True):
 527         assert info_dict.get('_type', 'video') == 'video'
 528
 529         if 'playlist' not in info_dict:
 530             # It isn't part of a playlist
 531             info_dict['playlist'] = None
 532             info_dict['playlist_index'] = None
 533
 534         # This extractors handle format selection themselves
 535         if info_dict['extractor'] in [u'youtube', u'Youku']:
 536             if download:
 537                 self.process_info(info_dict)
 538             return info_dict
 539
 540         # We now pick which formats have to be downloaded
 541         if info_dict.get('formats') is None:
 542             # There's only one format available
 543             formats = [info_dict]
 544         else:
 545             formats = info_dict['formats']
 546
 547         # We check that all the formats have the format and format_id fields
 548         for (i, format) in enumerate(formats):
 549             if format.get('format_id') is None:
 550                 format['format_id'] = compat_str(i)
 551             if format.get('format') is None:
 552                 format['format'] = u'{id} - {res}{note}'.format(
 553                     id=format['format_id'],
 554                     res=self.format_resolution(format),
 555                     note=u' ({0})'.format(format['format_note']) if format.get('format_note') is not None else '',
 556                 )
 557             # Automatically determine file extension if missing
 558             if 'ext' not in format:
 559                 format['ext'] = determine_ext(format['url'])
 560
 561         if self.params.get('listformats', None):
 562             self.list_formats(info_dict)
 563             return
 564
 565         format_limit = self.params.get('format_limit', None)
 566         if format_limit:
 567             formats = list(takewhile_inclusive(
 568                 lambda f: f['format_id'] != format_limit, formats
 569             ))
 570         if self.params.get('prefer_free_formats'):
 571             def _free_formats_key(f):
 572                 try:
 573                     ext_ord = [u'flv', u'mp4', u'webm'].index(f['ext'])
 574                 except ValueError:
 575                     ext_ord = -1
 576                 # We only compare the extension if they have the same height and width
 577                 return (f.get('height'), f.get('width'), ext_ord)
 578             formats = sorted(formats, key=_free_formats_key)
 579
 580         req_format = self.params.get('format', 'best')
 581         if req_format is None:
 582             req_format = 'best'
 583         formats_to_download = []
 584         # The -1 is for supporting YoutubeIE
 585         if req_format in ('-1', 'all'):
 586             formats_to_download = formats
 587         else:
 588             # We can accept formats requestd in the format: 34/5/best, we pick
 589             # the first that is available, starting from left
 590             req_formats = req_format.split('/')
 591             for rf in req_formats:
 592                 selected_format = self.select_format(rf, formats)
 593                 if selected_format is not None:
 594                     formats_to_download = [selected_format]
 595                     break
 596         if not formats_to_download:
 597             raise ExtractorError(u'requested format not available',
 598                                  expected=True)
 599
 600         if download:
 601             if len(formats_to_download) > 1:
 602                 self.to_screen(u'[info] %s: downloading video in %s formats' % (info_dict['id'], len(formats_to_download)))
 603             for format in formats_to_download:
 604                 new_info = dict(info_dict)
 605                 new_info.update(format)
 606                 self.process_info(new_info)
 607         # We update the info dict with the best quality format (backwards compatibility)
 608         info_dict.update(formats_to_download[-1])
 609         return info_dict
 610
 611     def process_info(self, info_dict):
 612         """Process a single resolved IE result."""
 613
 614         assert info_dict.get('_type', 'video') == 'video'
 615         #We increment the download the download count here to match the previous behaviour.
 616         self.increment_downloads()
 617
 618         info_dict['fulltitle'] = info_dict['title']
 619         if len(info_dict['title']) > 200:
 620             info_dict['title'] = info_dict['title'][:197] + u'...'
 621
 622         # Keep for backwards compatibility
 623         info_dict['stitle'] = info_dict['title']
 624
 625         if not 'format' in info_dict:
 626             info_dict['format'] = info_dict['ext']
 627
 628         reason = self._match_entry(info_dict)
 629         if reason is not None:
 630             self.to_screen(u'[download] ' + reason)
 631             return
 632
 633         max_downloads = self.params.get('max_downloads')
 634         if max_downloads is not None:
 635             if self._num_downloads > int(max_downloads):
 636                 raise MaxDownloadsReached()
 637
 638         filename = self.prepare_filename(info_dict)
 639
 640         # Forced printings
 641         if self.params.get('forcetitle', False):
 642             compat_print(info_dict['fulltitle'])
 643         if self.params.get('forceid', False):
 644             compat_print(info_dict['id'])
 645         if self.params.get('forceurl', False):
 646             # For RTMP URLs, also include the playpath
 647             compat_print(info_dict['url'] + info_dict.get('play_path', u''))
 648         if self.params.get('forcethumbnail', False) and info_dict.get('thumbnail') is not None:
 649             compat_print(info_dict['thumbnail'])
 650         if self.params.get('forcedescription', False) and info_dict.get('description') is not None:
 651             compat_print(info_dict['description'])
 652         if self.params.get('forcefilename', False) and filename is not None:
 653             compat_print(filename)
 654         if self.params.get('forceformat', False):
 655             compat_print(info_dict['format'])
 656         if self.params.get('forcejson', False):
 657             compat_print(json.dumps(info_dict))
 658
 659         # Do nothing else if in simulate mode
 660         if self.params.get('simulate', False):
 661             return
 662
 663         if filename is None:
 664             return
 665
 666         try:
 667             dn = os.path.dirname(encodeFilename(filename))
 668             if dn != '' and not os.path.exists(dn):
 669                 os.makedirs(dn)
 670         except (OSError, IOError) as err:
 671             self.report_error(u'unable to create directory ' + compat_str(err))
 672             return
 673
 674         if self.params.get('writedescription', False):
 675             try:
 676                 descfn = filename + u'.description'
 677                 self.report_writedescription(descfn)
 678                 with io.open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile:
 679                     descfile.write(info_dict['description'])
 680             except (KeyError, TypeError):
 681                 self.report_warning(u'There\'s no description to write.')
 682             except (OSError, IOError):
 683                 self.report_error(u'Cannot write description file ' + descfn)
 684                 return
 685
 686         if self.params.get('writeannotations', False):
 687             try:
 688                 annofn = filename + u'.annotations.xml'
 689                 self.report_writeannotations(annofn)
 690                 with io.open(encodeFilename(annofn), 'w', encoding='utf-8') as annofile:
 691                     annofile.write(info_dict['annotations'])
 692             except (KeyError, TypeError):
 693                 self.report_warning(u'There are no annotations to write.')
 694             except (OSError, IOError):
 695                 self.report_error(u'Cannot write annotations file: ' + annofn)
 696                 return
 697
 698         subtitles_are_requested = any([self.params.get('writesubtitles', False),
 699                                        self.params.get('writeautomaticsub')])
 700
 701         if subtitles_are_requested and 'subtitles' in info_dict and info_dict['subtitles']:
 702             # subtitles download errors are already managed as troubles in relevant IE
 703             # that way it will silently go on when used with unsupporting IE
 704             subtitles = info_dict['subtitles']
 705             sub_format = self.params.get('subtitlesformat', 'srt')
 706             for sub_lang in subtitles.keys():
 707                 sub = subtitles[sub_lang]
 708                 if sub is None:
 709                     continue
 710                 try:
 711                     sub_filename = subtitles_filename(filename, sub_lang, sub_format)
 712                     self.report_writesubtitles(sub_filename)
 713                     with io.open(encodeFilename(sub_filename), 'w', encoding='utf-8') as subfile:
 714                             subfile.write(sub)
 715                 except (OSError, IOError):
 716                     self.report_error(u'Cannot write subtitles file ' + descfn)
 717                     return
 718
 719         if self.params.get('writeinfojson', False):
 720             infofn = os.path.splitext(filename)[0] + u'.info.json'
 721             self.report_writeinfojson(infofn)
 722             try:
 723                 json_info_dict = dict((k, v) for k, v in info_dict.items() if not k in ['urlhandle'])
 724                 write_json_file(json_info_dict, encodeFilename(infofn))
 725             except (OSError, IOError):
 726                 self.report_error(u'Cannot write metadata to JSON file ' + infofn)
 727                 return
 728
 729         if self.params.get('writethumbnail', False):
 730             if info_dict.get('thumbnail') is not None:
 731                 thumb_format = determine_ext(info_dict['thumbnail'], u'jpg')
 732                 thumb_filename = filename.rpartition('.')[0] + u'.' + thumb_format
 733                 self.to_screen(u'[%s] %s: Downloading thumbnail ...' %
 734                                (info_dict['extractor'], info_dict['id']))
 735                 try:
 736                     uf = compat_urllib_request.urlopen(info_dict['thumbnail'])
 737                     with open(thumb_filename, 'wb') as thumbf:
 738                         shutil.copyfileobj(uf, thumbf)
 739                     self.to_screen(u'[%s] %s: Writing thumbnail to: %s' %
 740                         (info_dict['extractor'], info_dict['id'], thumb_filename))
 741                 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
 742                     self.report_warning(u'Unable to download thumbnail "%s": %s' %
 743                         (info_dict['thumbnail'], compat_str(err)))
 744
 745         if not self.params.get('skip_download', False):
 746             if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(filename)):
 747                 success = True
 748             else:
 749                 try:
 750                     success = self.fd._do_download(filename, info_dict)
 751                 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
 752                     self.report_error(u'unable to download video data: %s' % str(err))
 753                     return
 754                 except (OSError, IOError) as err:
 755                     raise UnavailableVideoError(err)
 756                 except (ContentTooShortError, ) as err:
 757                     self.report_error(u'content too short (expected %s bytes and served %s)' % (err.expected, err.downloaded))
 758                     return
 759
 760             if success:
 761                 try:
 762                     self.post_process(filename, info_dict)
 763                 except (PostProcessingError) as err:
 764                     self.report_error(u'postprocessing: %s' % str(err))
 765                     return
 766
 767         self.record_download_archive(info_dict)
 768
 769     def download(self, url_list):
 770         """Download a given list of URLs."""
 771         if len(url_list) > 1 and self.fixed_template():
 772             raise SameFileError(self.params['outtmpl'])
 773
 774         for url in url_list:
 775             try:
 776                 #It also downloads the videos
 777                 videos = self.extract_info(url)
 778             except UnavailableVideoError:
 779                 self.report_error(u'unable to download video')
 780             except MaxDownloadsReached:
 781                 self.to_screen(u'[info] Maximum number of downloaded files reached.')
 782                 raise
 783
 784         return self._download_retcode
 785
 786     def post_process(self, filename, ie_info):
 787         """Run all the postprocessors on the given file."""
 788         info = dict(ie_info)
 789         info['filepath'] = filename
 790         keep_video = None
 791         for pp in self._pps:
 792             try:
 793                 keep_video_wish, new_info = pp.run(info)
 794                 if keep_video_wish is not None:
 795                     if keep_video_wish:
 796                         keep_video = keep_video_wish
 797                     elif keep_video is None:
 798                         # No clear decision yet, let IE decide
 799                         keep_video = keep_video_wish
 800             except PostProcessingError as e:
 801                 self.report_error(e.msg)
 802         if keep_video is False and not self.params.get('keepvideo', False):
 803             try:
 804                 self.to_screen(u'Deleting original file %s (pass -k to keep)' % filename)
 805                 os.remove(encodeFilename(filename))
 806             except (IOError, OSError):
 807                 self.report_warning(u'Unable to remove downloaded video file')
 808
 809     def in_download_archive(self, info_dict):
 810         fn = self.params.get('download_archive')
 811         if fn is None:
 812             return False
 813         vid_id = info_dict['extractor'] + u' ' + info_dict['id']
 814         try:
 815             with locked_file(fn, 'r', encoding='utf-8') as archive_file:
 816                 for line in archive_file:
 817                     if line.strip() == vid_id:
 818                         return True
 819         except IOError as ioe:
 820             if ioe.errno != errno.ENOENT:
 821                 raise
 822         return False
 823
 824     def record_download_archive(self, info_dict):
 825         fn = self.params.get('download_archive')
 826         if fn is None:
 827             return
 828         vid_id = info_dict['extractor'] + u' ' + info_dict['id']
 829         with locked_file(fn, 'a', encoding='utf-8') as archive_file:
 830             archive_file.write(vid_id + u'\n')
 831
 832     @staticmethod
 833     def format_resolution(format, default='unknown'):
 834         if format.get('_resolution') is not None:
 835             return format['_resolution']
 836         if format.get('height') is not None:
 837             if format.get('width') is not None:
 838                 res = u'%sx%s' % (format['width'], format['height'])
 839             else:
 840                 res = u'%sp' % format['height']
 841         else:
 842             res = default
 843         return res
 844
 845     def list_formats(self, info_dict):
 846         def format_note(fdict):
 847             if fdict.get('format_note') is not None:
 848                 return fdict['format_note']
 849             res = u''
 850             if fdict.get('vcodec') is not None:
 851                 res += u'%-5s' % fdict['vcodec']
 852             elif fdict.get('vbr') is not None:
 853                 res += u'video'
 854             if fdict.get('vbr') is not None:
 855                 res += u'@%4dk' % fdict['vbr']
 856             if fdict.get('acodec') is not None:
 857                 if res:
 858                     res += u', '
 859                 res += u'%-5s' % fdict['acodec']
 860             elif fdict.get('abr') is not None:
 861                 if res:
 862                     res += u', '
 863                 res += 'audio'
 864             if fdict.get('abr') is not None:
 865                 res += u'@%3dk' % fdict['abr']
 866             return res
 867
 868         def line(format):
 869             return (u'%-20s%-10s%-12s%s' % (
 870                 format['format_id'],
 871                 format['ext'],
 872                 self.format_resolution(format),
 873                 format_note(format),
 874                 )
 875             )
 876
 877         formats = info_dict.get('formats', [info_dict])
 878         formats_s = list(map(line, formats))
 879         if len(formats) > 1:
 880             formats_s[0] += (' ' if format_note(formats[0]) else '') + '(worst)'
 881             formats_s[-1] += (' ' if format_note(formats[-1]) else '') + '(best)'
 882
 883         header_line = line({
 884             'format_id': u'format code', 'ext': u'extension',
 885             '_resolution': u'resolution', 'format_note': u'note'})
 886         self.to_screen(u'[info] Available formats for %s:\n%s\n%s' %
 887                        (info_dict['id'], header_line, u"\n".join(formats_s)))