_ Git - youtube-dl/blob - youtube_dl/YoutubeDL.py

   1 #!/usr/bin/env python
   2 # -*- coding: utf-8 -*-
   3
   4 from __future__ import absolute_import
   5
   6 import errno
   7 import io
   8 import json
   9 import os
  10 import re
  11 import shutil
  12 import socket
  13 import sys
  14 import time
  15 import traceback
  16
  17 if os.name == 'nt':
  18     import ctypes
  19
  20 from .utils import (
  21     compat_http_client,
  22     compat_print,
  23     compat_str,
  24     compat_urllib_error,
  25     compat_urllib_request,
  26     ContentTooShortError,
  27     date_from_str,
  28     DateRange,
  29     determine_ext,
  30     DownloadError,
  31     encodeFilename,
  32     ExtractorError,
  33     locked_file,
  34     MaxDownloadsReached,
  35     PostProcessingError,
  36     preferredencoding,
  37     SameFileError,
  38     sanitize_filename,
  39     subtitles_filename,
  40     takewhile_inclusive,
  41     UnavailableVideoError,
  42     write_json_file,
  43     write_string,
  44 )
  45 from .extractor import get_info_extractor, gen_extractors
  46 from .FileDownloader import FileDownloader
  47
  48
  49 class YoutubeDL(object):
  50     """YoutubeDL class.
  51
  52     YoutubeDL objects are the ones responsible of downloading the
  53     actual video file and writing it to disk if the user has requested
  54     it, among some other tasks. In most cases there should be one per
  55     program. As, given a video URL, the downloader doesn't know how to
  56     extract all the needed information, task that InfoExtractors do, it
  57     has to pass the URL to one of them.
  58
  59     For this, YoutubeDL objects have a method that allows
  60     InfoExtractors to be registered in a given order. When it is passed
  61     a URL, the YoutubeDL object handles it to the first InfoExtractor it
  62     finds that reports being able to handle it. The InfoExtractor extracts
  63     all the information about the video or videos the URL refers to, and
  64     YoutubeDL process the extracted information, possibly using a File
  65     Downloader to download the video.
  66
  67     YoutubeDL objects accept a lot of parameters. In order not to saturate
  68     the object constructor with arguments, it receives a dictionary of
  69     options instead. These options are available through the params
  70     attribute for the InfoExtractors to use. The YoutubeDL also
  71     registers itself as the downloader in charge for the InfoExtractors
  72     that are added to it, so this is a "mutual registration".
  73
  74     Available options:
  75
  76     username:          Username for authentication purposes.
  77     password:          Password for authentication purposes.
  78     videopassword:     Password for acces a video.
  79     usenetrc:          Use netrc for authentication instead.
  80     verbose:           Print additional info to stdout.
  81     quiet:             Do not print messages to stdout.
  82     forceurl:          Force printing final URL.
  83     forcetitle:        Force printing title.
  84     forceid:           Force printing ID.
  85     forcethumbnail:    Force printing thumbnail URL.
  86     forcedescription:  Force printing description.
  87     forcefilename:     Force printing final filename.
  88     forcejson:         Force printing info_dict as JSON.
  89     simulate:          Do not download the video files.
  90     format:            Video format code.
  91     format_limit:      Highest quality format to try.
  92     outtmpl:           Template for output names.
  93     restrictfilenames: Do not allow "&" and spaces in file names
  94     ignoreerrors:      Do not stop on download errors.
  95     nooverwrites:      Prevent overwriting files.
  96     playliststart:     Playlist item to start at.
  97     playlistend:       Playlist item to end at.
  98     matchtitle:        Download only matching titles.
  99     rejecttitle:       Reject downloads for matching titles.
 100     logtostderr:       Log messages to stderr instead of stdout.
 101     writedescription:  Write the video description to a .description file
 102     writeinfojson:     Write the video description to a .info.json file
 103     writeannotations:  Write the video annotations to a .annotations.xml file
 104     writethumbnail:    Write the thumbnail image to a file
 105     writesubtitles:    Write the video subtitles to a file
 106     writeautomaticsub: Write the automatic subtitles to a file
 107     allsubtitles:      Downloads all the subtitles of the video
 108                        (requires writesubtitles or writeautomaticsub)
 109     listsubtitles:     Lists all available subtitles for the video
 110     subtitlesformat:   Subtitle format [srt/sbv/vtt] (default=srt)
 111     subtitleslangs:    List of languages of the subtitles to download
 112     keepvideo:         Keep the video file after post-processing
 113     daterange:         A DateRange object, download only if the upload_date is in the range.
 114     skip_download:     Skip the actual download of the video file
 115     cachedir:          Location of the cache files in the filesystem.
 116                        None to disable filesystem cache.
 117     noplaylist:        Download single video instead of a playlist if in doubt.
 118     age_limit:         An integer representing the user's age in years.
 119                        Unsuitable videos for the given age are skipped.
 120     downloadarchive:   File name of a file where all downloads are recorded.
 121                        Videos already present in the file are not downloaded
 122                        again.
 123
 124     The following parameters are not used by YoutubeDL itself, they are used by
 125     the FileDownloader:
 126     nopart, updatetime, buffersize, ratelimit, min_filesize, max_filesize, test,
 127     noresizebuffer, retries, continuedl, noprogress, consoletitle
 128     """
 129
 130     params = None
 131     _ies = []
 132     _pps = []
 133     _download_retcode = None
 134     _num_downloads = None
 135     _screen_file = None
 136
 137     def __init__(self, params):
 138         """Create a FileDownloader object with the given options."""
 139         self._ies = []
 140         self._ies_instances = {}
 141         self._pps = []
 142         self._progress_hooks = []
 143         self._download_retcode = 0
 144         self._num_downloads = 0
 145         self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)]
 146
 147         if (sys.version_info >= (3,) and sys.platform != 'win32' and
 148                 sys.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968']
 149                 and not params['restrictfilenames']):
 150             # On Python 3, the Unicode filesystem API will throw errors (#1474)
 151             self.report_warning(
 152                 u'Assuming --restrict-filenames since file system encoding '
 153                 u'cannot encode all charactes. '
 154                 u'Set the LC_ALL environment variable to fix this.')
 155             params['restrictfilenames'] = True
 156
 157         self.params = params
 158         self.fd = FileDownloader(self, self.params)
 159
 160         if '%(stitle)s' in self.params['outtmpl']:
 161             self.report_warning(u'%(stitle)s is deprecated. Use the %(title)s and the --restrict-filenames flag(which also secures %(uploader)s et al) instead.')
 162
 163     def add_info_extractor(self, ie):
 164         """Add an InfoExtractor object to the end of the list."""
 165         self._ies.append(ie)
 166         self._ies_instances[ie.ie_key()] = ie
 167         ie.set_downloader(self)
 168
 169     def get_info_extractor(self, ie_key):
 170         """
 171         Get an instance of an IE with name ie_key, it will try to get one from
 172         the _ies list, if there's no instance it will create a new one and add
 173         it to the extractor list.
 174         """
 175         ie = self._ies_instances.get(ie_key)
 176         if ie is None:
 177             ie = get_info_extractor(ie_key)()
 178             self.add_info_extractor(ie)
 179         return ie
 180
 181     def add_default_info_extractors(self):
 182         """
 183         Add the InfoExtractors returned by gen_extractors to the end of the list
 184         """
 185         for ie in gen_extractors():
 186             self.add_info_extractor(ie)
 187
 188     def add_post_processor(self, pp):
 189         """Add a PostProcessor object to the end of the chain."""
 190         self._pps.append(pp)
 191         pp.set_downloader(self)
 192
 193     def to_screen(self, message, skip_eol=False):
 194         """Print message to stdout if not in quiet mode."""
 195         if not self.params.get('quiet', False):
 196             terminator = [u'\n', u''][skip_eol]
 197             output = message + terminator
 198             write_string(output, self._screen_file)
 199
 200     def to_stderr(self, message):
 201         """Print message to stderr."""
 202         assert type(message) == type(u'')
 203         output = message + u'\n'
 204         if 'b' in getattr(self._screen_file, 'mode', '') or sys.version_info[0] < 3: # Python 2 lies about the mode of sys.stdout/sys.stderr
 205             output = output.encode(preferredencoding())
 206         sys.stderr.write(output)
 207
 208     def to_console_title(self, message):
 209         if not self.params.get('consoletitle', False):
 210             return
 211         if os.name == 'nt' and ctypes.windll.kernel32.GetConsoleWindow():
 212             # c_wchar_p() might not be necessary if `message` is
 213             # already of type unicode()
 214             ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message))
 215         elif 'TERM' in os.environ:
 216             write_string(u'\033]0;%s\007' % message, self._screen_file)
 217
 218     def save_console_title(self):
 219         if not self.params.get('consoletitle', False):
 220             return
 221         if 'TERM' in os.environ:
 222             # Save the title on stack
 223             write_string(u'\033[22;0t', self._screen_file)
 224
 225     def restore_console_title(self):
 226         if not self.params.get('consoletitle', False):
 227             return
 228         if 'TERM' in os.environ:
 229             # Restore the title from stack
 230             write_string(u'\033[23;0t', self._screen_file)
 231
 232     def __enter__(self):
 233         self.save_console_title()
 234         return self
 235
 236     def __exit__(self, *args):
 237         self.restore_console_title()
 238
 239     def fixed_template(self):
 240         """Checks if the output template is fixed."""
 241         return (re.search(u'(?u)%\\(.+?\\)s', self.params['outtmpl']) is None)
 242
 243     def trouble(self, message=None, tb=None):
 244         """Determine action to take when a download problem appears.
 245
 246         Depending on if the downloader has been configured to ignore
 247         download errors or not, this method may throw an exception or
 248         not when errors are found, after printing the message.
 249
 250         tb, if given, is additional traceback information.
 251         """
 252         if message is not None:
 253             self.to_stderr(message)
 254         if self.params.get('verbose'):
 255             if tb is None:
 256                 if sys.exc_info()[0]:  # if .trouble has been called from an except block
 257                     tb = u''
 258                     if hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
 259                         tb += u''.join(traceback.format_exception(*sys.exc_info()[1].exc_info))
 260                     tb += compat_str(traceback.format_exc())
 261                 else:
 262                     tb_data = traceback.format_list(traceback.extract_stack())
 263                     tb = u''.join(tb_data)
 264             self.to_stderr(tb)
 265         if not self.params.get('ignoreerrors', False):
 266             if sys.exc_info()[0] and hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
 267                 exc_info = sys.exc_info()[1].exc_info
 268             else:
 269                 exc_info = sys.exc_info()
 270             raise DownloadError(message, exc_info)
 271         self._download_retcode = 1
 272
 273     def report_warning(self, message):
 274         '''
 275         Print the message to stderr, it will be prefixed with 'WARNING:'
 276         If stderr is a tty file the 'WARNING:' will be colored
 277         '''
 278         if sys.stderr.isatty() and os.name != 'nt':
 279             _msg_header = u'\033[0;33mWARNING:\033[0m'
 280         else:
 281             _msg_header = u'WARNING:'
 282         warning_message = u'%s %s' % (_msg_header, message)
 283         self.to_stderr(warning_message)
 284
 285     def report_error(self, message, tb=None):
 286         '''
 287         Do the same as trouble, but prefixes the message with 'ERROR:', colored
 288         in red if stderr is a tty file.
 289         '''
 290         if sys.stderr.isatty() and os.name != 'nt':
 291             _msg_header = u'\033[0;31mERROR:\033[0m'
 292         else:
 293             _msg_header = u'ERROR:'
 294         error_message = u'%s %s' % (_msg_header, message)
 295         self.trouble(error_message, tb)
 296
 297     def report_writedescription(self, descfn):
 298         """ Report that the description file is being written """
 299         self.to_screen(u'[info] Writing video description to: ' + descfn)
 300
 301     def report_writesubtitles(self, sub_filename):
 302         """ Report that the subtitles file is being written """
 303         self.to_screen(u'[info] Writing video subtitles to: ' + sub_filename)
 304
 305     def report_writeinfojson(self, infofn):
 306         """ Report that the metadata file has been written """
 307         self.to_screen(u'[info] Video description metadata as JSON to: ' + infofn)
 308
 309     def report_writeannotations(self, annofn):
 310         """ Report that the annotations file has been written. """
 311         self.to_screen(u'[info] Writing video annotations to: ' + annofn)
 312
 313     def report_file_already_downloaded(self, file_name):
 314         """Report file has already been fully downloaded."""
 315         try:
 316             self.to_screen(u'[download] %s has already been downloaded' % file_name)
 317         except UnicodeEncodeError:
 318             self.to_screen(u'[download] The file has already been downloaded')
 319
 320     def increment_downloads(self):
 321         """Increment the ordinal that assigns a number to each file."""
 322         self._num_downloads += 1
 323
 324     def prepare_filename(self, info_dict):
 325         """Generate the output filename."""
 326         try:
 327             template_dict = dict(info_dict)
 328
 329             template_dict['epoch'] = int(time.time())
 330             autonumber_size = self.params.get('autonumber_size')
 331             if autonumber_size is None:
 332                 autonumber_size = 5
 333             autonumber_templ = u'%0' + str(autonumber_size) + u'd'
 334             template_dict['autonumber'] = autonumber_templ % self._num_downloads
 335             if template_dict.get('playlist_index') is not None:
 336                 template_dict['playlist_index'] = u'%05d' % template_dict['playlist_index']
 337
 338             sanitize = lambda k, v: sanitize_filename(
 339                 u'NA' if v is None else compat_str(v),
 340                 restricted=self.params.get('restrictfilenames'),
 341                 is_id=(k == u'id'))
 342             template_dict = dict((k, sanitize(k, v))
 343                                  for k, v in template_dict.items())
 344
 345             tmpl = os.path.expanduser(self.params['outtmpl'])
 346             filename = tmpl % template_dict
 347             return filename
 348         except KeyError as err:
 349             self.report_error(u'Erroneous output template')
 350             return None
 351         except ValueError as err:
 352             self.report_error(u'Error in output template: ' + str(err) + u' (encoding: ' + repr(preferredencoding()) + ')')
 353             return None
 354
 355     def _match_entry(self, info_dict):
 356         """ Returns None iff the file should be downloaded """
 357
 358         if 'title' in info_dict:
 359             # This can happen when we're just evaluating the playlist
 360             title = info_dict['title']
 361             matchtitle = self.params.get('matchtitle', False)
 362             if matchtitle:
 363                 if not re.search(matchtitle, title, re.IGNORECASE):
 364                     return u'[download] "' + title + '" title did not match pattern "' + matchtitle + '"'
 365             rejecttitle = self.params.get('rejecttitle', False)
 366             if rejecttitle:
 367                 if re.search(rejecttitle, title, re.IGNORECASE):
 368                     return u'"' + title + '" title matched reject pattern "' + rejecttitle + '"'
 369         date = info_dict.get('upload_date', None)
 370         if date is not None:
 371             dateRange = self.params.get('daterange', DateRange())
 372             if date not in dateRange:
 373                 return u'[download] %s upload date is not in range %s' % (date_from_str(date).isoformat(), dateRange)
 374         age_limit = self.params.get('age_limit')
 375         if age_limit is not None:
 376             if age_limit < info_dict.get('age_limit', 0):
 377                 return u'Skipping "' + title + '" because it is age restricted'
 378         if self.in_download_archive(info_dict):
 379             return (u'%s has already been recorded in archive'
 380                     % info_dict.get('title', info_dict.get('id', u'video')))
 381         return None
 382
 383     @staticmethod
 384     def add_extra_info(info_dict, extra_info):
 385         '''Set the keys from extra_info in info dict if they are missing'''
 386         for key, value in extra_info.items():
 387             info_dict.setdefault(key, value)
 388
 389     def extract_info(self, url, download=True, ie_key=None, extra_info={}):
 390         '''
 391         Returns a list with a dictionary for each video we find.
 392         If 'download', also downloads the videos.
 393         extra_info is a dict containing the extra values to add to each result
 394          '''
 395
 396         if ie_key:
 397             ies = [self.get_info_extractor(ie_key)]
 398         else:
 399             ies = self._ies
 400
 401         for ie in ies:
 402             if not ie.suitable(url):
 403                 continue
 404
 405             if not ie.working():
 406                 self.report_warning(u'The program functionality for this site has been marked as broken, '
 407                                     u'and will probably not work.')
 408
 409             try:
 410                 ie_result = ie.extract(url)
 411                 if ie_result is None: # Finished already (backwards compatibility; listformats and friends should be moved here)
 412                     break
 413                 if isinstance(ie_result, list):
 414                     # Backwards compatibility: old IE result format
 415                     ie_result = {
 416                         '_type': 'compat_list',
 417                         'entries': ie_result,
 418                     }
 419                 self.add_extra_info(ie_result,
 420                     {
 421                         'extractor': ie.IE_NAME,
 422                         'webpage_url': url,
 423                         'extractor_key': ie.ie_key(),
 424                     })
 425                 return self.process_ie_result(ie_result, download, extra_info)
 426             except ExtractorError as de: # An error we somewhat expected
 427                 self.report_error(compat_str(de), de.format_traceback())
 428                 break
 429             except Exception as e:
 430                 if self.params.get('ignoreerrors', False):
 431                     self.report_error(compat_str(e), tb=compat_str(traceback.format_exc()))
 432                     break
 433                 else:
 434                     raise
 435         else:
 436             self.report_error(u'no suitable InfoExtractor: %s' % url)
 437
 438     def process_ie_result(self, ie_result, download=True, extra_info={}):
 439         """
 440         Take the result of the ie(may be modified) and resolve all unresolved
 441         references (URLs, playlist items).
 442
 443         It will also download the videos if 'download'.
 444         Returns the resolved ie_result.
 445         """
 446
 447         result_type = ie_result.get('_type', 'video') # If not given we suppose it's a video, support the default old system
 448         if result_type == 'video':
 449             self.add_extra_info(ie_result, extra_info)
 450             return self.process_video_result(ie_result, download=download)
 451         elif result_type == 'url':
 452             # We have to add extra_info to the results because it may be
 453             # contained in a playlist
 454             return self.extract_info(ie_result['url'],
 455                                      download,
 456                                      ie_key=ie_result.get('ie_key'),
 457                                      extra_info=extra_info)
 458         elif result_type == 'playlist':
 459
 460             # We process each entry in the playlist
 461             playlist = ie_result.get('title', None) or ie_result.get('id', None)
 462             self.to_screen(u'[download] Downloading playlist: %s' % playlist)
 463
 464             playlist_results = []
 465
 466             n_all_entries = len(ie_result['entries'])
 467             playliststart = self.params.get('playliststart', 1) - 1
 468             playlistend = self.params.get('playlistend', -1)
 469
 470             if playlistend == -1:
 471                 entries = ie_result['entries'][playliststart:]
 472             else:
 473                 entries = ie_result['entries'][playliststart:playlistend]
 474
 475             n_entries = len(entries)
 476
 477             self.to_screen(u"[%s] playlist '%s': Collected %d video ids (downloading %d of them)" %
 478                 (ie_result['extractor'], playlist, n_all_entries, n_entries))
 479
 480             for i, entry in enumerate(entries, 1):
 481                 self.to_screen(u'[download] Downloading video #%s of %s' % (i, n_entries))
 482                 extra = {
 483                     'playlist': playlist,
 484                     'playlist_index': i + playliststart,
 485                     'extractor': ie_result['extractor'],
 486                     'webpage_url': ie_result['webpage_url'],
 487                     'extractor_key': ie_result['extractor_key'],
 488                 }
 489
 490                 reason = self._match_entry(entry)
 491                 if reason is not None:
 492                     self.to_screen(u'[download] ' + reason)
 493                     continue
 494
 495                 entry_result = self.process_ie_result(entry,
 496                                                       download=download,
 497                                                       extra_info=extra)
 498                 playlist_results.append(entry_result)
 499             ie_result['entries'] = playlist_results
 500             return ie_result
 501         elif result_type == 'compat_list':
 502             def _fixup(r):
 503                 self.add_extra_info(r,
 504                     {
 505                         'extractor': ie_result['extractor'],
 506                         'webpage_url': ie_result['webpage_url'],
 507                         'extractor_key': ie_result['extractor_key'],
 508                     })
 509                 return r
 510             ie_result['entries'] = [
 511                 self.process_ie_result(_fixup(r), download, extra_info)
 512                 for r in ie_result['entries']
 513             ]
 514             return ie_result
 515         else:
 516             raise Exception('Invalid result type: %s' % result_type)
 517
 518     def select_format(self, format_spec, available_formats):
 519         if format_spec == 'best' or format_spec is None:
 520             return available_formats[-1]
 521         elif format_spec == 'worst':
 522             return available_formats[0]
 523         else:
 524             extensions = [u'mp4', u'flv', u'webm', u'3gp']
 525             if format_spec in extensions:
 526                 filter_f = lambda f: f['ext'] == format_spec
 527             else:
 528                 filter_f = lambda f: f['format_id'] == format_spec
 529             matches = list(filter(filter_f, available_formats))
 530             if matches:
 531                 return matches[-1]
 532         return None
 533
 534     def process_video_result(self, info_dict, download=True):
 535         assert info_dict.get('_type', 'video') == 'video'
 536
 537         if 'playlist' not in info_dict:
 538             # It isn't part of a playlist
 539             info_dict['playlist'] = None
 540             info_dict['playlist_index'] = None
 541
 542         # This extractors handle format selection themselves
 543         if info_dict['extractor'] in [u'youtube', u'Youku']:
 544             if download:
 545                 self.process_info(info_dict)
 546             return info_dict
 547
 548         # We now pick which formats have to be downloaded
 549         if info_dict.get('formats') is None:
 550             # There's only one format available
 551             formats = [info_dict]
 552         else:
 553             formats = info_dict['formats']
 554
 555         # We check that all the formats have the format and format_id fields
 556         for (i, format) in enumerate(formats):
 557             if format.get('format_id') is None:
 558                 format['format_id'] = compat_str(i)
 559             if format.get('format') is None:
 560                 format['format'] = u'{id} - {res}{note}'.format(
 561                     id=format['format_id'],
 562                     res=self.format_resolution(format),
 563                     note=u' ({0})'.format(format['format_note']) if format.get('format_note') is not None else '',
 564                 )
 565             # Automatically determine file extension if missing
 566             if 'ext' not in format:
 567                 format['ext'] = determine_ext(format['url'])
 568
 569         if self.params.get('listformats', None):
 570             self.list_formats(info_dict)
 571             return
 572
 573         format_limit = self.params.get('format_limit', None)
 574         if format_limit:
 575             formats = list(takewhile_inclusive(
 576                 lambda f: f['format_id'] != format_limit, formats
 577             ))
 578         if self.params.get('prefer_free_formats'):
 579             def _free_formats_key(f):
 580                 try:
 581                     ext_ord = [u'flv', u'mp4', u'webm'].index(f['ext'])
 582                 except ValueError:
 583                     ext_ord = -1
 584                 # We only compare the extension if they have the same height and width
 585                 return (f.get('height'), f.get('width'), ext_ord)
 586             formats = sorted(formats, key=_free_formats_key)
 587
 588         req_format = self.params.get('format', 'best')
 589         if req_format is None:
 590             req_format = 'best'
 591         formats_to_download = []
 592         # The -1 is for supporting YoutubeIE
 593         if req_format in ('-1', 'all'):
 594             formats_to_download = formats
 595         else:
 596             # We can accept formats requestd in the format: 34/5/best, we pick
 597             # the first that is available, starting from left
 598             req_formats = req_format.split('/')
 599             for rf in req_formats:
 600                 selected_format = self.select_format(rf, formats)
 601                 if selected_format is not None:
 602                     formats_to_download = [selected_format]
 603                     break
 604         if not formats_to_download:
 605             raise ExtractorError(u'requested format not available',
 606                                  expected=True)
 607
 608         if download:
 609             if len(formats_to_download) > 1:
 610                 self.to_screen(u'[info] %s: downloading video in %s formats' % (info_dict['id'], len(formats_to_download)))
 611             for format in formats_to_download:
 612                 new_info = dict(info_dict)
 613                 new_info.update(format)
 614                 self.process_info(new_info)
 615         # We update the info dict with the best quality format (backwards compatibility)
 616         info_dict.update(formats_to_download[-1])
 617         return info_dict
 618
 619     def process_info(self, info_dict):
 620         """Process a single resolved IE result."""
 621
 622         assert info_dict.get('_type', 'video') == 'video'
 623         #We increment the download the download count here to match the previous behaviour.
 624         self.increment_downloads()
 625
 626         info_dict['fulltitle'] = info_dict['title']
 627         if len(info_dict['title']) > 200:
 628             info_dict['title'] = info_dict['title'][:197] + u'...'
 629
 630         # Keep for backwards compatibility
 631         info_dict['stitle'] = info_dict['title']
 632
 633         if not 'format' in info_dict:
 634             info_dict['format'] = info_dict['ext']
 635
 636         reason = self._match_entry(info_dict)
 637         if reason is not None:
 638             self.to_screen(u'[download] ' + reason)
 639             return
 640
 641         max_downloads = self.params.get('max_downloads')
 642         if max_downloads is not None:
 643             if self._num_downloads > int(max_downloads):
 644                 raise MaxDownloadsReached()
 645
 646         filename = self.prepare_filename(info_dict)
 647
 648         # Forced printings
 649         if self.params.get('forcetitle', False):
 650             compat_print(info_dict['fulltitle'])
 651         if self.params.get('forceid', False):
 652             compat_print(info_dict['id'])
 653         if self.params.get('forceurl', False):
 654             # For RTMP URLs, also include the playpath
 655             compat_print(info_dict['url'] + info_dict.get('play_path', u''))
 656         if self.params.get('forcethumbnail', False) and info_dict.get('thumbnail') is not None:
 657             compat_print(info_dict['thumbnail'])
 658         if self.params.get('forcedescription', False) and info_dict.get('description') is not None:
 659             compat_print(info_dict['description'])
 660         if self.params.get('forcefilename', False) and filename is not None:
 661             compat_print(filename)
 662         if self.params.get('forceformat', False):
 663             compat_print(info_dict['format'])
 664         if self.params.get('forcejson', False):
 665             compat_print(json.dumps(info_dict))
 666
 667         # Do nothing else if in simulate mode
 668         if self.params.get('simulate', False):
 669             return
 670
 671         if filename is None:
 672             return
 673
 674         try:
 675             dn = os.path.dirname(encodeFilename(filename))
 676             if dn != '' and not os.path.exists(dn):
 677                 os.makedirs(dn)
 678         except (OSError, IOError) as err:
 679             self.report_error(u'unable to create directory ' + compat_str(err))
 680             return
 681
 682         if self.params.get('writedescription', False):
 683             try:
 684                 descfn = filename + u'.description'
 685                 self.report_writedescription(descfn)
 686                 with io.open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile:
 687                     descfile.write(info_dict['description'])
 688             except (KeyError, TypeError):
 689                 self.report_warning(u'There\'s no description to write.')
 690             except (OSError, IOError):
 691                 self.report_error(u'Cannot write description file ' + descfn)
 692                 return
 693
 694         if self.params.get('writeannotations', False):
 695             try:
 696                 annofn = filename + u'.annotations.xml'
 697                 self.report_writeannotations(annofn)
 698                 with io.open(encodeFilename(annofn), 'w', encoding='utf-8') as annofile:
 699                     annofile.write(info_dict['annotations'])
 700             except (KeyError, TypeError):
 701                 self.report_warning(u'There are no annotations to write.')
 702             except (OSError, IOError):
 703                 self.report_error(u'Cannot write annotations file: ' + annofn)
 704                 return
 705
 706         subtitles_are_requested = any([self.params.get('writesubtitles', False),
 707                                        self.params.get('writeautomaticsub')])
 708
 709         if subtitles_are_requested and 'subtitles' in info_dict and info_dict['subtitles']:
 710             # subtitles download errors are already managed as troubles in relevant IE
 711             # that way it will silently go on when used with unsupporting IE
 712             subtitles = info_dict['subtitles']
 713             sub_format = self.params.get('subtitlesformat', 'srt')
 714             for sub_lang in subtitles.keys():
 715                 sub = subtitles[sub_lang]
 716                 if sub is None:
 717                     continue
 718                 try:
 719                     sub_filename = subtitles_filename(filename, sub_lang, sub_format)
 720                     self.report_writesubtitles(sub_filename)
 721                     with io.open(encodeFilename(sub_filename), 'w', encoding='utf-8') as subfile:
 722                             subfile.write(sub)
 723                 except (OSError, IOError):
 724                     self.report_error(u'Cannot write subtitles file ' + descfn)
 725                     return
 726
 727         if self.params.get('writeinfojson', False):
 728             infofn = os.path.splitext(filename)[0] + u'.info.json'
 729             self.report_writeinfojson(infofn)
 730             try:
 731                 json_info_dict = dict((k, v) for k, v in info_dict.items() if not k in ['urlhandle'])
 732                 write_json_file(json_info_dict, encodeFilename(infofn))
 733             except (OSError, IOError):
 734                 self.report_error(u'Cannot write metadata to JSON file ' + infofn)
 735                 return
 736
 737         if self.params.get('writethumbnail', False):
 738             if info_dict.get('thumbnail') is not None:
 739                 thumb_format = determine_ext(info_dict['thumbnail'], u'jpg')
 740                 thumb_filename = filename.rpartition('.')[0] + u'.' + thumb_format
 741                 self.to_screen(u'[%s] %s: Downloading thumbnail ...' %
 742                                (info_dict['extractor'], info_dict['id']))
 743                 try:
 744                     uf = compat_urllib_request.urlopen(info_dict['thumbnail'])
 745                     with open(thumb_filename, 'wb') as thumbf:
 746                         shutil.copyfileobj(uf, thumbf)
 747                     self.to_screen(u'[%s] %s: Writing thumbnail to: %s' %
 748                         (info_dict['extractor'], info_dict['id'], thumb_filename))
 749                 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
 750                     self.report_warning(u'Unable to download thumbnail "%s": %s' %
 751                         (info_dict['thumbnail'], compat_str(err)))
 752
 753         if not self.params.get('skip_download', False):
 754             if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(filename)):
 755                 success = True
 756             else:
 757                 try:
 758                     success = self.fd._do_download(filename, info_dict)
 759                 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
 760                     self.report_error(u'unable to download video data: %s' % str(err))
 761                     return
 762                 except (OSError, IOError) as err:
 763                     raise UnavailableVideoError(err)
 764                 except (ContentTooShortError, ) as err:
 765                     self.report_error(u'content too short (expected %s bytes and served %s)' % (err.expected, err.downloaded))
 766                     return
 767
 768             if success:
 769                 try:
 770                     self.post_process(filename, info_dict)
 771                 except (PostProcessingError) as err:
 772                     self.report_error(u'postprocessing: %s' % str(err))
 773                     return
 774
 775         self.record_download_archive(info_dict)
 776
 777     def download(self, url_list):
 778         """Download a given list of URLs."""
 779         if len(url_list) > 1 and self.fixed_template():
 780             raise SameFileError(self.params['outtmpl'])
 781
 782         for url in url_list:
 783             try:
 784                 #It also downloads the videos
 785                 videos = self.extract_info(url)
 786             except UnavailableVideoError:
 787                 self.report_error(u'unable to download video')
 788             except MaxDownloadsReached:
 789                 self.to_screen(u'[info] Maximum number of downloaded files reached.')
 790                 raise
 791
 792         return self._download_retcode
 793
 794     def post_process(self, filename, ie_info):
 795         """Run all the postprocessors on the given file."""
 796         info = dict(ie_info)
 797         info['filepath'] = filename
 798         keep_video = None
 799         for pp in self._pps:
 800             try:
 801                 keep_video_wish, new_info = pp.run(info)
 802                 if keep_video_wish is not None:
 803                     if keep_video_wish:
 804                         keep_video = keep_video_wish
 805                     elif keep_video is None:
 806                         # No clear decision yet, let IE decide
 807                         keep_video = keep_video_wish
 808             except PostProcessingError as e:
 809                 self.report_error(e.msg)
 810         if keep_video is False and not self.params.get('keepvideo', False):
 811             try:
 812                 self.to_screen(u'Deleting original file %s (pass -k to keep)' % filename)
 813                 os.remove(encodeFilename(filename))
 814             except (IOError, OSError):
 815                 self.report_warning(u'Unable to remove downloaded video file')
 816
 817     def in_download_archive(self, info_dict):
 818         fn = self.params.get('download_archive')
 819         if fn is None:
 820             return False
 821         extractor = info_dict.get('extractor_id')
 822         if extractor is None:
 823             if 'id' in info_dict:
 824                 extractor = info_dict.get('ie_key')  # key in a playlist
 825         if extractor is None:
 826             return False  # Incomplete video information
 827         # Future-proof against any change in case
 828         # and backwards compatibility with prior versions
 829         extractor = extractor.lower()
 830         vid_id = extractor + u' ' + info_dict['id']
 831         try:
 832             with locked_file(fn, 'r', encoding='utf-8') as archive_file:
 833                 for line in archive_file:
 834                     if line.strip() == vid_id:
 835                         return True
 836         except IOError as ioe:
 837             if ioe.errno != errno.ENOENT:
 838                 raise
 839         return False
 840
 841     def record_download_archive(self, info_dict):
 842         fn = self.params.get('download_archive')
 843         if fn is None:
 844             return
 845         vid_id = info_dict['extractor'] + u' ' + info_dict['id']
 846         with locked_file(fn, 'a', encoding='utf-8') as archive_file:
 847             archive_file.write(vid_id + u'\n')
 848
 849     @staticmethod
 850     def format_resolution(format, default='unknown'):
 851         if format.get('_resolution') is not None:
 852             return format['_resolution']
 853         if format.get('height') is not None:
 854             if format.get('width') is not None:
 855                 res = u'%sx%s' % (format['width'], format['height'])
 856             else:
 857                 res = u'%sp' % format['height']
 858         else:
 859             res = default
 860         return res
 861
 862     def list_formats(self, info_dict):
 863         def format_note(fdict):
 864             if fdict.get('format_note') is not None:
 865                 return fdict['format_note']
 866             res = u''
 867             if fdict.get('vcodec') is not None:
 868                 res += u'%-5s' % fdict['vcodec']
 869             elif fdict.get('vbr') is not None:
 870                 res += u'video'
 871             if fdict.get('vbr') is not None:
 872                 res += u'@%4dk' % fdict['vbr']
 873             if fdict.get('acodec') is not None:
 874                 if res:
 875                     res += u', '
 876                 res += u'%-5s' % fdict['acodec']
 877             elif fdict.get('abr') is not None:
 878                 if res:
 879                     res += u', '
 880                 res += 'audio'
 881             if fdict.get('abr') is not None:
 882                 res += u'@%3dk' % fdict['abr']
 883             return res
 884
 885         def line(format):
 886             return (u'%-20s%-10s%-12s%s' % (
 887                 format['format_id'],
 888                 format['ext'],
 889                 self.format_resolution(format),
 890                 format_note(format),
 891                 )
 892             )
 893
 894         formats = info_dict.get('formats', [info_dict])
 895         formats_s = list(map(line, formats))
 896         if len(formats) > 1:
 897             formats_s[0] += (' ' if format_note(formats[0]) else '') + '(worst)'
 898             formats_s[-1] += (' ' if format_note(formats[-1]) else '') + '(best)'
 899
 900         header_line = line({
 901             'format_id': u'format code', 'ext': u'extension',
 902             '_resolution': u'resolution', 'format_note': u'note'})
 903         self.to_screen(u'[info] Available formats for %s:\n%s\n%s' %
 904                        (info_dict['id'], header_line, u"\n".join(formats_s)))