git.bitcoin.ninja Git - youtube-dl/blob - youtube_dl/YoutubeDL.py

   1 #!/usr/bin/env python
   2 # -*- coding: utf-8 -*-
   3
   4 from __future__ import absolute_import
   5
   6 import errno
   7 import io
   8 import json
   9 import os
  10 import re
  11 import shutil
  12 import socket
  13 import sys
  14 import time
  15 import traceback
  16
  17 if os.name == 'nt':
  18     import ctypes
  19
  20 from .utils import (
  21     compat_http_client,
  22     compat_print,
  23     compat_str,
  24     compat_urllib_error,
  25     compat_urllib_request,
  26     ContentTooShortError,
  27     date_from_str,
  28     DateRange,
  29     determine_ext,
  30     DownloadError,
  31     encodeFilename,
  32     ExtractorError,
  33     locked_file,
  34     MaxDownloadsReached,
  35     PostProcessingError,
  36     preferredencoding,
  37     SameFileError,
  38     sanitize_filename,
  39     subtitles_filename,
  40     takewhile_inclusive,
  41     UnavailableVideoError,
  42     write_json_file,
  43     write_string,
  44 )
  45 from .extractor import get_info_extractor, gen_extractors
  46 from .FileDownloader import FileDownloader
  47
  48
  49 class YoutubeDL(object):
  50     """YoutubeDL class.
  51
  52     YoutubeDL objects are the ones responsible of downloading the
  53     actual video file and writing it to disk if the user has requested
  54     it, among some other tasks. In most cases there should be one per
  55     program. As, given a video URL, the downloader doesn't know how to
  56     extract all the needed information, task that InfoExtractors do, it
  57     has to pass the URL to one of them.
  58
  59     For this, YoutubeDL objects have a method that allows
  60     InfoExtractors to be registered in a given order. When it is passed
  61     a URL, the YoutubeDL object handles it to the first InfoExtractor it
  62     finds that reports being able to handle it. The InfoExtractor extracts
  63     all the information about the video or videos the URL refers to, and
  64     YoutubeDL process the extracted information, possibly using a File
  65     Downloader to download the video.
  66
  67     YoutubeDL objects accept a lot of parameters. In order not to saturate
  68     the object constructor with arguments, it receives a dictionary of
  69     options instead. These options are available through the params
  70     attribute for the InfoExtractors to use. The YoutubeDL also
  71     registers itself as the downloader in charge for the InfoExtractors
  72     that are added to it, so this is a "mutual registration".
  73
  74     Available options:
  75
  76     username:          Username for authentication purposes.
  77     password:          Password for authentication purposes.
  78     videopassword:     Password for acces a video.
  79     usenetrc:          Use netrc for authentication instead.
  80     verbose:           Print additional info to stdout.
  81     quiet:             Do not print messages to stdout.
  82     forceurl:          Force printing final URL.
  83     forcetitle:        Force printing title.
  84     forceid:           Force printing ID.
  85     forcethumbnail:    Force printing thumbnail URL.
  86     forcedescription:  Force printing description.
  87     forcefilename:     Force printing final filename.
  88     forcejson:         Force printing info_dict as JSON.
  89     simulate:          Do not download the video files.
  90     format:            Video format code.
  91     format_limit:      Highest quality format to try.
  92     outtmpl:           Template for output names.
  93     restrictfilenames: Do not allow "&" and spaces in file names
  94     ignoreerrors:      Do not stop on download errors.
  95     nooverwrites:      Prevent overwriting files.
  96     playliststart:     Playlist item to start at.
  97     playlistend:       Playlist item to end at.
  98     matchtitle:        Download only matching titles.
  99     rejecttitle:       Reject downloads for matching titles.
 100     logger:            Log messages to a logging.Logger instance.
 101     logtostderr:       Log messages to stderr instead of stdout.
 102     writedescription:  Write the video description to a .description file
 103     writeinfojson:     Write the video description to a .info.json file
 104     writeannotations:  Write the video annotations to a .annotations.xml file
 105     writethumbnail:    Write the thumbnail image to a file
 106     writesubtitles:    Write the video subtitles to a file
 107     writeautomaticsub: Write the automatic subtitles to a file
 108     allsubtitles:      Downloads all the subtitles of the video
 109                        (requires writesubtitles or writeautomaticsub)
 110     listsubtitles:     Lists all available subtitles for the video
 111     subtitlesformat:   Subtitle format [srt/sbv/vtt] (default=srt)
 112     subtitleslangs:    List of languages of the subtitles to download
 113     keepvideo:         Keep the video file after post-processing
 114     daterange:         A DateRange object, download only if the upload_date is in the range.
 115     skip_download:     Skip the actual download of the video file
 116     cachedir:          Location of the cache files in the filesystem.
 117                        None to disable filesystem cache.
 118     noplaylist:        Download single video instead of a playlist if in doubt.
 119     age_limit:         An integer representing the user's age in years.
 120                        Unsuitable videos for the given age are skipped.
 121     downloadarchive:   File name of a file where all downloads are recorded.
 122                        Videos already present in the file are not downloaded
 123                        again.
 124
 125     The following parameters are not used by YoutubeDL itself, they are used by
 126     the FileDownloader:
 127     nopart, updatetime, buffersize, ratelimit, min_filesize, max_filesize, test,
 128     noresizebuffer, retries, continuedl, noprogress, consoletitle
 129     """
 130
 131     params = None
 132     _ies = []
 133     _pps = []
 134     _download_retcode = None
 135     _num_downloads = None
 136     _screen_file = None
 137
 138     def __init__(self, params):
 139         """Create a FileDownloader object with the given options."""
 140         self._ies = []
 141         self._ies_instances = {}
 142         self._pps = []
 143         self._progress_hooks = []
 144         self._download_retcode = 0
 145         self._num_downloads = 0
 146         self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)]
 147
 148         if (sys.version_info >= (3,) and sys.platform != 'win32' and
 149                 sys.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968']
 150                 and not params['restrictfilenames']):
 151             # On Python 3, the Unicode filesystem API will throw errors (#1474)
 152             self.report_warning(
 153                 u'Assuming --restrict-filenames since file system encoding '
 154                 u'cannot encode all charactes. '
 155                 u'Set the LC_ALL environment variable to fix this.')
 156             params['restrictfilenames'] = True
 157
 158         self.params = params
 159         self.fd = FileDownloader(self, self.params)
 160
 161         if '%(stitle)s' in self.params['outtmpl']:
 162             self.report_warning(u'%(stitle)s is deprecated. Use the %(title)s and the --restrict-filenames flag(which also secures %(uploader)s et al) instead.')
 163
 164     def add_info_extractor(self, ie):
 165         """Add an InfoExtractor object to the end of the list."""
 166         self._ies.append(ie)
 167         self._ies_instances[ie.ie_key()] = ie
 168         ie.set_downloader(self)
 169
 170     def get_info_extractor(self, ie_key):
 171         """
 172         Get an instance of an IE with name ie_key, it will try to get one from
 173         the _ies list, if there's no instance it will create a new one and add
 174         it to the extractor list.
 175         """
 176         ie = self._ies_instances.get(ie_key)
 177         if ie is None:
 178             ie = get_info_extractor(ie_key)()
 179             self.add_info_extractor(ie)
 180         return ie
 181
 182     def add_default_info_extractors(self):
 183         """
 184         Add the InfoExtractors returned by gen_extractors to the end of the list
 185         """
 186         for ie in gen_extractors():
 187             self.add_info_extractor(ie)
 188
 189     def add_post_processor(self, pp):
 190         """Add a PostProcessor object to the end of the chain."""
 191         self._pps.append(pp)
 192         pp.set_downloader(self)
 193
 194     def to_screen(self, message, skip_eol=False):
 195         """Print message to stdout if not in quiet mode."""
 196         if self.params.get('logger'):
 197             self.params['logger'].debug(message)
 198         elif not self.params.get('quiet', False):
 199             terminator = [u'\n', u''][skip_eol]
 200             output = message + terminator
 201             write_string(output, self._screen_file)
 202
 203     def to_stderr(self, message):
 204         """Print message to stderr."""
 205         assert type(message) == type(u'')
 206         if self.params.get('logger'):
 207             self.params['logger'].error(message)
 208         else:
 209             output = message + u'\n'
 210             if 'b' in getattr(self._screen_file, 'mode', '') or sys.version_info[0] < 3: # Python 2 lies about the mode of sys.stdout/sys.stderr
 211                 output = output.encode(preferredencoding())
 212             sys.stderr.write(output)
 213
 214     def to_console_title(self, message):
 215         if not self.params.get('consoletitle', False):
 216             return
 217         if os.name == 'nt' and ctypes.windll.kernel32.GetConsoleWindow():
 218             # c_wchar_p() might not be necessary if `message` is
 219             # already of type unicode()
 220             ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message))
 221         elif 'TERM' in os.environ:
 222             write_string(u'\033]0;%s\007' % message, self._screen_file)
 223
 224     def save_console_title(self):
 225         if not self.params.get('consoletitle', False):
 226             return
 227         if 'TERM' in os.environ:
 228             # Save the title on stack
 229             write_string(u'\033[22;0t', self._screen_file)
 230
 231     def restore_console_title(self):
 232         if not self.params.get('consoletitle', False):
 233             return
 234         if 'TERM' in os.environ:
 235             # Restore the title from stack
 236             write_string(u'\033[23;0t', self._screen_file)
 237
 238     def __enter__(self):
 239         self.save_console_title()
 240         return self
 241
 242     def __exit__(self, *args):
 243         self.restore_console_title()
 244
 245     def fixed_template(self):
 246         """Checks if the output template is fixed."""
 247         return (re.search(u'(?u)%\\(.+?\\)s', self.params['outtmpl']) is None)
 248
 249     def trouble(self, message=None, tb=None):
 250         """Determine action to take when a download problem appears.
 251
 252         Depending on if the downloader has been configured to ignore
 253         download errors or not, this method may throw an exception or
 254         not when errors are found, after printing the message.
 255
 256         tb, if given, is additional traceback information.
 257         """
 258         if message is not None:
 259             self.to_stderr(message)
 260         if self.params.get('verbose'):
 261             if tb is None:
 262                 if sys.exc_info()[0]:  # if .trouble has been called from an except block
 263                     tb = u''
 264                     if hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
 265                         tb += u''.join(traceback.format_exception(*sys.exc_info()[1].exc_info))
 266                     tb += compat_str(traceback.format_exc())
 267                 else:
 268                     tb_data = traceback.format_list(traceback.extract_stack())
 269                     tb = u''.join(tb_data)
 270             self.to_stderr(tb)
 271         if not self.params.get('ignoreerrors', False):
 272             if sys.exc_info()[0] and hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
 273                 exc_info = sys.exc_info()[1].exc_info
 274             else:
 275                 exc_info = sys.exc_info()
 276             raise DownloadError(message, exc_info)
 277         self._download_retcode = 1
 278
 279     def report_warning(self, message):
 280         '''
 281         Print the message to stderr, it will be prefixed with 'WARNING:'
 282         If stderr is a tty file the 'WARNING:' will be colored
 283         '''
 284         if sys.stderr.isatty() and os.name != 'nt':
 285             _msg_header = u'\033[0;33mWARNING:\033[0m'
 286         else:
 287             _msg_header = u'WARNING:'
 288         warning_message = u'%s %s' % (_msg_header, message)
 289         self.to_stderr(warning_message)
 290
 291     def report_error(self, message, tb=None):
 292         '''
 293         Do the same as trouble, but prefixes the message with 'ERROR:', colored
 294         in red if stderr is a tty file.
 295         '''
 296         if sys.stderr.isatty() and os.name != 'nt':
 297             _msg_header = u'\033[0;31mERROR:\033[0m'
 298         else:
 299             _msg_header = u'ERROR:'
 300         error_message = u'%s %s' % (_msg_header, message)
 301         self.trouble(error_message, tb)
 302
 303     def report_writedescription(self, descfn):
 304         """ Report that the description file is being written """
 305         self.to_screen(u'[info] Writing video description to: ' + descfn)
 306
 307     def report_writesubtitles(self, sub_filename):
 308         """ Report that the subtitles file is being written """
 309         self.to_screen(u'[info] Writing video subtitles to: ' + sub_filename)
 310
 311     def report_writeinfojson(self, infofn):
 312         """ Report that the metadata file has been written """
 313         self.to_screen(u'[info] Video description metadata as JSON to: ' + infofn)
 314
 315     def report_writeannotations(self, annofn):
 316         """ Report that the annotations file has been written. """
 317         self.to_screen(u'[info] Writing video annotations to: ' + annofn)
 318
 319     def report_file_already_downloaded(self, file_name):
 320         """Report file has already been fully downloaded."""
 321         try:
 322             self.to_screen(u'[download] %s has already been downloaded' % file_name)
 323         except UnicodeEncodeError:
 324             self.to_screen(u'[download] The file has already been downloaded')
 325
 326     def increment_downloads(self):
 327         """Increment the ordinal that assigns a number to each file."""
 328         self._num_downloads += 1
 329
 330     def prepare_filename(self, info_dict):
 331         """Generate the output filename."""
 332         try:
 333             template_dict = dict(info_dict)
 334
 335             template_dict['epoch'] = int(time.time())
 336             autonumber_size = self.params.get('autonumber_size')
 337             if autonumber_size is None:
 338                 autonumber_size = 5
 339             autonumber_templ = u'%0' + str(autonumber_size) + u'd'
 340             template_dict['autonumber'] = autonumber_templ % self._num_downloads
 341             if template_dict.get('playlist_index') is not None:
 342                 template_dict['playlist_index'] = u'%05d' % template_dict['playlist_index']
 343
 344             sanitize = lambda k, v: sanitize_filename(
 345                 u'NA' if v is None else compat_str(v),
 346                 restricted=self.params.get('restrictfilenames'),
 347                 is_id=(k == u'id'))
 348             template_dict = dict((k, sanitize(k, v))
 349                                  for k, v in template_dict.items())
 350
 351             tmpl = os.path.expanduser(self.params['outtmpl'])
 352             filename = tmpl % template_dict
 353             return filename
 354         except KeyError as err:
 355             self.report_error(u'Erroneous output template')
 356             return None
 357         except ValueError as err:
 358             self.report_error(u'Error in output template: ' + str(err) + u' (encoding: ' + repr(preferredencoding()) + ')')
 359             return None
 360
 361     def _match_entry(self, info_dict):
 362         """ Returns None iff the file should be downloaded """
 363
 364         if 'title' in info_dict:
 365             # This can happen when we're just evaluating the playlist
 366             title = info_dict['title']
 367             matchtitle = self.params.get('matchtitle', False)
 368             if matchtitle:
 369                 if not re.search(matchtitle, title, re.IGNORECASE):
 370                     return u'[download] "' + title + '" title did not match pattern "' + matchtitle + '"'
 371             rejecttitle = self.params.get('rejecttitle', False)
 372             if rejecttitle:
 373                 if re.search(rejecttitle, title, re.IGNORECASE):
 374                     return u'"' + title + '" title matched reject pattern "' + rejecttitle + '"'
 375         date = info_dict.get('upload_date', None)
 376         if date is not None:
 377             dateRange = self.params.get('daterange', DateRange())
 378             if date not in dateRange:
 379                 return u'[download] %s upload date is not in range %s' % (date_from_str(date).isoformat(), dateRange)
 380         age_limit = self.params.get('age_limit')
 381         if age_limit is not None:
 382             if age_limit < info_dict.get('age_limit', 0):
 383                 return u'Skipping "' + title + '" because it is age restricted'
 384         if self.in_download_archive(info_dict):
 385             return (u'%s has already been recorded in archive'
 386                     % info_dict.get('title', info_dict.get('id', u'video')))
 387         return None
 388
 389     @staticmethod
 390     def add_extra_info(info_dict, extra_info):
 391         '''Set the keys from extra_info in info dict if they are missing'''
 392         for key, value in extra_info.items():
 393             info_dict.setdefault(key, value)
 394
 395     def extract_info(self, url, download=True, ie_key=None, extra_info={}):
 396         '''
 397         Returns a list with a dictionary for each video we find.
 398         If 'download', also downloads the videos.
 399         extra_info is a dict containing the extra values to add to each result
 400          '''
 401
 402         if ie_key:
 403             ies = [self.get_info_extractor(ie_key)]
 404         else:
 405             ies = self._ies
 406
 407         for ie in ies:
 408             if not ie.suitable(url):
 409                 continue
 410
 411             if not ie.working():
 412                 self.report_warning(u'The program functionality for this site has been marked as broken, '
 413                                     u'and will probably not work.')
 414
 415             try:
 416                 ie_result = ie.extract(url)
 417                 if ie_result is None: # Finished already (backwards compatibility; listformats and friends should be moved here)
 418                     break
 419                 if isinstance(ie_result, list):
 420                     # Backwards compatibility: old IE result format
 421                     ie_result = {
 422                         '_type': 'compat_list',
 423                         'entries': ie_result,
 424                     }
 425                 self.add_extra_info(ie_result,
 426                     {
 427                         'extractor': ie.IE_NAME,
 428                         'webpage_url': url,
 429                         'extractor_key': ie.ie_key(),
 430                     })
 431                 return self.process_ie_result(ie_result, download, extra_info)
 432             except ExtractorError as de: # An error we somewhat expected
 433                 self.report_error(compat_str(de), de.format_traceback())
 434                 break
 435             except Exception as e:
 436                 if self.params.get('ignoreerrors', False):
 437                     self.report_error(compat_str(e), tb=compat_str(traceback.format_exc()))
 438                     break
 439                 else:
 440                     raise
 441         else:
 442             self.report_error(u'no suitable InfoExtractor: %s' % url)
 443
 444     def process_ie_result(self, ie_result, download=True, extra_info={}):
 445         """
 446         Take the result of the ie(may be modified) and resolve all unresolved
 447         references (URLs, playlist items).
 448
 449         It will also download the videos if 'download'.
 450         Returns the resolved ie_result.
 451         """
 452
 453         result_type = ie_result.get('_type', 'video') # If not given we suppose it's a video, support the default old system
 454         if result_type == 'video':
 455             self.add_extra_info(ie_result, extra_info)
 456             return self.process_video_result(ie_result, download=download)
 457         elif result_type == 'url':
 458             # We have to add extra_info to the results because it may be
 459             # contained in a playlist
 460             return self.extract_info(ie_result['url'],
 461                                      download,
 462                                      ie_key=ie_result.get('ie_key'),
 463                                      extra_info=extra_info)
 464         elif result_type == 'playlist':
 465
 466             # We process each entry in the playlist
 467             playlist = ie_result.get('title', None) or ie_result.get('id', None)
 468             self.to_screen(u'[download] Downloading playlist: %s' % playlist)
 469
 470             playlist_results = []
 471
 472             n_all_entries = len(ie_result['entries'])
 473             playliststart = self.params.get('playliststart', 1) - 1
 474             playlistend = self.params.get('playlistend', -1)
 475
 476             if playlistend == -1:
 477                 entries = ie_result['entries'][playliststart:]
 478             else:
 479                 entries = ie_result['entries'][playliststart:playlistend]
 480
 481             n_entries = len(entries)
 482
 483             self.to_screen(u"[%s] playlist '%s': Collected %d video ids (downloading %d of them)" %
 484                 (ie_result['extractor'], playlist, n_all_entries, n_entries))
 485
 486             for i, entry in enumerate(entries, 1):
 487                 self.to_screen(u'[download] Downloading video #%s of %s' % (i, n_entries))
 488                 extra = {
 489                     'playlist': playlist,
 490                     'playlist_index': i + playliststart,
 491                     'extractor': ie_result['extractor'],
 492                     'webpage_url': ie_result['webpage_url'],
 493                     'extractor_key': ie_result['extractor_key'],
 494                 }
 495
 496                 reason = self._match_entry(entry)
 497                 if reason is not None:
 498                     self.to_screen(u'[download] ' + reason)
 499                     continue
 500
 501                 entry_result = self.process_ie_result(entry,
 502                                                       download=download,
 503                                                       extra_info=extra)
 504                 playlist_results.append(entry_result)
 505             ie_result['entries'] = playlist_results
 506             return ie_result
 507         elif result_type == 'compat_list':
 508             def _fixup(r):
 509                 self.add_extra_info(r,
 510                     {
 511                         'extractor': ie_result['extractor'],
 512                         'webpage_url': ie_result['webpage_url'],
 513                         'extractor_key': ie_result['extractor_key'],
 514                     })
 515                 return r
 516             ie_result['entries'] = [
 517                 self.process_ie_result(_fixup(r), download, extra_info)
 518                 for r in ie_result['entries']
 519             ]
 520             return ie_result
 521         else:
 522             raise Exception('Invalid result type: %s' % result_type)
 523
 524     def select_format(self, format_spec, available_formats):
 525         if format_spec == 'best' or format_spec is None:
 526             return available_formats[-1]
 527         elif format_spec == 'worst':
 528             return available_formats[0]
 529         else:
 530             extensions = [u'mp4', u'flv', u'webm', u'3gp']
 531             if format_spec in extensions:
 532                 filter_f = lambda f: f['ext'] == format_spec
 533             else:
 534                 filter_f = lambda f: f['format_id'] == format_spec
 535             matches = list(filter(filter_f, available_formats))
 536             if matches:
 537                 return matches[-1]
 538         return None
 539
 540     def process_video_result(self, info_dict, download=True):
 541         assert info_dict.get('_type', 'video') == 'video'
 542
 543         if 'playlist' not in info_dict:
 544             # It isn't part of a playlist
 545             info_dict['playlist'] = None
 546             info_dict['playlist_index'] = None
 547
 548         # This extractors handle format selection themselves
 549         if info_dict['extractor'] in [u'youtube', u'Youku']:
 550             if download:
 551                 self.process_info(info_dict)
 552             return info_dict
 553
 554         # We now pick which formats have to be downloaded
 555         if info_dict.get('formats') is None:
 556             # There's only one format available
 557             formats = [info_dict]
 558         else:
 559             formats = info_dict['formats']
 560
 561         # We check that all the formats have the format and format_id fields
 562         for (i, format) in enumerate(formats):
 563             if format.get('format_id') is None:
 564                 format['format_id'] = compat_str(i)
 565             if format.get('format') is None:
 566                 format['format'] = u'{id} - {res}{note}'.format(
 567                     id=format['format_id'],
 568                     res=self.format_resolution(format),
 569                     note=u' ({0})'.format(format['format_note']) if format.get('format_note') is not None else '',
 570                 )
 571             # Automatically determine file extension if missing
 572             if 'ext' not in format:
 573                 format['ext'] = determine_ext(format['url'])
 574
 575         if self.params.get('listformats', None):
 576             self.list_formats(info_dict)
 577             return
 578
 579         format_limit = self.params.get('format_limit', None)
 580         if format_limit:
 581             formats = list(takewhile_inclusive(
 582                 lambda f: f['format_id'] != format_limit, formats
 583             ))
 584         if self.params.get('prefer_free_formats'):
 585             def _free_formats_key(f):
 586                 try:
 587                     ext_ord = [u'flv', u'mp4', u'webm'].index(f['ext'])
 588                 except ValueError:
 589                     ext_ord = -1
 590                 # We only compare the extension if they have the same height and width
 591                 return (f.get('height'), f.get('width'), ext_ord)
 592             formats = sorted(formats, key=_free_formats_key)
 593
 594         req_format = self.params.get('format', 'best')
 595         if req_format is None:
 596             req_format = 'best'
 597         formats_to_download = []
 598         # The -1 is for supporting YoutubeIE
 599         if req_format in ('-1', 'all'):
 600             formats_to_download = formats
 601         else:
 602             # We can accept formats requestd in the format: 34/5/best, we pick
 603             # the first that is available, starting from left
 604             req_formats = req_format.split('/')
 605             for rf in req_formats:
 606                 selected_format = self.select_format(rf, formats)
 607                 if selected_format is not None:
 608                     formats_to_download = [selected_format]
 609                     break
 610         if not formats_to_download:
 611             raise ExtractorError(u'requested format not available',
 612                                  expected=True)
 613
 614         if download:
 615             if len(formats_to_download) > 1:
 616                 self.to_screen(u'[info] %s: downloading video in %s formats' % (info_dict['id'], len(formats_to_download)))
 617             for format in formats_to_download:
 618                 new_info = dict(info_dict)
 619                 new_info.update(format)
 620                 self.process_info(new_info)
 621         # We update the info dict with the best quality format (backwards compatibility)
 622         info_dict.update(formats_to_download[-1])
 623         return info_dict
 624
 625     def process_info(self, info_dict):
 626         """Process a single resolved IE result."""
 627
 628         assert info_dict.get('_type', 'video') == 'video'
 629         #We increment the download the download count here to match the previous behaviour.
 630         self.increment_downloads()
 631
 632         info_dict['fulltitle'] = info_dict['title']
 633         if len(info_dict['title']) > 200:
 634             info_dict['title'] = info_dict['title'][:197] + u'...'
 635
 636         # Keep for backwards compatibility
 637         info_dict['stitle'] = info_dict['title']
 638
 639         if not 'format' in info_dict:
 640             info_dict['format'] = info_dict['ext']
 641
 642         reason = self._match_entry(info_dict)
 643         if reason is not None:
 644             self.to_screen(u'[download] ' + reason)
 645             return
 646
 647         max_downloads = self.params.get('max_downloads')
 648         if max_downloads is not None:
 649             if self._num_downloads > int(max_downloads):
 650                 raise MaxDownloadsReached()
 651
 652         filename = self.prepare_filename(info_dict)
 653
 654         # Forced printings
 655         if self.params.get('forcetitle', False):
 656             compat_print(info_dict['fulltitle'])
 657         if self.params.get('forceid', False):
 658             compat_print(info_dict['id'])
 659         if self.params.get('forceurl', False):
 660             # For RTMP URLs, also include the playpath
 661             compat_print(info_dict['url'] + info_dict.get('play_path', u''))
 662         if self.params.get('forcethumbnail', False) and info_dict.get('thumbnail') is not None:
 663             compat_print(info_dict['thumbnail'])
 664         if self.params.get('forcedescription', False) and info_dict.get('description') is not None:
 665             compat_print(info_dict['description'])
 666         if self.params.get('forcefilename', False) and filename is not None:
 667             compat_print(filename)
 668         if self.params.get('forceformat', False):
 669             compat_print(info_dict['format'])
 670         if self.params.get('forcejson', False):
 671             compat_print(json.dumps(info_dict))
 672
 673         # Do nothing else if in simulate mode
 674         if self.params.get('simulate', False):
 675             return
 676
 677         if filename is None:
 678             return
 679
 680         try:
 681             dn = os.path.dirname(encodeFilename(filename))
 682             if dn != '' and not os.path.exists(dn):
 683                 os.makedirs(dn)
 684         except (OSError, IOError) as err:
 685             self.report_error(u'unable to create directory ' + compat_str(err))
 686             return
 687
 688         if self.params.get('writedescription', False):
 689             try:
 690                 descfn = filename + u'.description'
 691                 self.report_writedescription(descfn)
 692                 with io.open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile:
 693                     descfile.write(info_dict['description'])
 694             except (KeyError, TypeError):
 695                 self.report_warning(u'There\'s no description to write.')
 696             except (OSError, IOError):
 697                 self.report_error(u'Cannot write description file ' + descfn)
 698                 return
 699
 700         if self.params.get('writeannotations', False):
 701             try:
 702                 annofn = filename + u'.annotations.xml'
 703                 self.report_writeannotations(annofn)
 704                 with io.open(encodeFilename(annofn), 'w', encoding='utf-8') as annofile:
 705                     annofile.write(info_dict['annotations'])
 706             except (KeyError, TypeError):
 707                 self.report_warning(u'There are no annotations to write.')
 708             except (OSError, IOError):
 709                 self.report_error(u'Cannot write annotations file: ' + annofn)
 710                 return
 711
 712         subtitles_are_requested = any([self.params.get('writesubtitles', False),
 713                                        self.params.get('writeautomaticsub')])
 714
 715         if subtitles_are_requested and 'subtitles' in info_dict and info_dict['subtitles']:
 716             # subtitles download errors are already managed as troubles in relevant IE
 717             # that way it will silently go on when used with unsupporting IE
 718             subtitles = info_dict['subtitles']
 719             sub_format = self.params.get('subtitlesformat', 'srt')
 720             for sub_lang in subtitles.keys():
 721                 sub = subtitles[sub_lang]
 722                 if sub is None:
 723                     continue
 724                 try:
 725                     sub_filename = subtitles_filename(filename, sub_lang, sub_format)
 726                     self.report_writesubtitles(sub_filename)
 727                     with io.open(encodeFilename(sub_filename), 'w', encoding='utf-8') as subfile:
 728                             subfile.write(sub)
 729                 except (OSError, IOError):
 730                     self.report_error(u'Cannot write subtitles file ' + descfn)
 731                     return
 732
 733         if self.params.get('writeinfojson', False):
 734             infofn = os.path.splitext(filename)[0] + u'.info.json'
 735             self.report_writeinfojson(infofn)
 736             try:
 737                 json_info_dict = dict((k, v) for k, v in info_dict.items() if not k in ['urlhandle'])
 738                 write_json_file(json_info_dict, encodeFilename(infofn))
 739             except (OSError, IOError):
 740                 self.report_error(u'Cannot write metadata to JSON file ' + infofn)
 741                 return
 742
 743         if self.params.get('writethumbnail', False):
 744             if info_dict.get('thumbnail') is not None:
 745                 thumb_format = determine_ext(info_dict['thumbnail'], u'jpg')
 746                 thumb_filename = filename.rpartition('.')[0] + u'.' + thumb_format
 747                 self.to_screen(u'[%s] %s: Downloading thumbnail ...' %
 748                                (info_dict['extractor'], info_dict['id']))
 749                 try:
 750                     uf = compat_urllib_request.urlopen(info_dict['thumbnail'])
 751                     with open(thumb_filename, 'wb') as thumbf:
 752                         shutil.copyfileobj(uf, thumbf)
 753                     self.to_screen(u'[%s] %s: Writing thumbnail to: %s' %
 754                         (info_dict['extractor'], info_dict['id'], thumb_filename))
 755                 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
 756                     self.report_warning(u'Unable to download thumbnail "%s": %s' %
 757                         (info_dict['thumbnail'], compat_str(err)))
 758
 759         if not self.params.get('skip_download', False):
 760             if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(filename)):
 761                 success = True
 762             else:
 763                 try:
 764                     success = self.fd._do_download(filename, info_dict)
 765                 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
 766                     self.report_error(u'unable to download video data: %s' % str(err))
 767                     return
 768                 except (OSError, IOError) as err:
 769                     raise UnavailableVideoError(err)
 770                 except (ContentTooShortError, ) as err:
 771                     self.report_error(u'content too short (expected %s bytes and served %s)' % (err.expected, err.downloaded))
 772                     return
 773
 774             if success:
 775                 try:
 776                     self.post_process(filename, info_dict)
 777                 except (PostProcessingError) as err:
 778                     self.report_error(u'postprocessing: %s' % str(err))
 779                     return
 780
 781         self.record_download_archive(info_dict)
 782
 783     def download(self, url_list):
 784         """Download a given list of URLs."""
 785         if len(url_list) > 1 and self.fixed_template():
 786             raise SameFileError(self.params['outtmpl'])
 787
 788         for url in url_list:
 789             try:
 790                 #It also downloads the videos
 791                 videos = self.extract_info(url)
 792             except UnavailableVideoError:
 793                 self.report_error(u'unable to download video')
 794             except MaxDownloadsReached:
 795                 self.to_screen(u'[info] Maximum number of downloaded files reached.')
 796                 raise
 797
 798         return self._download_retcode
 799
 800     def post_process(self, filename, ie_info):
 801         """Run all the postprocessors on the given file."""
 802         info = dict(ie_info)
 803         info['filepath'] = filename
 804         keep_video = None
 805         for pp in self._pps:
 806             try:
 807                 keep_video_wish, new_info = pp.run(info)
 808                 if keep_video_wish is not None:
 809                     if keep_video_wish:
 810                         keep_video = keep_video_wish
 811                     elif keep_video is None:
 812                         # No clear decision yet, let IE decide
 813                         keep_video = keep_video_wish
 814             except PostProcessingError as e:
 815                 self.report_error(e.msg)
 816         if keep_video is False and not self.params.get('keepvideo', False):
 817             try:
 818                 self.to_screen(u'Deleting original file %s (pass -k to keep)' % filename)
 819                 os.remove(encodeFilename(filename))
 820             except (IOError, OSError):
 821                 self.report_warning(u'Unable to remove downloaded video file')
 822
 823     def in_download_archive(self, info_dict):
 824         fn = self.params.get('download_archive')
 825         if fn is None:
 826             return False
 827         extractor = info_dict.get('extractor_id')
 828         if extractor is None:
 829             if 'id' in info_dict:
 830                 extractor = info_dict.get('ie_key')  # key in a playlist
 831         if extractor is None:
 832             return False  # Incomplete video information
 833         # Future-proof against any change in case
 834         # and backwards compatibility with prior versions
 835         extractor = extractor.lower()
 836         vid_id = extractor + u' ' + info_dict['id']
 837         try:
 838             with locked_file(fn, 'r', encoding='utf-8') as archive_file:
 839                 for line in archive_file:
 840                     if line.strip() == vid_id:
 841                         return True
 842         except IOError as ioe:
 843             if ioe.errno != errno.ENOENT:
 844                 raise
 845         return False
 846
 847     def record_download_archive(self, info_dict):
 848         fn = self.params.get('download_archive')
 849         if fn is None:
 850             return
 851         vid_id = info_dict['extractor'] + u' ' + info_dict['id']
 852         with locked_file(fn, 'a', encoding='utf-8') as archive_file:
 853             archive_file.write(vid_id + u'\n')
 854
 855     @staticmethod
 856     def format_resolution(format, default='unknown'):
 857         if format.get('_resolution') is not None:
 858             return format['_resolution']
 859         if format.get('height') is not None:
 860             if format.get('width') is not None:
 861                 res = u'%sx%s' % (format['width'], format['height'])
 862             else:
 863                 res = u'%sp' % format['height']
 864         else:
 865             res = default
 866         return res
 867
 868     def list_formats(self, info_dict):
 869         def format_note(fdict):
 870             if fdict.get('format_note') is not None:
 871                 return fdict['format_note']
 872             res = u''
 873             if fdict.get('vcodec') is not None:
 874                 res += u'%-5s' % fdict['vcodec']
 875             elif fdict.get('vbr') is not None:
 876                 res += u'video'
 877             if fdict.get('vbr') is not None:
 878                 res += u'@%4dk' % fdict['vbr']
 879             if fdict.get('acodec') is not None:
 880                 if res:
 881                     res += u', '
 882                 res += u'%-5s' % fdict['acodec']
 883             elif fdict.get('abr') is not None:
 884                 if res:
 885                     res += u', '
 886                 res += 'audio'
 887             if fdict.get('abr') is not None:
 888                 res += u'@%3dk' % fdict['abr']
 889             return res
 890
 891         def line(format):
 892             return (u'%-20s%-10s%-12s%s' % (
 893                 format['format_id'],
 894                 format['ext'],
 895                 self.format_resolution(format),
 896                 format_note(format),
 897                 )
 898             )
 899
 900         formats = info_dict.get('formats', [info_dict])
 901         formats_s = list(map(line, formats))
 902         if len(formats) > 1:
 903             formats_s[0] += (' ' if format_note(formats[0]) else '') + '(worst)'
 904             formats_s[-1] += (' ' if format_note(formats[-1]) else '') + '(best)'
 905
 906         header_line = line({
 907             'format_id': u'format code', 'ext': u'extension',
 908             '_resolution': u'resolution', 'format_note': u'note'})
 909         self.to_screen(u'[info] Available formats for %s:\n%s\n%s' %
 910                        (info_dict['id'], header_line, u"\n".join(formats_s)))