_ Git - youtube-dl/blob - youtube_dl/YoutubeDL.py

   1 #!/usr/bin/env python
   2 # -*- coding: utf-8 -*-
   3
   4 from __future__ import absolute_import
   5
   6 import errno
   7 import io
   8 import os
   9 import re
  10 import shutil
  11 import socket
  12 import sys
  13 import time
  14 import traceback
  15
  16 if os.name == 'nt':
  17     import ctypes
  18
  19 from .utils import (
  20     compat_http_client,
  21     compat_print,
  22     compat_str,
  23     compat_urllib_error,
  24     compat_urllib_request,
  25     ContentTooShortError,
  26     date_from_str,
  27     DateRange,
  28     determine_ext,
  29     DownloadError,
  30     encodeFilename,
  31     ExtractorError,
  32     locked_file,
  33     MaxDownloadsReached,
  34     PostProcessingError,
  35     preferredencoding,
  36     SameFileError,
  37     sanitize_filename,
  38     subtitles_filename,
  39     takewhile_inclusive,
  40     UnavailableVideoError,
  41     write_json_file,
  42     write_string,
  43 )
  44 from .extractor import get_info_extractor, gen_extractors
  45 from .FileDownloader import FileDownloader
  46
  47
  48 class YoutubeDL(object):
  49     """YoutubeDL class.
  50
  51     YoutubeDL objects are the ones responsible of downloading the
  52     actual video file and writing it to disk if the user has requested
  53     it, among some other tasks. In most cases there should be one per
  54     program. As, given a video URL, the downloader doesn't know how to
  55     extract all the needed information, task that InfoExtractors do, it
  56     has to pass the URL to one of them.
  57
  58     For this, YoutubeDL objects have a method that allows
  59     InfoExtractors to be registered in a given order. When it is passed
  60     a URL, the YoutubeDL object handles it to the first InfoExtractor it
  61     finds that reports being able to handle it. The InfoExtractor extracts
  62     all the information about the video or videos the URL refers to, and
  63     YoutubeDL process the extracted information, possibly using a File
  64     Downloader to download the video.
  65
  66     YoutubeDL objects accept a lot of parameters. In order not to saturate
  67     the object constructor with arguments, it receives a dictionary of
  68     options instead. These options are available through the params
  69     attribute for the InfoExtractors to use. The YoutubeDL also
  70     registers itself as the downloader in charge for the InfoExtractors
  71     that are added to it, so this is a "mutual registration".
  72
  73     Available options:
  74
  75     username:          Username for authentication purposes.
  76     password:          Password for authentication purposes.
  77     videopassword:     Password for acces a video.
  78     usenetrc:          Use netrc for authentication instead.
  79     verbose:           Print additional info to stdout.
  80     quiet:             Do not print messages to stdout.
  81     forceurl:          Force printing final URL.
  82     forcetitle:        Force printing title.
  83     forceid:           Force printing ID.
  84     forcethumbnail:    Force printing thumbnail URL.
  85     forcedescription:  Force printing description.
  86     forcefilename:     Force printing final filename.
  87     simulate:          Do not download the video files.
  88     format:            Video format code.
  89     format_limit:      Highest quality format to try.
  90     outtmpl:           Template for output names.
  91     restrictfilenames: Do not allow "&" and spaces in file names
  92     ignoreerrors:      Do not stop on download errors.
  93     nooverwrites:      Prevent overwriting files.
  94     playliststart:     Playlist item to start at.
  95     playlistend:       Playlist item to end at.
  96     matchtitle:        Download only matching titles.
  97     rejecttitle:       Reject downloads for matching titles.
  98     logtostderr:       Log messages to stderr instead of stdout.
  99     writedescription:  Write the video description to a .description file
 100     writeinfojson:     Write the video description to a .info.json file
 101     writeannotations:  Write the video annotations to a .annotations.xml file
 102     writethumbnail:    Write the thumbnail image to a file
 103     writesubtitles:    Write the video subtitles to a file
 104     writeautomaticsub: Write the automatic subtitles to a file
 105     allsubtitles:      Downloads all the subtitles of the video
 106                        (requires writesubtitles or writeautomaticsub)
 107     listsubtitles:     Lists all available subtitles for the video
 108     subtitlesformat:   Subtitle format [srt/sbv/vtt] (default=srt)
 109     subtitleslangs:    List of languages of the subtitles to download
 110     keepvideo:         Keep the video file after post-processing
 111     daterange:         A DateRange object, download only if the upload_date is in the range.
 112     skip_download:     Skip the actual download of the video file
 113     cachedir:          Location of the cache files in the filesystem.
 114                        None to disable filesystem cache.
 115     noplaylist:        Download single video instead of a playlist if in doubt.
 116     age_limit:         An integer representing the user's age in years.
 117                        Unsuitable videos for the given age are skipped.
 118     downloadarchive:   File name of a file where all downloads are recorded.
 119                        Videos already present in the file are not downloaded
 120                        again.
 121
 122     The following parameters are not used by YoutubeDL itself, they are used by
 123     the FileDownloader:
 124     nopart, updatetime, buffersize, ratelimit, min_filesize, max_filesize, test,
 125     noresizebuffer, retries, continuedl, noprogress, consoletitle
 126     """
 127
 128     params = None
 129     _ies = []
 130     _pps = []
 131     _download_retcode = None
 132     _num_downloads = None
 133     _screen_file = None
 134
 135     def __init__(self, params):
 136         """Create a FileDownloader object with the given options."""
 137         self._ies = []
 138         self._ies_instances = {}
 139         self._pps = []
 140         self._progress_hooks = []
 141         self._download_retcode = 0
 142         self._num_downloads = 0
 143         self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)]
 144
 145         if (sys.version_info >= (3,) and sys.platform != 'win32' and
 146                 sys.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968']
 147                 and not params['restrictfilenames']):
 148             # On Python 3, the Unicode filesystem API will throw errors (#1474)
 149             self.report_warning(
 150                 u'Assuming --restrict-filenames since file system encoding '
 151                 u'cannot encode all charactes. '
 152                 u'Set the LC_ALL environment variable to fix this.')
 153             params['restrictfilenames'] = True
 154
 155         self.params = params
 156         self.fd = FileDownloader(self, self.params)
 157
 158         if '%(stitle)s' in self.params['outtmpl']:
 159             self.report_warning(u'%(stitle)s is deprecated. Use the %(title)s and the --restrict-filenames flag(which also secures %(uploader)s et al) instead.')
 160
 161     def add_info_extractor(self, ie):
 162         """Add an InfoExtractor object to the end of the list."""
 163         self._ies.append(ie)
 164         self._ies_instances[ie.ie_key()] = ie
 165         ie.set_downloader(self)
 166
 167     def get_info_extractor(self, ie_key):
 168         """
 169         Get an instance of an IE with name ie_key, it will try to get one from
 170         the _ies list, if there's no instance it will create a new one and add
 171         it to the extractor list.
 172         """
 173         ie = self._ies_instances.get(ie_key)
 174         if ie is None:
 175             ie = get_info_extractor(ie_key)()
 176             self.add_info_extractor(ie)
 177         return ie
 178
 179     def add_default_info_extractors(self):
 180         """
 181         Add the InfoExtractors returned by gen_extractors to the end of the list
 182         """
 183         for ie in gen_extractors():
 184             self.add_info_extractor(ie)
 185
 186     def add_post_processor(self, pp):
 187         """Add a PostProcessor object to the end of the chain."""
 188         self._pps.append(pp)
 189         pp.set_downloader(self)
 190
 191     def to_screen(self, message, skip_eol=False):
 192         """Print message to stdout if not in quiet mode."""
 193         if not self.params.get('quiet', False):
 194             terminator = [u'\n', u''][skip_eol]
 195             output = message + terminator
 196             write_string(output, self._screen_file)
 197
 198     def to_stderr(self, message):
 199         """Print message to stderr."""
 200         assert type(message) == type(u'')
 201         output = message + u'\n'
 202         if 'b' in getattr(self._screen_file, 'mode', '') or sys.version_info[0] < 3: # Python 2 lies about the mode of sys.stdout/sys.stderr
 203             output = output.encode(preferredencoding())
 204         sys.stderr.write(output)
 205
 206     def to_console_title(self, message):
 207         if not self.params.get('consoletitle', False):
 208             return
 209         if os.name == 'nt' and ctypes.windll.kernel32.GetConsoleWindow():
 210             # c_wchar_p() might not be necessary if `message` is
 211             # already of type unicode()
 212             ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message))
 213         elif 'TERM' in os.environ:
 214             self.to_screen('\033]0;%s\007' % message, skip_eol=True)
 215
 216     def fixed_template(self):
 217         """Checks if the output template is fixed."""
 218         return (re.search(u'(?u)%\\(.+?\\)s', self.params['outtmpl']) is None)
 219
 220     def trouble(self, message=None, tb=None):
 221         """Determine action to take when a download problem appears.
 222
 223         Depending on if the downloader has been configured to ignore
 224         download errors or not, this method may throw an exception or
 225         not when errors are found, after printing the message.
 226
 227         tb, if given, is additional traceback information.
 228         """
 229         if message is not None:
 230             self.to_stderr(message)
 231         if self.params.get('verbose'):
 232             if tb is None:
 233                 if sys.exc_info()[0]:  # if .trouble has been called from an except block
 234                     tb = u''
 235                     if hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
 236                         tb += u''.join(traceback.format_exception(*sys.exc_info()[1].exc_info))
 237                     tb += compat_str(traceback.format_exc())
 238                 else:
 239                     tb_data = traceback.format_list(traceback.extract_stack())
 240                     tb = u''.join(tb_data)
 241             self.to_stderr(tb)
 242         if not self.params.get('ignoreerrors', False):
 243             if sys.exc_info()[0] and hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
 244                 exc_info = sys.exc_info()[1].exc_info
 245             else:
 246                 exc_info = sys.exc_info()
 247             raise DownloadError(message, exc_info)
 248         self._download_retcode = 1
 249
 250     def report_warning(self, message):
 251         '''
 252         Print the message to stderr, it will be prefixed with 'WARNING:'
 253         If stderr is a tty file the 'WARNING:' will be colored
 254         '''
 255         if sys.stderr.isatty() and os.name != 'nt':
 256             _msg_header = u'\033[0;33mWARNING:\033[0m'
 257         else:
 258             _msg_header = u'WARNING:'
 259         warning_message = u'%s %s' % (_msg_header, message)
 260         self.to_stderr(warning_message)
 261
 262     def report_error(self, message, tb=None):
 263         '''
 264         Do the same as trouble, but prefixes the message with 'ERROR:', colored
 265         in red if stderr is a tty file.
 266         '''
 267         if sys.stderr.isatty() and os.name != 'nt':
 268             _msg_header = u'\033[0;31mERROR:\033[0m'
 269         else:
 270             _msg_header = u'ERROR:'
 271         error_message = u'%s %s' % (_msg_header, message)
 272         self.trouble(error_message, tb)
 273
 274     def report_writedescription(self, descfn):
 275         """ Report that the description file is being written """
 276         self.to_screen(u'[info] Writing video description to: ' + descfn)
 277
 278     def report_writesubtitles(self, sub_filename):
 279         """ Report that the subtitles file is being written """
 280         self.to_screen(u'[info] Writing video subtitles to: ' + sub_filename)
 281
 282     def report_writeinfojson(self, infofn):
 283         """ Report that the metadata file has been written """
 284         self.to_screen(u'[info] Video description metadata as JSON to: ' + infofn)
 285
 286     def report_writeannotations(self, annofn):
 287         """ Report that the annotations file has been written. """
 288         self.to_screen(u'[info] Writing video annotations to: ' + annofn)
 289
 290     def report_file_already_downloaded(self, file_name):
 291         """Report file has already been fully downloaded."""
 292         try:
 293             self.to_screen(u'[download] %s has already been downloaded' % file_name)
 294         except UnicodeEncodeError:
 295             self.to_screen(u'[download] The file has already been downloaded')
 296
 297     def increment_downloads(self):
 298         """Increment the ordinal that assigns a number to each file."""
 299         self._num_downloads += 1
 300
 301     def prepare_filename(self, info_dict):
 302         """Generate the output filename."""
 303         try:
 304             template_dict = dict(info_dict)
 305
 306             template_dict['epoch'] = int(time.time())
 307             autonumber_size = self.params.get('autonumber_size')
 308             if autonumber_size is None:
 309                 autonumber_size = 5
 310             autonumber_templ = u'%0' + str(autonumber_size) + u'd'
 311             template_dict['autonumber'] = autonumber_templ % self._num_downloads
 312             if template_dict.get('playlist_index') is not None:
 313                 template_dict['playlist_index'] = u'%05d' % template_dict['playlist_index']
 314
 315             sanitize = lambda k, v: sanitize_filename(
 316                 u'NA' if v is None else compat_str(v),
 317                 restricted=self.params.get('restrictfilenames'),
 318                 is_id=(k == u'id'))
 319             template_dict = dict((k, sanitize(k, v))
 320                                  for k, v in template_dict.items())
 321
 322             tmpl = os.path.expanduser(self.params['outtmpl'])
 323             filename = tmpl % template_dict
 324             return filename
 325         except KeyError as err:
 326             self.report_error(u'Erroneous output template')
 327             return None
 328         except ValueError as err:
 329             self.report_error(u'Error in output template: ' + str(err) + u' (encoding: ' + repr(preferredencoding()) + ')')
 330             return None
 331
 332     def _match_entry(self, info_dict):
 333         """ Returns None iff the file should be downloaded """
 334
 335         title = info_dict['title']
 336         matchtitle = self.params.get('matchtitle', False)
 337         if matchtitle:
 338             if not re.search(matchtitle, title, re.IGNORECASE):
 339                 return u'[download] "' + title + '" title did not match pattern "' + matchtitle + '"'
 340         rejecttitle = self.params.get('rejecttitle', False)
 341         if rejecttitle:
 342             if re.search(rejecttitle, title, re.IGNORECASE):
 343                 return u'"' + title + '" title matched reject pattern "' + rejecttitle + '"'
 344         date = info_dict.get('upload_date', None)
 345         if date is not None:
 346             dateRange = self.params.get('daterange', DateRange())
 347             if date not in dateRange:
 348                 return u'[download] %s upload date is not in range %s' % (date_from_str(date).isoformat(), dateRange)
 349         age_limit = self.params.get('age_limit')
 350         if age_limit is not None:
 351             if age_limit < info_dict.get('age_limit', 0):
 352                 return u'Skipping "' + title + '" because it is age restricted'
 353         if self.in_download_archive(info_dict):
 354             return (u'%(title)s has already been recorded in archive'
 355                     % info_dict)
 356         return None
 357
 358     @staticmethod
 359     def add_extra_info(info_dict, extra_info):
 360         '''Set the keys from extra_info in info dict if they are missing'''
 361         for key, value in extra_info.items():
 362             info_dict.setdefault(key, value)
 363
 364     def extract_info(self, url, download=True, ie_key=None, extra_info={}):
 365         '''
 366         Returns a list with a dictionary for each video we find.
 367         If 'download', also downloads the videos.
 368         extra_info is a dict containing the extra values to add to each result
 369          '''
 370
 371         if ie_key:
 372             ies = [self.get_info_extractor(ie_key)]
 373         else:
 374             ies = self._ies
 375
 376         for ie in ies:
 377             if not ie.suitable(url):
 378                 continue
 379
 380             if not ie.working():
 381                 self.report_warning(u'The program functionality for this site has been marked as broken, '
 382                                     u'and will probably not work.')
 383
 384             try:
 385                 ie_result = ie.extract(url)
 386                 if ie_result is None: # Finished already (backwards compatibility; listformats and friends should be moved here)
 387                     break
 388                 if isinstance(ie_result, list):
 389                     # Backwards compatibility: old IE result format
 390                     ie_result = {
 391                         '_type': 'compat_list',
 392                         'entries': ie_result,
 393                     }
 394                 self.add_extra_info(ie_result,
 395                     {
 396                         'extractor': ie.IE_NAME,
 397                         'webpage_url': url,
 398                         'extractor_key': ie.ie_key(),
 399                     })
 400                 return self.process_ie_result(ie_result, download, extra_info)
 401             except ExtractorError as de: # An error we somewhat expected
 402                 self.report_error(compat_str(de), de.format_traceback())
 403                 break
 404             except Exception as e:
 405                 if self.params.get('ignoreerrors', False):
 406                     self.report_error(compat_str(e), tb=compat_str(traceback.format_exc()))
 407                     break
 408                 else:
 409                     raise
 410         else:
 411             self.report_error(u'no suitable InfoExtractor: %s' % url)
 412
 413     def process_ie_result(self, ie_result, download=True, extra_info={}):
 414         """
 415         Take the result of the ie(may be modified) and resolve all unresolved
 416         references (URLs, playlist items).
 417
 418         It will also download the videos if 'download'.
 419         Returns the resolved ie_result.
 420         """
 421
 422         result_type = ie_result.get('_type', 'video') # If not given we suppose it's a video, support the default old system
 423         if result_type == 'video':
 424             self.add_extra_info(ie_result, extra_info)
 425             return self.process_video_result(ie_result, download=download)
 426         elif result_type == 'url':
 427             # We have to add extra_info to the results because it may be
 428             # contained in a playlist
 429             return self.extract_info(ie_result['url'],
 430                                      download,
 431                                      ie_key=ie_result.get('ie_key'),
 432                                      extra_info=extra_info)
 433         elif result_type == 'playlist':
 434             self.add_extra_info(ie_result, extra_info)
 435             # We process each entry in the playlist
 436             playlist = ie_result.get('title', None) or ie_result.get('id', None)
 437             self.to_screen(u'[download] Downloading playlist: %s' % playlist)
 438
 439             playlist_results = []
 440
 441             n_all_entries = len(ie_result['entries'])
 442             playliststart = self.params.get('playliststart', 1) - 1
 443             playlistend = self.params.get('playlistend', -1)
 444
 445             if playlistend == -1:
 446                 entries = ie_result['entries'][playliststart:]
 447             else:
 448                 entries = ie_result['entries'][playliststart:playlistend]
 449
 450             n_entries = len(entries)
 451
 452             self.to_screen(u"[%s] playlist '%s': Collected %d video ids (downloading %d of them)" %
 453                 (ie_result['extractor'], playlist, n_all_entries, n_entries))
 454
 455             for i, entry in enumerate(entries, 1):
 456                 self.to_screen(u'[download] Downloading video #%s of %s' % (i, n_entries))
 457                 extra = {
 458                     'playlist': playlist,
 459                     'playlist_index': i + playliststart,
 460                     'extractor': ie_result['extractor'],
 461                     'webpage_url': ie_result['webpage_url'],
 462                     'extractor_key': ie_result['extractor_key'],
 463                 }
 464                 entry_result = self.process_ie_result(entry,
 465                                                       download=download,
 466                                                       extra_info=extra)
 467                 playlist_results.append(entry_result)
 468             ie_result['entries'] = playlist_results
 469             return ie_result
 470         elif result_type == 'compat_list':
 471             def _fixup(r):
 472                 self.add_extra_info(r,
 473                     {
 474                         'extractor': ie_result['extractor'],
 475                         'webpage_url': ie_result['webpage_url'],
 476                         'extractor_key': ie_result['extractor_key'],
 477                     })
 478                 return r
 479             ie_result['entries'] = [
 480                 self.process_ie_result(_fixup(r), download, extra_info)
 481                 for r in ie_result['entries']
 482             ]
 483             return ie_result
 484         else:
 485             raise Exception('Invalid result type: %s' % result_type)
 486
 487     def select_format(self, format_spec, available_formats):
 488         if format_spec == 'best' or format_spec is None:
 489             return available_formats[-1]
 490         elif format_spec == 'worst':
 491             return available_formats[0]
 492         else:
 493             extensions = [u'mp4', u'flv', u'webm', u'3gp']
 494             if format_spec in extensions:
 495                 filter_f = lambda f: f['ext'] == format_spec
 496             else:
 497                 filter_f = lambda f: f['format_id'] == format_spec
 498             matches = list(filter(filter_f, available_formats))
 499             if matches:
 500                 return matches[-1]
 501         return None
 502
 503     def process_video_result(self, info_dict, download=True):
 504         assert info_dict.get('_type', 'video') == 'video'
 505
 506         if 'playlist' not in info_dict:
 507             # It isn't part of a playlist
 508             info_dict['playlist'] = None
 509             info_dict['playlist_index'] = None
 510
 511         # This extractors handle format selection themselves
 512         if info_dict['extractor'] in [u'youtube', u'Youku']:
 513             if download:
 514                 self.process_info(info_dict)
 515             return info_dict
 516
 517         # We now pick which formats have to be downloaded
 518         if info_dict.get('formats') is None:
 519             # There's only one format available
 520             formats = [info_dict]
 521         else:
 522             formats = info_dict['formats']
 523
 524         # We check that all the formats have the format and format_id fields
 525         for (i, format) in enumerate(formats):
 526             if format.get('format_id') is None:
 527                 format['format_id'] = compat_str(i)
 528             if format.get('format') is None:
 529                 format['format'] = u'{id} - {res}{note}'.format(
 530                     id=format['format_id'],
 531                     res=self.format_resolution(format),
 532                     note=u' ({0})'.format(format['format_note']) if format.get('format_note') is not None else '',
 533                 )
 534             # Automatically determine file extension if missing
 535             if 'ext' not in format:
 536                 format['ext'] = determine_ext(format['url'])
 537
 538         if self.params.get('listformats', None):
 539             self.list_formats(info_dict)
 540             return
 541
 542         format_limit = self.params.get('format_limit', None)
 543         if format_limit:
 544             formats = list(takewhile_inclusive(
 545                 lambda f: f['format_id'] != format_limit, formats
 546             ))
 547         if self.params.get('prefer_free_formats'):
 548             def _free_formats_key(f):
 549                 try:
 550                     ext_ord = [u'flv', u'mp4', u'webm'].index(f['ext'])
 551                 except ValueError:
 552                     ext_ord = -1
 553                 # We only compare the extension if they have the same height and width
 554                 return (f.get('height'), f.get('width'), ext_ord)
 555             formats = sorted(formats, key=_free_formats_key)
 556
 557         req_format = self.params.get('format', 'best')
 558         if req_format is None:
 559             req_format = 'best'
 560         formats_to_download = []
 561         # The -1 is for supporting YoutubeIE
 562         if req_format in ('-1', 'all'):
 563             formats_to_download = formats
 564         else:
 565             # We can accept formats requestd in the format: 34/5/best, we pick
 566             # the first that is available, starting from left
 567             req_formats = req_format.split('/')
 568             for rf in req_formats:
 569                 selected_format = self.select_format(rf, formats)
 570                 if selected_format is not None:
 571                     formats_to_download = [selected_format]
 572                     break
 573         if not formats_to_download:
 574             raise ExtractorError(u'requested format not available',
 575                                  expected=True)
 576
 577         if download:
 578             if len(formats_to_download) > 1:
 579                 self.to_screen(u'[info] %s: downloading video in %s formats' % (info_dict['id'], len(formats_to_download)))
 580             for format in formats_to_download:
 581                 new_info = dict(info_dict)
 582                 new_info.update(format)
 583                 self.process_info(new_info)
 584         # We update the info dict with the best quality format (backwards compatibility)
 585         info_dict.update(formats_to_download[-1])
 586         return info_dict
 587
 588     def process_info(self, info_dict):
 589         """Process a single resolved IE result."""
 590
 591         assert info_dict.get('_type', 'video') == 'video'
 592         #We increment the download the download count here to match the previous behaviour.
 593         self.increment_downloads()
 594
 595         info_dict['fulltitle'] = info_dict['title']
 596         if len(info_dict['title']) > 200:
 597             info_dict['title'] = info_dict['title'][:197] + u'...'
 598
 599         # Keep for backwards compatibility
 600         info_dict['stitle'] = info_dict['title']
 601
 602         if not 'format' in info_dict:
 603             info_dict['format'] = info_dict['ext']
 604
 605         reason = self._match_entry(info_dict)
 606         if reason is not None:
 607             self.to_screen(u'[download] ' + reason)
 608             return
 609
 610         max_downloads = self.params.get('max_downloads')
 611         if max_downloads is not None:
 612             if self._num_downloads > int(max_downloads):
 613                 raise MaxDownloadsReached()
 614
 615         filename = self.prepare_filename(info_dict)
 616
 617         # Forced printings
 618         if self.params.get('forcetitle', False):
 619             compat_print(info_dict['title'])
 620         if self.params.get('forceid', False):
 621             compat_print(info_dict['id'])
 622         if self.params.get('forceurl', False):
 623             # For RTMP URLs, also include the playpath
 624             compat_print(info_dict['url'] + info_dict.get('play_path', u''))
 625         if self.params.get('forcethumbnail', False) and info_dict.get('thumbnail') is not None:
 626             compat_print(info_dict['thumbnail'])
 627         if self.params.get('forcedescription', False) and info_dict.get('description') is not None:
 628             compat_print(info_dict['description'])
 629         if self.params.get('forcefilename', False) and filename is not None:
 630             compat_print(filename)
 631         if self.params.get('forceformat', False):
 632             compat_print(info_dict['format'])
 633
 634         # Do nothing else if in simulate mode
 635         if self.params.get('simulate', False):
 636             return
 637
 638         if filename is None:
 639             return
 640
 641         try:
 642             dn = os.path.dirname(encodeFilename(filename))
 643             if dn != '' and not os.path.exists(dn):
 644                 os.makedirs(dn)
 645         except (OSError, IOError) as err:
 646             self.report_error(u'unable to create directory ' + compat_str(err))
 647             return
 648
 649         if self.params.get('writedescription', False):
 650             try:
 651                 descfn = filename + u'.description'
 652                 self.report_writedescription(descfn)
 653                 with io.open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile:
 654                     descfile.write(info_dict['description'])
 655             except (KeyError, TypeError):
 656                 self.report_warning(u'There\'s no description to write.')
 657             except (OSError, IOError):
 658                 self.report_error(u'Cannot write description file ' + descfn)
 659                 return
 660
 661         if self.params.get('writeannotations', False):
 662             try:
 663                 annofn = filename + u'.annotations.xml'
 664                 self.report_writeannotations(annofn)
 665                 with io.open(encodeFilename(annofn), 'w', encoding='utf-8') as annofile:
 666                     annofile.write(info_dict['annotations'])
 667             except (KeyError, TypeError):
 668                 self.report_warning(u'There are no annotations to write.')
 669             except (OSError, IOError):
 670                 self.report_error(u'Cannot write annotations file: ' + annofn)
 671                 return
 672
 673         subtitles_are_requested = any([self.params.get('writesubtitles', False),
 674                                        self.params.get('writeautomaticsub')])
 675
 676         if subtitles_are_requested and 'subtitles' in info_dict and info_dict['subtitles']:
 677             # subtitles download errors are already managed as troubles in relevant IE
 678             # that way it will silently go on when used with unsupporting IE
 679             subtitles = info_dict['subtitles']
 680             sub_format = self.params.get('subtitlesformat', 'srt')
 681             for sub_lang in subtitles.keys():
 682                 sub = subtitles[sub_lang]
 683                 if sub is None:
 684                     continue
 685                 try:
 686                     sub_filename = subtitles_filename(filename, sub_lang, sub_format)
 687                     self.report_writesubtitles(sub_filename)
 688                     with io.open(encodeFilename(sub_filename), 'w', encoding='utf-8') as subfile:
 689                             subfile.write(sub)
 690                 except (OSError, IOError):
 691                     self.report_error(u'Cannot write subtitles file ' + descfn)
 692                     return
 693
 694         if self.params.get('writeinfojson', False):
 695             infofn = filename + u'.info.json'
 696             self.report_writeinfojson(infofn)
 697             try:
 698                 json_info_dict = dict((k, v) for k, v in info_dict.items() if not k in ['urlhandle'])
 699                 write_json_file(json_info_dict, encodeFilename(infofn))
 700             except (OSError, IOError):
 701                 self.report_error(u'Cannot write metadata to JSON file ' + infofn)
 702                 return
 703
 704         if self.params.get('writethumbnail', False):
 705             if info_dict.get('thumbnail') is not None:
 706                 thumb_format = determine_ext(info_dict['thumbnail'], u'jpg')
 707                 thumb_filename = filename.rpartition('.')[0] + u'.' + thumb_format
 708                 self.to_screen(u'[%s] %s: Downloading thumbnail ...' %
 709                                (info_dict['extractor'], info_dict['id']))
 710                 try:
 711                     uf = compat_urllib_request.urlopen(info_dict['thumbnail'])
 712                     with open(thumb_filename, 'wb') as thumbf:
 713                         shutil.copyfileobj(uf, thumbf)
 714                     self.to_screen(u'[%s] %s: Writing thumbnail to: %s' %
 715                         (info_dict['extractor'], info_dict['id'], thumb_filename))
 716                 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
 717                     self.report_warning(u'Unable to download thumbnail "%s": %s' %
 718                         (info_dict['thumbnail'], compat_str(err)))
 719
 720         if not self.params.get('skip_download', False):
 721             if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(filename)):
 722                 success = True
 723             else:
 724                 try:
 725                     success = self.fd._do_download(filename, info_dict)
 726                 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
 727                     self.report_error(u'unable to download video data: %s' % str(err))
 728                     return
 729                 except (OSError, IOError) as err:
 730                     raise UnavailableVideoError(err)
 731                 except (ContentTooShortError, ) as err:
 732                     self.report_error(u'content too short (expected %s bytes and served %s)' % (err.expected, err.downloaded))
 733                     return
 734
 735             if success:
 736                 try:
 737                     self.post_process(filename, info_dict)
 738                 except (PostProcessingError) as err:
 739                     self.report_error(u'postprocessing: %s' % str(err))
 740                     return
 741
 742         self.record_download_archive(info_dict)
 743
 744     def download(self, url_list):
 745         """Download a given list of URLs."""
 746         if len(url_list) > 1 and self.fixed_template():
 747             raise SameFileError(self.params['outtmpl'])
 748
 749         for url in url_list:
 750             try:
 751                 #It also downloads the videos
 752                 videos = self.extract_info(url)
 753             except UnavailableVideoError:
 754                 self.report_error(u'unable to download video')
 755             except MaxDownloadsReached:
 756                 self.to_screen(u'[info] Maximum number of downloaded files reached.')
 757                 raise
 758
 759         return self._download_retcode
 760
 761     def post_process(self, filename, ie_info):
 762         """Run all the postprocessors on the given file."""
 763         info = dict(ie_info)
 764         info['filepath'] = filename
 765         keep_video = None
 766         for pp in self._pps:
 767             try:
 768                 keep_video_wish, new_info = pp.run(info)
 769                 if keep_video_wish is not None:
 770                     if keep_video_wish:
 771                         keep_video = keep_video_wish
 772                     elif keep_video is None:
 773                         # No clear decision yet, let IE decide
 774                         keep_video = keep_video_wish
 775             except PostProcessingError as e:
 776                 self.report_error(e.msg)
 777         if keep_video is False and not self.params.get('keepvideo', False):
 778             try:
 779                 self.to_screen(u'Deleting original file %s (pass -k to keep)' % filename)
 780                 os.remove(encodeFilename(filename))
 781             except (IOError, OSError):
 782                 self.report_warning(u'Unable to remove downloaded video file')
 783
 784     def in_download_archive(self, info_dict):
 785         fn = self.params.get('download_archive')
 786         if fn is None:
 787             return False
 788         vid_id = info_dict['extractor'] + u' ' + info_dict['id']
 789         try:
 790             with locked_file(fn, 'r', encoding='utf-8') as archive_file:
 791                 for line in archive_file:
 792                     if line.strip() == vid_id:
 793                         return True
 794         except IOError as ioe:
 795             if ioe.errno != errno.ENOENT:
 796                 raise
 797         return False
 798
 799     def record_download_archive(self, info_dict):
 800         fn = self.params.get('download_archive')
 801         if fn is None:
 802             return
 803         vid_id = info_dict['extractor'] + u' ' + info_dict['id']
 804         with locked_file(fn, 'a', encoding='utf-8') as archive_file:
 805             archive_file.write(vid_id + u'\n')
 806
 807     @staticmethod
 808     def format_resolution(format, default='unknown'):
 809         if format.get('_resolution') is not None:
 810             return format['_resolution']
 811         if format.get('height') is not None:
 812             if format.get('width') is not None:
 813                 res = u'%sx%s' % (format['width'], format['height'])
 814             else:
 815                 res = u'%sp' % format['height']
 816         else:
 817             res = default
 818         return res
 819
 820     def list_formats(self, info_dict):
 821         def format_note(fdict):
 822             if fdict.get('format_note') is not None:
 823                 return fdict['format_note']
 824             res = u''
 825             if fdict.get('vcodec') is not None:
 826                 res += u'%-5s' % fdict['vcodec']
 827             elif fdict.get('vbr') is not None:
 828                 res += u'video'
 829             if fdict.get('vbr') is not None:
 830                 res += u'@%4dk' % fdict['vbr']
 831             if fdict.get('acodec') is not None:
 832                 if res:
 833                     res += u', '
 834                 res += u'%-5s' % fdict['acodec']
 835             elif fdict.get('abr') is not None:
 836                 if res:
 837                     res += u', '
 838                 res += 'audio'
 839             if fdict.get('abr') is not None:
 840                 res += u'@%3dk' % fdict['abr']
 841             return res
 842
 843         def line(format):
 844             return (u'%-20s%-10s%-12s%s' % (
 845                 format['format_id'],
 846                 format['ext'],
 847                 self.format_resolution(format),
 848                 format_note(format),
 849                 )
 850             )
 851
 852         formats = info_dict.get('formats', [info_dict])
 853         formats_s = list(map(line, formats))
 854         if len(formats) > 1:
 855             formats_s[0] += (' ' if format_note(formats[0]) else '') + '(worst)'
 856             formats_s[-1] += (' ' if format_note(formats[-1]) else '') + '(best)'
 857
 858         header_line = line({
 859             'format_id': u'format code', 'ext': u'extension',
 860             '_resolution': u'resolution', 'format_note': u'note'})
 861         self.to_screen(u'[info] Available formats for %s:\n%s\n%s' %
 862                        (info_dict['id'], header_line, u"\n".join(formats_s)))