_ Git - youtube-dl/blob - youtube_dl/YoutubeDL.py

   1 #!/usr/bin/env python
   2 # -*- coding: utf-8 -*-
   3
   4 from __future__ import absolute_import
   5
   6 import errno
   7 import io
   8 import json
   9 import os
  10 import platform
  11 import re
  12 import shutil
  13 import subprocess
  14 import socket
  15 import sys
  16 import time
  17 import traceback
  18
  19 if os.name == 'nt':
  20     import ctypes
  21
  22 from .utils import (
  23     compat_cookiejar,
  24     compat_http_client,
  25     compat_print,
  26     compat_str,
  27     compat_urllib_error,
  28     compat_urllib_request,
  29     ContentTooShortError,
  30     date_from_str,
  31     DateRange,
  32     determine_ext,
  33     DownloadError,
  34     encodeFilename,
  35     ExtractorError,
  36     locked_file,
  37     make_HTTPS_handler,
  38     MaxDownloadsReached,
  39     PostProcessingError,
  40     platform_name,
  41     preferredencoding,
  42     SameFileError,
  43     sanitize_filename,
  44     subtitles_filename,
  45     takewhile_inclusive,
  46     UnavailableVideoError,
  47     write_json_file,
  48     write_string,
  49     YoutubeDLHandler,
  50 )
  51 from .extractor import get_info_extractor, gen_extractors
  52 from .FileDownloader import FileDownloader
  53 from .version import __version__
  54
  55
  56 class YoutubeDL(object):
  57     """YoutubeDL class.
  58
  59     YoutubeDL objects are the ones responsible of downloading the
  60     actual video file and writing it to disk if the user has requested
  61     it, among some other tasks. In most cases there should be one per
  62     program. As, given a video URL, the downloader doesn't know how to
  63     extract all the needed information, task that InfoExtractors do, it
  64     has to pass the URL to one of them.
  65
  66     For this, YoutubeDL objects have a method that allows
  67     InfoExtractors to be registered in a given order. When it is passed
  68     a URL, the YoutubeDL object handles it to the first InfoExtractor it
  69     finds that reports being able to handle it. The InfoExtractor extracts
  70     all the information about the video or videos the URL refers to, and
  71     YoutubeDL process the extracted information, possibly using a File
  72     Downloader to download the video.
  73
  74     YoutubeDL objects accept a lot of parameters. In order not to saturate
  75     the object constructor with arguments, it receives a dictionary of
  76     options instead. These options are available through the params
  77     attribute for the InfoExtractors to use. The YoutubeDL also
  78     registers itself as the downloader in charge for the InfoExtractors
  79     that are added to it, so this is a "mutual registration".
  80
  81     Available options:
  82
  83     username:          Username for authentication purposes.
  84     password:          Password for authentication purposes.
  85     videopassword:     Password for acces a video.
  86     usenetrc:          Use netrc for authentication instead.
  87     verbose:           Print additional info to stdout.
  88     quiet:             Do not print messages to stdout.
  89     forceurl:          Force printing final URL.
  90     forcetitle:        Force printing title.
  91     forceid:           Force printing ID.
  92     forcethumbnail:    Force printing thumbnail URL.
  93     forcedescription:  Force printing description.
  94     forcefilename:     Force printing final filename.
  95     forcejson:         Force printing info_dict as JSON.
  96     simulate:          Do not download the video files.
  97     format:            Video format code.
  98     format_limit:      Highest quality format to try.
  99     outtmpl:           Template for output names.
 100     restrictfilenames: Do not allow "&" and spaces in file names
 101     ignoreerrors:      Do not stop on download errors.
 102     nooverwrites:      Prevent overwriting files.
 103     playliststart:     Playlist item to start at.
 104     playlistend:       Playlist item to end at.
 105     matchtitle:        Download only matching titles.
 106     rejecttitle:       Reject downloads for matching titles.
 107     logger:            Log messages to a logging.Logger instance.
 108     logtostderr:       Log messages to stderr instead of stdout.
 109     writedescription:  Write the video description to a .description file
 110     writeinfojson:     Write the video description to a .info.json file
 111     writeannotations:  Write the video annotations to a .annotations.xml file
 112     writethumbnail:    Write the thumbnail image to a file
 113     writesubtitles:    Write the video subtitles to a file
 114     writeautomaticsub: Write the automatic subtitles to a file
 115     allsubtitles:      Downloads all the subtitles of the video
 116                        (requires writesubtitles or writeautomaticsub)
 117     listsubtitles:     Lists all available subtitles for the video
 118     subtitlesformat:   Subtitle format [srt/sbv/vtt] (default=srt)
 119     subtitleslangs:    List of languages of the subtitles to download
 120     keepvideo:         Keep the video file after post-processing
 121     daterange:         A DateRange object, download only if the upload_date is in the range.
 122     skip_download:     Skip the actual download of the video file
 123     cachedir:          Location of the cache files in the filesystem.
 124                        None to disable filesystem cache.
 125     noplaylist:        Download single video instead of a playlist if in doubt.
 126     age_limit:         An integer representing the user's age in years.
 127                        Unsuitable videos for the given age are skipped.
 128     downloadarchive:   File name of a file where all downloads are recorded.
 129                        Videos already present in the file are not downloaded
 130                        again.
 131     cookiefile:        File name where cookies should be read from and dumped to.
 132     nocheckcertificate:Do not verify SSL certificates
 133     proxy:             URL of the proxy server to use
 134
 135     The following parameters are not used by YoutubeDL itself, they are used by
 136     the FileDownloader:
 137     nopart, updatetime, buffersize, ratelimit, min_filesize, max_filesize, test,
 138     noresizebuffer, retries, continuedl, noprogress, consoletitle
 139     """
 140
 141     params = None
 142     _ies = []
 143     _pps = []
 144     _download_retcode = None
 145     _num_downloads = None
 146     _screen_file = None
 147
 148     def __init__(self, params):
 149         """Create a FileDownloader object with the given options."""
 150         self._ies = []
 151         self._ies_instances = {}
 152         self._pps = []
 153         self._progress_hooks = []
 154         self._download_retcode = 0
 155         self._num_downloads = 0
 156         self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)]
 157
 158         if (sys.version_info >= (3,) and sys.platform != 'win32' and
 159                 sys.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968']
 160                 and not params['restrictfilenames']):
 161             # On Python 3, the Unicode filesystem API will throw errors (#1474)
 162             self.report_warning(
 163                 u'Assuming --restrict-filenames since file system encoding '
 164                 u'cannot encode all charactes. '
 165                 u'Set the LC_ALL environment variable to fix this.')
 166             params['restrictfilenames'] = True
 167
 168         self.params = params
 169         self.fd = FileDownloader(self, self.params)
 170
 171         if '%(stitle)s' in self.params['outtmpl']:
 172             self.report_warning(u'%(stitle)s is deprecated. Use the %(title)s and the --restrict-filenames flag(which also secures %(uploader)s et al) instead.')
 173
 174         self._setup_opener()
 175
 176     def add_info_extractor(self, ie):
 177         """Add an InfoExtractor object to the end of the list."""
 178         self._ies.append(ie)
 179         self._ies_instances[ie.ie_key()] = ie
 180         ie.set_downloader(self)
 181
 182     def get_info_extractor(self, ie_key):
 183         """
 184         Get an instance of an IE with name ie_key, it will try to get one from
 185         the _ies list, if there's no instance it will create a new one and add
 186         it to the extractor list.
 187         """
 188         ie = self._ies_instances.get(ie_key)
 189         if ie is None:
 190             ie = get_info_extractor(ie_key)()
 191             self.add_info_extractor(ie)
 192         return ie
 193
 194     def add_default_info_extractors(self):
 195         """
 196         Add the InfoExtractors returned by gen_extractors to the end of the list
 197         """
 198         for ie in gen_extractors():
 199             self.add_info_extractor(ie)
 200
 201     def add_post_processor(self, pp):
 202         """Add a PostProcessor object to the end of the chain."""
 203         self._pps.append(pp)
 204         pp.set_downloader(self)
 205
 206     def to_screen(self, message, skip_eol=False):
 207         """Print message to stdout if not in quiet mode."""
 208         if self.params.get('logger'):
 209             self.params['logger'].debug(message)
 210         elif not self.params.get('quiet', False):
 211             terminator = [u'\n', u''][skip_eol]
 212             output = message + terminator
 213             write_string(output, self._screen_file)
 214
 215     def to_stderr(self, message):
 216         """Print message to stderr."""
 217         assert type(message) == type(u'')
 218         if self.params.get('logger'):
 219             self.params['logger'].error(message)
 220         else:
 221             output = message + u'\n'
 222             if 'b' in getattr(self._screen_file, 'mode', '') or sys.version_info[0] < 3: # Python 2 lies about the mode of sys.stdout/sys.stderr
 223                 output = output.encode(preferredencoding())
 224             sys.stderr.write(output)
 225
 226     def to_console_title(self, message):
 227         if not self.params.get('consoletitle', False):
 228             return
 229         if os.name == 'nt' and ctypes.windll.kernel32.GetConsoleWindow():
 230             # c_wchar_p() might not be necessary if `message` is
 231             # already of type unicode()
 232             ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message))
 233         elif 'TERM' in os.environ:
 234             write_string(u'\033]0;%s\007' % message, self._screen_file)
 235
 236     def save_console_title(self):
 237         if not self.params.get('consoletitle', False):
 238             return
 239         if 'TERM' in os.environ:
 240             # Save the title on stack
 241             write_string(u'\033[22;0t', self._screen_file)
 242
 243     def restore_console_title(self):
 244         if not self.params.get('consoletitle', False):
 245             return
 246         if 'TERM' in os.environ:
 247             # Restore the title from stack
 248             write_string(u'\033[23;0t', self._screen_file)
 249
 250     def __enter__(self):
 251         self.save_console_title()
 252         return self
 253
 254     def __exit__(self, *args):
 255         self.restore_console_title()
 256
 257         if self.params.get('cookiefile') is not None:
 258             self.cookiejar.save()
 259
 260     def fixed_template(self):
 261         """Checks if the output template is fixed."""
 262         return (re.search(u'(?u)%\\(.+?\\)s', self.params['outtmpl']) is None)
 263
 264     def trouble(self, message=None, tb=None):
 265         """Determine action to take when a download problem appears.
 266
 267         Depending on if the downloader has been configured to ignore
 268         download errors or not, this method may throw an exception or
 269         not when errors are found, after printing the message.
 270
 271         tb, if given, is additional traceback information.
 272         """
 273         if message is not None:
 274             self.to_stderr(message)
 275         if self.params.get('verbose'):
 276             if tb is None:
 277                 if sys.exc_info()[0]:  # if .trouble has been called from an except block
 278                     tb = u''
 279                     if hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
 280                         tb += u''.join(traceback.format_exception(*sys.exc_info()[1].exc_info))
 281                     tb += compat_str(traceback.format_exc())
 282                 else:
 283                     tb_data = traceback.format_list(traceback.extract_stack())
 284                     tb = u''.join(tb_data)
 285             self.to_stderr(tb)
 286         if not self.params.get('ignoreerrors', False):
 287             if sys.exc_info()[0] and hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
 288                 exc_info = sys.exc_info()[1].exc_info
 289             else:
 290                 exc_info = sys.exc_info()
 291             raise DownloadError(message, exc_info)
 292         self._download_retcode = 1
 293
 294     def report_warning(self, message):
 295         '''
 296         Print the message to stderr, it will be prefixed with 'WARNING:'
 297         If stderr is a tty file the 'WARNING:' will be colored
 298         '''
 299         if sys.stderr.isatty() and os.name != 'nt':
 300             _msg_header = u'\033[0;33mWARNING:\033[0m'
 301         else:
 302             _msg_header = u'WARNING:'
 303         warning_message = u'%s %s' % (_msg_header, message)
 304         self.to_stderr(warning_message)
 305
 306     def report_error(self, message, tb=None):
 307         '''
 308         Do the same as trouble, but prefixes the message with 'ERROR:', colored
 309         in red if stderr is a tty file.
 310         '''
 311         if sys.stderr.isatty() and os.name != 'nt':
 312             _msg_header = u'\033[0;31mERROR:\033[0m'
 313         else:
 314             _msg_header = u'ERROR:'
 315         error_message = u'%s %s' % (_msg_header, message)
 316         self.trouble(error_message, tb)
 317
 318     def report_writedescription(self, descfn):
 319         """ Report that the description file is being written """
 320         self.to_screen(u'[info] Writing video description to: ' + descfn)
 321
 322     def report_writesubtitles(self, sub_filename):
 323         """ Report that the subtitles file is being written """
 324         self.to_screen(u'[info] Writing video subtitles to: ' + sub_filename)
 325
 326     def report_writeinfojson(self, infofn):
 327         """ Report that the metadata file has been written """
 328         self.to_screen(u'[info] Video description metadata as JSON to: ' + infofn)
 329
 330     def report_writeannotations(self, annofn):
 331         """ Report that the annotations file has been written. """
 332         self.to_screen(u'[info] Writing video annotations to: ' + annofn)
 333
 334     def report_file_already_downloaded(self, file_name):
 335         """Report file has already been fully downloaded."""
 336         try:
 337             self.to_screen(u'[download] %s has already been downloaded' % file_name)
 338         except UnicodeEncodeError:
 339             self.to_screen(u'[download] The file has already been downloaded')
 340
 341     def increment_downloads(self):
 342         """Increment the ordinal that assigns a number to each file."""
 343         self._num_downloads += 1
 344
 345     def prepare_filename(self, info_dict):
 346         """Generate the output filename."""
 347         try:
 348             template_dict = dict(info_dict)
 349
 350             template_dict['epoch'] = int(time.time())
 351             autonumber_size = self.params.get('autonumber_size')
 352             if autonumber_size is None:
 353                 autonumber_size = 5
 354             autonumber_templ = u'%0' + str(autonumber_size) + u'd'
 355             template_dict['autonumber'] = autonumber_templ % self._num_downloads
 356             if template_dict.get('playlist_index') is not None:
 357                 template_dict['playlist_index'] = u'%05d' % template_dict['playlist_index']
 358
 359             sanitize = lambda k, v: sanitize_filename(
 360                 u'NA' if v is None else compat_str(v),
 361                 restricted=self.params.get('restrictfilenames'),
 362                 is_id=(k == u'id'))
 363             template_dict = dict((k, sanitize(k, v))
 364                                  for k, v in template_dict.items())
 365
 366             tmpl = os.path.expanduser(self.params['outtmpl'])
 367             filename = tmpl % template_dict
 368             return filename
 369         except KeyError as err:
 370             self.report_error(u'Erroneous output template')
 371             return None
 372         except ValueError as err:
 373             self.report_error(u'Error in output template: ' + str(err) + u' (encoding: ' + repr(preferredencoding()) + ')')
 374             return None
 375
 376     def _match_entry(self, info_dict):
 377         """ Returns None iff the file should be downloaded """
 378
 379         if 'title' in info_dict:
 380             # This can happen when we're just evaluating the playlist
 381             title = info_dict['title']
 382             matchtitle = self.params.get('matchtitle', False)
 383             if matchtitle:
 384                 if not re.search(matchtitle, title, re.IGNORECASE):
 385                     return u'[download] "' + title + '" title did not match pattern "' + matchtitle + '"'
 386             rejecttitle = self.params.get('rejecttitle', False)
 387             if rejecttitle:
 388                 if re.search(rejecttitle, title, re.IGNORECASE):
 389                     return u'"' + title + '" title matched reject pattern "' + rejecttitle + '"'
 390         date = info_dict.get('upload_date', None)
 391         if date is not None:
 392             dateRange = self.params.get('daterange', DateRange())
 393             if date not in dateRange:
 394                 return u'[download] %s upload date is not in range %s' % (date_from_str(date).isoformat(), dateRange)
 395         age_limit = self.params.get('age_limit')
 396         if age_limit is not None:
 397             if age_limit < info_dict.get('age_limit', 0):
 398                 return u'Skipping "' + title + '" because it is age restricted'
 399         if self.in_download_archive(info_dict):
 400             return (u'%s has already been recorded in archive'
 401                     % info_dict.get('title', info_dict.get('id', u'video')))
 402         return None
 403
 404     @staticmethod
 405     def add_extra_info(info_dict, extra_info):
 406         '''Set the keys from extra_info in info dict if they are missing'''
 407         for key, value in extra_info.items():
 408             info_dict.setdefault(key, value)
 409
 410     def extract_info(self, url, download=True, ie_key=None, extra_info={}):
 411         '''
 412         Returns a list with a dictionary for each video we find.
 413         If 'download', also downloads the videos.
 414         extra_info is a dict containing the extra values to add to each result
 415          '''
 416
 417         if ie_key:
 418             ies = [self.get_info_extractor(ie_key)]
 419         else:
 420             ies = self._ies
 421
 422         for ie in ies:
 423             if not ie.suitable(url):
 424                 continue
 425
 426             if not ie.working():
 427                 self.report_warning(u'The program functionality for this site has been marked as broken, '
 428                                     u'and will probably not work.')
 429
 430             try:
 431                 ie_result = ie.extract(url)
 432                 if ie_result is None: # Finished already (backwards compatibility; listformats and friends should be moved here)
 433                     break
 434                 if isinstance(ie_result, list):
 435                     # Backwards compatibility: old IE result format
 436                     ie_result = {
 437                         '_type': 'compat_list',
 438                         'entries': ie_result,
 439                     }
 440                 self.add_extra_info(ie_result,
 441                     {
 442                         'extractor': ie.IE_NAME,
 443                         'webpage_url': url,
 444                         'extractor_key': ie.ie_key(),
 445                     })
 446                 return self.process_ie_result(ie_result, download, extra_info)
 447             except ExtractorError as de: # An error we somewhat expected
 448                 self.report_error(compat_str(de), de.format_traceback())
 449                 break
 450             except Exception as e:
 451                 if self.params.get('ignoreerrors', False):
 452                     self.report_error(compat_str(e), tb=compat_str(traceback.format_exc()))
 453                     break
 454                 else:
 455                     raise
 456         else:
 457             self.report_error(u'no suitable InfoExtractor: %s' % url)
 458
 459     def process_ie_result(self, ie_result, download=True, extra_info={}):
 460         """
 461         Take the result of the ie(may be modified) and resolve all unresolved
 462         references (URLs, playlist items).
 463
 464         It will also download the videos if 'download'.
 465         Returns the resolved ie_result.
 466         """
 467
 468         result_type = ie_result.get('_type', 'video') # If not given we suppose it's a video, support the default old system
 469         if result_type == 'video':
 470             self.add_extra_info(ie_result, extra_info)
 471             return self.process_video_result(ie_result, download=download)
 472         elif result_type == 'url':
 473             # We have to add extra_info to the results because it may be
 474             # contained in a playlist
 475             return self.extract_info(ie_result['url'],
 476                                      download,
 477                                      ie_key=ie_result.get('ie_key'),
 478                                      extra_info=extra_info)
 479         elif result_type == 'playlist':
 480
 481             # We process each entry in the playlist
 482             playlist = ie_result.get('title', None) or ie_result.get('id', None)
 483             self.to_screen(u'[download] Downloading playlist: %s' % playlist)
 484
 485             playlist_results = []
 486
 487             n_all_entries = len(ie_result['entries'])
 488             playliststart = self.params.get('playliststart', 1) - 1
 489             playlistend = self.params.get('playlistend', -1)
 490
 491             if playlistend == -1:
 492                 entries = ie_result['entries'][playliststart:]
 493             else:
 494                 entries = ie_result['entries'][playliststart:playlistend]
 495
 496             n_entries = len(entries)
 497
 498             self.to_screen(u"[%s] playlist '%s': Collected %d video ids (downloading %d of them)" %
 499                 (ie_result['extractor'], playlist, n_all_entries, n_entries))
 500
 501             for i, entry in enumerate(entries, 1):
 502                 self.to_screen(u'[download] Downloading video #%s of %s' % (i, n_entries))
 503                 extra = {
 504                     'playlist': playlist,
 505                     'playlist_index': i + playliststart,
 506                     'extractor': ie_result['extractor'],
 507                     'webpage_url': ie_result['webpage_url'],
 508                     'extractor_key': ie_result['extractor_key'],
 509                 }
 510
 511                 reason = self._match_entry(entry)
 512                 if reason is not None:
 513                     self.to_screen(u'[download] ' + reason)
 514                     continue
 515
 516                 entry_result = self.process_ie_result(entry,
 517                                                       download=download,
 518                                                       extra_info=extra)
 519                 playlist_results.append(entry_result)
 520             ie_result['entries'] = playlist_results
 521             return ie_result
 522         elif result_type == 'compat_list':
 523             def _fixup(r):
 524                 self.add_extra_info(r,
 525                     {
 526                         'extractor': ie_result['extractor'],
 527                         'webpage_url': ie_result['webpage_url'],
 528                         'extractor_key': ie_result['extractor_key'],
 529                     })
 530                 return r
 531             ie_result['entries'] = [
 532                 self.process_ie_result(_fixup(r), download, extra_info)
 533                 for r in ie_result['entries']
 534             ]
 535             return ie_result
 536         else:
 537             raise Exception('Invalid result type: %s' % result_type)
 538
 539     def select_format(self, format_spec, available_formats):
 540         if format_spec == 'best' or format_spec is None:
 541             return available_formats[-1]
 542         elif format_spec == 'worst':
 543             return available_formats[0]
 544         else:
 545             extensions = [u'mp4', u'flv', u'webm', u'3gp']
 546             if format_spec in extensions:
 547                 filter_f = lambda f: f['ext'] == format_spec
 548             else:
 549                 filter_f = lambda f: f['format_id'] == format_spec
 550             matches = list(filter(filter_f, available_formats))
 551             if matches:
 552                 return matches[-1]
 553         return None
 554
 555     def process_video_result(self, info_dict, download=True):
 556         assert info_dict.get('_type', 'video') == 'video'
 557
 558         if 'playlist' not in info_dict:
 559             # It isn't part of a playlist
 560             info_dict['playlist'] = None
 561             info_dict['playlist_index'] = None
 562
 563         # This extractors handle format selection themselves
 564         if info_dict['extractor'] in [u'youtube', u'Youku']:
 565             if download:
 566                 self.process_info(info_dict)
 567             return info_dict
 568
 569         # We now pick which formats have to be downloaded
 570         if info_dict.get('formats') is None:
 571             # There's only one format available
 572             formats = [info_dict]
 573         else:
 574             formats = info_dict['formats']
 575
 576         # We check that all the formats have the format and format_id fields
 577         for (i, format) in enumerate(formats):
 578             if format.get('format_id') is None:
 579                 format['format_id'] = compat_str(i)
 580             if format.get('format') is None:
 581                 format['format'] = u'{id} - {res}{note}'.format(
 582                     id=format['format_id'],
 583                     res=self.format_resolution(format),
 584                     note=u' ({0})'.format(format['format_note']) if format.get('format_note') is not None else '',
 585                 )
 586             # Automatically determine file extension if missing
 587             if 'ext' not in format:
 588                 format['ext'] = determine_ext(format['url'])
 589
 590         if self.params.get('listformats', None):
 591             self.list_formats(info_dict)
 592             return
 593
 594         format_limit = self.params.get('format_limit', None)
 595         if format_limit:
 596             formats = list(takewhile_inclusive(
 597                 lambda f: f['format_id'] != format_limit, formats
 598             ))
 599         if self.params.get('prefer_free_formats'):
 600             def _free_formats_key(f):
 601                 try:
 602                     ext_ord = [u'flv', u'mp4', u'webm'].index(f['ext'])
 603                 except ValueError:
 604                     ext_ord = -1
 605                 # We only compare the extension if they have the same height and width
 606                 return (f.get('height'), f.get('width'), ext_ord)
 607             formats = sorted(formats, key=_free_formats_key)
 608
 609         req_format = self.params.get('format', 'best')
 610         if req_format is None:
 611             req_format = 'best'
 612         formats_to_download = []
 613         # The -1 is for supporting YoutubeIE
 614         if req_format in ('-1', 'all'):
 615             formats_to_download = formats
 616         else:
 617             # We can accept formats requestd in the format: 34/5/best, we pick
 618             # the first that is available, starting from left
 619             req_formats = req_format.split('/')
 620             for rf in req_formats:
 621                 selected_format = self.select_format(rf, formats)
 622                 if selected_format is not None:
 623                     formats_to_download = [selected_format]
 624                     break
 625         if not formats_to_download:
 626             raise ExtractorError(u'requested format not available',
 627                                  expected=True)
 628
 629         if download:
 630             if len(formats_to_download) > 1:
 631                 self.to_screen(u'[info] %s: downloading video in %s formats' % (info_dict['id'], len(formats_to_download)))
 632             for format in formats_to_download:
 633                 new_info = dict(info_dict)
 634                 new_info.update(format)
 635                 self.process_info(new_info)
 636         # We update the info dict with the best quality format (backwards compatibility)
 637         info_dict.update(formats_to_download[-1])
 638         return info_dict
 639
 640     def process_info(self, info_dict):
 641         """Process a single resolved IE result."""
 642
 643         assert info_dict.get('_type', 'video') == 'video'
 644         #We increment the download the download count here to match the previous behaviour.
 645         self.increment_downloads()
 646
 647         info_dict['fulltitle'] = info_dict['title']
 648         if len(info_dict['title']) > 200:
 649             info_dict['title'] = info_dict['title'][:197] + u'...'
 650
 651         # Keep for backwards compatibility
 652         info_dict['stitle'] = info_dict['title']
 653
 654         if not 'format' in info_dict:
 655             info_dict['format'] = info_dict['ext']
 656
 657         reason = self._match_entry(info_dict)
 658         if reason is not None:
 659             self.to_screen(u'[download] ' + reason)
 660             return
 661
 662         max_downloads = self.params.get('max_downloads')
 663         if max_downloads is not None:
 664             if self._num_downloads > int(max_downloads):
 665                 raise MaxDownloadsReached()
 666
 667         filename = self.prepare_filename(info_dict)
 668
 669         # Forced printings
 670         if self.params.get('forcetitle', False):
 671             compat_print(info_dict['fulltitle'])
 672         if self.params.get('forceid', False):
 673             compat_print(info_dict['id'])
 674         if self.params.get('forceurl', False):
 675             # For RTMP URLs, also include the playpath
 676             compat_print(info_dict['url'] + info_dict.get('play_path', u''))
 677         if self.params.get('forcethumbnail', False) and info_dict.get('thumbnail') is not None:
 678             compat_print(info_dict['thumbnail'])
 679         if self.params.get('forcedescription', False) and info_dict.get('description') is not None:
 680             compat_print(info_dict['description'])
 681         if self.params.get('forcefilename', False) and filename is not None:
 682             compat_print(filename)
 683         if self.params.get('forceformat', False):
 684             compat_print(info_dict['format'])
 685         if self.params.get('forcejson', False):
 686             compat_print(json.dumps(info_dict))
 687
 688         # Do nothing else if in simulate mode
 689         if self.params.get('simulate', False):
 690             return
 691
 692         if filename is None:
 693             return
 694
 695         try:
 696             dn = os.path.dirname(encodeFilename(filename))
 697             if dn != '' and not os.path.exists(dn):
 698                 os.makedirs(dn)
 699         except (OSError, IOError) as err:
 700             self.report_error(u'unable to create directory ' + compat_str(err))
 701             return
 702
 703         if self.params.get('writedescription', False):
 704             try:
 705                 descfn = filename + u'.description'
 706                 self.report_writedescription(descfn)
 707                 with io.open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile:
 708                     descfile.write(info_dict['description'])
 709             except (KeyError, TypeError):
 710                 self.report_warning(u'There\'s no description to write.')
 711             except (OSError, IOError):
 712                 self.report_error(u'Cannot write description file ' + descfn)
 713                 return
 714
 715         if self.params.get('writeannotations', False):
 716             try:
 717                 annofn = filename + u'.annotations.xml'
 718                 self.report_writeannotations(annofn)
 719                 with io.open(encodeFilename(annofn), 'w', encoding='utf-8') as annofile:
 720                     annofile.write(info_dict['annotations'])
 721             except (KeyError, TypeError):
 722                 self.report_warning(u'There are no annotations to write.')
 723             except (OSError, IOError):
 724                 self.report_error(u'Cannot write annotations file: ' + annofn)
 725                 return
 726
 727         subtitles_are_requested = any([self.params.get('writesubtitles', False),
 728                                        self.params.get('writeautomaticsub')])
 729
 730         if subtitles_are_requested and 'subtitles' in info_dict and info_dict['subtitles']:
 731             # subtitles download errors are already managed as troubles in relevant IE
 732             # that way it will silently go on when used with unsupporting IE
 733             subtitles = info_dict['subtitles']
 734             sub_format = self.params.get('subtitlesformat', 'srt')
 735             for sub_lang in subtitles.keys():
 736                 sub = subtitles[sub_lang]
 737                 if sub is None:
 738                     continue
 739                 try:
 740                     sub_filename = subtitles_filename(filename, sub_lang, sub_format)
 741                     self.report_writesubtitles(sub_filename)
 742                     with io.open(encodeFilename(sub_filename), 'w', encoding='utf-8') as subfile:
 743                             subfile.write(sub)
 744                 except (OSError, IOError):
 745                     self.report_error(u'Cannot write subtitles file ' + descfn)
 746                     return
 747
 748         if self.params.get('writeinfojson', False):
 749             infofn = os.path.splitext(filename)[0] + u'.info.json'
 750             self.report_writeinfojson(infofn)
 751             try:
 752                 json_info_dict = dict((k, v) for k, v in info_dict.items() if not k in ['urlhandle'])
 753                 write_json_file(json_info_dict, encodeFilename(infofn))
 754             except (OSError, IOError):
 755                 self.report_error(u'Cannot write metadata to JSON file ' + infofn)
 756                 return
 757
 758         if self.params.get('writethumbnail', False):
 759             if info_dict.get('thumbnail') is not None:
 760                 thumb_format = determine_ext(info_dict['thumbnail'], u'jpg')
 761                 thumb_filename = filename.rpartition('.')[0] + u'.' + thumb_format
 762                 self.to_screen(u'[%s] %s: Downloading thumbnail ...' %
 763                                (info_dict['extractor'], info_dict['id']))
 764                 try:
 765                     uf = compat_urllib_request.urlopen(info_dict['thumbnail'])
 766                     with open(thumb_filename, 'wb') as thumbf:
 767                         shutil.copyfileobj(uf, thumbf)
 768                     self.to_screen(u'[%s] %s: Writing thumbnail to: %s' %
 769                         (info_dict['extractor'], info_dict['id'], thumb_filename))
 770                 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
 771                     self.report_warning(u'Unable to download thumbnail "%s": %s' %
 772                         (info_dict['thumbnail'], compat_str(err)))
 773
 774         if not self.params.get('skip_download', False):
 775             if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(filename)):
 776                 success = True
 777             else:
 778                 try:
 779                     success = self.fd._do_download(filename, info_dict)
 780                 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
 781                     self.report_error(u'unable to download video data: %s' % str(err))
 782                     return
 783                 except (OSError, IOError) as err:
 784                     raise UnavailableVideoError(err)
 785                 except (ContentTooShortError, ) as err:
 786                     self.report_error(u'content too short (expected %s bytes and served %s)' % (err.expected, err.downloaded))
 787                     return
 788
 789             if success:
 790                 try:
 791                     self.post_process(filename, info_dict)
 792                 except (PostProcessingError) as err:
 793                     self.report_error(u'postprocessing: %s' % str(err))
 794                     return
 795
 796         self.record_download_archive(info_dict)
 797
 798     def download(self, url_list):
 799         """Download a given list of URLs."""
 800         if len(url_list) > 1 and self.fixed_template():
 801             raise SameFileError(self.params['outtmpl'])
 802
 803         for url in url_list:
 804             try:
 805                 #It also downloads the videos
 806                 self.extract_info(url)
 807             except UnavailableVideoError:
 808                 self.report_error(u'unable to download video')
 809             except MaxDownloadsReached:
 810                 self.to_screen(u'[info] Maximum number of downloaded files reached.')
 811                 raise
 812
 813         return self._download_retcode
 814
 815     def post_process(self, filename, ie_info):
 816         """Run all the postprocessors on the given file."""
 817         info = dict(ie_info)
 818         info['filepath'] = filename
 819         keep_video = None
 820         for pp in self._pps:
 821             try:
 822                 keep_video_wish, new_info = pp.run(info)
 823                 if keep_video_wish is not None:
 824                     if keep_video_wish:
 825                         keep_video = keep_video_wish
 826                     elif keep_video is None:
 827                         # No clear decision yet, let IE decide
 828                         keep_video = keep_video_wish
 829             except PostProcessingError as e:
 830                 self.report_error(e.msg)
 831         if keep_video is False and not self.params.get('keepvideo', False):
 832             try:
 833                 self.to_screen(u'Deleting original file %s (pass -k to keep)' % filename)
 834                 os.remove(encodeFilename(filename))
 835             except (IOError, OSError):
 836                 self.report_warning(u'Unable to remove downloaded video file')
 837
 838     def in_download_archive(self, info_dict):
 839         fn = self.params.get('download_archive')
 840         if fn is None:
 841             return False
 842         extractor = info_dict.get('extractor_id')
 843         if extractor is None:
 844             if 'id' in info_dict:
 845                 extractor = info_dict.get('ie_key')  # key in a playlist
 846         if extractor is None:
 847             return False  # Incomplete video information
 848         # Future-proof against any change in case
 849         # and backwards compatibility with prior versions
 850         extractor = extractor.lower()
 851         vid_id = extractor + u' ' + info_dict['id']
 852         try:
 853             with locked_file(fn, 'r', encoding='utf-8') as archive_file:
 854                 for line in archive_file:
 855                     if line.strip() == vid_id:
 856                         return True
 857         except IOError as ioe:
 858             if ioe.errno != errno.ENOENT:
 859                 raise
 860         return False
 861
 862     def record_download_archive(self, info_dict):
 863         fn = self.params.get('download_archive')
 864         if fn is None:
 865             return
 866         vid_id = info_dict['extractor'] + u' ' + info_dict['id']
 867         with locked_file(fn, 'a', encoding='utf-8') as archive_file:
 868             archive_file.write(vid_id + u'\n')
 869
 870     @staticmethod
 871     def format_resolution(format, default='unknown'):
 872         if format.get('_resolution') is not None:
 873             return format['_resolution']
 874         if format.get('height') is not None:
 875             if format.get('width') is not None:
 876                 res = u'%sx%s' % (format['width'], format['height'])
 877             else:
 878                 res = u'%sp' % format['height']
 879         else:
 880             res = default
 881         return res
 882
 883     def list_formats(self, info_dict):
 884         def format_note(fdict):
 885             if fdict.get('format_note') is not None:
 886                 return fdict['format_note']
 887             res = u''
 888             if fdict.get('vcodec') is not None:
 889                 res += u'%-5s' % fdict['vcodec']
 890             elif fdict.get('vbr') is not None:
 891                 res += u'video'
 892             if fdict.get('vbr') is not None:
 893                 res += u'@%4dk' % fdict['vbr']
 894             if fdict.get('acodec') is not None:
 895                 if res:
 896                     res += u', '
 897                 res += u'%-5s' % fdict['acodec']
 898             elif fdict.get('abr') is not None:
 899                 if res:
 900                     res += u', '
 901                 res += 'audio'
 902             if fdict.get('abr') is not None:
 903                 res += u'@%3dk' % fdict['abr']
 904             return res
 905
 906         def line(format):
 907             return (u'%-20s%-10s%-12s%s' % (
 908                 format['format_id'],
 909                 format['ext'],
 910                 self.format_resolution(format),
 911                 format_note(format),
 912                 )
 913             )
 914
 915         formats = info_dict.get('formats', [info_dict])
 916         formats_s = list(map(line, formats))
 917         if len(formats) > 1:
 918             formats_s[0] += (' ' if format_note(formats[0]) else '') + '(worst)'
 919             formats_s[-1] += (' ' if format_note(formats[-1]) else '') + '(best)'
 920
 921         header_line = line({
 922             'format_id': u'format code', 'ext': u'extension',
 923             '_resolution': u'resolution', 'format_note': u'note'})
 924         self.to_screen(u'[info] Available formats for %s:\n%s\n%s' %
 925                        (info_dict['id'], header_line, u"\n".join(formats_s)))
 926
 927     def urlopen(self, req):
 928         """ Start an HTTP download """
 929         return self._opener.open(req)
 930
 931     def print_debug_header(self):
 932         if not self.params.get('verbose'):
 933             return
 934         write_string(u'[debug] youtube-dl version ' + __version__ + u'\n')
 935         try:
 936             sp = subprocess.Popen(
 937                 ['git', 'rev-parse', '--short', 'HEAD'],
 938                 stdout=subprocess.PIPE, stderr=subprocess.PIPE,
 939                 cwd=os.path.dirname(os.path.abspath(__file__)))
 940             out, err = sp.communicate()
 941             out = out.decode().strip()
 942             if re.match('[0-9a-f]+', out):
 943                 write_string(u'[debug] Git HEAD: ' + out + u'\n')
 944         except:
 945             try:
 946                 sys.exc_clear()
 947             except:
 948                 pass
 949         write_string(u'[debug] Python version %s - %s' %
 950                      (platform.python_version(), platform_name()) + u'\n')
 951
 952         proxy_map = {}
 953         for handler in self._opener.handlers:
 954             if hasattr(handler, 'proxies'):
 955                 proxy_map.update(handler.proxies)
 956         write_string(u'[debug] Proxy map: ' + compat_str(proxy_map) + u'\n')
 957
 958     def _setup_opener(self, timeout=300):
 959         opts_cookiefile = self.params.get('cookiefile')
 960         opts_proxy = self.params.get('proxy')
 961
 962         if opts_cookiefile is None:
 963             self.cookiejar = compat_cookiejar.CookieJar()
 964         else:
 965             self.cookiejar = compat_cookiejar.MozillaCookieJar(
 966                 opts_cookiefile)
 967             if os.access(opts_cookiefile, os.R_OK):
 968                 self.cookiejar.load()
 969
 970         cookie_processor = compat_urllib_request.HTTPCookieProcessor(
 971             self.cookiejar)
 972         if opts_proxy is not None:
 973             if opts_proxy == '':
 974                 proxies = {}
 975             else:
 976                 proxies = {'http': opts_proxy, 'https': opts_proxy}
 977         else:
 978             proxies = compat_urllib_request.getproxies()
 979             # Set HTTPS proxy to HTTP one if given (https://github.com/rg3/youtube-dl/issues/805)
 980             if 'http' in proxies and 'https' not in proxies:
 981                 proxies['https'] = proxies['http']
 982         proxy_handler = compat_urllib_request.ProxyHandler(proxies)
 983         https_handler = make_HTTPS_handler(
 984             self.params.get('nocheckcertificate', False))
 985         opener = compat_urllib_request.build_opener(
 986             https_handler, proxy_handler, cookie_processor, YoutubeDLHandler())
 987         # Delete the default user-agent header, which would otherwise apply in
 988         # cases where our custom HTTP handler doesn't come into play
 989         # (See https://github.com/rg3/youtube-dl/issues/1309 for details)
 990         opener.addheaders = []
 991         self._opener = opener
 992
 993         # TODO remove this global modification
 994         compat_urllib_request.install_opener(opener)
 995         socket.setdefaulttimeout(timeout)