_ Git - youtube-dl/blob - youtube_dl/YoutubeDL.py

   1 #!/usr/bin/env python
   2 # -*- coding: utf-8 -*-
   3
   4 from __future__ import absolute_import
   5
   6 import errno
   7 import io
   8 import json
   9 import os
  10 import platform
  11 import re
  12 import shutil
  13 import subprocess
  14 import socket
  15 import sys
  16 import time
  17 import traceback
  18
  19 if os.name == 'nt':
  20     import ctypes
  21
  22 from .utils import (
  23     compat_cookiejar,
  24     compat_http_client,
  25     compat_print,
  26     compat_str,
  27     compat_urllib_error,
  28     compat_urllib_request,
  29     ContentTooShortError,
  30     date_from_str,
  31     DateRange,
  32     determine_ext,
  33     DownloadError,
  34     encodeFilename,
  35     ExtractorError,
  36     format_bytes,
  37     locked_file,
  38     make_HTTPS_handler,
  39     MaxDownloadsReached,
  40     PostProcessingError,
  41     platform_name,
  42     preferredencoding,
  43     SameFileError,
  44     sanitize_filename,
  45     subtitles_filename,
  46     takewhile_inclusive,
  47     UnavailableVideoError,
  48     write_json_file,
  49     write_string,
  50     YoutubeDLHandler,
  51 )
  52 from .extractor import get_info_extractor, gen_extractors
  53 from .FileDownloader import FileDownloader
  54 from .version import __version__
  55
  56
  57 class YoutubeDL(object):
  58     """YoutubeDL class.
  59
  60     YoutubeDL objects are the ones responsible of downloading the
  61     actual video file and writing it to disk if the user has requested
  62     it, among some other tasks. In most cases there should be one per
  63     program. As, given a video URL, the downloader doesn't know how to
  64     extract all the needed information, task that InfoExtractors do, it
  65     has to pass the URL to one of them.
  66
  67     For this, YoutubeDL objects have a method that allows
  68     InfoExtractors to be registered in a given order. When it is passed
  69     a URL, the YoutubeDL object handles it to the first InfoExtractor it
  70     finds that reports being able to handle it. The InfoExtractor extracts
  71     all the information about the video or videos the URL refers to, and
  72     YoutubeDL process the extracted information, possibly using a File
  73     Downloader to download the video.
  74
  75     YoutubeDL objects accept a lot of parameters. In order not to saturate
  76     the object constructor with arguments, it receives a dictionary of
  77     options instead. These options are available through the params
  78     attribute for the InfoExtractors to use. The YoutubeDL also
  79     registers itself as the downloader in charge for the InfoExtractors
  80     that are added to it, so this is a "mutual registration".
  81
  82     Available options:
  83
  84     username:          Username for authentication purposes.
  85     password:          Password for authentication purposes.
  86     videopassword:     Password for acces a video.
  87     usenetrc:          Use netrc for authentication instead.
  88     verbose:           Print additional info to stdout.
  89     quiet:             Do not print messages to stdout.
  90     forceurl:          Force printing final URL.
  91     forcetitle:        Force printing title.
  92     forceid:           Force printing ID.
  93     forcethumbnail:    Force printing thumbnail URL.
  94     forcedescription:  Force printing description.
  95     forcefilename:     Force printing final filename.
  96     forcejson:         Force printing info_dict as JSON.
  97     simulate:          Do not download the video files.
  98     format:            Video format code.
  99     format_limit:      Highest quality format to try.
 100     outtmpl:           Template for output names.
 101     restrictfilenames: Do not allow "&" and spaces in file names
 102     ignoreerrors:      Do not stop on download errors.
 103     nooverwrites:      Prevent overwriting files.
 104     playliststart:     Playlist item to start at.
 105     playlistend:       Playlist item to end at.
 106     matchtitle:        Download only matching titles.
 107     rejecttitle:       Reject downloads for matching titles.
 108     logger:            Log messages to a logging.Logger instance.
 109     logtostderr:       Log messages to stderr instead of stdout.
 110     writedescription:  Write the video description to a .description file
 111     writeinfojson:     Write the video description to a .info.json file
 112     writeannotations:  Write the video annotations to a .annotations.xml file
 113     writethumbnail:    Write the thumbnail image to a file
 114     writesubtitles:    Write the video subtitles to a file
 115     writeautomaticsub: Write the automatic subtitles to a file
 116     allsubtitles:      Downloads all the subtitles of the video
 117                        (requires writesubtitles or writeautomaticsub)
 118     listsubtitles:     Lists all available subtitles for the video
 119     subtitlesformat:   Subtitle format [srt/sbv/vtt] (default=srt)
 120     subtitleslangs:    List of languages of the subtitles to download
 121     keepvideo:         Keep the video file after post-processing
 122     daterange:         A DateRange object, download only if the upload_date is in the range.
 123     skip_download:     Skip the actual download of the video file
 124     cachedir:          Location of the cache files in the filesystem.
 125                        None to disable filesystem cache.
 126     noplaylist:        Download single video instead of a playlist if in doubt.
 127     age_limit:         An integer representing the user's age in years.
 128                        Unsuitable videos for the given age are skipped.
 129     download_archive:   File name of a file where all downloads are recorded.
 130                        Videos already present in the file are not downloaded
 131                        again.
 132     cookiefile:        File name where cookies should be read from and dumped to.
 133     nocheckcertificate:Do not verify SSL certificates
 134     proxy:             URL of the proxy server to use
 135     socket_timeout:    Time to wait for unresponsive hosts, in seconds
 136
 137     The following parameters are not used by YoutubeDL itself, they are used by
 138     the FileDownloader:
 139     nopart, updatetime, buffersize, ratelimit, min_filesize, max_filesize, test,
 140     noresizebuffer, retries, continuedl, noprogress, consoletitle
 141     """
 142
 143     params = None
 144     _ies = []
 145     _pps = []
 146     _download_retcode = None
 147     _num_downloads = None
 148     _screen_file = None
 149
 150     def __init__(self, params=None):
 151         """Create a FileDownloader object with the given options."""
 152         self._ies = []
 153         self._ies_instances = {}
 154         self._pps = []
 155         self._progress_hooks = []
 156         self._download_retcode = 0
 157         self._num_downloads = 0
 158         self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)]
 159         self.params = {} if params is None else params
 160
 161         if (sys.version_info >= (3,) and sys.platform != 'win32' and
 162                 sys.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968']
 163                 and not params['restrictfilenames']):
 164             # On Python 3, the Unicode filesystem API will throw errors (#1474)
 165             self.report_warning(
 166                 u'Assuming --restrict-filenames since file system encoding '
 167                 u'cannot encode all charactes. '
 168                 u'Set the LC_ALL environment variable to fix this.')
 169             self.params['restrictfilenames'] = True
 170
 171         self.fd = FileDownloader(self, self.params)
 172
 173         if '%(stitle)s' in self.params.get('outtmpl', ''):
 174             self.report_warning(u'%(stitle)s is deprecated. Use the %(title)s and the --restrict-filenames flag(which also secures %(uploader)s et al) instead.')
 175
 176         self._setup_opener()
 177
 178     def add_info_extractor(self, ie):
 179         """Add an InfoExtractor object to the end of the list."""
 180         self._ies.append(ie)
 181         self._ies_instances[ie.ie_key()] = ie
 182         ie.set_downloader(self)
 183
 184     def get_info_extractor(self, ie_key):
 185         """
 186         Get an instance of an IE with name ie_key, it will try to get one from
 187         the _ies list, if there's no instance it will create a new one and add
 188         it to the extractor list.
 189         """
 190         ie = self._ies_instances.get(ie_key)
 191         if ie is None:
 192             ie = get_info_extractor(ie_key)()
 193             self.add_info_extractor(ie)
 194         return ie
 195
 196     def add_default_info_extractors(self):
 197         """
 198         Add the InfoExtractors returned by gen_extractors to the end of the list
 199         """
 200         for ie in gen_extractors():
 201             self.add_info_extractor(ie)
 202
 203     def add_post_processor(self, pp):
 204         """Add a PostProcessor object to the end of the chain."""
 205         self._pps.append(pp)
 206         pp.set_downloader(self)
 207
 208     def to_screen(self, message, skip_eol=False):
 209         """Print message to stdout if not in quiet mode."""
 210         if self.params.get('logger'):
 211             self.params['logger'].debug(message)
 212         elif not self.params.get('quiet', False):
 213             terminator = [u'\n', u''][skip_eol]
 214             output = message + terminator
 215             write_string(output, self._screen_file)
 216
 217     def to_stderr(self, message):
 218         """Print message to stderr."""
 219         assert type(message) == type(u'')
 220         if self.params.get('logger'):
 221             self.params['logger'].error(message)
 222         else:
 223             output = message + u'\n'
 224             if 'b' in getattr(self._screen_file, 'mode', '') or sys.version_info[0] < 3: # Python 2 lies about the mode of sys.stdout/sys.stderr
 225                 output = output.encode(preferredencoding())
 226             sys.stderr.write(output)
 227
 228     def to_console_title(self, message):
 229         if not self.params.get('consoletitle', False):
 230             return
 231         if os.name == 'nt' and ctypes.windll.kernel32.GetConsoleWindow():
 232             # c_wchar_p() might not be necessary if `message` is
 233             # already of type unicode()
 234             ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message))
 235         elif 'TERM' in os.environ:
 236             write_string(u'\033]0;%s\007' % message, self._screen_file)
 237
 238     def save_console_title(self):
 239         if not self.params.get('consoletitle', False):
 240             return
 241         if 'TERM' in os.environ:
 242             # Save the title on stack
 243             write_string(u'\033[22;0t', self._screen_file)
 244
 245     def restore_console_title(self):
 246         if not self.params.get('consoletitle', False):
 247             return
 248         if 'TERM' in os.environ:
 249             # Restore the title from stack
 250             write_string(u'\033[23;0t', self._screen_file)
 251
 252     def __enter__(self):
 253         self.save_console_title()
 254         return self
 255
 256     def __exit__(self, *args):
 257         self.restore_console_title()
 258
 259         if self.params.get('cookiefile') is not None:
 260             self.cookiejar.save()
 261
 262     def trouble(self, message=None, tb=None):
 263         """Determine action to take when a download problem appears.
 264
 265         Depending on if the downloader has been configured to ignore
 266         download errors or not, this method may throw an exception or
 267         not when errors are found, after printing the message.
 268
 269         tb, if given, is additional traceback information.
 270         """
 271         if message is not None:
 272             self.to_stderr(message)
 273         if self.params.get('verbose'):
 274             if tb is None:
 275                 if sys.exc_info()[0]:  # if .trouble has been called from an except block
 276                     tb = u''
 277                     if hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
 278                         tb += u''.join(traceback.format_exception(*sys.exc_info()[1].exc_info))
 279                     tb += compat_str(traceback.format_exc())
 280                 else:
 281                     tb_data = traceback.format_list(traceback.extract_stack())
 282                     tb = u''.join(tb_data)
 283             self.to_stderr(tb)
 284         if not self.params.get('ignoreerrors', False):
 285             if sys.exc_info()[0] and hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
 286                 exc_info = sys.exc_info()[1].exc_info
 287             else:
 288                 exc_info = sys.exc_info()
 289             raise DownloadError(message, exc_info)
 290         self._download_retcode = 1
 291
 292     def report_warning(self, message):
 293         '''
 294         Print the message to stderr, it will be prefixed with 'WARNING:'
 295         If stderr is a tty file the 'WARNING:' will be colored
 296         '''
 297         if sys.stderr.isatty() and os.name != 'nt':
 298             _msg_header = u'\033[0;33mWARNING:\033[0m'
 299         else:
 300             _msg_header = u'WARNING:'
 301         warning_message = u'%s %s' % (_msg_header, message)
 302         self.to_stderr(warning_message)
 303
 304     def report_error(self, message, tb=None):
 305         '''
 306         Do the same as trouble, but prefixes the message with 'ERROR:', colored
 307         in red if stderr is a tty file.
 308         '''
 309         if sys.stderr.isatty() and os.name != 'nt':
 310             _msg_header = u'\033[0;31mERROR:\033[0m'
 311         else:
 312             _msg_header = u'ERROR:'
 313         error_message = u'%s %s' % (_msg_header, message)
 314         self.trouble(error_message, tb)
 315
 316     def report_writedescription(self, descfn):
 317         """ Report that the description file is being written """
 318         self.to_screen(u'[info] Writing video description to: ' + descfn)
 319
 320     def report_writesubtitles(self, sub_filename):
 321         """ Report that the subtitles file is being written """
 322         self.to_screen(u'[info] Writing video subtitles to: ' + sub_filename)
 323
 324     def report_writeinfojson(self, infofn):
 325         """ Report that the metadata file has been written """
 326         self.to_screen(u'[info] Video description metadata as JSON to: ' + infofn)
 327
 328     def report_writeannotations(self, annofn):
 329         """ Report that the annotations file has been written. """
 330         self.to_screen(u'[info] Writing video annotations to: ' + annofn)
 331
 332     def report_file_already_downloaded(self, file_name):
 333         """Report file has already been fully downloaded."""
 334         try:
 335             self.to_screen(u'[download] %s has already been downloaded' % file_name)
 336         except UnicodeEncodeError:
 337             self.to_screen(u'[download] The file has already been downloaded')
 338
 339     def increment_downloads(self):
 340         """Increment the ordinal that assigns a number to each file."""
 341         self._num_downloads += 1
 342
 343     def prepare_filename(self, info_dict):
 344         """Generate the output filename."""
 345         try:
 346             template_dict = dict(info_dict)
 347
 348             template_dict['epoch'] = int(time.time())
 349             autonumber_size = self.params.get('autonumber_size')
 350             if autonumber_size is None:
 351                 autonumber_size = 5
 352             autonumber_templ = u'%0' + str(autonumber_size) + u'd'
 353             template_dict['autonumber'] = autonumber_templ % self._num_downloads
 354             if template_dict.get('playlist_index') is not None:
 355                 template_dict['playlist_index'] = u'%05d' % template_dict['playlist_index']
 356
 357             sanitize = lambda k, v: sanitize_filename(
 358                 u'NA' if v is None else compat_str(v),
 359                 restricted=self.params.get('restrictfilenames'),
 360                 is_id=(k == u'id'))
 361             template_dict = dict((k, sanitize(k, v))
 362                                  for k, v in template_dict.items())
 363
 364             tmpl = os.path.expanduser(self.params['outtmpl'])
 365             filename = tmpl % template_dict
 366             return filename
 367         except KeyError as err:
 368             self.report_error(u'Erroneous output template')
 369             return None
 370         except ValueError as err:
 371             self.report_error(u'Error in output template: ' + str(err) + u' (encoding: ' + repr(preferredencoding()) + ')')
 372             return None
 373
 374     def _match_entry(self, info_dict):
 375         """ Returns None iff the file should be downloaded """
 376
 377         if 'title' in info_dict:
 378             # This can happen when we're just evaluating the playlist
 379             title = info_dict['title']
 380             matchtitle = self.params.get('matchtitle', False)
 381             if matchtitle:
 382                 if not re.search(matchtitle, title, re.IGNORECASE):
 383                     return u'[download] "' + title + '" title did not match pattern "' + matchtitle + '"'
 384             rejecttitle = self.params.get('rejecttitle', False)
 385             if rejecttitle:
 386                 if re.search(rejecttitle, title, re.IGNORECASE):
 387                     return u'"' + title + '" title matched reject pattern "' + rejecttitle + '"'
 388         date = info_dict.get('upload_date', None)
 389         if date is not None:
 390             dateRange = self.params.get('daterange', DateRange())
 391             if date not in dateRange:
 392                 return u'[download] %s upload date is not in range %s' % (date_from_str(date).isoformat(), dateRange)
 393         age_limit = self.params.get('age_limit')
 394         if age_limit is not None:
 395             if age_limit < info_dict.get('age_limit', 0):
 396                 return u'Skipping "' + title + '" because it is age restricted'
 397         if self.in_download_archive(info_dict):
 398             return (u'%s has already been recorded in archive'
 399                     % info_dict.get('title', info_dict.get('id', u'video')))
 400         return None
 401
 402     @staticmethod
 403     def add_extra_info(info_dict, extra_info):
 404         '''Set the keys from extra_info in info dict if they are missing'''
 405         for key, value in extra_info.items():
 406             info_dict.setdefault(key, value)
 407
 408     def extract_info(self, url, download=True, ie_key=None, extra_info={}):
 409         '''
 410         Returns a list with a dictionary for each video we find.
 411         If 'download', also downloads the videos.
 412         extra_info is a dict containing the extra values to add to each result
 413          '''
 414
 415         if ie_key:
 416             ies = [self.get_info_extractor(ie_key)]
 417         else:
 418             ies = self._ies
 419
 420         for ie in ies:
 421             if not ie.suitable(url):
 422                 continue
 423
 424             if not ie.working():
 425                 self.report_warning(u'The program functionality for this site has been marked as broken, '
 426                                     u'and will probably not work.')
 427
 428             try:
 429                 ie_result = ie.extract(url)
 430                 if ie_result is None: # Finished already (backwards compatibility; listformats and friends should be moved here)
 431                     break
 432                 if isinstance(ie_result, list):
 433                     # Backwards compatibility: old IE result format
 434                     ie_result = {
 435                         '_type': 'compat_list',
 436                         'entries': ie_result,
 437                     }
 438                 self.add_extra_info(ie_result,
 439                     {
 440                         'extractor': ie.IE_NAME,
 441                         'webpage_url': url,
 442                         'extractor_key': ie.ie_key(),
 443                     })
 444                 return self.process_ie_result(ie_result, download, extra_info)
 445             except ExtractorError as de: # An error we somewhat expected
 446                 self.report_error(compat_str(de), de.format_traceback())
 447                 break
 448             except Exception as e:
 449                 if self.params.get('ignoreerrors', False):
 450                     self.report_error(compat_str(e), tb=compat_str(traceback.format_exc()))
 451                     break
 452                 else:
 453                     raise
 454         else:
 455             self.report_error(u'no suitable InfoExtractor: %s' % url)
 456
 457     def process_ie_result(self, ie_result, download=True, extra_info={}):
 458         """
 459         Take the result of the ie(may be modified) and resolve all unresolved
 460         references (URLs, playlist items).
 461
 462         It will also download the videos if 'download'.
 463         Returns the resolved ie_result.
 464         """
 465
 466         result_type = ie_result.get('_type', 'video') # If not given we suppose it's a video, support the default old system
 467         if result_type == 'video':
 468             self.add_extra_info(ie_result, extra_info)
 469             return self.process_video_result(ie_result, download=download)
 470         elif result_type == 'url':
 471             # We have to add extra_info to the results because it may be
 472             # contained in a playlist
 473             return self.extract_info(ie_result['url'],
 474                                      download,
 475                                      ie_key=ie_result.get('ie_key'),
 476                                      extra_info=extra_info)
 477         elif result_type == 'playlist':
 478
 479             # We process each entry in the playlist
 480             playlist = ie_result.get('title', None) or ie_result.get('id', None)
 481             self.to_screen(u'[download] Downloading playlist: %s' % playlist)
 482
 483             playlist_results = []
 484
 485             n_all_entries = len(ie_result['entries'])
 486             playliststart = self.params.get('playliststart', 1) - 1
 487             playlistend = self.params.get('playlistend', -1)
 488
 489             if playlistend == -1:
 490                 entries = ie_result['entries'][playliststart:]
 491             else:
 492                 entries = ie_result['entries'][playliststart:playlistend]
 493
 494             n_entries = len(entries)
 495
 496             self.to_screen(u"[%s] playlist '%s': Collected %d video ids (downloading %d of them)" %
 497                 (ie_result['extractor'], playlist, n_all_entries, n_entries))
 498
 499             for i, entry in enumerate(entries, 1):
 500                 self.to_screen(u'[download] Downloading video #%s of %s' % (i, n_entries))
 501                 extra = {
 502                     'playlist': playlist,
 503                     'playlist_index': i + playliststart,
 504                     'extractor': ie_result['extractor'],
 505                     'webpage_url': ie_result['webpage_url'],
 506                     'extractor_key': ie_result['extractor_key'],
 507                 }
 508
 509                 reason = self._match_entry(entry)
 510                 if reason is not None:
 511                     self.to_screen(u'[download] ' + reason)
 512                     continue
 513
 514                 entry_result = self.process_ie_result(entry,
 515                                                       download=download,
 516                                                       extra_info=extra)
 517                 playlist_results.append(entry_result)
 518             ie_result['entries'] = playlist_results
 519             return ie_result
 520         elif result_type == 'compat_list':
 521             def _fixup(r):
 522                 self.add_extra_info(r,
 523                     {
 524                         'extractor': ie_result['extractor'],
 525                         'webpage_url': ie_result['webpage_url'],
 526                         'extractor_key': ie_result['extractor_key'],
 527                     })
 528                 return r
 529             ie_result['entries'] = [
 530                 self.process_ie_result(_fixup(r), download, extra_info)
 531                 for r in ie_result['entries']
 532             ]
 533             return ie_result
 534         else:
 535             raise Exception('Invalid result type: %s' % result_type)
 536
 537     def select_format(self, format_spec, available_formats):
 538         if format_spec == 'best' or format_spec is None:
 539             return available_formats[-1]
 540         elif format_spec == 'worst':
 541             return available_formats[0]
 542         else:
 543             extensions = [u'mp4', u'flv', u'webm', u'3gp']
 544             if format_spec in extensions:
 545                 filter_f = lambda f: f['ext'] == format_spec
 546             else:
 547                 filter_f = lambda f: f['format_id'] == format_spec
 548             matches = list(filter(filter_f, available_formats))
 549             if matches:
 550                 return matches[-1]
 551         return None
 552
 553     def process_video_result(self, info_dict, download=True):
 554         assert info_dict.get('_type', 'video') == 'video'
 555
 556         if 'playlist' not in info_dict:
 557             # It isn't part of a playlist
 558             info_dict['playlist'] = None
 559             info_dict['playlist_index'] = None
 560
 561         # This extractors handle format selection themselves
 562         if info_dict['extractor'] in [u'youtube', u'Youku']:
 563             if download:
 564                 self.process_info(info_dict)
 565             return info_dict
 566
 567         # We now pick which formats have to be downloaded
 568         if info_dict.get('formats') is None:
 569             # There's only one format available
 570             formats = [info_dict]
 571         else:
 572             formats = info_dict['formats']
 573
 574         # We check that all the formats have the format and format_id fields
 575         for (i, format) in enumerate(formats):
 576             if format.get('format_id') is None:
 577                 format['format_id'] = compat_str(i)
 578             if format.get('format') is None:
 579                 format['format'] = u'{id} - {res}{note}'.format(
 580                     id=format['format_id'],
 581                     res=self.format_resolution(format),
 582                     note=u' ({0})'.format(format['format_note']) if format.get('format_note') is not None else '',
 583                 )
 584             # Automatically determine file extension if missing
 585             if 'ext' not in format:
 586                 format['ext'] = determine_ext(format['url'])
 587
 588         if self.params.get('listformats', None):
 589             self.list_formats(info_dict)
 590             return
 591
 592         format_limit = self.params.get('format_limit', None)
 593         if format_limit:
 594             formats = list(takewhile_inclusive(
 595                 lambda f: f['format_id'] != format_limit, formats
 596             ))
 597         if self.params.get('prefer_free_formats'):
 598             def _free_formats_key(f):
 599                 try:
 600                     ext_ord = [u'flv', u'mp4', u'webm'].index(f['ext'])
 601                 except ValueError:
 602                     ext_ord = -1
 603                 # We only compare the extension if they have the same height and width
 604                 return (f.get('height'), f.get('width'), ext_ord)
 605             formats = sorted(formats, key=_free_formats_key)
 606
 607         req_format = self.params.get('format', 'best')
 608         if req_format is None:
 609             req_format = 'best'
 610         formats_to_download = []
 611         # The -1 is for supporting YoutubeIE
 612         if req_format in ('-1', 'all'):
 613             formats_to_download = formats
 614         else:
 615             # We can accept formats requestd in the format: 34/5/best, we pick
 616             # the first that is available, starting from left
 617             req_formats = req_format.split('/')
 618             for rf in req_formats:
 619                 selected_format = self.select_format(rf, formats)
 620                 if selected_format is not None:
 621                     formats_to_download = [selected_format]
 622                     break
 623         if not formats_to_download:
 624             raise ExtractorError(u'requested format not available',
 625                                  expected=True)
 626
 627         if download:
 628             if len(formats_to_download) > 1:
 629                 self.to_screen(u'[info] %s: downloading video in %s formats' % (info_dict['id'], len(formats_to_download)))
 630             for format in formats_to_download:
 631                 new_info = dict(info_dict)
 632                 new_info.update(format)
 633                 self.process_info(new_info)
 634         # We update the info dict with the best quality format (backwards compatibility)
 635         info_dict.update(formats_to_download[-1])
 636         return info_dict
 637
 638     def process_info(self, info_dict):
 639         """Process a single resolved IE result."""
 640
 641         assert info_dict.get('_type', 'video') == 'video'
 642         #We increment the download the download count here to match the previous behaviour.
 643         self.increment_downloads()
 644
 645         info_dict['fulltitle'] = info_dict['title']
 646         if len(info_dict['title']) > 200:
 647             info_dict['title'] = info_dict['title'][:197] + u'...'
 648
 649         # Keep for backwards compatibility
 650         info_dict['stitle'] = info_dict['title']
 651
 652         if not 'format' in info_dict:
 653             info_dict['format'] = info_dict['ext']
 654
 655         reason = self._match_entry(info_dict)
 656         if reason is not None:
 657             self.to_screen(u'[download] ' + reason)
 658             return
 659
 660         max_downloads = self.params.get('max_downloads')
 661         if max_downloads is not None:
 662             if self._num_downloads > int(max_downloads):
 663                 raise MaxDownloadsReached()
 664
 665         filename = self.prepare_filename(info_dict)
 666
 667         # Forced printings
 668         if self.params.get('forcetitle', False):
 669             compat_print(info_dict['fulltitle'])
 670         if self.params.get('forceid', False):
 671             compat_print(info_dict['id'])
 672         if self.params.get('forceurl', False):
 673             # For RTMP URLs, also include the playpath
 674             compat_print(info_dict['url'] + info_dict.get('play_path', u''))
 675         if self.params.get('forcethumbnail', False) and info_dict.get('thumbnail') is not None:
 676             compat_print(info_dict['thumbnail'])
 677         if self.params.get('forcedescription', False) and info_dict.get('description') is not None:
 678             compat_print(info_dict['description'])
 679         if self.params.get('forcefilename', False) and filename is not None:
 680             compat_print(filename)
 681         if self.params.get('forceformat', False):
 682             compat_print(info_dict['format'])
 683         if self.params.get('forcejson', False):
 684             compat_print(json.dumps(info_dict))
 685
 686         # Do nothing else if in simulate mode
 687         if self.params.get('simulate', False):
 688             return
 689
 690         if filename is None:
 691             return
 692
 693         try:
 694             dn = os.path.dirname(encodeFilename(filename))
 695             if dn != '' and not os.path.exists(dn):
 696                 os.makedirs(dn)
 697         except (OSError, IOError) as err:
 698             self.report_error(u'unable to create directory ' + compat_str(err))
 699             return
 700
 701         if self.params.get('writedescription', False):
 702             try:
 703                 descfn = filename + u'.description'
 704                 self.report_writedescription(descfn)
 705                 with io.open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile:
 706                     descfile.write(info_dict['description'])
 707             except (KeyError, TypeError):
 708                 self.report_warning(u'There\'s no description to write.')
 709             except (OSError, IOError):
 710                 self.report_error(u'Cannot write description file ' + descfn)
 711                 return
 712
 713         if self.params.get('writeannotations', False):
 714             try:
 715                 annofn = filename + u'.annotations.xml'
 716                 self.report_writeannotations(annofn)
 717                 with io.open(encodeFilename(annofn), 'w', encoding='utf-8') as annofile:
 718                     annofile.write(info_dict['annotations'])
 719             except (KeyError, TypeError):
 720                 self.report_warning(u'There are no annotations to write.')
 721             except (OSError, IOError):
 722                 self.report_error(u'Cannot write annotations file: ' + annofn)
 723                 return
 724
 725         subtitles_are_requested = any([self.params.get('writesubtitles', False),
 726                                        self.params.get('writeautomaticsub')])
 727
 728         if subtitles_are_requested and 'subtitles' in info_dict and info_dict['subtitles']:
 729             # subtitles download errors are already managed as troubles in relevant IE
 730             # that way it will silently go on when used with unsupporting IE
 731             subtitles = info_dict['subtitles']
 732             sub_format = self.params.get('subtitlesformat', 'srt')
 733             for sub_lang in subtitles.keys():
 734                 sub = subtitles[sub_lang]
 735                 if sub is None:
 736                     continue
 737                 try:
 738                     sub_filename = subtitles_filename(filename, sub_lang, sub_format)
 739                     self.report_writesubtitles(sub_filename)
 740                     with io.open(encodeFilename(sub_filename), 'w', encoding='utf-8') as subfile:
 741                             subfile.write(sub)
 742                 except (OSError, IOError):
 743                     self.report_error(u'Cannot write subtitles file ' + descfn)
 744                     return
 745
 746         if self.params.get('writeinfojson', False):
 747             infofn = os.path.splitext(filename)[0] + u'.info.json'
 748             self.report_writeinfojson(infofn)
 749             try:
 750                 json_info_dict = dict((k, v) for k, v in info_dict.items() if not k in ['urlhandle'])
 751                 write_json_file(json_info_dict, encodeFilename(infofn))
 752             except (OSError, IOError):
 753                 self.report_error(u'Cannot write metadata to JSON file ' + infofn)
 754                 return
 755
 756         if self.params.get('writethumbnail', False):
 757             if info_dict.get('thumbnail') is not None:
 758                 thumb_format = determine_ext(info_dict['thumbnail'], u'jpg')
 759                 thumb_filename = filename.rpartition('.')[0] + u'.' + thumb_format
 760                 self.to_screen(u'[%s] %s: Downloading thumbnail ...' %
 761                                (info_dict['extractor'], info_dict['id']))
 762                 try:
 763                     uf = compat_urllib_request.urlopen(info_dict['thumbnail'])
 764                     with open(thumb_filename, 'wb') as thumbf:
 765                         shutil.copyfileobj(uf, thumbf)
 766                     self.to_screen(u'[%s] %s: Writing thumbnail to: %s' %
 767                         (info_dict['extractor'], info_dict['id'], thumb_filename))
 768                 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
 769                     self.report_warning(u'Unable to download thumbnail "%s": %s' %
 770                         (info_dict['thumbnail'], compat_str(err)))
 771
 772         if not self.params.get('skip_download', False):
 773             if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(filename)):
 774                 success = True
 775             else:
 776                 try:
 777                     success = self.fd._do_download(filename, info_dict)
 778                 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
 779                     self.report_error(u'unable to download video data: %s' % str(err))
 780                     return
 781                 except (OSError, IOError) as err:
 782                     raise UnavailableVideoError(err)
 783                 except (ContentTooShortError, ) as err:
 784                     self.report_error(u'content too short (expected %s bytes and served %s)' % (err.expected, err.downloaded))
 785                     return
 786
 787             if success:
 788                 try:
 789                     self.post_process(filename, info_dict)
 790                 except (PostProcessingError) as err:
 791                     self.report_error(u'postprocessing: %s' % str(err))
 792                     return
 793
 794         self.record_download_archive(info_dict)
 795
 796     def download(self, url_list):
 797         """Download a given list of URLs."""
 798         if (len(url_list) > 1 and
 799                 '%' not in self.params['outtmpl']
 800                 and self.params.get('max_downloads') != 1):
 801             raise SameFileError(self.params['outtmpl'])
 802
 803         for url in url_list:
 804             try:
 805                 #It also downloads the videos
 806                 self.extract_info(url)
 807             except UnavailableVideoError:
 808                 self.report_error(u'unable to download video')
 809             except MaxDownloadsReached:
 810                 self.to_screen(u'[info] Maximum number of downloaded files reached.')
 811                 raise
 812
 813         return self._download_retcode
 814
 815     def post_process(self, filename, ie_info):
 816         """Run all the postprocessors on the given file."""
 817         info = dict(ie_info)
 818         info['filepath'] = filename
 819         keep_video = None
 820         for pp in self._pps:
 821             try:
 822                 keep_video_wish, new_info = pp.run(info)
 823                 if keep_video_wish is not None:
 824                     if keep_video_wish:
 825                         keep_video = keep_video_wish
 826                     elif keep_video is None:
 827                         # No clear decision yet, let IE decide
 828                         keep_video = keep_video_wish
 829             except PostProcessingError as e:
 830                 self.report_error(e.msg)
 831         if keep_video is False and not self.params.get('keepvideo', False):
 832             try:
 833                 self.to_screen(u'Deleting original file %s (pass -k to keep)' % filename)
 834                 os.remove(encodeFilename(filename))
 835             except (IOError, OSError):
 836                 self.report_warning(u'Unable to remove downloaded video file')
 837
 838     def _make_archive_id(self, info_dict):
 839         # Future-proof against any change in case
 840         # and backwards compatibility with prior versions
 841         extractor = info_dict.get('extractor_key')
 842         if extractor is None:
 843             if 'id' in info_dict:
 844                 extractor = info_dict.get('ie_key')  # key in a playlist
 845         if extractor is None:
 846             return None  # Incomplete video information
 847         return extractor.lower() + u' ' + info_dict['id']
 848
 849     def in_download_archive(self, info_dict):
 850         fn = self.params.get('download_archive')
 851         if fn is None:
 852             return False
 853
 854         vid_id = self._make_archive_id(info_dict)
 855         if vid_id is None:
 856             return False  # Incomplete video information
 857
 858         try:
 859             with locked_file(fn, 'r', encoding='utf-8') as archive_file:
 860                 for line in archive_file:
 861                     if line.strip() == vid_id:
 862                         return True
 863         except IOError as ioe:
 864             if ioe.errno != errno.ENOENT:
 865                 raise
 866         return False
 867
 868     def record_download_archive(self, info_dict):
 869         fn = self.params.get('download_archive')
 870         if fn is None:
 871             return
 872         vid_id = self._make_archive_id(info_dict)
 873         assert vid_id
 874         with locked_file(fn, 'a', encoding='utf-8') as archive_file:
 875             archive_file.write(vid_id + u'\n')
 876
 877     @staticmethod
 878     def format_resolution(format, default='unknown'):
 879         if format.get('vcodec') == 'none':
 880             return 'audio only'
 881         if format.get('_resolution') is not None:
 882             return format['_resolution']
 883         if format.get('height') is not None:
 884             if format.get('width') is not None:
 885                 res = u'%sx%s' % (format['width'], format['height'])
 886             else:
 887                 res = u'%sp' % format['height']
 888         else:
 889             res = default
 890         return res
 891
 892     def list_formats(self, info_dict):
 893         def format_note(fdict):
 894             res = u''
 895             if fdict.get('format_note') is not None:
 896                 res += fdict['format_note'] + u' '
 897             if (fdict.get('vcodec') is not None and
 898                     fdict.get('vcodec') != 'none'):
 899                 res += u'%-5s' % fdict['vcodec']
 900             elif fdict.get('vbr') is not None:
 901                 res += u'video'
 902             if fdict.get('vbr') is not None:
 903                 res += u'@%4dk' % fdict['vbr']
 904             if fdict.get('acodec') is not None:
 905                 if res:
 906                     res += u', '
 907                 res += u'%-5s' % fdict['acodec']
 908             elif fdict.get('abr') is not None:
 909                 if res:
 910                     res += u', '
 911                 res += 'audio'
 912             if fdict.get('abr') is not None:
 913                 res += u'@%3dk' % fdict['abr']
 914             if fdict.get('filesize') is not None:
 915                 if res:
 916                     res += u', '
 917                 res += format_bytes(fdict['filesize'])
 918             return res
 919
 920         def line(format, idlen=20):
 921             return ((u'%-' + compat_str(idlen + 1) + u's%-10s%-12s%s') % (
 922                 format['format_id'],
 923                 format['ext'],
 924                 self.format_resolution(format),
 925                 format_note(format),
 926             ))
 927
 928         formats = info_dict.get('formats', [info_dict])
 929         idlen = max(len(u'format code'),
 930                     max(len(f['format_id']) for f in formats))
 931         formats_s = [line(f, idlen) for f in formats]
 932         if len(formats) > 1:
 933             formats_s[0] += (' ' if format_note(formats[0]) else '') + '(worst)'
 934             formats_s[-1] += (' ' if format_note(formats[-1]) else '') + '(best)'
 935
 936         header_line = line({
 937             'format_id': u'format code', 'ext': u'extension',
 938             '_resolution': u'resolution', 'format_note': u'note'}, idlen=idlen)
 939         self.to_screen(u'[info] Available formats for %s:\n%s\n%s' %
 940                        (info_dict['id'], header_line, u"\n".join(formats_s)))
 941
 942     def urlopen(self, req):
 943         """ Start an HTTP download """
 944         return self._opener.open(req)
 945
 946     def print_debug_header(self):
 947         if not self.params.get('verbose'):
 948             return
 949         write_string(u'[debug] youtube-dl version ' + __version__ + u'\n')
 950         try:
 951             sp = subprocess.Popen(
 952                 ['git', 'rev-parse', '--short', 'HEAD'],
 953                 stdout=subprocess.PIPE, stderr=subprocess.PIPE,
 954                 cwd=os.path.dirname(os.path.abspath(__file__)))
 955             out, err = sp.communicate()
 956             out = out.decode().strip()
 957             if re.match('[0-9a-f]+', out):
 958                 write_string(u'[debug] Git HEAD: ' + out + u'\n')
 959         except:
 960             try:
 961                 sys.exc_clear()
 962             except:
 963                 pass
 964         write_string(u'[debug] Python version %s - %s' %
 965                      (platform.python_version(), platform_name()) + u'\n')
 966
 967         proxy_map = {}
 968         for handler in self._opener.handlers:
 969             if hasattr(handler, 'proxies'):
 970                 proxy_map.update(handler.proxies)
 971         write_string(u'[debug] Proxy map: ' + compat_str(proxy_map) + u'\n')
 972
 973     def _setup_opener(self):
 974         timeout = float(self.params.get('socket_timeout', 600))
 975         opts_cookiefile = self.params.get('cookiefile')
 976         opts_proxy = self.params.get('proxy')
 977
 978         if opts_cookiefile is None:
 979             self.cookiejar = compat_cookiejar.CookieJar()
 980         else:
 981             self.cookiejar = compat_cookiejar.MozillaCookieJar(
 982                 opts_cookiefile)
 983             if os.access(opts_cookiefile, os.R_OK):
 984                 self.cookiejar.load()
 985
 986         cookie_processor = compat_urllib_request.HTTPCookieProcessor(
 987             self.cookiejar)
 988         if opts_proxy is not None:
 989             if opts_proxy == '':
 990                 proxies = {}
 991             else:
 992                 proxies = {'http': opts_proxy, 'https': opts_proxy}
 993         else:
 994             proxies = compat_urllib_request.getproxies()
 995             # Set HTTPS proxy to HTTP one if given (https://github.com/rg3/youtube-dl/issues/805)
 996             if 'http' in proxies and 'https' not in proxies:
 997                 proxies['https'] = proxies['http']
 998         proxy_handler = compat_urllib_request.ProxyHandler(proxies)
 999         https_handler = make_HTTPS_handler(
1000             self.params.get('nocheckcertificate', False))
1001         opener = compat_urllib_request.build_opener(
1002             https_handler, proxy_handler, cookie_processor, YoutubeDLHandler())
1003         # Delete the default user-agent header, which would otherwise apply in
1004         # cases where our custom HTTP handler doesn't come into play
1005         # (See https://github.com/rg3/youtube-dl/issues/1309 for details)
1006         opener.addheaders = []
1007         self._opener = opener
1008
1009         # TODO remove this global modification
1010         compat_urllib_request.install_opener(opener)
1011         socket.setdefaulttimeout(timeout)