_ Git - youtube-dl/blob - youtube_dl/YoutubeDL.py

   1 #!/usr/bin/env python
   2 # -*- coding: utf-8 -*-
   3
   4 from __future__ import absolute_import
   5
   6 import errno
   7 import io
   8 import json
   9 import os
  10 import platform
  11 import re
  12 import shutil
  13 import subprocess
  14 import socket
  15 import sys
  16 import time
  17 import traceback
  18
  19 if os.name == 'nt':
  20     import ctypes
  21
  22 from .utils import (
  23     compat_cookiejar,
  24     compat_http_client,
  25     compat_print,
  26     compat_str,
  27     compat_urllib_error,
  28     compat_urllib_request,
  29     ContentTooShortError,
  30     date_from_str,
  31     DateRange,
  32     determine_ext,
  33     DownloadError,
  34     encodeFilename,
  35     ExtractorError,
  36     format_bytes,
  37     locked_file,
  38     make_HTTPS_handler,
  39     MaxDownloadsReached,
  40     PostProcessingError,
  41     platform_name,
  42     preferredencoding,
  43     SameFileError,
  44     sanitize_filename,
  45     subtitles_filename,
  46     takewhile_inclusive,
  47     UnavailableVideoError,
  48     write_json_file,
  49     write_string,
  50     YoutubeDLHandler,
  51 )
  52 from .extractor import get_info_extractor, gen_extractors
  53 from .FileDownloader import FileDownloader
  54 from .version import __version__
  55
  56
  57 class YoutubeDL(object):
  58     """YoutubeDL class.
  59
  60     YoutubeDL objects are the ones responsible of downloading the
  61     actual video file and writing it to disk if the user has requested
  62     it, among some other tasks. In most cases there should be one per
  63     program. As, given a video URL, the downloader doesn't know how to
  64     extract all the needed information, task that InfoExtractors do, it
  65     has to pass the URL to one of them.
  66
  67     For this, YoutubeDL objects have a method that allows
  68     InfoExtractors to be registered in a given order. When it is passed
  69     a URL, the YoutubeDL object handles it to the first InfoExtractor it
  70     finds that reports being able to handle it. The InfoExtractor extracts
  71     all the information about the video or videos the URL refers to, and
  72     YoutubeDL process the extracted information, possibly using a File
  73     Downloader to download the video.
  74
  75     YoutubeDL objects accept a lot of parameters. In order not to saturate
  76     the object constructor with arguments, it receives a dictionary of
  77     options instead. These options are available through the params
  78     attribute for the InfoExtractors to use. The YoutubeDL also
  79     registers itself as the downloader in charge for the InfoExtractors
  80     that are added to it, so this is a "mutual registration".
  81
  82     Available options:
  83
  84     username:          Username for authentication purposes.
  85     password:          Password for authentication purposes.
  86     videopassword:     Password for acces a video.
  87     usenetrc:          Use netrc for authentication instead.
  88     verbose:           Print additional info to stdout.
  89     quiet:             Do not print messages to stdout.
  90     forceurl:          Force printing final URL.
  91     forcetitle:        Force printing title.
  92     forceid:           Force printing ID.
  93     forcethumbnail:    Force printing thumbnail URL.
  94     forcedescription:  Force printing description.
  95     forcefilename:     Force printing final filename.
  96     forcejson:         Force printing info_dict as JSON.
  97     simulate:          Do not download the video files.
  98     format:            Video format code.
  99     format_limit:      Highest quality format to try.
 100     outtmpl:           Template for output names.
 101     restrictfilenames: Do not allow "&" and spaces in file names
 102     ignoreerrors:      Do not stop on download errors.
 103     nooverwrites:      Prevent overwriting files.
 104     playliststart:     Playlist item to start at.
 105     playlistend:       Playlist item to end at.
 106     matchtitle:        Download only matching titles.
 107     rejecttitle:       Reject downloads for matching titles.
 108     logger:            Log messages to a logging.Logger instance.
 109     logtostderr:       Log messages to stderr instead of stdout.
 110     writedescription:  Write the video description to a .description file
 111     writeinfojson:     Write the video description to a .info.json file
 112     writeannotations:  Write the video annotations to a .annotations.xml file
 113     writethumbnail:    Write the thumbnail image to a file
 114     writesubtitles:    Write the video subtitles to a file
 115     writeautomaticsub: Write the automatic subtitles to a file
 116     allsubtitles:      Downloads all the subtitles of the video
 117                        (requires writesubtitles or writeautomaticsub)
 118     listsubtitles:     Lists all available subtitles for the video
 119     subtitlesformat:   Subtitle format [srt/sbv/vtt] (default=srt)
 120     subtitleslangs:    List of languages of the subtitles to download
 121     keepvideo:         Keep the video file after post-processing
 122     daterange:         A DateRange object, download only if the upload_date is in the range.
 123     skip_download:     Skip the actual download of the video file
 124     cachedir:          Location of the cache files in the filesystem.
 125                        None to disable filesystem cache.
 126     noplaylist:        Download single video instead of a playlist if in doubt.
 127     age_limit:         An integer representing the user's age in years.
 128                        Unsuitable videos for the given age are skipped.
 129     download_archive:   File name of a file where all downloads are recorded.
 130                        Videos already present in the file are not downloaded
 131                        again.
 132     cookiefile:        File name where cookies should be read from and dumped to.
 133     nocheckcertificate:Do not verify SSL certificates
 134     proxy:             URL of the proxy server to use
 135     socket_timeout:    Time to wait for unresponsive hosts, in seconds
 136
 137     The following parameters are not used by YoutubeDL itself, they are used by
 138     the FileDownloader:
 139     nopart, updatetime, buffersize, ratelimit, min_filesize, max_filesize, test,
 140     noresizebuffer, retries, continuedl, noprogress, consoletitle
 141     """
 142
 143     params = None
 144     _ies = []
 145     _pps = []
 146     _download_retcode = None
 147     _num_downloads = None
 148     _screen_file = None
 149
 150     def __init__(self, params=None):
 151         """Create a FileDownloader object with the given options."""
 152         self._ies = []
 153         self._ies_instances = {}
 154         self._pps = []
 155         self._progress_hooks = []
 156         self._download_retcode = 0
 157         self._num_downloads = 0
 158         self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)]
 159         self.params = {} if params is None else params
 160
 161         if (sys.version_info >= (3,) and sys.platform != 'win32' and
 162                 sys.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968']
 163                 and not params['restrictfilenames']):
 164             # On Python 3, the Unicode filesystem API will throw errors (#1474)
 165             self.report_warning(
 166                 u'Assuming --restrict-filenames since file system encoding '
 167                 u'cannot encode all charactes. '
 168                 u'Set the LC_ALL environment variable to fix this.')
 169             self.params['restrictfilenames'] = True
 170
 171         self.fd = FileDownloader(self, self.params)
 172
 173         if '%(stitle)s' in self.params.get('outtmpl', ''):
 174             self.report_warning(u'%(stitle)s is deprecated. Use the %(title)s and the --restrict-filenames flag(which also secures %(uploader)s et al) instead.')
 175
 176         self._setup_opener()
 177
 178     def add_info_extractor(self, ie):
 179         """Add an InfoExtractor object to the end of the list."""
 180         self._ies.append(ie)
 181         self._ies_instances[ie.ie_key()] = ie
 182         ie.set_downloader(self)
 183
 184     def get_info_extractor(self, ie_key):
 185         """
 186         Get an instance of an IE with name ie_key, it will try to get one from
 187         the _ies list, if there's no instance it will create a new one and add
 188         it to the extractor list.
 189         """
 190         ie = self._ies_instances.get(ie_key)
 191         if ie is None:
 192             ie = get_info_extractor(ie_key)()
 193             self.add_info_extractor(ie)
 194         return ie
 195
 196     def add_default_info_extractors(self):
 197         """
 198         Add the InfoExtractors returned by gen_extractors to the end of the list
 199         """
 200         for ie in gen_extractors():
 201             self.add_info_extractor(ie)
 202
 203     def add_post_processor(self, pp):
 204         """Add a PostProcessor object to the end of the chain."""
 205         self._pps.append(pp)
 206         pp.set_downloader(self)
 207
 208     def to_screen(self, message, skip_eol=False):
 209         """Print message to stdout if not in quiet mode."""
 210         if self.params.get('logger'):
 211             self.params['logger'].debug(message)
 212         elif not self.params.get('quiet', False):
 213             terminator = [u'\n', u''][skip_eol]
 214             output = message + terminator
 215             write_string(output, self._screen_file)
 216
 217     def to_stderr(self, message):
 218         """Print message to stderr."""
 219         assert type(message) == type(u'')
 220         if self.params.get('logger'):
 221             self.params['logger'].error(message)
 222         else:
 223             output = message + u'\n'
 224             if 'b' in getattr(self._screen_file, 'mode', '') or sys.version_info[0] < 3: # Python 2 lies about the mode of sys.stdout/sys.stderr
 225                 output = output.encode(preferredencoding())
 226             sys.stderr.write(output)
 227
 228     def to_console_title(self, message):
 229         if not self.params.get('consoletitle', False):
 230             return
 231         if os.name == 'nt' and ctypes.windll.kernel32.GetConsoleWindow():
 232             # c_wchar_p() might not be necessary if `message` is
 233             # already of type unicode()
 234             ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message))
 235         elif 'TERM' in os.environ:
 236             write_string(u'\033]0;%s\007' % message, self._screen_file)
 237
 238     def save_console_title(self):
 239         if not self.params.get('consoletitle', False):
 240             return
 241         if 'TERM' in os.environ:
 242             # Save the title on stack
 243             write_string(u'\033[22;0t', self._screen_file)
 244
 245     def restore_console_title(self):
 246         if not self.params.get('consoletitle', False):
 247             return
 248         if 'TERM' in os.environ:
 249             # Restore the title from stack
 250             write_string(u'\033[23;0t', self._screen_file)
 251
 252     def __enter__(self):
 253         self.save_console_title()
 254         return self
 255
 256     def __exit__(self, *args):
 257         self.restore_console_title()
 258
 259         if self.params.get('cookiefile') is not None:
 260             self.cookiejar.save()
 261
 262     def trouble(self, message=None, tb=None):
 263         """Determine action to take when a download problem appears.
 264
 265         Depending on if the downloader has been configured to ignore
 266         download errors or not, this method may throw an exception or
 267         not when errors are found, after printing the message.
 268
 269         tb, if given, is additional traceback information.
 270         """
 271         if message is not None:
 272             self.to_stderr(message)
 273         if self.params.get('verbose'):
 274             if tb is None:
 275                 if sys.exc_info()[0]:  # if .trouble has been called from an except block
 276                     tb = u''
 277                     if hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
 278                         tb += u''.join(traceback.format_exception(*sys.exc_info()[1].exc_info))
 279                     tb += compat_str(traceback.format_exc())
 280                 else:
 281                     tb_data = traceback.format_list(traceback.extract_stack())
 282                     tb = u''.join(tb_data)
 283             self.to_stderr(tb)
 284         if not self.params.get('ignoreerrors', False):
 285             if sys.exc_info()[0] and hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
 286                 exc_info = sys.exc_info()[1].exc_info
 287             else:
 288                 exc_info = sys.exc_info()
 289             raise DownloadError(message, exc_info)
 290         self._download_retcode = 1
 291
 292     def report_warning(self, message):
 293         '''
 294         Print the message to stderr, it will be prefixed with 'WARNING:'
 295         If stderr is a tty file the 'WARNING:' will be colored
 296         '''
 297         if sys.stderr.isatty() and os.name != 'nt':
 298             _msg_header = u'\033[0;33mWARNING:\033[0m'
 299         else:
 300             _msg_header = u'WARNING:'
 301         warning_message = u'%s %s' % (_msg_header, message)
 302         self.to_stderr(warning_message)
 303
 304     def report_error(self, message, tb=None):
 305         '''
 306         Do the same as trouble, but prefixes the message with 'ERROR:', colored
 307         in red if stderr is a tty file.
 308         '''
 309         if sys.stderr.isatty() and os.name != 'nt':
 310             _msg_header = u'\033[0;31mERROR:\033[0m'
 311         else:
 312             _msg_header = u'ERROR:'
 313         error_message = u'%s %s' % (_msg_header, message)
 314         self.trouble(error_message, tb)
 315
 316     def report_writedescription(self, descfn):
 317         """ Report that the description file is being written """
 318         self.to_screen(u'[info] Writing video description to: ' + descfn)
 319
 320     def report_writesubtitles(self, sub_filename):
 321         """ Report that the subtitles file is being written """
 322         self.to_screen(u'[info] Writing video subtitles to: ' + sub_filename)
 323
 324     def report_writeinfojson(self, infofn):
 325         """ Report that the metadata file has been written """
 326         self.to_screen(u'[info] Video description metadata as JSON to: ' + infofn)
 327
 328     def report_writeannotations(self, annofn):
 329         """ Report that the annotations file has been written. """
 330         self.to_screen(u'[info] Writing video annotations to: ' + annofn)
 331
 332     def report_file_already_downloaded(self, file_name):
 333         """Report file has already been fully downloaded."""
 334         try:
 335             self.to_screen(u'[download] %s has already been downloaded' % file_name)
 336         except UnicodeEncodeError:
 337             self.to_screen(u'[download] The file has already been downloaded')
 338
 339     def increment_downloads(self):
 340         """Increment the ordinal that assigns a number to each file."""
 341         self._num_downloads += 1
 342
 343     def prepare_filename(self, info_dict):
 344         """Generate the output filename."""
 345         try:
 346             template_dict = dict(info_dict)
 347
 348             template_dict['epoch'] = int(time.time())
 349             autonumber_size = self.params.get('autonumber_size')
 350             if autonumber_size is None:
 351                 autonumber_size = 5
 352             autonumber_templ = u'%0' + str(autonumber_size) + u'd'
 353             template_dict['autonumber'] = autonumber_templ % self._num_downloads
 354             if template_dict.get('playlist_index') is not None:
 355                 template_dict['playlist_index'] = u'%05d' % template_dict['playlist_index']
 356
 357             sanitize = lambda k, v: sanitize_filename(
 358                 u'NA' if v is None else compat_str(v),
 359                 restricted=self.params.get('restrictfilenames'),
 360                 is_id=(k == u'id'))
 361             template_dict = dict((k, sanitize(k, v))
 362                                  for k, v in template_dict.items())
 363
 364             tmpl = os.path.expanduser(self.params['outtmpl'])
 365             filename = tmpl % template_dict
 366             return filename
 367         except KeyError as err:
 368             self.report_error(u'Erroneous output template')
 369             return None
 370         except ValueError as err:
 371             self.report_error(u'Error in output template: ' + str(err) + u' (encoding: ' + repr(preferredencoding()) + ')')
 372             return None
 373
 374     def _match_entry(self, info_dict):
 375         """ Returns None iff the file should be downloaded """
 376
 377         if 'title' in info_dict:
 378             # This can happen when we're just evaluating the playlist
 379             title = info_dict['title']
 380             matchtitle = self.params.get('matchtitle', False)
 381             if matchtitle:
 382                 if not re.search(matchtitle, title, re.IGNORECASE):
 383                     return u'[download] "' + title + '" title did not match pattern "' + matchtitle + '"'
 384             rejecttitle = self.params.get('rejecttitle', False)
 385             if rejecttitle:
 386                 if re.search(rejecttitle, title, re.IGNORECASE):
 387                     return u'"' + title + '" title matched reject pattern "' + rejecttitle + '"'
 388         date = info_dict.get('upload_date', None)
 389         if date is not None:
 390             dateRange = self.params.get('daterange', DateRange())
 391             if date not in dateRange:
 392                 return u'[download] %s upload date is not in range %s' % (date_from_str(date).isoformat(), dateRange)
 393         age_limit = self.params.get('age_limit')
 394         if age_limit is not None:
 395             if age_limit < info_dict.get('age_limit', 0):
 396                 return u'Skipping "' + title + '" because it is age restricted'
 397         if self.in_download_archive(info_dict):
 398             return (u'%s has already been recorded in archive'
 399                     % info_dict.get('title', info_dict.get('id', u'video')))
 400         return None
 401
 402     @staticmethod
 403     def add_extra_info(info_dict, extra_info):
 404         '''Set the keys from extra_info in info dict if they are missing'''
 405         for key, value in extra_info.items():
 406             info_dict.setdefault(key, value)
 407
 408     def extract_info(self, url, download=True, ie_key=None, extra_info={}):
 409         '''
 410         Returns a list with a dictionary for each video we find.
 411         If 'download', also downloads the videos.
 412         extra_info is a dict containing the extra values to add to each result
 413          '''
 414
 415         if ie_key:
 416             ies = [self.get_info_extractor(ie_key)]
 417         else:
 418             ies = self._ies
 419
 420         for ie in ies:
 421             if not ie.suitable(url):
 422                 continue
 423
 424             if not ie.working():
 425                 self.report_warning(u'The program functionality for this site has been marked as broken, '
 426                                     u'and will probably not work.')
 427
 428             try:
 429                 ie_result = ie.extract(url)
 430                 if ie_result is None: # Finished already (backwards compatibility; listformats and friends should be moved here)
 431                     break
 432                 if isinstance(ie_result, list):
 433                     # Backwards compatibility: old IE result format
 434                     ie_result = {
 435                         '_type': 'compat_list',
 436                         'entries': ie_result,
 437                     }
 438                 self.add_extra_info(ie_result,
 439                     {
 440                         'extractor': ie.IE_NAME,
 441                         'webpage_url': url,
 442                         'extractor_key': ie.ie_key(),
 443                     })
 444                 return self.process_ie_result(ie_result, download, extra_info)
 445             except ExtractorError as de: # An error we somewhat expected
 446                 self.report_error(compat_str(de), de.format_traceback())
 447                 break
 448             except Exception as e:
 449                 if self.params.get('ignoreerrors', False):
 450                     self.report_error(compat_str(e), tb=compat_str(traceback.format_exc()))
 451                     break
 452                 else:
 453                     raise
 454         else:
 455             self.report_error(u'no suitable InfoExtractor: %s' % url)
 456
 457     def process_ie_result(self, ie_result, download=True, extra_info={}):
 458         """
 459         Take the result of the ie(may be modified) and resolve all unresolved
 460         references (URLs, playlist items).
 461
 462         It will also download the videos if 'download'.
 463         Returns the resolved ie_result.
 464         """
 465
 466         result_type = ie_result.get('_type', 'video') # If not given we suppose it's a video, support the default old system
 467         if result_type == 'video':
 468             self.add_extra_info(ie_result, extra_info)
 469             return self.process_video_result(ie_result, download=download)
 470         elif result_type == 'url':
 471             # We have to add extra_info to the results because it may be
 472             # contained in a playlist
 473             return self.extract_info(ie_result['url'],
 474                                      download,
 475                                      ie_key=ie_result.get('ie_key'),
 476                                      extra_info=extra_info)
 477         elif result_type == 'playlist':
 478
 479             # We process each entry in the playlist
 480             playlist = ie_result.get('title', None) or ie_result.get('id', None)
 481             self.to_screen(u'[download] Downloading playlist: %s' % playlist)
 482
 483             playlist_results = []
 484
 485             n_all_entries = len(ie_result['entries'])
 486             playliststart = self.params.get('playliststart', 1) - 1
 487             playlistend = self.params.get('playlistend', -1)
 488
 489             if playlistend == -1:
 490                 entries = ie_result['entries'][playliststart:]
 491             else:
 492                 entries = ie_result['entries'][playliststart:playlistend]
 493
 494             n_entries = len(entries)
 495
 496             self.to_screen(u"[%s] playlist '%s': Collected %d video ids (downloading %d of them)" %
 497                 (ie_result['extractor'], playlist, n_all_entries, n_entries))
 498
 499             for i, entry in enumerate(entries, 1):
 500                 self.to_screen(u'[download] Downloading video #%s of %s' % (i, n_entries))
 501                 extra = {
 502                     'playlist': playlist,
 503                     'playlist_index': i + playliststart,
 504                     'extractor': ie_result['extractor'],
 505                     'webpage_url': ie_result['webpage_url'],
 506                     'extractor_key': ie_result['extractor_key'],
 507                 }
 508
 509                 reason = self._match_entry(entry)
 510                 if reason is not None:
 511                     self.to_screen(u'[download] ' + reason)
 512                     continue
 513
 514                 entry_result = self.process_ie_result(entry,
 515                                                       download=download,
 516                                                       extra_info=extra)
 517                 playlist_results.append(entry_result)
 518             ie_result['entries'] = playlist_results
 519             return ie_result
 520         elif result_type == 'compat_list':
 521             def _fixup(r):
 522                 self.add_extra_info(r,
 523                     {
 524                         'extractor': ie_result['extractor'],
 525                         'webpage_url': ie_result['webpage_url'],
 526                         'extractor_key': ie_result['extractor_key'],
 527                     })
 528                 return r
 529             ie_result['entries'] = [
 530                 self.process_ie_result(_fixup(r), download, extra_info)
 531                 for r in ie_result['entries']
 532             ]
 533             return ie_result
 534         else:
 535             raise Exception('Invalid result type: %s' % result_type)
 536
 537     def select_format(self, format_spec, available_formats):
 538         if format_spec == 'best' or format_spec is None:
 539             return available_formats[-1]
 540         elif format_spec == 'worst':
 541             return available_formats[0]
 542         else:
 543             extensions = [u'mp4', u'flv', u'webm', u'3gp']
 544             if format_spec in extensions:
 545                 filter_f = lambda f: f['ext'] == format_spec
 546             else:
 547                 filter_f = lambda f: f['format_id'] == format_spec
 548             matches = list(filter(filter_f, available_formats))
 549             if matches:
 550                 return matches[-1]
 551         return None
 552
 553     def process_video_result(self, info_dict, download=True):
 554         assert info_dict.get('_type', 'video') == 'video'
 555
 556         if 'playlist' not in info_dict:
 557             # It isn't part of a playlist
 558             info_dict['playlist'] = None
 559             info_dict['playlist_index'] = None
 560
 561         # This extractors handle format selection themselves
 562         if info_dict['extractor'] in [u'youtube', u'Youku']:
 563             if download:
 564                 self.process_info(info_dict)
 565             return info_dict
 566
 567         # We now pick which formats have to be downloaded
 568         if info_dict.get('formats') is None:
 569             # There's only one format available
 570             formats = [info_dict]
 571         else:
 572             formats = info_dict['formats']
 573
 574         # We check that all the formats have the format and format_id fields
 575         for (i, format) in enumerate(formats):
 576             if format.get('format_id') is None:
 577                 format['format_id'] = compat_str(i)
 578             if format.get('format') is None:
 579                 format['format'] = u'{id} - {res}{note}'.format(
 580                     id=format['format_id'],
 581                     res=self.format_resolution(format),
 582                     note=u' ({0})'.format(format['format_note']) if format.get('format_note') is not None else '',
 583                 )
 584             # Automatically determine file extension if missing
 585             if 'ext' not in format:
 586                 format['ext'] = determine_ext(format['url'])
 587
 588         if self.params.get('listformats', None):
 589             self.list_formats(info_dict)
 590             return
 591
 592         format_limit = self.params.get('format_limit', None)
 593         if format_limit:
 594             formats = list(takewhile_inclusive(
 595                 lambda f: f['format_id'] != format_limit, formats
 596             ))
 597         if self.params.get('prefer_free_formats'):
 598             def _free_formats_key(f):
 599                 try:
 600                     ext_ord = [u'flv', u'mp4', u'webm'].index(f['ext'])
 601                 except ValueError:
 602                     ext_ord = -1
 603                 # We only compare the extension if they have the same height and width
 604                 return (f.get('height'), f.get('width'), ext_ord)
 605             formats = sorted(formats, key=_free_formats_key)
 606
 607         req_format = self.params.get('format', 'best')
 608         if req_format is None:
 609             req_format = 'best'
 610         formats_to_download = []
 611         # The -1 is for supporting YoutubeIE
 612         if req_format in ('-1', 'all'):
 613             formats_to_download = formats
 614         else:
 615             # We can accept formats requestd in the format: 34/5/best, we pick
 616             # the first that is available, starting from left
 617             req_formats = req_format.split('/')
 618             for rf in req_formats:
 619                 selected_format = self.select_format(rf, formats)
 620                 if selected_format is not None:
 621                     formats_to_download = [selected_format]
 622                     break
 623         if not formats_to_download:
 624             raise ExtractorError(u'requested format not available',
 625                                  expected=True)
 626
 627         if download:
 628             if len(formats_to_download) > 1:
 629                 self.to_screen(u'[info] %s: downloading video in %s formats' % (info_dict['id'], len(formats_to_download)))
 630             for format in formats_to_download:
 631                 new_info = dict(info_dict)
 632                 new_info.update(format)
 633                 self.process_info(new_info)
 634         # We update the info dict with the best quality format (backwards compatibility)
 635         info_dict.update(formats_to_download[-1])
 636         return info_dict
 637
 638     def process_info(self, info_dict):
 639         """Process a single resolved IE result."""
 640
 641         assert info_dict.get('_type', 'video') == 'video'
 642         #We increment the download the download count here to match the previous behaviour.
 643         self.increment_downloads()
 644
 645         info_dict['fulltitle'] = info_dict['title']
 646         if len(info_dict['title']) > 200:
 647             info_dict['title'] = info_dict['title'][:197] + u'...'
 648
 649         # Keep for backwards compatibility
 650         info_dict['stitle'] = info_dict['title']
 651
 652         if not 'format' in info_dict:
 653             info_dict['format'] = info_dict['ext']
 654
 655         reason = self._match_entry(info_dict)
 656         if reason is not None:
 657             self.to_screen(u'[download] ' + reason)
 658             return
 659
 660         max_downloads = self.params.get('max_downloads')
 661         if max_downloads is not None:
 662             if self._num_downloads > int(max_downloads):
 663                 raise MaxDownloadsReached()
 664
 665         filename = self.prepare_filename(info_dict)
 666
 667         # Forced printings
 668         if self.params.get('forcetitle', False):
 669             compat_print(info_dict['fulltitle'])
 670         if self.params.get('forceid', False):
 671             compat_print(info_dict['id'])
 672         if self.params.get('forceurl', False):
 673             # For RTMP URLs, also include the playpath
 674             compat_print(info_dict['url'] + info_dict.get('play_path', u''))
 675         if self.params.get('forcethumbnail', False) and info_dict.get('thumbnail') is not None:
 676             compat_print(info_dict['thumbnail'])
 677         if self.params.get('forcedescription', False) and info_dict.get('description') is not None:
 678             compat_print(info_dict['description'])
 679         if self.params.get('forcefilename', False) and filename is not None:
 680             compat_print(filename)
 681         if self.params.get('forceformat', False):
 682             compat_print(info_dict['format'])
 683         if self.params.get('forcejson', False):
 684             compat_print(json.dumps(info_dict))
 685
 686         # Do nothing else if in simulate mode
 687         if self.params.get('simulate', False):
 688             return
 689
 690         if filename is None:
 691             return
 692
 693         try:
 694             dn = os.path.dirname(encodeFilename(filename))
 695             if dn != '' and not os.path.exists(dn):
 696                 os.makedirs(dn)
 697         except (OSError, IOError) as err:
 698             self.report_error(u'unable to create directory ' + compat_str(err))
 699             return
 700
 701         if self.params.get('writedescription', False):
 702             try:
 703                 descfn = filename + u'.description'
 704                 self.report_writedescription(descfn)
 705                 with io.open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile:
 706                     descfile.write(info_dict['description'])
 707             except (KeyError, TypeError):
 708                 self.report_warning(u'There\'s no description to write.')
 709             except (OSError, IOError):
 710                 self.report_error(u'Cannot write description file ' + descfn)
 711                 return
 712
 713         if self.params.get('writeannotations', False):
 714             try:
 715                 annofn = filename + u'.annotations.xml'
 716                 self.report_writeannotations(annofn)
 717                 with io.open(encodeFilename(annofn), 'w', encoding='utf-8') as annofile:
 718                     annofile.write(info_dict['annotations'])
 719             except (KeyError, TypeError):
 720                 self.report_warning(u'There are no annotations to write.')
 721             except (OSError, IOError):
 722                 self.report_error(u'Cannot write annotations file: ' + annofn)
 723                 return
 724
 725         subtitles_are_requested = any([self.params.get('writesubtitles', False),
 726                                        self.params.get('writeautomaticsub')])
 727
 728         if subtitles_are_requested and 'subtitles' in info_dict and info_dict['subtitles']:
 729             # subtitles download errors are already managed as troubles in relevant IE
 730             # that way it will silently go on when used with unsupporting IE
 731             subtitles = info_dict['subtitles']
 732             sub_format = self.params.get('subtitlesformat', 'srt')
 733             for sub_lang in subtitles.keys():
 734                 sub = subtitles[sub_lang]
 735                 if sub is None:
 736                     continue
 737                 try:
 738                     sub_filename = subtitles_filename(filename, sub_lang, sub_format)
 739                     self.report_writesubtitles(sub_filename)
 740                     with io.open(encodeFilename(sub_filename), 'w', encoding='utf-8') as subfile:
 741                             subfile.write(sub)
 742                 except (OSError, IOError):
 743                     self.report_error(u'Cannot write subtitles file ' + descfn)
 744                     return
 745
 746         if self.params.get('writeinfojson', False):
 747             infofn = os.path.splitext(filename)[0] + u'.info.json'
 748             self.report_writeinfojson(infofn)
 749             try:
 750                 json_info_dict = dict((k, v) for k, v in info_dict.items() if not k in ['urlhandle'])
 751                 write_json_file(json_info_dict, encodeFilename(infofn))
 752             except (OSError, IOError):
 753                 self.report_error(u'Cannot write metadata to JSON file ' + infofn)
 754                 return
 755
 756         if self.params.get('writethumbnail', False):
 757             if info_dict.get('thumbnail') is not None:
 758                 thumb_format = determine_ext(info_dict['thumbnail'], u'jpg')
 759                 thumb_filename = filename.rpartition('.')[0] + u'.' + thumb_format
 760                 self.to_screen(u'[%s] %s: Downloading thumbnail ...' %
 761                                (info_dict['extractor'], info_dict['id']))
 762                 try:
 763                     uf = compat_urllib_request.urlopen(info_dict['thumbnail'])
 764                     with open(thumb_filename, 'wb') as thumbf:
 765                         shutil.copyfileobj(uf, thumbf)
 766                     self.to_screen(u'[%s] %s: Writing thumbnail to: %s' %
 767                         (info_dict['extractor'], info_dict['id'], thumb_filename))
 768                 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
 769                     self.report_warning(u'Unable to download thumbnail "%s": %s' %
 770                         (info_dict['thumbnail'], compat_str(err)))
 771
 772         if not self.params.get('skip_download', False):
 773             if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(filename)):
 774                 success = True
 775             else:
 776                 try:
 777                     success = self.fd._do_download(filename, info_dict)
 778                 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
 779                     self.report_error(u'unable to download video data: %s' % str(err))
 780                     return
 781                 except (OSError, IOError) as err:
 782                     raise UnavailableVideoError(err)
 783                 except (ContentTooShortError, ) as err:
 784                     self.report_error(u'content too short (expected %s bytes and served %s)' % (err.expected, err.downloaded))
 785                     return
 786
 787             if success:
 788                 try:
 789                     self.post_process(filename, info_dict)
 790                 except (PostProcessingError) as err:
 791                     self.report_error(u'postprocessing: %s' % str(err))
 792                     return
 793
 794         self.record_download_archive(info_dict)
 795
 796     def download(self, url_list):
 797         """Download a given list of URLs."""
 798         if (len(url_list) > 1 and
 799                 '%' not in self.params['outtmpl']
 800                 and self.params.get('max_downloads') != 1):
 801             raise SameFileError(self.params['outtmpl'])
 802
 803         for url in url_list:
 804             try:
 805                 #It also downloads the videos
 806                 self.extract_info(url)
 807             except UnavailableVideoError:
 808                 self.report_error(u'unable to download video')
 809             except MaxDownloadsReached:
 810                 self.to_screen(u'[info] Maximum number of downloaded files reached.')
 811                 raise
 812
 813         return self._download_retcode
 814
 815     def download_with_info_file(self, info_filename):
 816         with open(info_filename, 'r') as f:
 817             # TODO: Check for errors
 818             info = json.load(f)
 819         self.process_ie_result(info, download=True)
 820
 821     def post_process(self, filename, ie_info):
 822         """Run all the postprocessors on the given file."""
 823         info = dict(ie_info)
 824         info['filepath'] = filename
 825         keep_video = None
 826         for pp in self._pps:
 827             try:
 828                 keep_video_wish, new_info = pp.run(info)
 829                 if keep_video_wish is not None:
 830                     if keep_video_wish:
 831                         keep_video = keep_video_wish
 832                     elif keep_video is None:
 833                         # No clear decision yet, let IE decide
 834                         keep_video = keep_video_wish
 835             except PostProcessingError as e:
 836                 self.report_error(e.msg)
 837         if keep_video is False and not self.params.get('keepvideo', False):
 838             try:
 839                 self.to_screen(u'Deleting original file %s (pass -k to keep)' % filename)
 840                 os.remove(encodeFilename(filename))
 841             except (IOError, OSError):
 842                 self.report_warning(u'Unable to remove downloaded video file')
 843
 844     def _make_archive_id(self, info_dict):
 845         # Future-proof against any change in case
 846         # and backwards compatibility with prior versions
 847         extractor = info_dict.get('extractor_key')
 848         if extractor is None:
 849             if 'id' in info_dict:
 850                 extractor = info_dict.get('ie_key')  # key in a playlist
 851         if extractor is None:
 852             return None  # Incomplete video information
 853         return extractor.lower() + u' ' + info_dict['id']
 854
 855     def in_download_archive(self, info_dict):
 856         fn = self.params.get('download_archive')
 857         if fn is None:
 858             return False
 859
 860         vid_id = self._make_archive_id(info_dict)
 861         if vid_id is None:
 862             return False  # Incomplete video information
 863
 864         try:
 865             with locked_file(fn, 'r', encoding='utf-8') as archive_file:
 866                 for line in archive_file:
 867                     if line.strip() == vid_id:
 868                         return True
 869         except IOError as ioe:
 870             if ioe.errno != errno.ENOENT:
 871                 raise
 872         return False
 873
 874     def record_download_archive(self, info_dict):
 875         fn = self.params.get('download_archive')
 876         if fn is None:
 877             return
 878         vid_id = self._make_archive_id(info_dict)
 879         assert vid_id
 880         with locked_file(fn, 'a', encoding='utf-8') as archive_file:
 881             archive_file.write(vid_id + u'\n')
 882
 883     @staticmethod
 884     def format_resolution(format, default='unknown'):
 885         if format.get('vcodec') == 'none':
 886             return 'audio only'
 887         if format.get('_resolution') is not None:
 888             return format['_resolution']
 889         if format.get('height') is not None:
 890             if format.get('width') is not None:
 891                 res = u'%sx%s' % (format['width'], format['height'])
 892             else:
 893                 res = u'%sp' % format['height']
 894         else:
 895             res = default
 896         return res
 897
 898     def list_formats(self, info_dict):
 899         def format_note(fdict):
 900             res = u''
 901             if fdict.get('format_note') is not None:
 902                 res += fdict['format_note'] + u' '
 903             if (fdict.get('vcodec') is not None and
 904                     fdict.get('vcodec') != 'none'):
 905                 res += u'%-5s' % fdict['vcodec']
 906             elif fdict.get('vbr') is not None:
 907                 res += u'video'
 908             if fdict.get('vbr') is not None:
 909                 res += u'@%4dk' % fdict['vbr']
 910             if fdict.get('acodec') is not None:
 911                 if res:
 912                     res += u', '
 913                 res += u'%-5s' % fdict['acodec']
 914             elif fdict.get('abr') is not None:
 915                 if res:
 916                     res += u', '
 917                 res += 'audio'
 918             if fdict.get('abr') is not None:
 919                 res += u'@%3dk' % fdict['abr']
 920             if fdict.get('filesize') is not None:
 921                 if res:
 922                     res += u', '
 923                 res += format_bytes(fdict['filesize'])
 924             return res
 925
 926         def line(format, idlen=20):
 927             return ((u'%-' + compat_str(idlen + 1) + u's%-10s%-12s%s') % (
 928                 format['format_id'],
 929                 format['ext'],
 930                 self.format_resolution(format),
 931                 format_note(format),
 932             ))
 933
 934         formats = info_dict.get('formats', [info_dict])
 935         idlen = max(len(u'format code'),
 936                     max(len(f['format_id']) for f in formats))
 937         formats_s = [line(f, idlen) for f in formats]
 938         if len(formats) > 1:
 939             formats_s[0] += (' ' if format_note(formats[0]) else '') + '(worst)'
 940             formats_s[-1] += (' ' if format_note(formats[-1]) else '') + '(best)'
 941
 942         header_line = line({
 943             'format_id': u'format code', 'ext': u'extension',
 944             '_resolution': u'resolution', 'format_note': u'note'}, idlen=idlen)
 945         self.to_screen(u'[info] Available formats for %s:\n%s\n%s' %
 946                        (info_dict['id'], header_line, u"\n".join(formats_s)))
 947
 948     def urlopen(self, req):
 949         """ Start an HTTP download """
 950         return self._opener.open(req)
 951
 952     def print_debug_header(self):
 953         if not self.params.get('verbose'):
 954             return
 955         write_string(u'[debug] youtube-dl version ' + __version__ + u'\n')
 956         try:
 957             sp = subprocess.Popen(
 958                 ['git', 'rev-parse', '--short', 'HEAD'],
 959                 stdout=subprocess.PIPE, stderr=subprocess.PIPE,
 960                 cwd=os.path.dirname(os.path.abspath(__file__)))
 961             out, err = sp.communicate()
 962             out = out.decode().strip()
 963             if re.match('[0-9a-f]+', out):
 964                 write_string(u'[debug] Git HEAD: ' + out + u'\n')
 965         except:
 966             try:
 967                 sys.exc_clear()
 968             except:
 969                 pass
 970         write_string(u'[debug] Python version %s - %s' %
 971                      (platform.python_version(), platform_name()) + u'\n')
 972
 973         proxy_map = {}
 974         for handler in self._opener.handlers:
 975             if hasattr(handler, 'proxies'):
 976                 proxy_map.update(handler.proxies)
 977         write_string(u'[debug] Proxy map: ' + compat_str(proxy_map) + u'\n')
 978
 979     def _setup_opener(self):
 980         timeout_val = self.params.get('socket_timeout')
 981         timeout = 600 if timeout_val is None else float(timeout_val)
 982
 983         opts_cookiefile = self.params.get('cookiefile')
 984         opts_proxy = self.params.get('proxy')
 985
 986         if opts_cookiefile is None:
 987             self.cookiejar = compat_cookiejar.CookieJar()
 988         else:
 989             self.cookiejar = compat_cookiejar.MozillaCookieJar(
 990                 opts_cookiefile)
 991             if os.access(opts_cookiefile, os.R_OK):
 992                 self.cookiejar.load()
 993
 994         cookie_processor = compat_urllib_request.HTTPCookieProcessor(
 995             self.cookiejar)
 996         if opts_proxy is not None:
 997             if opts_proxy == '':
 998                 proxies = {}
 999             else:
1000                 proxies = {'http': opts_proxy, 'https': opts_proxy}
1001         else:
1002             proxies = compat_urllib_request.getproxies()
1003             # Set HTTPS proxy to HTTP one if given (https://github.com/rg3/youtube-dl/issues/805)
1004             if 'http' in proxies and 'https' not in proxies:
1005                 proxies['https'] = proxies['http']
1006         proxy_handler = compat_urllib_request.ProxyHandler(proxies)
1007         https_handler = make_HTTPS_handler(
1008             self.params.get('nocheckcertificate', False))
1009         opener = compat_urllib_request.build_opener(
1010             https_handler, proxy_handler, cookie_processor, YoutubeDLHandler())
1011         # Delete the default user-agent header, which would otherwise apply in
1012         # cases where our custom HTTP handler doesn't come into play
1013         # (See https://github.com/rg3/youtube-dl/issues/1309 for details)
1014         opener.addheaders = []
1015         self._opener = opener
1016
1017         # TODO remove this global modification
1018         compat_urllib_request.install_opener(opener)
1019         socket.setdefaulttimeout(timeout)