_ Git - youtube-dl/blob - youtube_dl/YoutubeDL.py

   1 #!/usr/bin/env python
   2 # -*- coding: utf-8 -*-
   3
   4 from __future__ import absolute_import
   5
   6 import errno
   7 import io
   8 import json
   9 import os
  10 import platform
  11 import re
  12 import shutil
  13 import subprocess
  14 import socket
  15 import sys
  16 import time
  17 import traceback
  18
  19 if os.name == 'nt':
  20     import ctypes
  21
  22 from .utils import (
  23     compat_cookiejar,
  24     compat_http_client,
  25     compat_print,
  26     compat_str,
  27     compat_urllib_error,
  28     compat_urllib_request,
  29     ContentTooShortError,
  30     date_from_str,
  31     DateRange,
  32     determine_ext,
  33     DownloadError,
  34     encodeFilename,
  35     ExtractorError,
  36     format_bytes,
  37     locked_file,
  38     make_HTTPS_handler,
  39     MaxDownloadsReached,
  40     PostProcessingError,
  41     platform_name,
  42     preferredencoding,
  43     SameFileError,
  44     sanitize_filename,
  45     subtitles_filename,
  46     takewhile_inclusive,
  47     UnavailableVideoError,
  48     write_json_file,
  49     write_string,
  50     YoutubeDLHandler,
  51 )
  52 from .extractor import get_info_extractor, gen_extractors
  53 from .FileDownloader import FileDownloader
  54 from .version import __version__
  55
  56
  57 class YoutubeDL(object):
  58     """YoutubeDL class.
  59
  60     YoutubeDL objects are the ones responsible of downloading the
  61     actual video file and writing it to disk if the user has requested
  62     it, among some other tasks. In most cases there should be one per
  63     program. As, given a video URL, the downloader doesn't know how to
  64     extract all the needed information, task that InfoExtractors do, it
  65     has to pass the URL to one of them.
  66
  67     For this, YoutubeDL objects have a method that allows
  68     InfoExtractors to be registered in a given order. When it is passed
  69     a URL, the YoutubeDL object handles it to the first InfoExtractor it
  70     finds that reports being able to handle it. The InfoExtractor extracts
  71     all the information about the video or videos the URL refers to, and
  72     YoutubeDL process the extracted information, possibly using a File
  73     Downloader to download the video.
  74
  75     YoutubeDL objects accept a lot of parameters. In order not to saturate
  76     the object constructor with arguments, it receives a dictionary of
  77     options instead. These options are available through the params
  78     attribute for the InfoExtractors to use. The YoutubeDL also
  79     registers itself as the downloader in charge for the InfoExtractors
  80     that are added to it, so this is a "mutual registration".
  81
  82     Available options:
  83
  84     username:          Username for authentication purposes.
  85     password:          Password for authentication purposes.
  86     videopassword:     Password for acces a video.
  87     usenetrc:          Use netrc for authentication instead.
  88     verbose:           Print additional info to stdout.
  89     quiet:             Do not print messages to stdout.
  90     forceurl:          Force printing final URL.
  91     forcetitle:        Force printing title.
  92     forceid:           Force printing ID.
  93     forcethumbnail:    Force printing thumbnail URL.
  94     forcedescription:  Force printing description.
  95     forcefilename:     Force printing final filename.
  96     forcejson:         Force printing info_dict as JSON.
  97     simulate:          Do not download the video files.
  98     format:            Video format code.
  99     format_limit:      Highest quality format to try.
 100     outtmpl:           Template for output names.
 101     restrictfilenames: Do not allow "&" and spaces in file names
 102     ignoreerrors:      Do not stop on download errors.
 103     nooverwrites:      Prevent overwriting files.
 104     playliststart:     Playlist item to start at.
 105     playlistend:       Playlist item to end at.
 106     matchtitle:        Download only matching titles.
 107     rejecttitle:       Reject downloads for matching titles.
 108     logger:            Log messages to a logging.Logger instance.
 109     logtostderr:       Log messages to stderr instead of stdout.
 110     writedescription:  Write the video description to a .description file
 111     writeinfojson:     Write the video description to a .info.json file
 112     writeannotations:  Write the video annotations to a .annotations.xml file
 113     writethumbnail:    Write the thumbnail image to a file
 114     writesubtitles:    Write the video subtitles to a file
 115     writeautomaticsub: Write the automatic subtitles to a file
 116     allsubtitles:      Downloads all the subtitles of the video
 117                        (requires writesubtitles or writeautomaticsub)
 118     listsubtitles:     Lists all available subtitles for the video
 119     subtitlesformat:   Subtitle format [srt/sbv/vtt] (default=srt)
 120     subtitleslangs:    List of languages of the subtitles to download
 121     keepvideo:         Keep the video file after post-processing
 122     daterange:         A DateRange object, download only if the upload_date is in the range.
 123     skip_download:     Skip the actual download of the video file
 124     cachedir:          Location of the cache files in the filesystem.
 125                        None to disable filesystem cache.
 126     noplaylist:        Download single video instead of a playlist if in doubt.
 127     age_limit:         An integer representing the user's age in years.
 128                        Unsuitable videos for the given age are skipped.
 129     download_archive:   File name of a file where all downloads are recorded.
 130                        Videos already present in the file are not downloaded
 131                        again.
 132     cookiefile:        File name where cookies should be read from and dumped to.
 133     nocheckcertificate:Do not verify SSL certificates
 134     proxy:             URL of the proxy server to use
 135     socket_timeout:    Time to wait for unresponsive hosts, in seconds
 136
 137     The following parameters are not used by YoutubeDL itself, they are used by
 138     the FileDownloader:
 139     nopart, updatetime, buffersize, ratelimit, min_filesize, max_filesize, test,
 140     noresizebuffer, retries, continuedl, noprogress, consoletitle
 141     """
 142
 143     params = None
 144     _ies = []
 145     _pps = []
 146     _download_retcode = None
 147     _num_downloads = None
 148     _screen_file = None
 149
 150     def __init__(self, params=None):
 151         """Create a FileDownloader object with the given options."""
 152         self._ies = []
 153         self._ies_instances = {}
 154         self._pps = []
 155         self._progress_hooks = []
 156         self._download_retcode = 0
 157         self._num_downloads = 0
 158         self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)]
 159         self.params = {} if params is None else params
 160
 161         if (sys.version_info >= (3,) and sys.platform != 'win32' and
 162                 sys.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968']
 163                 and not params['restrictfilenames']):
 164             # On Python 3, the Unicode filesystem API will throw errors (#1474)
 165             self.report_warning(
 166                 u'Assuming --restrict-filenames since file system encoding '
 167                 u'cannot encode all charactes. '
 168                 u'Set the LC_ALL environment variable to fix this.')
 169             self.params['restrictfilenames'] = True
 170
 171         self.fd = FileDownloader(self, self.params)
 172
 173         if '%(stitle)s' in self.params.get('outtmpl', ''):
 174             self.report_warning(u'%(stitle)s is deprecated. Use the %(title)s and the --restrict-filenames flag(which also secures %(uploader)s et al) instead.')
 175
 176         self._setup_opener()
 177
 178     def add_info_extractor(self, ie):
 179         """Add an InfoExtractor object to the end of the list."""
 180         self._ies.append(ie)
 181         self._ies_instances[ie.ie_key()] = ie
 182         ie.set_downloader(self)
 183
 184     def get_info_extractor(self, ie_key):
 185         """
 186         Get an instance of an IE with name ie_key, it will try to get one from
 187         the _ies list, if there's no instance it will create a new one and add
 188         it to the extractor list.
 189         """
 190         ie = self._ies_instances.get(ie_key)
 191         if ie is None:
 192             ie = get_info_extractor(ie_key)()
 193             self.add_info_extractor(ie)
 194         return ie
 195
 196     def add_default_info_extractors(self):
 197         """
 198         Add the InfoExtractors returned by gen_extractors to the end of the list
 199         """
 200         for ie in gen_extractors():
 201             self.add_info_extractor(ie)
 202
 203     def add_post_processor(self, pp):
 204         """Add a PostProcessor object to the end of the chain."""
 205         self._pps.append(pp)
 206         pp.set_downloader(self)
 207
 208     def to_screen(self, message, skip_eol=False):
 209         """Print message to stdout if not in quiet mode."""
 210         if self.params.get('logger'):
 211             self.params['logger'].debug(message)
 212         elif not self.params.get('quiet', False):
 213             terminator = [u'\n', u''][skip_eol]
 214             output = message + terminator
 215             write_string(output, self._screen_file)
 216
 217     def to_stderr(self, message):
 218         """Print message to stderr."""
 219         assert type(message) == type(u'')
 220         if self.params.get('logger'):
 221             self.params['logger'].error(message)
 222         else:
 223             output = message + u'\n'
 224             if 'b' in getattr(self._screen_file, 'mode', '') or sys.version_info[0] < 3: # Python 2 lies about the mode of sys.stdout/sys.stderr
 225                 output = output.encode(preferredencoding())
 226             sys.stderr.write(output)
 227
 228     def to_console_title(self, message):
 229         if not self.params.get('consoletitle', False):
 230             return
 231         if os.name == 'nt' and ctypes.windll.kernel32.GetConsoleWindow():
 232             # c_wchar_p() might not be necessary if `message` is
 233             # already of type unicode()
 234             ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message))
 235         elif 'TERM' in os.environ:
 236             write_string(u'\033]0;%s\007' % message, self._screen_file)
 237
 238     def save_console_title(self):
 239         if not self.params.get('consoletitle', False):
 240             return
 241         if 'TERM' in os.environ:
 242             # Save the title on stack
 243             write_string(u'\033[22;0t', self._screen_file)
 244
 245     def restore_console_title(self):
 246         if not self.params.get('consoletitle', False):
 247             return
 248         if 'TERM' in os.environ:
 249             # Restore the title from stack
 250             write_string(u'\033[23;0t', self._screen_file)
 251
 252     def __enter__(self):
 253         self.save_console_title()
 254         return self
 255
 256     def __exit__(self, *args):
 257         self.restore_console_title()
 258
 259         if self.params.get('cookiefile') is not None:
 260             self.cookiejar.save()
 261
 262     def trouble(self, message=None, tb=None):
 263         """Determine action to take when a download problem appears.
 264
 265         Depending on if the downloader has been configured to ignore
 266         download errors or not, this method may throw an exception or
 267         not when errors are found, after printing the message.
 268
 269         tb, if given, is additional traceback information.
 270         """
 271         if message is not None:
 272             self.to_stderr(message)
 273         if self.params.get('verbose'):
 274             if tb is None:
 275                 if sys.exc_info()[0]:  # if .trouble has been called from an except block
 276                     tb = u''
 277                     if hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
 278                         tb += u''.join(traceback.format_exception(*sys.exc_info()[1].exc_info))
 279                     tb += compat_str(traceback.format_exc())
 280                 else:
 281                     tb_data = traceback.format_list(traceback.extract_stack())
 282                     tb = u''.join(tb_data)
 283             self.to_stderr(tb)
 284         if not self.params.get('ignoreerrors', False):
 285             if sys.exc_info()[0] and hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
 286                 exc_info = sys.exc_info()[1].exc_info
 287             else:
 288                 exc_info = sys.exc_info()
 289             raise DownloadError(message, exc_info)
 290         self._download_retcode = 1
 291
 292     def report_warning(self, message):
 293         '''
 294         Print the message to stderr, it will be prefixed with 'WARNING:'
 295         If stderr is a tty file the 'WARNING:' will be colored
 296         '''
 297         if sys.stderr.isatty() and os.name != 'nt':
 298             _msg_header = u'\033[0;33mWARNING:\033[0m'
 299         else:
 300             _msg_header = u'WARNING:'
 301         warning_message = u'%s %s' % (_msg_header, message)
 302         self.to_stderr(warning_message)
 303
 304     def report_error(self, message, tb=None):
 305         '''
 306         Do the same as trouble, but prefixes the message with 'ERROR:', colored
 307         in red if stderr is a tty file.
 308         '''
 309         if sys.stderr.isatty() and os.name != 'nt':
 310             _msg_header = u'\033[0;31mERROR:\033[0m'
 311         else:
 312             _msg_header = u'ERROR:'
 313         error_message = u'%s %s' % (_msg_header, message)
 314         self.trouble(error_message, tb)
 315
 316     def report_writedescription(self, descfn):
 317         """ Report that the description file is being written """
 318         self.to_screen(u'[info] Writing video description to: ' + descfn)
 319
 320     def report_writesubtitles(self, sub_filename):
 321         """ Report that the subtitles file is being written """
 322         self.to_screen(u'[info] Writing video subtitles to: ' + sub_filename)
 323
 324     def report_writeinfojson(self, infofn):
 325         """ Report that the metadata file has been written """
 326         self.to_screen(u'[info] Video description metadata as JSON to: ' + infofn)
 327
 328     def report_writeannotations(self, annofn):
 329         """ Report that the annotations file has been written. """
 330         self.to_screen(u'[info] Writing video annotations to: ' + annofn)
 331
 332     def report_file_already_downloaded(self, file_name):
 333         """Report file has already been fully downloaded."""
 334         try:
 335             self.to_screen(u'[download] %s has already been downloaded' % file_name)
 336         except UnicodeEncodeError:
 337             self.to_screen(u'[download] The file has already been downloaded')
 338
 339     def increment_downloads(self):
 340         """Increment the ordinal that assigns a number to each file."""
 341         self._num_downloads += 1
 342
 343     def prepare_filename(self, info_dict):
 344         """Generate the output filename."""
 345         try:
 346             template_dict = dict(info_dict)
 347
 348             template_dict['epoch'] = int(time.time())
 349             autonumber_size = self.params.get('autonumber_size')
 350             if autonumber_size is None:
 351                 autonumber_size = 5
 352             autonumber_templ = u'%0' + str(autonumber_size) + u'd'
 353             template_dict['autonumber'] = autonumber_templ % self._num_downloads
 354             if template_dict.get('playlist_index') is not None:
 355                 template_dict['playlist_index'] = u'%05d' % template_dict['playlist_index']
 356
 357             sanitize = lambda k, v: sanitize_filename(
 358                 u'NA' if v is None else compat_str(v),
 359                 restricted=self.params.get('restrictfilenames'),
 360                 is_id=(k == u'id'))
 361             template_dict = dict((k, sanitize(k, v))
 362                                  for k, v in template_dict.items())
 363
 364             tmpl = os.path.expanduser(self.params['outtmpl'])
 365             filename = tmpl % template_dict
 366             return filename
 367         except KeyError as err:
 368             self.report_error(u'Erroneous output template')
 369             return None
 370         except ValueError as err:
 371             self.report_error(u'Error in output template: ' + str(err) + u' (encoding: ' + repr(preferredencoding()) + ')')
 372             return None
 373
 374     def _match_entry(self, info_dict):
 375         """ Returns None iff the file should be downloaded """
 376
 377         if 'title' in info_dict:
 378             # This can happen when we're just evaluating the playlist
 379             title = info_dict['title']
 380             matchtitle = self.params.get('matchtitle', False)
 381             if matchtitle:
 382                 if not re.search(matchtitle, title, re.IGNORECASE):
 383                     return u'[download] "' + title + '" title did not match pattern "' + matchtitle + '"'
 384             rejecttitle = self.params.get('rejecttitle', False)
 385             if rejecttitle:
 386                 if re.search(rejecttitle, title, re.IGNORECASE):
 387                     return u'"' + title + '" title matched reject pattern "' + rejecttitle + '"'
 388         date = info_dict.get('upload_date', None)
 389         if date is not None:
 390             dateRange = self.params.get('daterange', DateRange())
 391             if date not in dateRange:
 392                 return u'[download] %s upload date is not in range %s' % (date_from_str(date).isoformat(), dateRange)
 393         age_limit = self.params.get('age_limit')
 394         if age_limit is not None:
 395             if age_limit < info_dict.get('age_limit', 0):
 396                 return u'Skipping "' + title + '" because it is age restricted'
 397         if self.in_download_archive(info_dict):
 398             return (u'%s has already been recorded in archive'
 399                     % info_dict.get('title', info_dict.get('id', u'video')))
 400         return None
 401
 402     @staticmethod
 403     def add_extra_info(info_dict, extra_info):
 404         '''Set the keys from extra_info in info dict if they are missing'''
 405         for key, value in extra_info.items():
 406             info_dict.setdefault(key, value)
 407
 408     def extract_info(self, url, download=True, ie_key=None, extra_info={},
 409                      process=True):
 410         '''
 411         Returns a list with a dictionary for each video we find.
 412         If 'download', also downloads the videos.
 413         extra_info is a dict containing the extra values to add to each result
 414          '''
 415
 416         if ie_key:
 417             ies = [self.get_info_extractor(ie_key)]
 418         else:
 419             ies = self._ies
 420
 421         for ie in ies:
 422             if not ie.suitable(url):
 423                 continue
 424
 425             if not ie.working():
 426                 self.report_warning(u'The program functionality for this site has been marked as broken, '
 427                                     u'and will probably not work.')
 428
 429             try:
 430                 ie_result = ie.extract(url)
 431                 if ie_result is None: # Finished already (backwards compatibility; listformats and friends should be moved here)
 432                     break
 433                 if isinstance(ie_result, list):
 434                     # Backwards compatibility: old IE result format
 435                     ie_result = {
 436                         '_type': 'compat_list',
 437                         'entries': ie_result,
 438                     }
 439                 self.add_extra_info(ie_result,
 440                     {
 441                         'extractor': ie.IE_NAME,
 442                         'webpage_url': url,
 443                         'extractor_key': ie.ie_key(),
 444                     })
 445                 if process:
 446                     return self.process_ie_result(ie_result, download, extra_info)
 447                 else:
 448                     return ie_result
 449             except ExtractorError as de: # An error we somewhat expected
 450                 self.report_error(compat_str(de), de.format_traceback())
 451                 break
 452             except Exception as e:
 453                 if self.params.get('ignoreerrors', False):
 454                     self.report_error(compat_str(e), tb=compat_str(traceback.format_exc()))
 455                     break
 456                 else:
 457                     raise
 458         else:
 459             self.report_error(u'no suitable InfoExtractor: %s' % url)
 460
 461     def process_ie_result(self, ie_result, download=True, extra_info={}):
 462         """
 463         Take the result of the ie(may be modified) and resolve all unresolved
 464         references (URLs, playlist items).
 465
 466         It will also download the videos if 'download'.
 467         Returns the resolved ie_result.
 468         """
 469
 470         result_type = ie_result.get('_type', 'video') # If not given we suppose it's a video, support the default old system
 471         if result_type == 'video':
 472             self.add_extra_info(ie_result, extra_info)
 473             return self.process_video_result(ie_result, download=download)
 474         elif result_type == 'url':
 475             # We have to add extra_info to the results because it may be
 476             # contained in a playlist
 477             return self.extract_info(ie_result['url'],
 478                                      download,
 479                                      ie_key=ie_result.get('ie_key'),
 480                                      extra_info=extra_info)
 481         elif result_type == 'url_transparent':
 482             # Use the information from the embedding page
 483             info = self.extract_info(
 484                 ie_result['url'], ie_key=ie_result.get('ie_key'),
 485                 extra_info=extra_info, download=False, process=False)
 486
 487             def make_result(embedded_info):
 488                 new_result = ie_result.copy()
 489                 for f in ('_type', 'url', 'ext', 'player_url', 'formats',
 490                           'entries', 'urlhandle', 'ie_key', 'duration',
 491                           'subtitles', 'annotations', 'format',
 492                           'thumbnail', 'thumbnails'):
 493                     if f in new_result:
 494                         del new_result[f]
 495                     if f in embedded_info:
 496                         new_result[f] = embedded_info[f]
 497                 return new_result
 498             new_result = make_result(info)
 499
 500             assert new_result.get('_type') != 'url_transparent'
 501             if new_result.get('_type') == 'compat_list':
 502                 new_result['entries'] = [
 503                     make_result(e) for e in new_result['entries']]
 504
 505             return self.process_ie_result(
 506                 new_result, download=download, extra_info=extra_info)
 507         elif result_type == 'playlist':
 508             # We process each entry in the playlist
 509             playlist = ie_result.get('title', None) or ie_result.get('id', None)
 510             self.to_screen(u'[download] Downloading playlist: %s' % playlist)
 511
 512             playlist_results = []
 513
 514             n_all_entries = len(ie_result['entries'])
 515             playliststart = self.params.get('playliststart', 1) - 1
 516             playlistend = self.params.get('playlistend', -1)
 517
 518             if playlistend == -1:
 519                 entries = ie_result['entries'][playliststart:]
 520             else:
 521                 entries = ie_result['entries'][playliststart:playlistend]
 522
 523             n_entries = len(entries)
 524
 525             self.to_screen(u"[%s] playlist '%s': Collected %d video ids (downloading %d of them)" %
 526                 (ie_result['extractor'], playlist, n_all_entries, n_entries))
 527
 528             for i, entry in enumerate(entries, 1):
 529                 self.to_screen(u'[download] Downloading video #%s of %s' % (i, n_entries))
 530                 extra = {
 531                     'playlist': playlist,
 532                     'playlist_index': i + playliststart,
 533                     'extractor': ie_result['extractor'],
 534                     'webpage_url': ie_result['webpage_url'],
 535                     'extractor_key': ie_result['extractor_key'],
 536                 }
 537
 538                 reason = self._match_entry(entry)
 539                 if reason is not None:
 540                     self.to_screen(u'[download] ' + reason)
 541                     continue
 542
 543                 entry_result = self.process_ie_result(entry,
 544                                                       download=download,
 545                                                       extra_info=extra)
 546                 playlist_results.append(entry_result)
 547             ie_result['entries'] = playlist_results
 548             return ie_result
 549         elif result_type == 'compat_list':
 550             def _fixup(r):
 551                 self.add_extra_info(r,
 552                     {
 553                         'extractor': ie_result['extractor'],
 554                         'webpage_url': ie_result['webpage_url'],
 555                         'extractor_key': ie_result['extractor_key'],
 556                     })
 557                 return r
 558             ie_result['entries'] = [
 559                 self.process_ie_result(_fixup(r), download, extra_info)
 560                 for r in ie_result['entries']
 561             ]
 562             return ie_result
 563         else:
 564             raise Exception('Invalid result type: %s' % result_type)
 565
 566     def select_format(self, format_spec, available_formats):
 567         if format_spec == 'best' or format_spec is None:
 568             return available_formats[-1]
 569         elif format_spec == 'worst':
 570             return available_formats[0]
 571         else:
 572             extensions = [u'mp4', u'flv', u'webm', u'3gp']
 573             if format_spec in extensions:
 574                 filter_f = lambda f: f['ext'] == format_spec
 575             else:
 576                 filter_f = lambda f: f['format_id'] == format_spec
 577             matches = list(filter(filter_f, available_formats))
 578             if matches:
 579                 return matches[-1]
 580         return None
 581
 582     def process_video_result(self, info_dict, download=True):
 583         assert info_dict.get('_type', 'video') == 'video'
 584
 585         if 'playlist' not in info_dict:
 586             # It isn't part of a playlist
 587             info_dict['playlist'] = None
 588             info_dict['playlist_index'] = None
 589
 590         # This extractors handle format selection themselves
 591         if info_dict['extractor'] in [u'youtube', u'Youku']:
 592             if download:
 593                 self.process_info(info_dict)
 594             return info_dict
 595
 596         # We now pick which formats have to be downloaded
 597         if info_dict.get('formats') is None:
 598             # There's only one format available
 599             formats = [info_dict]
 600         else:
 601             formats = info_dict['formats']
 602
 603         # We check that all the formats have the format and format_id fields
 604         for (i, format) in enumerate(formats):
 605             if format.get('format_id') is None:
 606                 format['format_id'] = compat_str(i)
 607             if format.get('format') is None:
 608                 format['format'] = u'{id} - {res}{note}'.format(
 609                     id=format['format_id'],
 610                     res=self.format_resolution(format),
 611                     note=u' ({0})'.format(format['format_note']) if format.get('format_note') is not None else '',
 612                 )
 613             # Automatically determine file extension if missing
 614             if 'ext' not in format:
 615                 format['ext'] = determine_ext(format['url'])
 616
 617         if self.params.get('listformats', None):
 618             self.list_formats(info_dict)
 619             return
 620
 621         format_limit = self.params.get('format_limit', None)
 622         if format_limit:
 623             formats = list(takewhile_inclusive(
 624                 lambda f: f['format_id'] != format_limit, formats
 625             ))
 626         if self.params.get('prefer_free_formats'):
 627             def _free_formats_key(f):
 628                 try:
 629                     ext_ord = [u'flv', u'mp4', u'webm'].index(f['ext'])
 630                 except ValueError:
 631                     ext_ord = -1
 632                 # We only compare the extension if they have the same height and width
 633                 return (f.get('height'), f.get('width'), ext_ord)
 634             formats = sorted(formats, key=_free_formats_key)
 635
 636         req_format = self.params.get('format', 'best')
 637         if req_format is None:
 638             req_format = 'best'
 639         formats_to_download = []
 640         # The -1 is for supporting YoutubeIE
 641         if req_format in ('-1', 'all'):
 642             formats_to_download = formats
 643         else:
 644             # We can accept formats requestd in the format: 34/5/best, we pick
 645             # the first that is available, starting from left
 646             req_formats = req_format.split('/')
 647             for rf in req_formats:
 648                 selected_format = self.select_format(rf, formats)
 649                 if selected_format is not None:
 650                     formats_to_download = [selected_format]
 651                     break
 652         if not formats_to_download:
 653             raise ExtractorError(u'requested format not available',
 654                                  expected=True)
 655
 656         if download:
 657             if len(formats_to_download) > 1:
 658                 self.to_screen(u'[info] %s: downloading video in %s formats' % (info_dict['id'], len(formats_to_download)))
 659             for format in formats_to_download:
 660                 new_info = dict(info_dict)
 661                 new_info.update(format)
 662                 self.process_info(new_info)
 663         # We update the info dict with the best quality format (backwards compatibility)
 664         info_dict.update(formats_to_download[-1])
 665         return info_dict
 666
 667     def process_info(self, info_dict):
 668         """Process a single resolved IE result."""
 669
 670         assert info_dict.get('_type', 'video') == 'video'
 671         #We increment the download the download count here to match the previous behaviour.
 672         self.increment_downloads()
 673
 674         info_dict['fulltitle'] = info_dict['title']
 675         if len(info_dict['title']) > 200:
 676             info_dict['title'] = info_dict['title'][:197] + u'...'
 677
 678         # Keep for backwards compatibility
 679         info_dict['stitle'] = info_dict['title']
 680
 681         if not 'format' in info_dict:
 682             info_dict['format'] = info_dict['ext']
 683
 684         reason = self._match_entry(info_dict)
 685         if reason is not None:
 686             self.to_screen(u'[download] ' + reason)
 687             return
 688
 689         max_downloads = self.params.get('max_downloads')
 690         if max_downloads is not None:
 691             if self._num_downloads > int(max_downloads):
 692                 raise MaxDownloadsReached()
 693
 694         filename = self.prepare_filename(info_dict)
 695
 696         # Forced printings
 697         if self.params.get('forcetitle', False):
 698             compat_print(info_dict['fulltitle'])
 699         if self.params.get('forceid', False):
 700             compat_print(info_dict['id'])
 701         if self.params.get('forceurl', False):
 702             # For RTMP URLs, also include the playpath
 703             compat_print(info_dict['url'] + info_dict.get('play_path', u''))
 704         if self.params.get('forcethumbnail', False) and info_dict.get('thumbnail') is not None:
 705             compat_print(info_dict['thumbnail'])
 706         if self.params.get('forcedescription', False) and info_dict.get('description') is not None:
 707             compat_print(info_dict['description'])
 708         if self.params.get('forcefilename', False) and filename is not None:
 709             compat_print(filename)
 710         if self.params.get('forceformat', False):
 711             compat_print(info_dict['format'])
 712         if self.params.get('forcejson', False):
 713             compat_print(json.dumps(info_dict))
 714
 715         # Do nothing else if in simulate mode
 716         if self.params.get('simulate', False):
 717             return
 718
 719         if filename is None:
 720             return
 721
 722         try:
 723             dn = os.path.dirname(encodeFilename(filename))
 724             if dn != '' and not os.path.exists(dn):
 725                 os.makedirs(dn)
 726         except (OSError, IOError) as err:
 727             self.report_error(u'unable to create directory ' + compat_str(err))
 728             return
 729
 730         if self.params.get('writedescription', False):
 731             try:
 732                 descfn = filename + u'.description'
 733                 self.report_writedescription(descfn)
 734                 with io.open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile:
 735                     descfile.write(info_dict['description'])
 736             except (KeyError, TypeError):
 737                 self.report_warning(u'There\'s no description to write.')
 738             except (OSError, IOError):
 739                 self.report_error(u'Cannot write description file ' + descfn)
 740                 return
 741
 742         if self.params.get('writeannotations', False):
 743             try:
 744                 annofn = filename + u'.annotations.xml'
 745                 self.report_writeannotations(annofn)
 746                 with io.open(encodeFilename(annofn), 'w', encoding='utf-8') as annofile:
 747                     annofile.write(info_dict['annotations'])
 748             except (KeyError, TypeError):
 749                 self.report_warning(u'There are no annotations to write.')
 750             except (OSError, IOError):
 751                 self.report_error(u'Cannot write annotations file: ' + annofn)
 752                 return
 753
 754         subtitles_are_requested = any([self.params.get('writesubtitles', False),
 755                                        self.params.get('writeautomaticsub')])
 756
 757         if subtitles_are_requested and 'subtitles' in info_dict and info_dict['subtitles']:
 758             # subtitles download errors are already managed as troubles in relevant IE
 759             # that way it will silently go on when used with unsupporting IE
 760             subtitles = info_dict['subtitles']
 761             sub_format = self.params.get('subtitlesformat', 'srt')
 762             for sub_lang in subtitles.keys():
 763                 sub = subtitles[sub_lang]
 764                 if sub is None:
 765                     continue
 766                 try:
 767                     sub_filename = subtitles_filename(filename, sub_lang, sub_format)
 768                     self.report_writesubtitles(sub_filename)
 769                     with io.open(encodeFilename(sub_filename), 'w', encoding='utf-8') as subfile:
 770                             subfile.write(sub)
 771                 except (OSError, IOError):
 772                     self.report_error(u'Cannot write subtitles file ' + descfn)
 773                     return
 774
 775         if self.params.get('writeinfojson', False):
 776             infofn = os.path.splitext(filename)[0] + u'.info.json'
 777             self.report_writeinfojson(infofn)
 778             try:
 779                 json_info_dict = dict((k, v) for k, v in info_dict.items() if not k in ['urlhandle'])
 780                 write_json_file(json_info_dict, encodeFilename(infofn))
 781             except (OSError, IOError):
 782                 self.report_error(u'Cannot write metadata to JSON file ' + infofn)
 783                 return
 784
 785         if self.params.get('writethumbnail', False):
 786             if info_dict.get('thumbnail') is not None:
 787                 thumb_format = determine_ext(info_dict['thumbnail'], u'jpg')
 788                 thumb_filename = filename.rpartition('.')[0] + u'.' + thumb_format
 789                 self.to_screen(u'[%s] %s: Downloading thumbnail ...' %
 790                                (info_dict['extractor'], info_dict['id']))
 791                 try:
 792                     uf = compat_urllib_request.urlopen(info_dict['thumbnail'])
 793                     with open(thumb_filename, 'wb') as thumbf:
 794                         shutil.copyfileobj(uf, thumbf)
 795                     self.to_screen(u'[%s] %s: Writing thumbnail to: %s' %
 796                         (info_dict['extractor'], info_dict['id'], thumb_filename))
 797                 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
 798                     self.report_warning(u'Unable to download thumbnail "%s": %s' %
 799                         (info_dict['thumbnail'], compat_str(err)))
 800
 801         if not self.params.get('skip_download', False):
 802             if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(filename)):
 803                 success = True
 804             else:
 805                 try:
 806                     success = self.fd._do_download(filename, info_dict)
 807                 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
 808                     self.report_error(u'unable to download video data: %s' % str(err))
 809                     return
 810                 except (OSError, IOError) as err:
 811                     raise UnavailableVideoError(err)
 812                 except (ContentTooShortError, ) as err:
 813                     self.report_error(u'content too short (expected %s bytes and served %s)' % (err.expected, err.downloaded))
 814                     return
 815
 816             if success:
 817                 try:
 818                     self.post_process(filename, info_dict)
 819                 except (PostProcessingError) as err:
 820                     self.report_error(u'postprocessing: %s' % str(err))
 821                     return
 822
 823         self.record_download_archive(info_dict)
 824
 825     def download(self, url_list):
 826         """Download a given list of URLs."""
 827         if (len(url_list) > 1 and
 828                 '%' not in self.params['outtmpl']
 829                 and self.params.get('max_downloads') != 1):
 830             raise SameFileError(self.params['outtmpl'])
 831
 832         for url in url_list:
 833             try:
 834                 #It also downloads the videos
 835                 self.extract_info(url)
 836             except UnavailableVideoError:
 837                 self.report_error(u'unable to download video')
 838             except MaxDownloadsReached:
 839                 self.to_screen(u'[info] Maximum number of downloaded files reached.')
 840                 raise
 841
 842         return self._download_retcode
 843
 844     def post_process(self, filename, ie_info):
 845         """Run all the postprocessors on the given file."""
 846         info = dict(ie_info)
 847         info['filepath'] = filename
 848         keep_video = None
 849         for pp in self._pps:
 850             try:
 851                 keep_video_wish, new_info = pp.run(info)
 852                 if keep_video_wish is not None:
 853                     if keep_video_wish:
 854                         keep_video = keep_video_wish
 855                     elif keep_video is None:
 856                         # No clear decision yet, let IE decide
 857                         keep_video = keep_video_wish
 858             except PostProcessingError as e:
 859                 self.report_error(e.msg)
 860         if keep_video is False and not self.params.get('keepvideo', False):
 861             try:
 862                 self.to_screen(u'Deleting original file %s (pass -k to keep)' % filename)
 863                 os.remove(encodeFilename(filename))
 864             except (IOError, OSError):
 865                 self.report_warning(u'Unable to remove downloaded video file')
 866
 867     def _make_archive_id(self, info_dict):
 868         # Future-proof against any change in case
 869         # and backwards compatibility with prior versions
 870         extractor = info_dict.get('extractor_key')
 871         if extractor is None:
 872             if 'id' in info_dict:
 873                 extractor = info_dict.get('ie_key')  # key in a playlist
 874         if extractor is None:
 875             return None  # Incomplete video information
 876         return extractor.lower() + u' ' + info_dict['id']
 877
 878     def in_download_archive(self, info_dict):
 879         fn = self.params.get('download_archive')
 880         if fn is None:
 881             return False
 882
 883         vid_id = self._make_archive_id(info_dict)
 884         if vid_id is None:
 885             return False  # Incomplete video information
 886
 887         try:
 888             with locked_file(fn, 'r', encoding='utf-8') as archive_file:
 889                 for line in archive_file:
 890                     if line.strip() == vid_id:
 891                         return True
 892         except IOError as ioe:
 893             if ioe.errno != errno.ENOENT:
 894                 raise
 895         return False
 896
 897     def record_download_archive(self, info_dict):
 898         fn = self.params.get('download_archive')
 899         if fn is None:
 900             return
 901         vid_id = self._make_archive_id(info_dict)
 902         assert vid_id
 903         with locked_file(fn, 'a', encoding='utf-8') as archive_file:
 904             archive_file.write(vid_id + u'\n')
 905
 906     @staticmethod
 907     def format_resolution(format, default='unknown'):
 908         if format.get('vcodec') == 'none':
 909             return 'audio only'
 910         if format.get('_resolution') is not None:
 911             return format['_resolution']
 912         if format.get('height') is not None:
 913             if format.get('width') is not None:
 914                 res = u'%sx%s' % (format['width'], format['height'])
 915             else:
 916                 res = u'%sp' % format['height']
 917         else:
 918             res = default
 919         return res
 920
 921     def list_formats(self, info_dict):
 922         def format_note(fdict):
 923             res = u''
 924             if fdict.get('format_note') is not None:
 925                 res += fdict['format_note'] + u' '
 926             if (fdict.get('vcodec') is not None and
 927                     fdict.get('vcodec') != 'none'):
 928                 res += u'%-5s' % fdict['vcodec']
 929             elif fdict.get('vbr') is not None:
 930                 res += u'video'
 931             if fdict.get('vbr') is not None:
 932                 res += u'@%4dk' % fdict['vbr']
 933             if fdict.get('acodec') is not None:
 934                 if res:
 935                     res += u', '
 936                 res += u'%-5s' % fdict['acodec']
 937             elif fdict.get('abr') is not None:
 938                 if res:
 939                     res += u', '
 940                 res += 'audio'
 941             if fdict.get('abr') is not None:
 942                 res += u'@%3dk' % fdict['abr']
 943             if fdict.get('filesize') is not None:
 944                 if res:
 945                     res += u', '
 946                 res += format_bytes(fdict['filesize'])
 947             return res
 948
 949         def line(format, idlen=20):
 950             return ((u'%-' + compat_str(idlen + 1) + u's%-10s%-12s%s') % (
 951                 format['format_id'],
 952                 format['ext'],
 953                 self.format_resolution(format),
 954                 format_note(format),
 955             ))
 956
 957         formats = info_dict.get('formats', [info_dict])
 958         idlen = max(len(u'format code'),
 959                     max(len(f['format_id']) for f in formats))
 960         formats_s = [line(f, idlen) for f in formats]
 961         if len(formats) > 1:
 962             formats_s[0] += (' ' if format_note(formats[0]) else '') + '(worst)'
 963             formats_s[-1] += (' ' if format_note(formats[-1]) else '') + '(best)'
 964
 965         header_line = line({
 966             'format_id': u'format code', 'ext': u'extension',
 967             '_resolution': u'resolution', 'format_note': u'note'}, idlen=idlen)
 968         self.to_screen(u'[info] Available formats for %s:\n%s\n%s' %
 969                        (info_dict['id'], header_line, u"\n".join(formats_s)))
 970
 971     def urlopen(self, req):
 972         """ Start an HTTP download """
 973         return self._opener.open(req)
 974
 975     def print_debug_header(self):
 976         if not self.params.get('verbose'):
 977             return
 978         write_string(u'[debug] youtube-dl version ' + __version__ + u'\n')
 979         try:
 980             sp = subprocess.Popen(
 981                 ['git', 'rev-parse', '--short', 'HEAD'],
 982                 stdout=subprocess.PIPE, stderr=subprocess.PIPE,
 983                 cwd=os.path.dirname(os.path.abspath(__file__)))
 984             out, err = sp.communicate()
 985             out = out.decode().strip()
 986             if re.match('[0-9a-f]+', out):
 987                 write_string(u'[debug] Git HEAD: ' + out + u'\n')
 988         except:
 989             try:
 990                 sys.exc_clear()
 991             except:
 992                 pass
 993         write_string(u'[debug] Python version %s - %s' %
 994                      (platform.python_version(), platform_name()) + u'\n')
 995
 996         proxy_map = {}
 997         for handler in self._opener.handlers:
 998             if hasattr(handler, 'proxies'):
 999                 proxy_map.update(handler.proxies)
1000         write_string(u'[debug] Proxy map: ' + compat_str(proxy_map) + u'\n')
1001
1002     def _setup_opener(self):
1003         timeout_val = self.params.get('socket_timeout')
1004         timeout = 600 if timeout_val is None else float(timeout_val)
1005
1006         opts_cookiefile = self.params.get('cookiefile')
1007         opts_proxy = self.params.get('proxy')
1008
1009         if opts_cookiefile is None:
1010             self.cookiejar = compat_cookiejar.CookieJar()
1011         else:
1012             self.cookiejar = compat_cookiejar.MozillaCookieJar(
1013                 opts_cookiefile)
1014             if os.access(opts_cookiefile, os.R_OK):
1015                 self.cookiejar.load()
1016
1017         cookie_processor = compat_urllib_request.HTTPCookieProcessor(
1018             self.cookiejar)
1019         if opts_proxy is not None:
1020             if opts_proxy == '':
1021                 proxies = {}
1022             else:
1023                 proxies = {'http': opts_proxy, 'https': opts_proxy}
1024         else:
1025             proxies = compat_urllib_request.getproxies()
1026             # Set HTTPS proxy to HTTP one if given (https://github.com/rg3/youtube-dl/issues/805)
1027             if 'http' in proxies and 'https' not in proxies:
1028                 proxies['https'] = proxies['http']
1029         proxy_handler = compat_urllib_request.ProxyHandler(proxies)
1030         https_handler = make_HTTPS_handler(
1031             self.params.get('nocheckcertificate', False))
1032         opener = compat_urllib_request.build_opener(
1033             https_handler, proxy_handler, cookie_processor, YoutubeDLHandler())
1034         # Delete the default user-agent header, which would otherwise apply in
1035         # cases where our custom HTTP handler doesn't come into play
1036         # (See https://github.com/rg3/youtube-dl/issues/1309 for details)
1037         opener.addheaders = []
1038         self._opener = opener
1039
1040         # TODO remove this global modification
1041         compat_urllib_request.install_opener(opener)
1042         socket.setdefaulttimeout(timeout)