_ Git - youtube-dl/blob - youtube_dl/YoutubeDL.py

   1 #!/usr/bin/env python
   2 # -*- coding: utf-8 -*-
   3
   4 from __future__ import absolute_import, unicode_literals
   5
   6 import collections
   7 import datetime
   8 import errno
   9 import io
  10 import json
  11 import locale
  12 import os
  13 import platform
  14 import re
  15 import shutil
  16 import subprocess
  17 import socket
  18 import sys
  19 import time
  20 import traceback
  21
  22 if os.name == 'nt':
  23     import ctypes
  24
  25 from .utils import (
  26     compat_cookiejar,
  27     compat_http_client,
  28     compat_str,
  29     compat_urllib_error,
  30     compat_urllib_request,
  31     ContentTooShortError,
  32     date_from_str,
  33     DateRange,
  34     DEFAULT_OUTTMPL,
  35     determine_ext,
  36     DownloadError,
  37     encodeFilename,
  38     ExtractorError,
  39     format_bytes,
  40     formatSeconds,
  41     get_term_width,
  42     locked_file,
  43     make_HTTPS_handler,
  44     MaxDownloadsReached,
  45     PagedList,
  46     PostProcessingError,
  47     platform_name,
  48     preferredencoding,
  49     SameFileError,
  50     sanitize_filename,
  51     subtitles_filename,
  52     takewhile_inclusive,
  53     UnavailableVideoError,
  54     url_basename,
  55     write_json_file,
  56     write_string,
  57     YoutubeDLHandler,
  58     prepend_extension,
  59 )
  60 from .cache import Cache
  61 from .extractor import get_info_extractor, gen_extractors
  62 from .downloader import get_suitable_downloader
  63 from .postprocessor import FFmpegMergerPP
  64 from .version import __version__
  65
  66
  67 class YoutubeDL(object):
  68     """YoutubeDL class.
  69
  70     YoutubeDL objects are the ones responsible of downloading the
  71     actual video file and writing it to disk if the user has requested
  72     it, among some other tasks. In most cases there should be one per
  73     program. As, given a video URL, the downloader doesn't know how to
  74     extract all the needed information, task that InfoExtractors do, it
  75     has to pass the URL to one of them.
  76
  77     For this, YoutubeDL objects have a method that allows
  78     InfoExtractors to be registered in a given order. When it is passed
  79     a URL, the YoutubeDL object handles it to the first InfoExtractor it
  80     finds that reports being able to handle it. The InfoExtractor extracts
  81     all the information about the video or videos the URL refers to, and
  82     YoutubeDL process the extracted information, possibly using a File
  83     Downloader to download the video.
  84
  85     YoutubeDL objects accept a lot of parameters. In order not to saturate
  86     the object constructor with arguments, it receives a dictionary of
  87     options instead. These options are available through the params
  88     attribute for the InfoExtractors to use. The YoutubeDL also
  89     registers itself as the downloader in charge for the InfoExtractors
  90     that are added to it, so this is a "mutual registration".
  91
  92     Available options:
  93
  94     username:          Username for authentication purposes.
  95     password:          Password for authentication purposes.
  96     videopassword:     Password for acces a video.
  97     usenetrc:          Use netrc for authentication instead.
  98     verbose:           Print additional info to stdout.
  99     quiet:             Do not print messages to stdout.
 100     no_warnings:       Do not print out anything for warnings.
 101     forceurl:          Force printing final URL.
 102     forcetitle:        Force printing title.
 103     forceid:           Force printing ID.
 104     forcethumbnail:    Force printing thumbnail URL.
 105     forcedescription:  Force printing description.
 106     forcefilename:     Force printing final filename.
 107     forceduration:     Force printing duration.
 108     forcejson:         Force printing info_dict as JSON.
 109     simulate:          Do not download the video files.
 110     format:            Video format code.
 111     format_limit:      Highest quality format to try.
 112     outtmpl:           Template for output names.
 113     restrictfilenames: Do not allow "&" and spaces in file names
 114     ignoreerrors:      Do not stop on download errors.
 115     nooverwrites:      Prevent overwriting files.
 116     playliststart:     Playlist item to start at.
 117     playlistend:       Playlist item to end at.
 118     matchtitle:        Download only matching titles.
 119     rejecttitle:       Reject downloads for matching titles.
 120     logger:            Log messages to a logging.Logger instance.
 121     logtostderr:       Log messages to stderr instead of stdout.
 122     writedescription:  Write the video description to a .description file
 123     writeinfojson:     Write the video description to a .info.json file
 124     writeannotations:  Write the video annotations to a .annotations.xml file
 125     writethumbnail:    Write the thumbnail image to a file
 126     writesubtitles:    Write the video subtitles to a file
 127     writeautomaticsub: Write the automatic subtitles to a file
 128     allsubtitles:      Downloads all the subtitles of the video
 129                        (requires writesubtitles or writeautomaticsub)
 130     listsubtitles:     Lists all available subtitles for the video
 131     subtitlesformat:   Subtitle format [srt/sbv/vtt] (default=srt)
 132     subtitleslangs:    List of languages of the subtitles to download
 133     keepvideo:         Keep the video file after post-processing
 134     daterange:         A DateRange object, download only if the upload_date is in the range.
 135     skip_download:     Skip the actual download of the video file
 136     cachedir:          Location of the cache files in the filesystem.
 137                        False to disable filesystem cache.
 138     noplaylist:        Download single video instead of a playlist if in doubt.
 139     age_limit:         An integer representing the user's age in years.
 140                        Unsuitable videos for the given age are skipped.
 141     min_views:         An integer representing the minimum view count the video
 142                        must have in order to not be skipped.
 143                        Videos without view count information are always
 144                        downloaded. None for no limit.
 145     max_views:         An integer representing the maximum view count.
 146                        Videos that are more popular than that are not
 147                        downloaded.
 148                        Videos without view count information are always
 149                        downloaded. None for no limit.
 150     download_archive:  File name of a file where all downloads are recorded.
 151                        Videos already present in the file are not downloaded
 152                        again.
 153     cookiefile:        File name where cookies should be read from and dumped to.
 154     nocheckcertificate:Do not verify SSL certificates
 155     prefer_insecure:   Use HTTP instead of HTTPS to retrieve information.
 156                        At the moment, this is only supported by YouTube.
 157     proxy:             URL of the proxy server to use
 158     socket_timeout:    Time to wait for unresponsive hosts, in seconds
 159     bidi_workaround:   Work around buggy terminals without bidirectional text
 160                        support, using fridibi
 161     debug_printtraffic:Print out sent and received HTTP traffic
 162     include_ads:       Download ads as well
 163     default_search:    Prepend this string if an input url is not valid.
 164                        'auto' for elaborate guessing
 165     encoding:          Use this encoding instead of the system-specified.
 166     extract_flat:      Do not resolve URLs, return the immediate result.
 167
 168     The following parameters are not used by YoutubeDL itself, they are used by
 169     the FileDownloader:
 170     nopart, updatetime, buffersize, ratelimit, min_filesize, max_filesize, test,
 171     noresizebuffer, retries, continuedl, noprogress, consoletitle
 172
 173     The following options are used by the post processors:
 174     prefer_ffmpeg:     If True, use ffmpeg instead of avconv if both are available,
 175                        otherwise prefer avconv.
 176     exec_cmd:          Arbitrary command to run after downloading
 177     """
 178
 179     params = None
 180     _ies = []
 181     _pps = []
 182     _download_retcode = None
 183     _num_downloads = None
 184     _screen_file = None
 185
 186     def __init__(self, params=None):
 187         """Create a FileDownloader object with the given options."""
 188         if params is None:
 189             params = {}
 190         self._ies = []
 191         self._ies_instances = {}
 192         self._pps = []
 193         self._progress_hooks = []
 194         self._download_retcode = 0
 195         self._num_downloads = 0
 196         self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)]
 197         self._err_file = sys.stderr
 198         self.params = params
 199         self.cache = Cache(self)
 200
 201         if params.get('bidi_workaround', False):
 202             try:
 203                 import pty
 204                 master, slave = pty.openpty()
 205                 width = get_term_width()
 206                 if width is None:
 207                     width_args = []
 208                 else:
 209                     width_args = ['-w', str(width)]
 210                 sp_kwargs = dict(
 211                     stdin=subprocess.PIPE,
 212                     stdout=slave,
 213                     stderr=self._err_file)
 214                 try:
 215                     self._output_process = subprocess.Popen(
 216                         ['bidiv'] + width_args, **sp_kwargs
 217                     )
 218                 except OSError:
 219                     self._output_process = subprocess.Popen(
 220                         ['fribidi', '-c', 'UTF-8'] + width_args, **sp_kwargs)
 221                 self._output_channel = os.fdopen(master, 'rb')
 222             except OSError as ose:
 223                 if ose.errno == 2:
 224                     self.report_warning('Could not find fribidi executable, ignoring --bidi-workaround . Make sure that  fribidi  is an executable file in one of the directories in your $PATH.')
 225                 else:
 226                     raise
 227
 228         if (sys.version_info >= (3,) and sys.platform != 'win32' and
 229                 sys.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968']
 230                 and not params['restrictfilenames']):
 231             # On Python 3, the Unicode filesystem API will throw errors (#1474)
 232             self.report_warning(
 233                 'Assuming --restrict-filenames since file system encoding '
 234                 'cannot encode all charactes. '
 235                 'Set the LC_ALL environment variable to fix this.')
 236             self.params['restrictfilenames'] = True
 237
 238         if '%(stitle)s' in self.params.get('outtmpl', ''):
 239             self.report_warning('%(stitle)s is deprecated. Use the %(title)s and the --restrict-filenames flag(which also secures %(uploader)s et al) instead.')
 240
 241         self._setup_opener()
 242
 243     def add_info_extractor(self, ie):
 244         """Add an InfoExtractor object to the end of the list."""
 245         self._ies.append(ie)
 246         self._ies_instances[ie.ie_key()] = ie
 247         ie.set_downloader(self)
 248
 249     def get_info_extractor(self, ie_key):
 250         """
 251         Get an instance of an IE with name ie_key, it will try to get one from
 252         the _ies list, if there's no instance it will create a new one and add
 253         it to the extractor list.
 254         """
 255         ie = self._ies_instances.get(ie_key)
 256         if ie is None:
 257             ie = get_info_extractor(ie_key)()
 258             self.add_info_extractor(ie)
 259         return ie
 260
 261     def add_default_info_extractors(self):
 262         """
 263         Add the InfoExtractors returned by gen_extractors to the end of the list
 264         """
 265         for ie in gen_extractors():
 266             self.add_info_extractor(ie)
 267
 268     def add_post_processor(self, pp):
 269         """Add a PostProcessor object to the end of the chain."""
 270         self._pps.append(pp)
 271         pp.set_downloader(self)
 272
 273     def add_progress_hook(self, ph):
 274         """Add the progress hook (currently only for the file downloader)"""
 275         self._progress_hooks.append(ph)
 276
 277     def _bidi_workaround(self, message):
 278         if not hasattr(self, '_output_channel'):
 279             return message
 280
 281         assert hasattr(self, '_output_process')
 282         assert isinstance(message, compat_str)
 283         line_count = message.count('\n') + 1
 284         self._output_process.stdin.write((message + '\n').encode('utf-8'))
 285         self._output_process.stdin.flush()
 286         res = ''.join(self._output_channel.readline().decode('utf-8')
 287                        for _ in range(line_count))
 288         return res[:-len('\n')]
 289
 290     def to_screen(self, message, skip_eol=False):
 291         """Print message to stdout if not in quiet mode."""
 292         return self.to_stdout(message, skip_eol, check_quiet=True)
 293
 294     def _write_string(self, s, out=None):
 295         write_string(s, out=out, encoding=self.params.get('encoding'))
 296
 297     def to_stdout(self, message, skip_eol=False, check_quiet=False):
 298         """Print message to stdout if not in quiet mode."""
 299         if self.params.get('logger'):
 300             self.params['logger'].debug(message)
 301         elif not check_quiet or not self.params.get('quiet', False):
 302             message = self._bidi_workaround(message)
 303             terminator = ['\n', ''][skip_eol]
 304             output = message + terminator
 305
 306             self._write_string(output, self._screen_file)
 307
 308     def to_stderr(self, message):
 309         """Print message to stderr."""
 310         assert isinstance(message, compat_str)
 311         if self.params.get('logger'):
 312             self.params['logger'].error(message)
 313         else:
 314             message = self._bidi_workaround(message)
 315             output = message + '\n'
 316             self._write_string(output, self._err_file)
 317
 318     def to_console_title(self, message):
 319         if not self.params.get('consoletitle', False):
 320             return
 321         if os.name == 'nt' and ctypes.windll.kernel32.GetConsoleWindow():
 322             # c_wchar_p() might not be necessary if `message` is
 323             # already of type unicode()
 324             ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message))
 325         elif 'TERM' in os.environ:
 326             self._write_string('\033]0;%s\007' % message, self._screen_file)
 327
 328     def save_console_title(self):
 329         if not self.params.get('consoletitle', False):
 330             return
 331         if 'TERM' in os.environ:
 332             # Save the title on stack
 333             self._write_string('\033[22;0t', self._screen_file)
 334
 335     def restore_console_title(self):
 336         if not self.params.get('consoletitle', False):
 337             return
 338         if 'TERM' in os.environ:
 339             # Restore the title from stack
 340             self._write_string('\033[23;0t', self._screen_file)
 341
 342     def __enter__(self):
 343         self.save_console_title()
 344         return self
 345
 346     def __exit__(self, *args):
 347         self.restore_console_title()
 348
 349         if self.params.get('cookiefile') is not None:
 350             self.cookiejar.save()
 351
 352     def trouble(self, message=None, tb=None):
 353         """Determine action to take when a download problem appears.
 354
 355         Depending on if the downloader has been configured to ignore
 356         download errors or not, this method may throw an exception or
 357         not when errors are found, after printing the message.
 358
 359         tb, if given, is additional traceback information.
 360         """
 361         if message is not None:
 362             self.to_stderr(message)
 363         if self.params.get('verbose'):
 364             if tb is None:
 365                 if sys.exc_info()[0]:  # if .trouble has been called from an except block
 366                     tb = ''
 367                     if hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
 368                         tb += ''.join(traceback.format_exception(*sys.exc_info()[1].exc_info))
 369                     tb += compat_str(traceback.format_exc())
 370                 else:
 371                     tb_data = traceback.format_list(traceback.extract_stack())
 372                     tb = ''.join(tb_data)
 373             self.to_stderr(tb)
 374         if not self.params.get('ignoreerrors', False):
 375             if sys.exc_info()[0] and hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
 376                 exc_info = sys.exc_info()[1].exc_info
 377             else:
 378                 exc_info = sys.exc_info()
 379             raise DownloadError(message, exc_info)
 380         self._download_retcode = 1
 381
 382     def report_warning(self, message):
 383         '''
 384         Print the message to stderr, it will be prefixed with 'WARNING:'
 385         If stderr is a tty file the 'WARNING:' will be colored
 386         '''
 387         if self.params.get('logger') is not None:
 388             self.params['logger'].warning(message)
 389         else:
 390             if self.params.get('no_warnings'):
 391                 return
 392             if self._err_file.isatty() and os.name != 'nt':
 393                 _msg_header = '\033[0;33mWARNING:\033[0m'
 394             else:
 395                 _msg_header = 'WARNING:'
 396             warning_message = '%s %s' % (_msg_header, message)
 397             self.to_stderr(warning_message)
 398
 399     def report_error(self, message, tb=None):
 400         '''
 401         Do the same as trouble, but prefixes the message with 'ERROR:', colored
 402         in red if stderr is a tty file.
 403         '''
 404         if self._err_file.isatty() and os.name != 'nt':
 405             _msg_header = '\033[0;31mERROR:\033[0m'
 406         else:
 407             _msg_header = 'ERROR:'
 408         error_message = '%s %s' % (_msg_header, message)
 409         self.trouble(error_message, tb)
 410
 411     def report_file_already_downloaded(self, file_name):
 412         """Report file has already been fully downloaded."""
 413         try:
 414             self.to_screen('[download] %s has already been downloaded' % file_name)
 415         except UnicodeEncodeError:
 416             self.to_screen('[download] The file has already been downloaded')
 417
 418     def prepare_filename(self, info_dict):
 419         """Generate the output filename."""
 420         try:
 421             template_dict = dict(info_dict)
 422
 423             template_dict['epoch'] = int(time.time())
 424             autonumber_size = self.params.get('autonumber_size')
 425             if autonumber_size is None:
 426                 autonumber_size = 5
 427             autonumber_templ = '%0' + str(autonumber_size) + 'd'
 428             template_dict['autonumber'] = autonumber_templ % self._num_downloads
 429             if template_dict.get('playlist_index') is not None:
 430                 template_dict['playlist_index'] = '%0*d' % (len(str(template_dict['n_entries'])), template_dict['playlist_index'])
 431             if template_dict.get('resolution') is None:
 432                 if template_dict.get('width') and template_dict.get('height'):
 433                     template_dict['resolution'] = '%dx%d' % (template_dict['width'], template_dict['height'])
 434                 elif template_dict.get('height'):
 435                     template_dict['resolution'] = '%sp' % template_dict['height']
 436                 elif template_dict.get('width'):
 437                     template_dict['resolution'] = '?x%d' % template_dict['width']
 438
 439             sanitize = lambda k, v: sanitize_filename(
 440                 compat_str(v),
 441                 restricted=self.params.get('restrictfilenames'),
 442                 is_id=(k == 'id'))
 443             template_dict = dict((k, sanitize(k, v))
 444                                  for k, v in template_dict.items()
 445                                  if v is not None)
 446             template_dict = collections.defaultdict(lambda: 'NA', template_dict)
 447
 448             outtmpl = self.params.get('outtmpl', DEFAULT_OUTTMPL)
 449             tmpl = os.path.expanduser(outtmpl)
 450             filename = tmpl % template_dict
 451             return filename
 452         except ValueError as err:
 453             self.report_error('Error in output template: ' + str(err) + ' (encoding: ' + repr(preferredencoding()) + ')')
 454             return None
 455
 456     def _match_entry(self, info_dict):
 457         """ Returns None iff the file should be downloaded """
 458
 459         video_title = info_dict.get('title', info_dict.get('id', 'video'))
 460         if 'title' in info_dict:
 461             # This can happen when we're just evaluating the playlist
 462             title = info_dict['title']
 463             matchtitle = self.params.get('matchtitle', False)
 464             if matchtitle:
 465                 if not re.search(matchtitle, title, re.IGNORECASE):
 466                     return '"' + title + '" title did not match pattern "' + matchtitle + '"'
 467             rejecttitle = self.params.get('rejecttitle', False)
 468             if rejecttitle:
 469                 if re.search(rejecttitle, title, re.IGNORECASE):
 470                     return '"' + title + '" title matched reject pattern "' + rejecttitle + '"'
 471         date = info_dict.get('upload_date', None)
 472         if date is not None:
 473             dateRange = self.params.get('daterange', DateRange())
 474             if date not in dateRange:
 475                 return '%s upload date is not in range %s' % (date_from_str(date).isoformat(), dateRange)
 476         view_count = info_dict.get('view_count', None)
 477         if view_count is not None:
 478             min_views = self.params.get('min_views')
 479             if min_views is not None and view_count < min_views:
 480                 return 'Skipping %s, because it has not reached minimum view count (%d/%d)' % (video_title, view_count, min_views)
 481             max_views = self.params.get('max_views')
 482             if max_views is not None and view_count > max_views:
 483                 return 'Skipping %s, because it has exceeded the maximum view count (%d/%d)' % (video_title, view_count, max_views)
 484         age_limit = self.params.get('age_limit')
 485         if age_limit is not None:
 486             actual_age_limit = info_dict.get('age_limit')
 487             if actual_age_limit is None:
 488                 actual_age_limit = 0
 489             if age_limit < actual_age_limit:
 490                 return 'Skipping "' + title + '" because it is age restricted'
 491         if self.in_download_archive(info_dict):
 492             return '%s has already been recorded in archive' % video_title
 493         return None
 494
 495     @staticmethod
 496     def add_extra_info(info_dict, extra_info):
 497         '''Set the keys from extra_info in info dict if they are missing'''
 498         for key, value in extra_info.items():
 499             info_dict.setdefault(key, value)
 500
 501     def extract_info(self, url, download=True, ie_key=None, extra_info={},
 502                      process=True):
 503         '''
 504         Returns a list with a dictionary for each video we find.
 505         If 'download', also downloads the videos.
 506         extra_info is a dict containing the extra values to add to each result
 507          '''
 508
 509         if ie_key:
 510             ies = [self.get_info_extractor(ie_key)]
 511         else:
 512             ies = self._ies
 513
 514         for ie in ies:
 515             if not ie.suitable(url):
 516                 continue
 517
 518             if not ie.working():
 519                 self.report_warning('The program functionality for this site has been marked as broken, '
 520                                     'and will probably not work.')
 521
 522             try:
 523                 ie_result = ie.extract(url)
 524                 if ie_result is None: # Finished already (backwards compatibility; listformats and friends should be moved here)
 525                     break
 526                 if isinstance(ie_result, list):
 527                     # Backwards compatibility: old IE result format
 528                     ie_result = {
 529                         '_type': 'compat_list',
 530                         'entries': ie_result,
 531                     }
 532                 self.add_default_extra_info(ie_result, ie, url)
 533                 if process:
 534                     return self.process_ie_result(ie_result, download, extra_info)
 535                 else:
 536                     return ie_result
 537             except ExtractorError as de: # An error we somewhat expected
 538                 self.report_error(compat_str(de), de.format_traceback())
 539                 break
 540             except MaxDownloadsReached:
 541                 raise
 542             except Exception as e:
 543                 if self.params.get('ignoreerrors', False):
 544                     self.report_error(compat_str(e), tb=compat_str(traceback.format_exc()))
 545                     break
 546                 else:
 547                     raise
 548         else:
 549             self.report_error('no suitable InfoExtractor for URL %s' % url)
 550
 551     def add_default_extra_info(self, ie_result, ie, url):
 552         self.add_extra_info(ie_result, {
 553             'extractor': ie.IE_NAME,
 554             'webpage_url': url,
 555             'webpage_url_basename': url_basename(url),
 556             'extractor_key': ie.ie_key(),
 557         })
 558
 559     def process_ie_result(self, ie_result, download=True, extra_info={}):
 560         """
 561         Take the result of the ie(may be modified) and resolve all unresolved
 562         references (URLs, playlist items).
 563
 564         It will also download the videos if 'download'.
 565         Returns the resolved ie_result.
 566         """
 567
 568         result_type = ie_result.get('_type', 'video')
 569
 570         if self.params.get('extract_flat', False):
 571             if result_type in ('url', 'url_transparent'):
 572                 return ie_result
 573
 574         if result_type == 'video':
 575             self.add_extra_info(ie_result, extra_info)
 576             return self.process_video_result(ie_result, download=download)
 577         elif result_type == 'url':
 578             # We have to add extra_info to the results because it may be
 579             # contained in a playlist
 580             return self.extract_info(ie_result['url'],
 581                                      download,
 582                                      ie_key=ie_result.get('ie_key'),
 583                                      extra_info=extra_info)
 584         elif result_type == 'url_transparent':
 585             # Use the information from the embedding page
 586             info = self.extract_info(
 587                 ie_result['url'], ie_key=ie_result.get('ie_key'),
 588                 extra_info=extra_info, download=False, process=False)
 589
 590             def make_result(embedded_info):
 591                 new_result = ie_result.copy()
 592                 for f in ('_type', 'url', 'ext', 'player_url', 'formats',
 593                           'entries', 'ie_key', 'duration',
 594                           'subtitles', 'annotations', 'format',
 595                           'thumbnail', 'thumbnails'):
 596                     if f in new_result:
 597                         del new_result[f]
 598                     if f in embedded_info:
 599                         new_result[f] = embedded_info[f]
 600                 return new_result
 601             new_result = make_result(info)
 602
 603             assert new_result.get('_type') != 'url_transparent'
 604             if new_result.get('_type') == 'compat_list':
 605                 new_result['entries'] = [
 606                     make_result(e) for e in new_result['entries']]
 607
 608             return self.process_ie_result(
 609                 new_result, download=download, extra_info=extra_info)
 610         elif result_type == 'playlist':
 611             # We process each entry in the playlist
 612             playlist = ie_result.get('title', None) or ie_result.get('id', None)
 613             self.to_screen('[download] Downloading playlist: %s' % playlist)
 614
 615             playlist_results = []
 616
 617             playliststart = self.params.get('playliststart', 1) - 1
 618             playlistend = self.params.get('playlistend', None)
 619             # For backwards compatibility, interpret -1 as whole list
 620             if playlistend == -1:
 621                 playlistend = None
 622
 623             if isinstance(ie_result['entries'], list):
 624                 n_all_entries = len(ie_result['entries'])
 625                 entries = ie_result['entries'][playliststart:playlistend]
 626                 n_entries = len(entries)
 627                 self.to_screen(
 628                     "[%s] playlist %s: Collected %d video ids (downloading %d of them)" %
 629                     (ie_result['extractor'], playlist, n_all_entries, n_entries))
 630             else:
 631                 assert isinstance(ie_result['entries'], PagedList)
 632                 entries = ie_result['entries'].getslice(
 633                     playliststart, playlistend)
 634                 n_entries = len(entries)
 635                 self.to_screen(
 636                     "[%s] playlist %s: Downloading %d videos" %
 637                     (ie_result['extractor'], playlist, n_entries))
 638
 639             for i, entry in enumerate(entries, 1):
 640                 self.to_screen('[download] Downloading video #%s of %s' % (i, n_entries))
 641                 extra = {
 642                     'n_entries': n_entries,
 643                     'playlist': playlist,
 644                     'playlist_index': i + playliststart,
 645                     'extractor': ie_result['extractor'],
 646                     'webpage_url': ie_result['webpage_url'],
 647                     'webpage_url_basename': url_basename(ie_result['webpage_url']),
 648                     'extractor_key': ie_result['extractor_key'],
 649                 }
 650
 651                 reason = self._match_entry(entry)
 652                 if reason is not None:
 653                     self.to_screen('[download] ' + reason)
 654                     continue
 655
 656                 entry_result = self.process_ie_result(entry,
 657                                                       download=download,
 658                                                       extra_info=extra)
 659                 playlist_results.append(entry_result)
 660             ie_result['entries'] = playlist_results
 661             return ie_result
 662         elif result_type == 'compat_list':
 663             def _fixup(r):
 664                 self.add_extra_info(r,
 665                     {
 666                         'extractor': ie_result['extractor'],
 667                         'webpage_url': ie_result['webpage_url'],
 668                         'webpage_url_basename': url_basename(ie_result['webpage_url']),
 669                         'extractor_key': ie_result['extractor_key'],
 670                     })
 671                 return r
 672             ie_result['entries'] = [
 673                 self.process_ie_result(_fixup(r), download, extra_info)
 674                 for r in ie_result['entries']
 675             ]
 676             return ie_result
 677         else:
 678             raise Exception('Invalid result type: %s' % result_type)
 679
 680     def select_format(self, format_spec, available_formats):
 681         if format_spec == 'best' or format_spec is None:
 682             return available_formats[-1]
 683         elif format_spec == 'worst':
 684             return available_formats[0]
 685         elif format_spec == 'bestaudio':
 686             audio_formats = [
 687                 f for f in available_formats
 688                 if f.get('vcodec') == 'none']
 689             if audio_formats:
 690                 return audio_formats[-1]
 691         elif format_spec == 'worstaudio':
 692             audio_formats = [
 693                 f for f in available_formats
 694                 if f.get('vcodec') == 'none']
 695             if audio_formats:
 696                 return audio_formats[0]
 697         elif format_spec == 'bestvideo':
 698             video_formats = [
 699                 f for f in available_formats
 700                 if f.get('acodec') == 'none']
 701             if video_formats:
 702                 return video_formats[-1]
 703         elif format_spec == 'worstvideo':
 704             video_formats = [
 705                 f for f in available_formats
 706                 if f.get('acodec') == 'none']
 707             if video_formats:
 708                 return video_formats[0]
 709         else:
 710             extensions = ['mp4', 'flv', 'webm', '3gp']
 711             if format_spec in extensions:
 712                 filter_f = lambda f: f['ext'] == format_spec
 713             else:
 714                 filter_f = lambda f: f['format_id'] == format_spec
 715             matches = list(filter(filter_f, available_formats))
 716             if matches:
 717                 return matches[-1]
 718         return None
 719
 720     def process_video_result(self, info_dict, download=True):
 721         assert info_dict.get('_type', 'video') == 'video'
 722
 723         if 'id' not in info_dict:
 724             raise ExtractorError('Missing "id" field in extractor result')
 725         if 'title' not in info_dict:
 726             raise ExtractorError('Missing "title" field in extractor result')
 727
 728         if 'playlist' not in info_dict:
 729             # It isn't part of a playlist
 730             info_dict['playlist'] = None
 731             info_dict['playlist_index'] = None
 732
 733         thumbnails = info_dict.get('thumbnails')
 734         if thumbnails:
 735             thumbnails.sort(key=lambda t: (
 736                 t.get('width'), t.get('height'), t.get('url')))
 737             for t in thumbnails:
 738                 if 'width' in t and 'height' in t:
 739                     t['resolution'] = '%dx%d' % (t['width'], t['height'])
 740
 741         if thumbnails and 'thumbnail' not in info_dict:
 742             info_dict['thumbnail'] = thumbnails[-1]['url']
 743
 744         if 'display_id' not in info_dict and 'id' in info_dict:
 745             info_dict['display_id'] = info_dict['id']
 746
 747         if info_dict.get('upload_date') is None and info_dict.get('timestamp') is not None:
 748             upload_date = datetime.datetime.utcfromtimestamp(
 749                 info_dict['timestamp'])
 750             info_dict['upload_date'] = upload_date.strftime('%Y%m%d')
 751
 752         # This extractors handle format selection themselves
 753         if info_dict['extractor'] in ['Youku']:
 754             if download:
 755                 self.process_info(info_dict)
 756             return info_dict
 757
 758         # We now pick which formats have to be downloaded
 759         if info_dict.get('formats') is None:
 760             # There's only one format available
 761             formats = [info_dict]
 762         else:
 763             formats = info_dict['formats']
 764
 765         if not formats:
 766             raise ExtractorError('No video formats found!')
 767
 768         # We check that all the formats have the format and format_id fields
 769         for i, format in enumerate(formats):
 770             if 'url' not in format:
 771                 raise ExtractorError('Missing "url" key in result (index %d)' % i)
 772
 773             if format.get('format_id') is None:
 774                 format['format_id'] = compat_str(i)
 775             if format.get('format') is None:
 776                 format['format'] = '{id} - {res}{note}'.format(
 777                     id=format['format_id'],
 778                     res=self.format_resolution(format),
 779                     note=' ({0})'.format(format['format_note']) if format.get('format_note') is not None else '',
 780                 )
 781             # Automatically determine file extension if missing
 782             if 'ext' not in format:
 783                 format['ext'] = determine_ext(format['url']).lower()
 784
 785         format_limit = self.params.get('format_limit', None)
 786         if format_limit:
 787             formats = list(takewhile_inclusive(
 788                 lambda f: f['format_id'] != format_limit, formats
 789             ))
 790
 791         # TODO Central sorting goes here
 792
 793         if formats[0] is not info_dict:
 794             # only set the 'formats' fields if the original info_dict list them
 795             # otherwise we end up with a circular reference, the first (and unique)
 796             # element in the 'formats' field in info_dict is info_dict itself,
 797             # wich can't be exported to json
 798             info_dict['formats'] = formats
 799         if self.params.get('listformats', None):
 800             self.list_formats(info_dict)
 801             return
 802
 803         req_format = self.params.get('format')
 804         if req_format is None:
 805             req_format = 'best'
 806         formats_to_download = []
 807         # The -1 is for supporting YoutubeIE
 808         if req_format in ('-1', 'all'):
 809             formats_to_download = formats
 810         else:
 811             # We can accept formats requested in the format: 34/5/best, we pick
 812             # the first that is available, starting from left
 813             req_formats = req_format.split('/')
 814             for rf in req_formats:
 815                 if re.match(r'.+?\+.+?', rf) is not None:
 816                     # Two formats have been requested like '137+139'
 817                     format_1, format_2 = rf.split('+')
 818                     formats_info = (self.select_format(format_1, formats),
 819                         self.select_format(format_2, formats))
 820                     if all(formats_info):
 821                         selected_format = {
 822                             'requested_formats': formats_info,
 823                             'format': rf,
 824                             'ext': formats_info[0]['ext'],
 825                         }
 826                     else:
 827                         selected_format = None
 828                 else:
 829                     selected_format = self.select_format(rf, formats)
 830                 if selected_format is not None:
 831                     formats_to_download = [selected_format]
 832                     break
 833         if not formats_to_download:
 834             raise ExtractorError('requested format not available',
 835                                  expected=True)
 836
 837         if download:
 838             if len(formats_to_download) > 1:
 839                 self.to_screen('[info] %s: downloading video in %s formats' % (info_dict['id'], len(formats_to_download)))
 840             for format in formats_to_download:
 841                 new_info = dict(info_dict)
 842                 new_info.update(format)
 843                 self.process_info(new_info)
 844         # We update the info dict with the best quality format (backwards compatibility)
 845         info_dict.update(formats_to_download[-1])
 846         return info_dict
 847
 848     def process_info(self, info_dict):
 849         """Process a single resolved IE result."""
 850
 851         assert info_dict.get('_type', 'video') == 'video'
 852
 853         max_downloads = self.params.get('max_downloads')
 854         if max_downloads is not None:
 855             if self._num_downloads >= int(max_downloads):
 856                 raise MaxDownloadsReached()
 857
 858         info_dict['fulltitle'] = info_dict['title']
 859         if len(info_dict['title']) > 200:
 860             info_dict['title'] = info_dict['title'][:197] + '...'
 861
 862         # Keep for backwards compatibility
 863         info_dict['stitle'] = info_dict['title']
 864
 865         if 'format' not in info_dict:
 866             info_dict['format'] = info_dict['ext']
 867
 868         reason = self._match_entry(info_dict)
 869         if reason is not None:
 870             self.to_screen('[download] ' + reason)
 871             return
 872
 873         self._num_downloads += 1
 874
 875         filename = self.prepare_filename(info_dict)
 876
 877         # Forced printings
 878         if self.params.get('forcetitle', False):
 879             self.to_stdout(info_dict['fulltitle'])
 880         if self.params.get('forceid', False):
 881             self.to_stdout(info_dict['id'])
 882         if self.params.get('forceurl', False):
 883             # For RTMP URLs, also include the playpath
 884             self.to_stdout(info_dict['url'] + info_dict.get('play_path', ''))
 885         if self.params.get('forcethumbnail', False) and info_dict.get('thumbnail') is not None:
 886             self.to_stdout(info_dict['thumbnail'])
 887         if self.params.get('forcedescription', False) and info_dict.get('description') is not None:
 888             self.to_stdout(info_dict['description'])
 889         if self.params.get('forcefilename', False) and filename is not None:
 890             self.to_stdout(filename)
 891         if self.params.get('forceduration', False) and info_dict.get('duration') is not None:
 892             self.to_stdout(formatSeconds(info_dict['duration']))
 893         if self.params.get('forceformat', False):
 894             self.to_stdout(info_dict['format'])
 895         if self.params.get('forcejson', False):
 896             info_dict['_filename'] = filename
 897             self.to_stdout(json.dumps(info_dict))
 898
 899         # Do nothing else if in simulate mode
 900         if self.params.get('simulate', False):
 901             return
 902
 903         if filename is None:
 904             return
 905
 906         try:
 907             dn = os.path.dirname(encodeFilename(filename))
 908             if dn and not os.path.exists(dn):
 909                 os.makedirs(dn)
 910         except (OSError, IOError) as err:
 911             self.report_error('unable to create directory ' + compat_str(err))
 912             return
 913
 914         if self.params.get('writedescription', False):
 915             descfn = filename + '.description'
 916             if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(descfn)):
 917                 self.to_screen('[info] Video description is already present')
 918             else:
 919                 try:
 920                     self.to_screen('[info] Writing video description to: ' + descfn)
 921                     with io.open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile:
 922                         descfile.write(info_dict['description'])
 923                 except (KeyError, TypeError):
 924                     self.report_warning('There\'s no description to write.')
 925                 except (OSError, IOError):
 926                     self.report_error('Cannot write description file ' + descfn)
 927                     return
 928
 929         if self.params.get('writeannotations', False):
 930             annofn = filename + '.annotations.xml'
 931             if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(annofn)):
 932                 self.to_screen('[info] Video annotations are already present')
 933             else:
 934                 try:
 935                     self.to_screen('[info] Writing video annotations to: ' + annofn)
 936                     with io.open(encodeFilename(annofn), 'w', encoding='utf-8') as annofile:
 937                         annofile.write(info_dict['annotations'])
 938                 except (KeyError, TypeError):
 939                     self.report_warning('There are no annotations to write.')
 940                 except (OSError, IOError):
 941                     self.report_error('Cannot write annotations file: ' + annofn)
 942                     return
 943
 944         subtitles_are_requested = any([self.params.get('writesubtitles', False),
 945                                        self.params.get('writeautomaticsub')])
 946
 947         if subtitles_are_requested and 'subtitles' in info_dict and info_dict['subtitles']:
 948             # subtitles download errors are already managed as troubles in relevant IE
 949             # that way it will silently go on when used with unsupporting IE
 950             subtitles = info_dict['subtitles']
 951             sub_format = self.params.get('subtitlesformat', 'srt')
 952             for sub_lang in subtitles.keys():
 953                 sub = subtitles[sub_lang]
 954                 if sub is None:
 955                     continue
 956                 try:
 957                     sub_filename = subtitles_filename(filename, sub_lang, sub_format)
 958                     if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(sub_filename)):
 959                         self.to_screen('[info] Video subtitle %s.%s is already_present' % (sub_lang, sub_format))
 960                     else:
 961                         self.to_screen('[info] Writing video subtitles to: ' + sub_filename)
 962                         with io.open(encodeFilename(sub_filename), 'w', encoding='utf-8') as subfile:
 963                                 subfile.write(sub)
 964                 except (OSError, IOError):
 965                     self.report_error('Cannot write subtitles file ' + sub_filename)
 966                     return
 967
 968         if self.params.get('writeinfojson', False):
 969             infofn = os.path.splitext(filename)[0] + '.info.json'
 970             if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(infofn)):
 971                 self.to_screen('[info] Video description metadata is already present')
 972             else:
 973                 self.to_screen('[info] Writing video description metadata as JSON to: ' + infofn)
 974                 try:
 975                     write_json_file(info_dict, encodeFilename(infofn))
 976                 except (OSError, IOError):
 977                     self.report_error('Cannot write metadata to JSON file ' + infofn)
 978                     return
 979
 980         if self.params.get('writethumbnail', False):
 981             if info_dict.get('thumbnail') is not None:
 982                 thumb_format = determine_ext(info_dict['thumbnail'], 'jpg')
 983                 thumb_filename = os.path.splitext(filename)[0] + '.' + thumb_format
 984                 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(thumb_filename)):
 985                     self.to_screen('[%s] %s: Thumbnail is already present' %
 986                                    (info_dict['extractor'], info_dict['id']))
 987                 else:
 988                     self.to_screen('[%s] %s: Downloading thumbnail ...' %
 989                                    (info_dict['extractor'], info_dict['id']))
 990                     try:
 991                         uf = self.urlopen(info_dict['thumbnail'])
 992                         with open(thumb_filename, 'wb') as thumbf:
 993                             shutil.copyfileobj(uf, thumbf)
 994                         self.to_screen('[%s] %s: Writing thumbnail to: %s' %
 995                             (info_dict['extractor'], info_dict['id'], thumb_filename))
 996                     except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
 997                         self.report_warning('Unable to download thumbnail "%s": %s' %
 998                             (info_dict['thumbnail'], compat_str(err)))
 999
1000         if not self.params.get('skip_download', False):
1001             if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(filename)):
1002                 success = True
1003             else:
1004                 try:
1005                     def dl(name, info):
1006                         fd = get_suitable_downloader(info)(self, self.params)
1007                         for ph in self._progress_hooks:
1008                             fd.add_progress_hook(ph)
1009                         if self.params.get('verbose'):
1010                             self.to_stdout('[debug] Invoking downloader on %r' % info.get('url'))
1011                         return fd.download(name, info)
1012                     if info_dict.get('requested_formats') is not None:
1013                         downloaded = []
1014                         success = True
1015                         merger = FFmpegMergerPP(self, not self.params.get('keepvideo'))
1016                         if not merger._get_executable():
1017                             postprocessors = []
1018                             self.report_warning('You have requested multiple '
1019                                 'formats but ffmpeg or avconv are not installed.'
1020                                 ' The formats won\'t be merged')
1021                         else:
1022                             postprocessors = [merger]
1023                         for f in info_dict['requested_formats']:
1024                             new_info = dict(info_dict)
1025                             new_info.update(f)
1026                             fname = self.prepare_filename(new_info)
1027                             fname = prepend_extension(fname, 'f%s' % f['format_id'])
1028                             downloaded.append(fname)
1029                             partial_success = dl(fname, new_info)
1030                             success = success and partial_success
1031                         info_dict['__postprocessors'] = postprocessors
1032                         info_dict['__files_to_merge'] = downloaded
1033                     else:
1034                         # Just a single file
1035                         success = dl(filename, info_dict)
1036                 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
1037                     self.report_error('unable to download video data: %s' % str(err))
1038                     return
1039                 except (OSError, IOError) as err:
1040                     raise UnavailableVideoError(err)
1041                 except (ContentTooShortError, ) as err:
1042                     self.report_error('content too short (expected %s bytes and served %s)' % (err.expected, err.downloaded))
1043                     return
1044
1045             if success:
1046                 try:
1047                     self.post_process(filename, info_dict)
1048                 except (PostProcessingError) as err:
1049                     self.report_error('postprocessing: %s' % str(err))
1050                     return
1051
1052         self.record_download_archive(info_dict)
1053
1054     def download(self, url_list):
1055         """Download a given list of URLs."""
1056         outtmpl = self.params.get('outtmpl', DEFAULT_OUTTMPL)
1057         if (len(url_list) > 1 and
1058                 '%' not in outtmpl
1059                 and self.params.get('max_downloads') != 1):
1060             raise SameFileError(outtmpl)
1061
1062         for url in url_list:
1063             try:
1064                 #It also downloads the videos
1065                 self.extract_info(url)
1066             except UnavailableVideoError:
1067                 self.report_error('unable to download video')
1068             except MaxDownloadsReached:
1069                 self.to_screen('[info] Maximum number of downloaded files reached.')
1070                 raise
1071
1072         return self._download_retcode
1073
1074     def download_with_info_file(self, info_filename):
1075         with io.open(info_filename, 'r', encoding='utf-8') as f:
1076             info = json.load(f)
1077         try:
1078             self.process_ie_result(info, download=True)
1079         except DownloadError:
1080             webpage_url = info.get('webpage_url')
1081             if webpage_url is not None:
1082                 self.report_warning('The info failed to download, trying with "%s"' % webpage_url)
1083                 return self.download([webpage_url])
1084             else:
1085                 raise
1086         return self._download_retcode
1087
1088     def post_process(self, filename, ie_info):
1089         """Run all the postprocessors on the given file."""
1090         info = dict(ie_info)
1091         info['filepath'] = filename
1092         keep_video = None
1093         pps_chain = []
1094         if ie_info.get('__postprocessors') is not None:
1095             pps_chain.extend(ie_info['__postprocessors'])
1096         pps_chain.extend(self._pps)
1097         for pp in pps_chain:
1098             try:
1099                 keep_video_wish, new_info = pp.run(info)
1100                 if keep_video_wish is not None:
1101                     if keep_video_wish:
1102                         keep_video = keep_video_wish
1103                     elif keep_video is None:
1104                         # No clear decision yet, let IE decide
1105                         keep_video = keep_video_wish
1106             except PostProcessingError as e:
1107                 self.report_error(e.msg)
1108         if keep_video is False and not self.params.get('keepvideo', False):
1109             try:
1110                 self.to_screen('Deleting original file %s (pass -k to keep)' % filename)
1111                 os.remove(encodeFilename(filename))
1112             except (IOError, OSError):
1113                 self.report_warning('Unable to remove downloaded video file')
1114
1115     def _make_archive_id(self, info_dict):
1116         # Future-proof against any change in case
1117         # and backwards compatibility with prior versions
1118         extractor = info_dict.get('extractor_key')
1119         if extractor is None:
1120             if 'id' in info_dict:
1121                 extractor = info_dict.get('ie_key')  # key in a playlist
1122         if extractor is None:
1123             return None  # Incomplete video information
1124         return extractor.lower() + ' ' + info_dict['id']
1125
1126     def in_download_archive(self, info_dict):
1127         fn = self.params.get('download_archive')
1128         if fn is None:
1129             return False
1130
1131         vid_id = self._make_archive_id(info_dict)
1132         if vid_id is None:
1133             return False  # Incomplete video information
1134
1135         try:
1136             with locked_file(fn, 'r', encoding='utf-8') as archive_file:
1137                 for line in archive_file:
1138                     if line.strip() == vid_id:
1139                         return True
1140         except IOError as ioe:
1141             if ioe.errno != errno.ENOENT:
1142                 raise
1143         return False
1144
1145     def record_download_archive(self, info_dict):
1146         fn = self.params.get('download_archive')
1147         if fn is None:
1148             return
1149         vid_id = self._make_archive_id(info_dict)
1150         assert vid_id
1151         with locked_file(fn, 'a', encoding='utf-8') as archive_file:
1152             archive_file.write(vid_id + '\n')
1153
1154     @staticmethod
1155     def format_resolution(format, default='unknown'):
1156         if format.get('vcodec') == 'none':
1157             return 'audio only'
1158         if format.get('resolution') is not None:
1159             return format['resolution']
1160         if format.get('height') is not None:
1161             if format.get('width') is not None:
1162                 res = '%sx%s' % (format['width'], format['height'])
1163             else:
1164                 res = '%sp' % format['height']
1165         elif format.get('width') is not None:
1166             res = '?x%d' % format['width']
1167         else:
1168             res = default
1169         return res
1170
1171     def _format_note(self, fdict):
1172         res = ''
1173         if fdict.get('ext') in ['f4f', 'f4m']:
1174             res += '(unsupported) '
1175         if fdict.get('format_note') is not None:
1176             res += fdict['format_note'] + ' '
1177         if fdict.get('tbr') is not None:
1178             res += '%4dk ' % fdict['tbr']
1179         if fdict.get('container') is not None:
1180             if res:
1181                 res += ', '
1182             res += '%s container' % fdict['container']
1183         if (fdict.get('vcodec') is not None and
1184                 fdict.get('vcodec') != 'none'):
1185             if res:
1186                 res += ', '
1187             res += fdict['vcodec']
1188             if fdict.get('vbr') is not None:
1189                 res += '@'
1190         elif fdict.get('vbr') is not None and fdict.get('abr') is not None:
1191             res += 'video@'
1192         if fdict.get('vbr') is not None:
1193             res += '%4dk' % fdict['vbr']
1194         if fdict.get('acodec') is not None:
1195             if res:
1196                 res += ', '
1197             if fdict['acodec'] == 'none':
1198                 res += 'video only'
1199             else:
1200                 res += '%-5s' % fdict['acodec']
1201         elif fdict.get('abr') is not None:
1202             if res:
1203                 res += ', '
1204             res += 'audio'
1205         if fdict.get('abr') is not None:
1206             res += '@%3dk' % fdict['abr']
1207         if fdict.get('asr') is not None:
1208             res += ' (%5dHz)' % fdict['asr']
1209         if fdict.get('filesize') is not None:
1210             if res:
1211                 res += ', '
1212             res += format_bytes(fdict['filesize'])
1213         elif fdict.get('filesize_approx') is not None:
1214             if res:
1215                 res += ', '
1216             res += '~' + format_bytes(fdict['filesize_approx'])
1217         return res
1218
1219     def list_formats(self, info_dict):
1220         def line(format, idlen=20):
1221             return (('%-' + compat_str(idlen + 1) + 's%-10s%-12s%s') % (
1222                 format['format_id'],
1223                 format['ext'],
1224                 self.format_resolution(format),
1225                 self._format_note(format),
1226             ))
1227
1228         formats = info_dict.get('formats', [info_dict])
1229         idlen = max(len('format code'),
1230                     max(len(f['format_id']) for f in formats))
1231         formats_s = [line(f, idlen) for f in formats]
1232         if len(formats) > 1:
1233             formats_s[0] += (' ' if self._format_note(formats[0]) else '') + '(worst)'
1234             formats_s[-1] += (' ' if self._format_note(formats[-1]) else '') + '(best)'
1235
1236         header_line = line({
1237             'format_id': 'format code', 'ext': 'extension',
1238             'resolution': 'resolution', 'format_note': 'note'}, idlen=idlen)
1239         self.to_screen('[info] Available formats for %s:\n%s\n%s' %
1240                        (info_dict['id'], header_line, '\n'.join(formats_s)))
1241
1242     def urlopen(self, req):
1243         """ Start an HTTP download """
1244         return self._opener.open(req, timeout=self._socket_timeout)
1245
1246     def print_debug_header(self):
1247         if not self.params.get('verbose'):
1248             return
1249
1250         if type('') is not compat_str:
1251             # Python 2.6 on SLES11 SP1 (https://github.com/rg3/youtube-dl/issues/3326)
1252             self.report_warning(
1253                 'Your Python is broken! Update to a newer and supported version')
1254
1255         encoding_str = (
1256             '[debug] Encodings: locale %s, fs %s, out %s, pref %s\n' % (
1257                 locale.getpreferredencoding(),
1258                 sys.getfilesystemencoding(),
1259                 sys.stdout.encoding,
1260                 self.get_encoding()))
1261         write_string(encoding_str, encoding=None)
1262
1263         self._write_string('[debug] youtube-dl version ' + __version__ + '\n')
1264         try:
1265             sp = subprocess.Popen(
1266                 ['git', 'rev-parse', '--short', 'HEAD'],
1267                 stdout=subprocess.PIPE, stderr=subprocess.PIPE,
1268                 cwd=os.path.dirname(os.path.abspath(__file__)))
1269             out, err = sp.communicate()
1270             out = out.decode().strip()
1271             if re.match('[0-9a-f]+', out):
1272                 self._write_string('[debug] Git HEAD: ' + out + '\n')
1273         except:
1274             try:
1275                 sys.exc_clear()
1276             except:
1277                 pass
1278         self._write_string('[debug] Python version %s - %s' %
1279                      (platform.python_version(), platform_name()) + '\n')
1280
1281         proxy_map = {}
1282         for handler in self._opener.handlers:
1283             if hasattr(handler, 'proxies'):
1284                 proxy_map.update(handler.proxies)
1285         self._write_string('[debug] Proxy map: ' + compat_str(proxy_map) + '\n')
1286
1287     def _setup_opener(self):
1288         timeout_val = self.params.get('socket_timeout')
1289         self._socket_timeout = 600 if timeout_val is None else float(timeout_val)
1290
1291         opts_cookiefile = self.params.get('cookiefile')
1292         opts_proxy = self.params.get('proxy')
1293
1294         if opts_cookiefile is None:
1295             self.cookiejar = compat_cookiejar.CookieJar()
1296         else:
1297             self.cookiejar = compat_cookiejar.MozillaCookieJar(
1298                 opts_cookiefile)
1299             if os.access(opts_cookiefile, os.R_OK):
1300                 self.cookiejar.load()
1301
1302         cookie_processor = compat_urllib_request.HTTPCookieProcessor(
1303             self.cookiejar)
1304         if opts_proxy is not None:
1305             if opts_proxy == '':
1306                 proxies = {}
1307             else:
1308                 proxies = {'http': opts_proxy, 'https': opts_proxy}
1309         else:
1310             proxies = compat_urllib_request.getproxies()
1311             # Set HTTPS proxy to HTTP one if given (https://github.com/rg3/youtube-dl/issues/805)
1312             if 'http' in proxies and 'https' not in proxies:
1313                 proxies['https'] = proxies['http']
1314         proxy_handler = compat_urllib_request.ProxyHandler(proxies)
1315
1316         debuglevel = 1 if self.params.get('debug_printtraffic') else 0
1317         https_handler = make_HTTPS_handler(
1318             self.params.get('nocheckcertificate', False), debuglevel=debuglevel)
1319         ydlh = YoutubeDLHandler(debuglevel=debuglevel)
1320         opener = compat_urllib_request.build_opener(
1321             https_handler, proxy_handler, cookie_processor, ydlh)
1322         # Delete the default user-agent header, which would otherwise apply in
1323         # cases where our custom HTTP handler doesn't come into play
1324         # (See https://github.com/rg3/youtube-dl/issues/1309 for details)
1325         opener.addheaders = []
1326         self._opener = opener
1327
1328     def encode(self, s):
1329         if isinstance(s, bytes):
1330             return s  # Already encoded
1331
1332         try:
1333             return s.encode(self.get_encoding())
1334         except UnicodeEncodeError as err:
1335             err.reason = err.reason + '. Check your system encoding configuration or use the --encoding option.'
1336             raise
1337
1338     def get_encoding(self):
1339         encoding = self.params.get('encoding')
1340         if encoding is None:
1341             encoding = preferredencoding()
1342         return encoding