_ Git - youtube-dl/blob - youtube_dl/YoutubeDL.py

   1 #!/usr/bin/env python
   2 # -*- coding: utf-8 -*-
   3
   4 from __future__ import absolute_import, unicode_literals
   5
   6 import collections
   7 import datetime
   8 import errno
   9 import io
  10 import json
  11 import locale
  12 import os
  13 import platform
  14 import re
  15 import shutil
  16 import subprocess
  17 import socket
  18 import sys
  19 import time
  20 import traceback
  21
  22 if os.name == 'nt':
  23     import ctypes
  24
  25 from .utils import (
  26     compat_cookiejar,
  27     compat_http_client,
  28     compat_str,
  29     compat_urllib_error,
  30     compat_urllib_request,
  31     escape_url,
  32     ContentTooShortError,
  33     date_from_str,
  34     DateRange,
  35     DEFAULT_OUTTMPL,
  36     determine_ext,
  37     DownloadError,
  38     encodeFilename,
  39     ExtractorError,
  40     format_bytes,
  41     formatSeconds,
  42     get_term_width,
  43     locked_file,
  44     make_HTTPS_handler,
  45     MaxDownloadsReached,
  46     PagedList,
  47     PostProcessingError,
  48     platform_name,
  49     preferredencoding,
  50     SameFileError,
  51     sanitize_filename,
  52     subtitles_filename,
  53     takewhile_inclusive,
  54     UnavailableVideoError,
  55     url_basename,
  56     write_json_file,
  57     write_string,
  58     YoutubeDLHandler,
  59     prepend_extension,
  60 )
  61 from .cache import Cache
  62 from .extractor import get_info_extractor, gen_extractors
  63 from .downloader import get_suitable_downloader
  64 from .postprocessor import FFmpegMergerPP
  65 from .version import __version__
  66
  67
  68 class YoutubeDL(object):
  69     """YoutubeDL class.
  70
  71     YoutubeDL objects are the ones responsible of downloading the
  72     actual video file and writing it to disk if the user has requested
  73     it, among some other tasks. In most cases there should be one per
  74     program. As, given a video URL, the downloader doesn't know how to
  75     extract all the needed information, task that InfoExtractors do, it
  76     has to pass the URL to one of them.
  77
  78     For this, YoutubeDL objects have a method that allows
  79     InfoExtractors to be registered in a given order. When it is passed
  80     a URL, the YoutubeDL object handles it to the first InfoExtractor it
  81     finds that reports being able to handle it. The InfoExtractor extracts
  82     all the information about the video or videos the URL refers to, and
  83     YoutubeDL process the extracted information, possibly using a File
  84     Downloader to download the video.
  85
  86     YoutubeDL objects accept a lot of parameters. In order not to saturate
  87     the object constructor with arguments, it receives a dictionary of
  88     options instead. These options are available through the params
  89     attribute for the InfoExtractors to use. The YoutubeDL also
  90     registers itself as the downloader in charge for the InfoExtractors
  91     that are added to it, so this is a "mutual registration".
  92
  93     Available options:
  94
  95     username:          Username for authentication purposes.
  96     password:          Password for authentication purposes.
  97     videopassword:     Password for acces a video.
  98     usenetrc:          Use netrc for authentication instead.
  99     verbose:           Print additional info to stdout.
 100     quiet:             Do not print messages to stdout.
 101     no_warnings:       Do not print out anything for warnings.
 102     forceurl:          Force printing final URL.
 103     forcetitle:        Force printing title.
 104     forceid:           Force printing ID.
 105     forcethumbnail:    Force printing thumbnail URL.
 106     forcedescription:  Force printing description.
 107     forcefilename:     Force printing final filename.
 108     forceduration:     Force printing duration.
 109     forcejson:         Force printing info_dict as JSON.
 110     simulate:          Do not download the video files.
 111     format:            Video format code.
 112     format_limit:      Highest quality format to try.
 113     outtmpl:           Template for output names.
 114     restrictfilenames: Do not allow "&" and spaces in file names
 115     ignoreerrors:      Do not stop on download errors.
 116     nooverwrites:      Prevent overwriting files.
 117     playliststart:     Playlist item to start at.
 118     playlistend:       Playlist item to end at.
 119     matchtitle:        Download only matching titles.
 120     rejecttitle:       Reject downloads for matching titles.
 121     logger:            Log messages to a logging.Logger instance.
 122     logtostderr:       Log messages to stderr instead of stdout.
 123     writedescription:  Write the video description to a .description file
 124     writeinfojson:     Write the video description to a .info.json file
 125     writeannotations:  Write the video annotations to a .annotations.xml file
 126     writethumbnail:    Write the thumbnail image to a file
 127     writesubtitles:    Write the video subtitles to a file
 128     writeautomaticsub: Write the automatic subtitles to a file
 129     allsubtitles:      Downloads all the subtitles of the video
 130                        (requires writesubtitles or writeautomaticsub)
 131     listsubtitles:     Lists all available subtitles for the video
 132     subtitlesformat:   Subtitle format [srt/sbv/vtt] (default=srt)
 133     subtitleslangs:    List of languages of the subtitles to download
 134     keepvideo:         Keep the video file after post-processing
 135     daterange:         A DateRange object, download only if the upload_date is in the range.
 136     skip_download:     Skip the actual download of the video file
 137     cachedir:          Location of the cache files in the filesystem.
 138                        False to disable filesystem cache.
 139     noplaylist:        Download single video instead of a playlist if in doubt.
 140     age_limit:         An integer representing the user's age in years.
 141                        Unsuitable videos for the given age are skipped.
 142     min_views:         An integer representing the minimum view count the video
 143                        must have in order to not be skipped.
 144                        Videos without view count information are always
 145                        downloaded. None for no limit.
 146     max_views:         An integer representing the maximum view count.
 147                        Videos that are more popular than that are not
 148                        downloaded.
 149                        Videos without view count information are always
 150                        downloaded. None for no limit.
 151     download_archive:  File name of a file where all downloads are recorded.
 152                        Videos already present in the file are not downloaded
 153                        again.
 154     cookiefile:        File name where cookies should be read from and dumped to.
 155     nocheckcertificate:Do not verify SSL certificates
 156     prefer_insecure:   Use HTTP instead of HTTPS to retrieve information.
 157                        At the moment, this is only supported by YouTube.
 158     proxy:             URL of the proxy server to use
 159     socket_timeout:    Time to wait for unresponsive hosts, in seconds
 160     bidi_workaround:   Work around buggy terminals without bidirectional text
 161                        support, using fridibi
 162     debug_printtraffic:Print out sent and received HTTP traffic
 163     include_ads:       Download ads as well
 164     default_search:    Prepend this string if an input url is not valid.
 165                        'auto' for elaborate guessing
 166     encoding:          Use this encoding instead of the system-specified.
 167     extract_flat:      Do not resolve URLs, return the immediate result.
 168
 169     The following parameters are not used by YoutubeDL itself, they are used by
 170     the FileDownloader:
 171     nopart, updatetime, buffersize, ratelimit, min_filesize, max_filesize, test,
 172     noresizebuffer, retries, continuedl, noprogress, consoletitle
 173
 174     The following options are used by the post processors:
 175     prefer_ffmpeg:     If True, use ffmpeg instead of avconv if both are available,
 176                        otherwise prefer avconv.
 177     exec_cmd:          Arbitrary command to run after downloading
 178     """
 179
 180     params = None
 181     _ies = []
 182     _pps = []
 183     _download_retcode = None
 184     _num_downloads = None
 185     _screen_file = None
 186
 187     def __init__(self, params=None):
 188         """Create a FileDownloader object with the given options."""
 189         if params is None:
 190             params = {}
 191         self._ies = []
 192         self._ies_instances = {}
 193         self._pps = []
 194         self._progress_hooks = []
 195         self._download_retcode = 0
 196         self._num_downloads = 0
 197         self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)]
 198         self._err_file = sys.stderr
 199         self.params = params
 200         self.cache = Cache(self)
 201
 202         if params.get('bidi_workaround', False):
 203             try:
 204                 import pty
 205                 master, slave = pty.openpty()
 206                 width = get_term_width()
 207                 if width is None:
 208                     width_args = []
 209                 else:
 210                     width_args = ['-w', str(width)]
 211                 sp_kwargs = dict(
 212                     stdin=subprocess.PIPE,
 213                     stdout=slave,
 214                     stderr=self._err_file)
 215                 try:
 216                     self._output_process = subprocess.Popen(
 217                         ['bidiv'] + width_args, **sp_kwargs
 218                     )
 219                 except OSError:
 220                     self._output_process = subprocess.Popen(
 221                         ['fribidi', '-c', 'UTF-8'] + width_args, **sp_kwargs)
 222                 self._output_channel = os.fdopen(master, 'rb')
 223             except OSError as ose:
 224                 if ose.errno == 2:
 225                     self.report_warning('Could not find fribidi executable, ignoring --bidi-workaround . Make sure that  fribidi  is an executable file in one of the directories in your $PATH.')
 226                 else:
 227                     raise
 228
 229         if (sys.version_info >= (3,) and sys.platform != 'win32' and
 230                 sys.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968']
 231                 and not params['restrictfilenames']):
 232             # On Python 3, the Unicode filesystem API will throw errors (#1474)
 233             self.report_warning(
 234                 'Assuming --restrict-filenames since file system encoding '
 235                 'cannot encode all charactes. '
 236                 'Set the LC_ALL environment variable to fix this.')
 237             self.params['restrictfilenames'] = True
 238
 239         if '%(stitle)s' in self.params.get('outtmpl', ''):
 240             self.report_warning('%(stitle)s is deprecated. Use the %(title)s and the --restrict-filenames flag(which also secures %(uploader)s et al) instead.')
 241
 242         self._setup_opener()
 243
 244     def add_info_extractor(self, ie):
 245         """Add an InfoExtractor object to the end of the list."""
 246         self._ies.append(ie)
 247         self._ies_instances[ie.ie_key()] = ie
 248         ie.set_downloader(self)
 249
 250     def get_info_extractor(self, ie_key):
 251         """
 252         Get an instance of an IE with name ie_key, it will try to get one from
 253         the _ies list, if there's no instance it will create a new one and add
 254         it to the extractor list.
 255         """
 256         ie = self._ies_instances.get(ie_key)
 257         if ie is None:
 258             ie = get_info_extractor(ie_key)()
 259             self.add_info_extractor(ie)
 260         return ie
 261
 262     def add_default_info_extractors(self):
 263         """
 264         Add the InfoExtractors returned by gen_extractors to the end of the list
 265         """
 266         for ie in gen_extractors():
 267             self.add_info_extractor(ie)
 268
 269     def add_post_processor(self, pp):
 270         """Add a PostProcessor object to the end of the chain."""
 271         self._pps.append(pp)
 272         pp.set_downloader(self)
 273
 274     def add_progress_hook(self, ph):
 275         """Add the progress hook (currently only for the file downloader)"""
 276         self._progress_hooks.append(ph)
 277
 278     def _bidi_workaround(self, message):
 279         if not hasattr(self, '_output_channel'):
 280             return message
 281
 282         assert hasattr(self, '_output_process')
 283         assert isinstance(message, compat_str)
 284         line_count = message.count('\n') + 1
 285         self._output_process.stdin.write((message + '\n').encode('utf-8'))
 286         self._output_process.stdin.flush()
 287         res = ''.join(self._output_channel.readline().decode('utf-8')
 288                        for _ in range(line_count))
 289         return res[:-len('\n')]
 290
 291     def to_screen(self, message, skip_eol=False):
 292         """Print message to stdout if not in quiet mode."""
 293         return self.to_stdout(message, skip_eol, check_quiet=True)
 294
 295     def _write_string(self, s, out=None):
 296         write_string(s, out=out, encoding=self.params.get('encoding'))
 297
 298     def to_stdout(self, message, skip_eol=False, check_quiet=False):
 299         """Print message to stdout if not in quiet mode."""
 300         if self.params.get('logger'):
 301             self.params['logger'].debug(message)
 302         elif not check_quiet or not self.params.get('quiet', False):
 303             message = self._bidi_workaround(message)
 304             terminator = ['\n', ''][skip_eol]
 305             output = message + terminator
 306
 307             self._write_string(output, self._screen_file)
 308
 309     def to_stderr(self, message):
 310         """Print message to stderr."""
 311         assert isinstance(message, compat_str)
 312         if self.params.get('logger'):
 313             self.params['logger'].error(message)
 314         else:
 315             message = self._bidi_workaround(message)
 316             output = message + '\n'
 317             self._write_string(output, self._err_file)
 318
 319     def to_console_title(self, message):
 320         if not self.params.get('consoletitle', False):
 321             return
 322         if os.name == 'nt' and ctypes.windll.kernel32.GetConsoleWindow():
 323             # c_wchar_p() might not be necessary if `message` is
 324             # already of type unicode()
 325             ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message))
 326         elif 'TERM' in os.environ:
 327             self._write_string('\033]0;%s\007' % message, self._screen_file)
 328
 329     def save_console_title(self):
 330         if not self.params.get('consoletitle', False):
 331             return
 332         if 'TERM' in os.environ:
 333             # Save the title on stack
 334             self._write_string('\033[22;0t', self._screen_file)
 335
 336     def restore_console_title(self):
 337         if not self.params.get('consoletitle', False):
 338             return
 339         if 'TERM' in os.environ:
 340             # Restore the title from stack
 341             self._write_string('\033[23;0t', self._screen_file)
 342
 343     def __enter__(self):
 344         self.save_console_title()
 345         return self
 346
 347     def __exit__(self, *args):
 348         self.restore_console_title()
 349
 350         if self.params.get('cookiefile') is not None:
 351             self.cookiejar.save()
 352
 353     def trouble(self, message=None, tb=None):
 354         """Determine action to take when a download problem appears.
 355
 356         Depending on if the downloader has been configured to ignore
 357         download errors or not, this method may throw an exception or
 358         not when errors are found, after printing the message.
 359
 360         tb, if given, is additional traceback information.
 361         """
 362         if message is not None:
 363             self.to_stderr(message)
 364         if self.params.get('verbose'):
 365             if tb is None:
 366                 if sys.exc_info()[0]:  # if .trouble has been called from an except block
 367                     tb = ''
 368                     if hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
 369                         tb += ''.join(traceback.format_exception(*sys.exc_info()[1].exc_info))
 370                     tb += compat_str(traceback.format_exc())
 371                 else:
 372                     tb_data = traceback.format_list(traceback.extract_stack())
 373                     tb = ''.join(tb_data)
 374             self.to_stderr(tb)
 375         if not self.params.get('ignoreerrors', False):
 376             if sys.exc_info()[0] and hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
 377                 exc_info = sys.exc_info()[1].exc_info
 378             else:
 379                 exc_info = sys.exc_info()
 380             raise DownloadError(message, exc_info)
 381         self._download_retcode = 1
 382
 383     def report_warning(self, message):
 384         '''
 385         Print the message to stderr, it will be prefixed with 'WARNING:'
 386         If stderr is a tty file the 'WARNING:' will be colored
 387         '''
 388         if self.params.get('logger') is not None:
 389             self.params['logger'].warning(message)
 390         else:
 391             if self.params.get('no_warnings'):
 392                 return
 393             if self._err_file.isatty() and os.name != 'nt':
 394                 _msg_header = '\033[0;33mWARNING:\033[0m'
 395             else:
 396                 _msg_header = 'WARNING:'
 397             warning_message = '%s %s' % (_msg_header, message)
 398             self.to_stderr(warning_message)
 399
 400     def report_error(self, message, tb=None):
 401         '''
 402         Do the same as trouble, but prefixes the message with 'ERROR:', colored
 403         in red if stderr is a tty file.
 404         '''
 405         if self._err_file.isatty() and os.name != 'nt':
 406             _msg_header = '\033[0;31mERROR:\033[0m'
 407         else:
 408             _msg_header = 'ERROR:'
 409         error_message = '%s %s' % (_msg_header, message)
 410         self.trouble(error_message, tb)
 411
 412     def report_file_already_downloaded(self, file_name):
 413         """Report file has already been fully downloaded."""
 414         try:
 415             self.to_screen('[download] %s has already been downloaded' % file_name)
 416         except UnicodeEncodeError:
 417             self.to_screen('[download] The file has already been downloaded')
 418
 419     def prepare_filename(self, info_dict):
 420         """Generate the output filename."""
 421         try:
 422             template_dict = dict(info_dict)
 423
 424             template_dict['epoch'] = int(time.time())
 425             autonumber_size = self.params.get('autonumber_size')
 426             if autonumber_size is None:
 427                 autonumber_size = 5
 428             autonumber_templ = '%0' + str(autonumber_size) + 'd'
 429             template_dict['autonumber'] = autonumber_templ % self._num_downloads
 430             if template_dict.get('playlist_index') is not None:
 431                 template_dict['playlist_index'] = '%0*d' % (len(str(template_dict['n_entries'])), template_dict['playlist_index'])
 432             if template_dict.get('resolution') is None:
 433                 if template_dict.get('width') and template_dict.get('height'):
 434                     template_dict['resolution'] = '%dx%d' % (template_dict['width'], template_dict['height'])
 435                 elif template_dict.get('height'):
 436                     template_dict['resolution'] = '%sp' % template_dict['height']
 437                 elif template_dict.get('width'):
 438                     template_dict['resolution'] = '?x%d' % template_dict['width']
 439
 440             sanitize = lambda k, v: sanitize_filename(
 441                 compat_str(v),
 442                 restricted=self.params.get('restrictfilenames'),
 443                 is_id=(k == 'id'))
 444             template_dict = dict((k, sanitize(k, v))
 445                                  for k, v in template_dict.items()
 446                                  if v is not None)
 447             template_dict = collections.defaultdict(lambda: 'NA', template_dict)
 448
 449             outtmpl = self.params.get('outtmpl', DEFAULT_OUTTMPL)
 450             tmpl = os.path.expanduser(outtmpl)
 451             filename = tmpl % template_dict
 452             return filename
 453         except ValueError as err:
 454             self.report_error('Error in output template: ' + str(err) + ' (encoding: ' + repr(preferredencoding()) + ')')
 455             return None
 456
 457     def _match_entry(self, info_dict):
 458         """ Returns None iff the file should be downloaded """
 459
 460         video_title = info_dict.get('title', info_dict.get('id', 'video'))
 461         if 'title' in info_dict:
 462             # This can happen when we're just evaluating the playlist
 463             title = info_dict['title']
 464             matchtitle = self.params.get('matchtitle', False)
 465             if matchtitle:
 466                 if not re.search(matchtitle, title, re.IGNORECASE):
 467                     return '"' + title + '" title did not match pattern "' + matchtitle + '"'
 468             rejecttitle = self.params.get('rejecttitle', False)
 469             if rejecttitle:
 470                 if re.search(rejecttitle, title, re.IGNORECASE):
 471                     return '"' + title + '" title matched reject pattern "' + rejecttitle + '"'
 472         date = info_dict.get('upload_date', None)
 473         if date is not None:
 474             dateRange = self.params.get('daterange', DateRange())
 475             if date not in dateRange:
 476                 return '%s upload date is not in range %s' % (date_from_str(date).isoformat(), dateRange)
 477         view_count = info_dict.get('view_count', None)
 478         if view_count is not None:
 479             min_views = self.params.get('min_views')
 480             if min_views is not None and view_count < min_views:
 481                 return 'Skipping %s, because it has not reached minimum view count (%d/%d)' % (video_title, view_count, min_views)
 482             max_views = self.params.get('max_views')
 483             if max_views is not None and view_count > max_views:
 484                 return 'Skipping %s, because it has exceeded the maximum view count (%d/%d)' % (video_title, view_count, max_views)
 485         age_limit = self.params.get('age_limit')
 486         if age_limit is not None:
 487             actual_age_limit = info_dict.get('age_limit')
 488             if actual_age_limit is None:
 489                 actual_age_limit = 0
 490             if age_limit < actual_age_limit:
 491                 return 'Skipping "' + title + '" because it is age restricted'
 492         if self.in_download_archive(info_dict):
 493             return '%s has already been recorded in archive' % video_title
 494         return None
 495
 496     @staticmethod
 497     def add_extra_info(info_dict, extra_info):
 498         '''Set the keys from extra_info in info dict if they are missing'''
 499         for key, value in extra_info.items():
 500             info_dict.setdefault(key, value)
 501
 502     def extract_info(self, url, download=True, ie_key=None, extra_info={},
 503                      process=True):
 504         '''
 505         Returns a list with a dictionary for each video we find.
 506         If 'download', also downloads the videos.
 507         extra_info is a dict containing the extra values to add to each result
 508          '''
 509
 510         if ie_key:
 511             ies = [self.get_info_extractor(ie_key)]
 512         else:
 513             ies = self._ies
 514
 515         for ie in ies:
 516             if not ie.suitable(url):
 517                 continue
 518
 519             if not ie.working():
 520                 self.report_warning('The program functionality for this site has been marked as broken, '
 521                                     'and will probably not work.')
 522
 523             try:
 524                 ie_result = ie.extract(url)
 525                 if ie_result is None: # Finished already (backwards compatibility; listformats and friends should be moved here)
 526                     break
 527                 if isinstance(ie_result, list):
 528                     # Backwards compatibility: old IE result format
 529                     ie_result = {
 530                         '_type': 'compat_list',
 531                         'entries': ie_result,
 532                     }
 533                 self.add_default_extra_info(ie_result, ie, url)
 534                 if process:
 535                     return self.process_ie_result(ie_result, download, extra_info)
 536                 else:
 537                     return ie_result
 538             except ExtractorError as de: # An error we somewhat expected
 539                 self.report_error(compat_str(de), de.format_traceback())
 540                 break
 541             except MaxDownloadsReached:
 542                 raise
 543             except Exception as e:
 544                 if self.params.get('ignoreerrors', False):
 545                     self.report_error(compat_str(e), tb=compat_str(traceback.format_exc()))
 546                     break
 547                 else:
 548                     raise
 549         else:
 550             self.report_error('no suitable InfoExtractor for URL %s' % url)
 551
 552     def add_default_extra_info(self, ie_result, ie, url):
 553         self.add_extra_info(ie_result, {
 554             'extractor': ie.IE_NAME,
 555             'webpage_url': url,
 556             'webpage_url_basename': url_basename(url),
 557             'extractor_key': ie.ie_key(),
 558         })
 559
 560     def process_ie_result(self, ie_result, download=True, extra_info={}):
 561         """
 562         Take the result of the ie(may be modified) and resolve all unresolved
 563         references (URLs, playlist items).
 564
 565         It will also download the videos if 'download'.
 566         Returns the resolved ie_result.
 567         """
 568
 569         result_type = ie_result.get('_type', 'video')
 570
 571         if self.params.get('extract_flat', False):
 572             if result_type in ('url', 'url_transparent'):
 573                 return ie_result
 574
 575         if result_type == 'video':
 576             self.add_extra_info(ie_result, extra_info)
 577             return self.process_video_result(ie_result, download=download)
 578         elif result_type == 'url':
 579             # We have to add extra_info to the results because it may be
 580             # contained in a playlist
 581             return self.extract_info(ie_result['url'],
 582                                      download,
 583                                      ie_key=ie_result.get('ie_key'),
 584                                      extra_info=extra_info)
 585         elif result_type == 'url_transparent':
 586             # Use the information from the embedding page
 587             info = self.extract_info(
 588                 ie_result['url'], ie_key=ie_result.get('ie_key'),
 589                 extra_info=extra_info, download=False, process=False)
 590
 591             def make_result(embedded_info):
 592                 new_result = ie_result.copy()
 593                 for f in ('_type', 'url', 'ext', 'player_url', 'formats',
 594                           'entries', 'ie_key', 'duration',
 595                           'subtitles', 'annotations', 'format',
 596                           'thumbnail', 'thumbnails'):
 597                     if f in new_result:
 598                         del new_result[f]
 599                     if f in embedded_info:
 600                         new_result[f] = embedded_info[f]
 601                 return new_result
 602             new_result = make_result(info)
 603
 604             assert new_result.get('_type') != 'url_transparent'
 605             if new_result.get('_type') == 'compat_list':
 606                 new_result['entries'] = [
 607                     make_result(e) for e in new_result['entries']]
 608
 609             return self.process_ie_result(
 610                 new_result, download=download, extra_info=extra_info)
 611         elif result_type == 'playlist':
 612             # We process each entry in the playlist
 613             playlist = ie_result.get('title', None) or ie_result.get('id', None)
 614             self.to_screen('[download] Downloading playlist: %s' % playlist)
 615
 616             playlist_results = []
 617
 618             playliststart = self.params.get('playliststart', 1) - 1
 619             playlistend = self.params.get('playlistend', None)
 620             # For backwards compatibility, interpret -1 as whole list
 621             if playlistend == -1:
 622                 playlistend = None
 623
 624             if isinstance(ie_result['entries'], list):
 625                 n_all_entries = len(ie_result['entries'])
 626                 entries = ie_result['entries'][playliststart:playlistend]
 627                 n_entries = len(entries)
 628                 self.to_screen(
 629                     "[%s] playlist %s: Collected %d video ids (downloading %d of them)" %
 630                     (ie_result['extractor'], playlist, n_all_entries, n_entries))
 631             else:
 632                 assert isinstance(ie_result['entries'], PagedList)
 633                 entries = ie_result['entries'].getslice(
 634                     playliststart, playlistend)
 635                 n_entries = len(entries)
 636                 self.to_screen(
 637                     "[%s] playlist %s: Downloading %d videos" %
 638                     (ie_result['extractor'], playlist, n_entries))
 639
 640             for i, entry in enumerate(entries, 1):
 641                 self.to_screen('[download] Downloading video #%s of %s' % (i, n_entries))
 642                 extra = {
 643                     'n_entries': n_entries,
 644                     'playlist': playlist,
 645                     'playlist_index': i + playliststart,
 646                     'extractor': ie_result['extractor'],
 647                     'webpage_url': ie_result['webpage_url'],
 648                     'webpage_url_basename': url_basename(ie_result['webpage_url']),
 649                     'extractor_key': ie_result['extractor_key'],
 650                 }
 651
 652                 reason = self._match_entry(entry)
 653                 if reason is not None:
 654                     self.to_screen('[download] ' + reason)
 655                     continue
 656
 657                 entry_result = self.process_ie_result(entry,
 658                                                       download=download,
 659                                                       extra_info=extra)
 660                 playlist_results.append(entry_result)
 661             ie_result['entries'] = playlist_results
 662             return ie_result
 663         elif result_type == 'compat_list':
 664             def _fixup(r):
 665                 self.add_extra_info(r,
 666                     {
 667                         'extractor': ie_result['extractor'],
 668                         'webpage_url': ie_result['webpage_url'],
 669                         'webpage_url_basename': url_basename(ie_result['webpage_url']),
 670                         'extractor_key': ie_result['extractor_key'],
 671                     })
 672                 return r
 673             ie_result['entries'] = [
 674                 self.process_ie_result(_fixup(r), download, extra_info)
 675                 for r in ie_result['entries']
 676             ]
 677             return ie_result
 678         else:
 679             raise Exception('Invalid result type: %s' % result_type)
 680
 681     def select_format(self, format_spec, available_formats):
 682         if format_spec == 'best' or format_spec is None:
 683             return available_formats[-1]
 684         elif format_spec == 'worst':
 685             return available_formats[0]
 686         elif format_spec == 'bestaudio':
 687             audio_formats = [
 688                 f for f in available_formats
 689                 if f.get('vcodec') == 'none']
 690             if audio_formats:
 691                 return audio_formats[-1]
 692         elif format_spec == 'worstaudio':
 693             audio_formats = [
 694                 f for f in available_formats
 695                 if f.get('vcodec') == 'none']
 696             if audio_formats:
 697                 return audio_formats[0]
 698         elif format_spec == 'bestvideo':
 699             video_formats = [
 700                 f for f in available_formats
 701                 if f.get('acodec') == 'none']
 702             if video_formats:
 703                 return video_formats[-1]
 704         elif format_spec == 'worstvideo':
 705             video_formats = [
 706                 f for f in available_formats
 707                 if f.get('acodec') == 'none']
 708             if video_formats:
 709                 return video_formats[0]
 710         else:
 711             extensions = ['mp4', 'flv', 'webm', '3gp', 'm4a']
 712             if format_spec in extensions:
 713                 filter_f = lambda f: f['ext'] == format_spec
 714             else:
 715                 filter_f = lambda f: f['format_id'] == format_spec
 716             matches = list(filter(filter_f, available_formats))
 717             if matches:
 718                 return matches[-1]
 719         return None
 720
 721     def process_video_result(self, info_dict, download=True):
 722         assert info_dict.get('_type', 'video') == 'video'
 723
 724         if 'id' not in info_dict:
 725             raise ExtractorError('Missing "id" field in extractor result')
 726         if 'title' not in info_dict:
 727             raise ExtractorError('Missing "title" field in extractor result')
 728
 729         if 'playlist' not in info_dict:
 730             # It isn't part of a playlist
 731             info_dict['playlist'] = None
 732             info_dict['playlist_index'] = None
 733
 734         thumbnails = info_dict.get('thumbnails')
 735         if thumbnails:
 736             thumbnails.sort(key=lambda t: (
 737                 t.get('width'), t.get('height'), t.get('url')))
 738             for t in thumbnails:
 739                 if 'width' in t and 'height' in t:
 740                     t['resolution'] = '%dx%d' % (t['width'], t['height'])
 741
 742         if thumbnails and 'thumbnail' not in info_dict:
 743             info_dict['thumbnail'] = thumbnails[-1]['url']
 744
 745         if 'display_id' not in info_dict and 'id' in info_dict:
 746             info_dict['display_id'] = info_dict['id']
 747
 748         if info_dict.get('upload_date') is None and info_dict.get('timestamp') is not None:
 749             upload_date = datetime.datetime.utcfromtimestamp(
 750                 info_dict['timestamp'])
 751             info_dict['upload_date'] = upload_date.strftime('%Y%m%d')
 752
 753         # This extractors handle format selection themselves
 754         if info_dict['extractor'] in ['Youku']:
 755             if download:
 756                 self.process_info(info_dict)
 757             return info_dict
 758
 759         # We now pick which formats have to be downloaded
 760         if info_dict.get('formats') is None:
 761             # There's only one format available
 762             formats = [info_dict]
 763         else:
 764             formats = info_dict['formats']
 765
 766         if not formats:
 767             raise ExtractorError('No video formats found!')
 768
 769         # We check that all the formats have the format and format_id fields
 770         for i, format in enumerate(formats):
 771             if 'url' not in format:
 772                 raise ExtractorError('Missing "url" key in result (index %d)' % i)
 773
 774             if format.get('format_id') is None:
 775                 format['format_id'] = compat_str(i)
 776             if format.get('format') is None:
 777                 format['format'] = '{id} - {res}{note}'.format(
 778                     id=format['format_id'],
 779                     res=self.format_resolution(format),
 780                     note=' ({0})'.format(format['format_note']) if format.get('format_note') is not None else '',
 781                 )
 782             # Automatically determine file extension if missing
 783             if 'ext' not in format:
 784                 format['ext'] = determine_ext(format['url']).lower()
 785
 786         format_limit = self.params.get('format_limit', None)
 787         if format_limit:
 788             formats = list(takewhile_inclusive(
 789                 lambda f: f['format_id'] != format_limit, formats
 790             ))
 791
 792         # TODO Central sorting goes here
 793
 794         if formats[0] is not info_dict:
 795             # only set the 'formats' fields if the original info_dict list them
 796             # otherwise we end up with a circular reference, the first (and unique)
 797             # element in the 'formats' field in info_dict is info_dict itself,
 798             # wich can't be exported to json
 799             info_dict['formats'] = formats
 800         if self.params.get('listformats', None):
 801             self.list_formats(info_dict)
 802             return
 803
 804         req_format = self.params.get('format')
 805         if req_format is None:
 806             req_format = 'best'
 807         formats_to_download = []
 808         # The -1 is for supporting YoutubeIE
 809         if req_format in ('-1', 'all'):
 810             formats_to_download = formats
 811         else:
 812             for rfstr in req_format.split(','):
 813                 # We can accept formats requested in the format: 34/5/best, we pick
 814                 # the first that is available, starting from left
 815                 req_formats = rfstr.split('/')
 816                 for rf in req_formats:
 817                     if re.match(r'.+?\+.+?', rf) is not None:
 818                         # Two formats have been requested like '137+139'
 819                         format_1, format_2 = rf.split('+')
 820                         formats_info = (self.select_format(format_1, formats),
 821                             self.select_format(format_2, formats))
 822                         if all(formats_info):
 823                             selected_format = {
 824                                 'requested_formats': formats_info,
 825                                 'format': rf,
 826                                 'ext': formats_info[0]['ext'],
 827                             }
 828                         else:
 829                             selected_format = None
 830                     else:
 831                         selected_format = self.select_format(rf, formats)
 832                     if selected_format is not None:
 833                         formats_to_download.append(selected_format)
 834                         break
 835         if not formats_to_download:
 836             raise ExtractorError('requested format not available',
 837                                  expected=True)
 838
 839         if download:
 840             if len(formats_to_download) > 1:
 841                 self.to_screen('[info] %s: downloading video in %s formats' % (info_dict['id'], len(formats_to_download)))
 842             for format in formats_to_download:
 843                 new_info = dict(info_dict)
 844                 new_info.update(format)
 845                 self.process_info(new_info)
 846         # We update the info dict with the best quality format (backwards compatibility)
 847         info_dict.update(formats_to_download[-1])
 848         return info_dict
 849
 850     def process_info(self, info_dict):
 851         """Process a single resolved IE result."""
 852
 853         assert info_dict.get('_type', 'video') == 'video'
 854
 855         max_downloads = self.params.get('max_downloads')
 856         if max_downloads is not None:
 857             if self._num_downloads >= int(max_downloads):
 858                 raise MaxDownloadsReached()
 859
 860         info_dict['fulltitle'] = info_dict['title']
 861         if len(info_dict['title']) > 200:
 862             info_dict['title'] = info_dict['title'][:197] + '...'
 863
 864         # Keep for backwards compatibility
 865         info_dict['stitle'] = info_dict['title']
 866
 867         if 'format' not in info_dict:
 868             info_dict['format'] = info_dict['ext']
 869
 870         reason = self._match_entry(info_dict)
 871         if reason is not None:
 872             self.to_screen('[download] ' + reason)
 873             return
 874
 875         self._num_downloads += 1
 876
 877         filename = self.prepare_filename(info_dict)
 878
 879         # Forced printings
 880         if self.params.get('forcetitle', False):
 881             self.to_stdout(info_dict['fulltitle'])
 882         if self.params.get('forceid', False):
 883             self.to_stdout(info_dict['id'])
 884         if self.params.get('forceurl', False):
 885             # For RTMP URLs, also include the playpath
 886             self.to_stdout(info_dict['url'] + info_dict.get('play_path', ''))
 887         if self.params.get('forcethumbnail', False) and info_dict.get('thumbnail') is not None:
 888             self.to_stdout(info_dict['thumbnail'])
 889         if self.params.get('forcedescription', False) and info_dict.get('description') is not None:
 890             self.to_stdout(info_dict['description'])
 891         if self.params.get('forcefilename', False) and filename is not None:
 892             self.to_stdout(filename)
 893         if self.params.get('forceduration', False) and info_dict.get('duration') is not None:
 894             self.to_stdout(formatSeconds(info_dict['duration']))
 895         if self.params.get('forceformat', False):
 896             self.to_stdout(info_dict['format'])
 897         if self.params.get('forcejson', False):
 898             info_dict['_filename'] = filename
 899             self.to_stdout(json.dumps(info_dict))
 900
 901         # Do nothing else if in simulate mode
 902         if self.params.get('simulate', False):
 903             return
 904
 905         if filename is None:
 906             return
 907
 908         try:
 909             dn = os.path.dirname(encodeFilename(filename))
 910             if dn and not os.path.exists(dn):
 911                 os.makedirs(dn)
 912         except (OSError, IOError) as err:
 913             self.report_error('unable to create directory ' + compat_str(err))
 914             return
 915
 916         if self.params.get('writedescription', False):
 917             descfn = filename + '.description'
 918             if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(descfn)):
 919                 self.to_screen('[info] Video description is already present')
 920             else:
 921                 try:
 922                     self.to_screen('[info] Writing video description to: ' + descfn)
 923                     with io.open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile:
 924                         descfile.write(info_dict['description'])
 925                 except (KeyError, TypeError):
 926                     self.report_warning('There\'s no description to write.')
 927                 except (OSError, IOError):
 928                     self.report_error('Cannot write description file ' + descfn)
 929                     return
 930
 931         if self.params.get('writeannotations', False):
 932             annofn = filename + '.annotations.xml'
 933             if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(annofn)):
 934                 self.to_screen('[info] Video annotations are already present')
 935             else:
 936                 try:
 937                     self.to_screen('[info] Writing video annotations to: ' + annofn)
 938                     with io.open(encodeFilename(annofn), 'w', encoding='utf-8') as annofile:
 939                         annofile.write(info_dict['annotations'])
 940                 except (KeyError, TypeError):
 941                     self.report_warning('There are no annotations to write.')
 942                 except (OSError, IOError):
 943                     self.report_error('Cannot write annotations file: ' + annofn)
 944                     return
 945
 946         subtitles_are_requested = any([self.params.get('writesubtitles', False),
 947                                        self.params.get('writeautomaticsub')])
 948
 949         if subtitles_are_requested and 'subtitles' in info_dict and info_dict['subtitles']:
 950             # subtitles download errors are already managed as troubles in relevant IE
 951             # that way it will silently go on when used with unsupporting IE
 952             subtitles = info_dict['subtitles']
 953             sub_format = self.params.get('subtitlesformat', 'srt')
 954             for sub_lang in subtitles.keys():
 955                 sub = subtitles[sub_lang]
 956                 if sub is None:
 957                     continue
 958                 try:
 959                     sub_filename = subtitles_filename(filename, sub_lang, sub_format)
 960                     if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(sub_filename)):
 961                         self.to_screen('[info] Video subtitle %s.%s is already_present' % (sub_lang, sub_format))
 962                     else:
 963                         self.to_screen('[info] Writing video subtitles to: ' + sub_filename)
 964                         with io.open(encodeFilename(sub_filename), 'w', encoding='utf-8') as subfile:
 965                                 subfile.write(sub)
 966                 except (OSError, IOError):
 967                     self.report_error('Cannot write subtitles file ' + sub_filename)
 968                     return
 969
 970         if self.params.get('writeinfojson', False):
 971             infofn = os.path.splitext(filename)[0] + '.info.json'
 972             if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(infofn)):
 973                 self.to_screen('[info] Video description metadata is already present')
 974             else:
 975                 self.to_screen('[info] Writing video description metadata as JSON to: ' + infofn)
 976                 try:
 977                     write_json_file(info_dict, encodeFilename(infofn))
 978                 except (OSError, IOError):
 979                     self.report_error('Cannot write metadata to JSON file ' + infofn)
 980                     return
 981
 982         if self.params.get('writethumbnail', False):
 983             if info_dict.get('thumbnail') is not None:
 984                 thumb_format = determine_ext(info_dict['thumbnail'], 'jpg')
 985                 thumb_filename = os.path.splitext(filename)[0] + '.' + thumb_format
 986                 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(thumb_filename)):
 987                     self.to_screen('[%s] %s: Thumbnail is already present' %
 988                                    (info_dict['extractor'], info_dict['id']))
 989                 else:
 990                     self.to_screen('[%s] %s: Downloading thumbnail ...' %
 991                                    (info_dict['extractor'], info_dict['id']))
 992                     try:
 993                         uf = self.urlopen(info_dict['thumbnail'])
 994                         with open(thumb_filename, 'wb') as thumbf:
 995                             shutil.copyfileobj(uf, thumbf)
 996                         self.to_screen('[%s] %s: Writing thumbnail to: %s' %
 997                             (info_dict['extractor'], info_dict['id'], thumb_filename))
 998                     except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
 999                         self.report_warning('Unable to download thumbnail "%s": %s' %
1000                             (info_dict['thumbnail'], compat_str(err)))
1001
1002         if not self.params.get('skip_download', False):
1003             if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(filename)):
1004                 success = True
1005             else:
1006                 try:
1007                     def dl(name, info):
1008                         fd = get_suitable_downloader(info)(self, self.params)
1009                         for ph in self._progress_hooks:
1010                             fd.add_progress_hook(ph)
1011                         if self.params.get('verbose'):
1012                             self.to_stdout('[debug] Invoking downloader on %r' % info.get('url'))
1013                         return fd.download(name, info)
1014                     if info_dict.get('requested_formats') is not None:
1015                         downloaded = []
1016                         success = True
1017                         merger = FFmpegMergerPP(self, not self.params.get('keepvideo'))
1018                         if not merger._get_executable():
1019                             postprocessors = []
1020                             self.report_warning('You have requested multiple '
1021                                 'formats but ffmpeg or avconv are not installed.'
1022                                 ' The formats won\'t be merged')
1023                         else:
1024                             postprocessors = [merger]
1025                         for f in info_dict['requested_formats']:
1026                             new_info = dict(info_dict)
1027                             new_info.update(f)
1028                             fname = self.prepare_filename(new_info)
1029                             fname = prepend_extension(fname, 'f%s' % f['format_id'])
1030                             downloaded.append(fname)
1031                             partial_success = dl(fname, new_info)
1032                             success = success and partial_success
1033                         info_dict['__postprocessors'] = postprocessors
1034                         info_dict['__files_to_merge'] = downloaded
1035                     else:
1036                         # Just a single file
1037                         success = dl(filename, info_dict)
1038                 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
1039                     self.report_error('unable to download video data: %s' % str(err))
1040                     return
1041                 except (OSError, IOError) as err:
1042                     raise UnavailableVideoError(err)
1043                 except (ContentTooShortError, ) as err:
1044                     self.report_error('content too short (expected %s bytes and served %s)' % (err.expected, err.downloaded))
1045                     return
1046
1047             if success:
1048                 try:
1049                     self.post_process(filename, info_dict)
1050                 except (PostProcessingError) as err:
1051                     self.report_error('postprocessing: %s' % str(err))
1052                     return
1053
1054         self.record_download_archive(info_dict)
1055
1056     def download(self, url_list):
1057         """Download a given list of URLs."""
1058         outtmpl = self.params.get('outtmpl', DEFAULT_OUTTMPL)
1059         if (len(url_list) > 1 and
1060                 '%' not in outtmpl
1061                 and self.params.get('max_downloads') != 1):
1062             raise SameFileError(outtmpl)
1063
1064         for url in url_list:
1065             try:
1066                 #It also downloads the videos
1067                 self.extract_info(url)
1068             except UnavailableVideoError:
1069                 self.report_error('unable to download video')
1070             except MaxDownloadsReached:
1071                 self.to_screen('[info] Maximum number of downloaded files reached.')
1072                 raise
1073
1074         return self._download_retcode
1075
1076     def download_with_info_file(self, info_filename):
1077         with io.open(info_filename, 'r', encoding='utf-8') as f:
1078             info = json.load(f)
1079         try:
1080             self.process_ie_result(info, download=True)
1081         except DownloadError:
1082             webpage_url = info.get('webpage_url')
1083             if webpage_url is not None:
1084                 self.report_warning('The info failed to download, trying with "%s"' % webpage_url)
1085                 return self.download([webpage_url])
1086             else:
1087                 raise
1088         return self._download_retcode
1089
1090     def post_process(self, filename, ie_info):
1091         """Run all the postprocessors on the given file."""
1092         info = dict(ie_info)
1093         info['filepath'] = filename
1094         keep_video = None
1095         pps_chain = []
1096         if ie_info.get('__postprocessors') is not None:
1097             pps_chain.extend(ie_info['__postprocessors'])
1098         pps_chain.extend(self._pps)
1099         for pp in pps_chain:
1100             try:
1101                 keep_video_wish, new_info = pp.run(info)
1102                 if keep_video_wish is not None:
1103                     if keep_video_wish:
1104                         keep_video = keep_video_wish
1105                     elif keep_video is None:
1106                         # No clear decision yet, let IE decide
1107                         keep_video = keep_video_wish
1108             except PostProcessingError as e:
1109                 self.report_error(e.msg)
1110         if keep_video is False and not self.params.get('keepvideo', False):
1111             try:
1112                 self.to_screen('Deleting original file %s (pass -k to keep)' % filename)
1113                 os.remove(encodeFilename(filename))
1114             except (IOError, OSError):
1115                 self.report_warning('Unable to remove downloaded video file')
1116
1117     def _make_archive_id(self, info_dict):
1118         # Future-proof against any change in case
1119         # and backwards compatibility with prior versions
1120         extractor = info_dict.get('extractor_key')
1121         if extractor is None:
1122             if 'id' in info_dict:
1123                 extractor = info_dict.get('ie_key')  # key in a playlist
1124         if extractor is None:
1125             return None  # Incomplete video information
1126         return extractor.lower() + ' ' + info_dict['id']
1127
1128     def in_download_archive(self, info_dict):
1129         fn = self.params.get('download_archive')
1130         if fn is None:
1131             return False
1132
1133         vid_id = self._make_archive_id(info_dict)
1134         if vid_id is None:
1135             return False  # Incomplete video information
1136
1137         try:
1138             with locked_file(fn, 'r', encoding='utf-8') as archive_file:
1139                 for line in archive_file:
1140                     if line.strip() == vid_id:
1141                         return True
1142         except IOError as ioe:
1143             if ioe.errno != errno.ENOENT:
1144                 raise
1145         return False
1146
1147     def record_download_archive(self, info_dict):
1148         fn = self.params.get('download_archive')
1149         if fn is None:
1150             return
1151         vid_id = self._make_archive_id(info_dict)
1152         assert vid_id
1153         with locked_file(fn, 'a', encoding='utf-8') as archive_file:
1154             archive_file.write(vid_id + '\n')
1155
1156     @staticmethod
1157     def format_resolution(format, default='unknown'):
1158         if format.get('vcodec') == 'none':
1159             return 'audio only'
1160         if format.get('resolution') is not None:
1161             return format['resolution']
1162         if format.get('height') is not None:
1163             if format.get('width') is not None:
1164                 res = '%sx%s' % (format['width'], format['height'])
1165             else:
1166                 res = '%sp' % format['height']
1167         elif format.get('width') is not None:
1168             res = '?x%d' % format['width']
1169         else:
1170             res = default
1171         return res
1172
1173     def _format_note(self, fdict):
1174         res = ''
1175         if fdict.get('ext') in ['f4f', 'f4m']:
1176             res += '(unsupported) '
1177         if fdict.get('format_note') is not None:
1178             res += fdict['format_note'] + ' '
1179         if fdict.get('tbr') is not None:
1180             res += '%4dk ' % fdict['tbr']
1181         if fdict.get('container') is not None:
1182             if res:
1183                 res += ', '
1184             res += '%s container' % fdict['container']
1185         if (fdict.get('vcodec') is not None and
1186                 fdict.get('vcodec') != 'none'):
1187             if res:
1188                 res += ', '
1189             res += fdict['vcodec']
1190             if fdict.get('vbr') is not None:
1191                 res += '@'
1192         elif fdict.get('vbr') is not None and fdict.get('abr') is not None:
1193             res += 'video@'
1194         if fdict.get('vbr') is not None:
1195             res += '%4dk' % fdict['vbr']
1196         if fdict.get('acodec') is not None:
1197             if res:
1198                 res += ', '
1199             if fdict['acodec'] == 'none':
1200                 res += 'video only'
1201             else:
1202                 res += '%-5s' % fdict['acodec']
1203         elif fdict.get('abr') is not None:
1204             if res:
1205                 res += ', '
1206             res += 'audio'
1207         if fdict.get('abr') is not None:
1208             res += '@%3dk' % fdict['abr']
1209         if fdict.get('asr') is not None:
1210             res += ' (%5dHz)' % fdict['asr']
1211         if fdict.get('filesize') is not None:
1212             if res:
1213                 res += ', '
1214             res += format_bytes(fdict['filesize'])
1215         elif fdict.get('filesize_approx') is not None:
1216             if res:
1217                 res += ', '
1218             res += '~' + format_bytes(fdict['filesize_approx'])
1219         return res
1220
1221     def list_formats(self, info_dict):
1222         def line(format, idlen=20):
1223             return (('%-' + compat_str(idlen + 1) + 's%-10s%-12s%s') % (
1224                 format['format_id'],
1225                 format['ext'],
1226                 self.format_resolution(format),
1227                 self._format_note(format),
1228             ))
1229
1230         formats = info_dict.get('formats', [info_dict])
1231         idlen = max(len('format code'),
1232                     max(len(f['format_id']) for f in formats))
1233         formats_s = [line(f, idlen) for f in formats]
1234         if len(formats) > 1:
1235             formats_s[0] += (' ' if self._format_note(formats[0]) else '') + '(worst)'
1236             formats_s[-1] += (' ' if self._format_note(formats[-1]) else '') + '(best)'
1237
1238         header_line = line({
1239             'format_id': 'format code', 'ext': 'extension',
1240             'resolution': 'resolution', 'format_note': 'note'}, idlen=idlen)
1241         self.to_screen('[info] Available formats for %s:\n%s\n%s' %
1242                        (info_dict['id'], header_line, '\n'.join(formats_s)))
1243
1244     def urlopen(self, req):
1245         """ Start an HTTP download """
1246
1247         # According to RFC 3986, URLs can not contain non-ASCII characters, however this is not
1248         # always respected by websites, some tend to give out URLs with non percent-encoded
1249         # non-ASCII characters (see telemb.py, ard.py [#3412])
1250         # urllib chokes on URLs with non-ASCII characters (see http://bugs.python.org/issue3991)
1251         # To work around aforementioned issue we will replace request's original URL with
1252         # percent-encoded one
1253         req_is_string = isinstance(req, basestring if sys.version_info < (3, 0) else compat_str)
1254         url = req if req_is_string else req.get_full_url()
1255         url_escaped = escape_url(url)
1256
1257         # Substitute URL if any change after escaping
1258         if url != url_escaped:
1259             if req_is_string:
1260                 req = url_escaped
1261             else:
1262                 req = compat_urllib_request.Request(
1263                     url_escaped, data=req.data, headers=req.headers,
1264                     origin_req_host=req.origin_req_host, unverifiable=req.unverifiable)
1265
1266         return self._opener.open(req, timeout=self._socket_timeout)
1267
1268     def print_debug_header(self):
1269         if not self.params.get('verbose'):
1270             return
1271
1272         if type('') is not compat_str:
1273             # Python 2.6 on SLES11 SP1 (https://github.com/rg3/youtube-dl/issues/3326)
1274             self.report_warning(
1275                 'Your Python is broken! Update to a newer and supported version')
1276
1277         encoding_str = (
1278             '[debug] Encodings: locale %s, fs %s, out %s, pref %s\n' % (
1279                 locale.getpreferredencoding(),
1280                 sys.getfilesystemencoding(),
1281                 sys.stdout.encoding,
1282                 self.get_encoding()))
1283         write_string(encoding_str, encoding=None)
1284
1285         self._write_string('[debug] youtube-dl version ' + __version__ + '\n')
1286         try:
1287             sp = subprocess.Popen(
1288                 ['git', 'rev-parse', '--short', 'HEAD'],
1289                 stdout=subprocess.PIPE, stderr=subprocess.PIPE,
1290                 cwd=os.path.dirname(os.path.abspath(__file__)))
1291             out, err = sp.communicate()
1292             out = out.decode().strip()
1293             if re.match('[0-9a-f]+', out):
1294                 self._write_string('[debug] Git HEAD: ' + out + '\n')
1295         except:
1296             try:
1297                 sys.exc_clear()
1298             except:
1299                 pass
1300         self._write_string('[debug] Python version %s - %s' %
1301                      (platform.python_version(), platform_name()) + '\n')
1302
1303         proxy_map = {}
1304         for handler in self._opener.handlers:
1305             if hasattr(handler, 'proxies'):
1306                 proxy_map.update(handler.proxies)
1307         self._write_string('[debug] Proxy map: ' + compat_str(proxy_map) + '\n')
1308
1309     def _setup_opener(self):
1310         timeout_val = self.params.get('socket_timeout')
1311         self._socket_timeout = 600 if timeout_val is None else float(timeout_val)
1312
1313         opts_cookiefile = self.params.get('cookiefile')
1314         opts_proxy = self.params.get('proxy')
1315
1316         if opts_cookiefile is None:
1317             self.cookiejar = compat_cookiejar.CookieJar()
1318         else:
1319             self.cookiejar = compat_cookiejar.MozillaCookieJar(
1320                 opts_cookiefile)
1321             if os.access(opts_cookiefile, os.R_OK):
1322                 self.cookiejar.load()
1323
1324         cookie_processor = compat_urllib_request.HTTPCookieProcessor(
1325             self.cookiejar)
1326         if opts_proxy is not None:
1327             if opts_proxy == '':
1328                 proxies = {}
1329             else:
1330                 proxies = {'http': opts_proxy, 'https': opts_proxy}
1331         else:
1332             proxies = compat_urllib_request.getproxies()
1333             # Set HTTPS proxy to HTTP one if given (https://github.com/rg3/youtube-dl/issues/805)
1334             if 'http' in proxies and 'https' not in proxies:
1335                 proxies['https'] = proxies['http']
1336         proxy_handler = compat_urllib_request.ProxyHandler(proxies)
1337
1338         debuglevel = 1 if self.params.get('debug_printtraffic') else 0
1339         https_handler = make_HTTPS_handler(
1340             self.params.get('nocheckcertificate', False), debuglevel=debuglevel)
1341         ydlh = YoutubeDLHandler(debuglevel=debuglevel)
1342         opener = compat_urllib_request.build_opener(
1343             https_handler, proxy_handler, cookie_processor, ydlh)
1344         # Delete the default user-agent header, which would otherwise apply in
1345         # cases where our custom HTTP handler doesn't come into play
1346         # (See https://github.com/rg3/youtube-dl/issues/1309 for details)
1347         opener.addheaders = []
1348         self._opener = opener
1349
1350     def encode(self, s):
1351         if isinstance(s, bytes):
1352             return s  # Already encoded
1353
1354         try:
1355             return s.encode(self.get_encoding())
1356         except UnicodeEncodeError as err:
1357             err.reason = err.reason + '. Check your system encoding configuration or use the --encoding option.'
1358             raise
1359
1360     def get_encoding(self):
1361         encoding = self.params.get('encoding')
1362         if encoding is None:
1363             encoding = preferredencoding()
1364         return encoding