_ Git - youtube-dl/blob - youtube_dl/YoutubeDL.py

   1 #!/usr/bin/env python
   2 # -*- coding: utf-8 -*-
   3
   4 from __future__ import absolute_import, unicode_literals
   5
   6 import collections
   7 import datetime
   8 import errno
   9 import io
  10 import json
  11 import locale
  12 import os
  13 import platform
  14 import re
  15 import shutil
  16 import subprocess
  17 import socket
  18 import sys
  19 import time
  20 import traceback
  21
  22 if os.name == 'nt':
  23     import ctypes
  24
  25 from .utils import (
  26     compat_cookiejar,
  27     compat_http_client,
  28     compat_str,
  29     compat_urllib_error,
  30     compat_urllib_request,
  31     escape_url,
  32     ContentTooShortError,
  33     date_from_str,
  34     DateRange,
  35     DEFAULT_OUTTMPL,
  36     determine_ext,
  37     DownloadError,
  38     encodeFilename,
  39     ExtractorError,
  40     format_bytes,
  41     formatSeconds,
  42     get_term_width,
  43     locked_file,
  44     make_HTTPS_handler,
  45     MaxDownloadsReached,
  46     PagedList,
  47     PostProcessingError,
  48     platform_name,
  49     preferredencoding,
  50     SameFileError,
  51     sanitize_filename,
  52     subtitles_filename,
  53     takewhile_inclusive,
  54     UnavailableVideoError,
  55     url_basename,
  56     write_json_file,
  57     write_string,
  58     YoutubeDLHandler,
  59     prepend_extension,
  60 )
  61 from .cache import Cache
  62 from .extractor import get_info_extractor, gen_extractors
  63 from .downloader import get_suitable_downloader
  64 from .postprocessor import FFmpegMergerPP
  65 from .version import __version__
  66
  67
  68 class YoutubeDL(object):
  69     """YoutubeDL class.
  70
  71     YoutubeDL objects are the ones responsible of downloading the
  72     actual video file and writing it to disk if the user has requested
  73     it, among some other tasks. In most cases there should be one per
  74     program. As, given a video URL, the downloader doesn't know how to
  75     extract all the needed information, task that InfoExtractors do, it
  76     has to pass the URL to one of them.
  77
  78     For this, YoutubeDL objects have a method that allows
  79     InfoExtractors to be registered in a given order. When it is passed
  80     a URL, the YoutubeDL object handles it to the first InfoExtractor it
  81     finds that reports being able to handle it. The InfoExtractor extracts
  82     all the information about the video or videos the URL refers to, and
  83     YoutubeDL process the extracted information, possibly using a File
  84     Downloader to download the video.
  85
  86     YoutubeDL objects accept a lot of parameters. In order not to saturate
  87     the object constructor with arguments, it receives a dictionary of
  88     options instead. These options are available through the params
  89     attribute for the InfoExtractors to use. The YoutubeDL also
  90     registers itself as the downloader in charge for the InfoExtractors
  91     that are added to it, so this is a "mutual registration".
  92
  93     Available options:
  94
  95     username:          Username for authentication purposes.
  96     password:          Password for authentication purposes.
  97     videopassword:     Password for acces a video.
  98     usenetrc:          Use netrc for authentication instead.
  99     verbose:           Print additional info to stdout.
 100     quiet:             Do not print messages to stdout.
 101     no_warnings:       Do not print out anything for warnings.
 102     forceurl:          Force printing final URL.
 103     forcetitle:        Force printing title.
 104     forceid:           Force printing ID.
 105     forcethumbnail:    Force printing thumbnail URL.
 106     forcedescription:  Force printing description.
 107     forcefilename:     Force printing final filename.
 108     forceduration:     Force printing duration.
 109     forcejson:         Force printing info_dict as JSON.
 110     simulate:          Do not download the video files.
 111     format:            Video format code.
 112     format_limit:      Highest quality format to try.
 113     outtmpl:           Template for output names.
 114     restrictfilenames: Do not allow "&" and spaces in file names
 115     ignoreerrors:      Do not stop on download errors.
 116     nooverwrites:      Prevent overwriting files.
 117     playliststart:     Playlist item to start at.
 118     playlistend:       Playlist item to end at.
 119     matchtitle:        Download only matching titles.
 120     rejecttitle:       Reject downloads for matching titles.
 121     logger:            Log messages to a logging.Logger instance.
 122     logtostderr:       Log messages to stderr instead of stdout.
 123     writedescription:  Write the video description to a .description file
 124     writeinfojson:     Write the video description to a .info.json file
 125     writeannotations:  Write the video annotations to a .annotations.xml file
 126     writethumbnail:    Write the thumbnail image to a file
 127     writesubtitles:    Write the video subtitles to a file
 128     writeautomaticsub: Write the automatic subtitles to a file
 129     allsubtitles:      Downloads all the subtitles of the video
 130                        (requires writesubtitles or writeautomaticsub)
 131     listsubtitles:     Lists all available subtitles for the video
 132     subtitlesformat:   Subtitle format [srt/sbv/vtt] (default=srt)
 133     subtitleslangs:    List of languages of the subtitles to download
 134     keepvideo:         Keep the video file after post-processing
 135     daterange:         A DateRange object, download only if the upload_date is in the range.
 136     skip_download:     Skip the actual download of the video file
 137     cachedir:          Location of the cache files in the filesystem.
 138                        False to disable filesystem cache.
 139     noplaylist:        Download single video instead of a playlist if in doubt.
 140     age_limit:         An integer representing the user's age in years.
 141                        Unsuitable videos for the given age are skipped.
 142     min_views:         An integer representing the minimum view count the video
 143                        must have in order to not be skipped.
 144                        Videos without view count information are always
 145                        downloaded. None for no limit.
 146     max_views:         An integer representing the maximum view count.
 147                        Videos that are more popular than that are not
 148                        downloaded.
 149                        Videos without view count information are always
 150                        downloaded. None for no limit.
 151     download_archive:  File name of a file where all downloads are recorded.
 152                        Videos already present in the file are not downloaded
 153                        again.
 154     cookiefile:        File name where cookies should be read from and dumped to.
 155     nocheckcertificate:Do not verify SSL certificates
 156     prefer_insecure:   Use HTTP instead of HTTPS to retrieve information.
 157                        At the moment, this is only supported by YouTube.
 158     proxy:             URL of the proxy server to use
 159     socket_timeout:    Time to wait for unresponsive hosts, in seconds
 160     bidi_workaround:   Work around buggy terminals without bidirectional text
 161                        support, using fridibi
 162     debug_printtraffic:Print out sent and received HTTP traffic
 163     include_ads:       Download ads as well
 164     default_search:    Prepend this string if an input url is not valid.
 165                        'auto' for elaborate guessing
 166     encoding:          Use this encoding instead of the system-specified.
 167     extract_flat:      Do not resolve URLs, return the immediate result.
 168
 169     The following parameters are not used by YoutubeDL itself, they are used by
 170     the FileDownloader:
 171     nopart, updatetime, buffersize, ratelimit, min_filesize, max_filesize, test,
 172     noresizebuffer, retries, continuedl, noprogress, consoletitle
 173
 174     The following options are used by the post processors:
 175     prefer_ffmpeg:     If True, use ffmpeg instead of avconv if both are available,
 176                        otherwise prefer avconv.
 177     exec_cmd:          Arbitrary command to run after downloading
 178     """
 179
 180     params = None
 181     _ies = []
 182     _pps = []
 183     _download_retcode = None
 184     _num_downloads = None
 185     _screen_file = None
 186
 187     def __init__(self, params=None):
 188         """Create a FileDownloader object with the given options."""
 189         if params is None:
 190             params = {}
 191         self._ies = []
 192         self._ies_instances = {}
 193         self._pps = []
 194         self._progress_hooks = []
 195         self._download_retcode = 0
 196         self._num_downloads = 0
 197         self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)]
 198         self._err_file = sys.stderr
 199         self.params = params
 200         self.cache = Cache(self)
 201
 202         if params.get('bidi_workaround', False):
 203             try:
 204                 import pty
 205                 master, slave = pty.openpty()
 206                 width = get_term_width()
 207                 if width is None:
 208                     width_args = []
 209                 else:
 210                     width_args = ['-w', str(width)]
 211                 sp_kwargs = dict(
 212                     stdin=subprocess.PIPE,
 213                     stdout=slave,
 214                     stderr=self._err_file)
 215                 try:
 216                     self._output_process = subprocess.Popen(
 217                         ['bidiv'] + width_args, **sp_kwargs
 218                     )
 219                 except OSError:
 220                     self._output_process = subprocess.Popen(
 221                         ['fribidi', '-c', 'UTF-8'] + width_args, **sp_kwargs)
 222                 self._output_channel = os.fdopen(master, 'rb')
 223             except OSError as ose:
 224                 if ose.errno == 2:
 225                     self.report_warning('Could not find fribidi executable, ignoring --bidi-workaround . Make sure that  fribidi  is an executable file in one of the directories in your $PATH.')
 226                 else:
 227                     raise
 228
 229         if (sys.version_info >= (3,) and sys.platform != 'win32' and
 230                 sys.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968']
 231                 and not params['restrictfilenames']):
 232             # On Python 3, the Unicode filesystem API will throw errors (#1474)
 233             self.report_warning(
 234                 'Assuming --restrict-filenames since file system encoding '
 235                 'cannot encode all charactes. '
 236                 'Set the LC_ALL environment variable to fix this.')
 237             self.params['restrictfilenames'] = True
 238
 239         if '%(stitle)s' in self.params.get('outtmpl', ''):
 240             self.report_warning('%(stitle)s is deprecated. Use the %(title)s and the --restrict-filenames flag(which also secures %(uploader)s et al) instead.')
 241
 242         self._setup_opener()
 243
 244     def add_info_extractor(self, ie):
 245         """Add an InfoExtractor object to the end of the list."""
 246         self._ies.append(ie)
 247         self._ies_instances[ie.ie_key()] = ie
 248         ie.set_downloader(self)
 249
 250     def get_info_extractor(self, ie_key):
 251         """
 252         Get an instance of an IE with name ie_key, it will try to get one from
 253         the _ies list, if there's no instance it will create a new one and add
 254         it to the extractor list.
 255         """
 256         ie = self._ies_instances.get(ie_key)
 257         if ie is None:
 258             ie = get_info_extractor(ie_key)()
 259             self.add_info_extractor(ie)
 260         return ie
 261
 262     def add_default_info_extractors(self):
 263         """
 264         Add the InfoExtractors returned by gen_extractors to the end of the list
 265         """
 266         for ie in gen_extractors():
 267             self.add_info_extractor(ie)
 268
 269     def add_post_processor(self, pp):
 270         """Add a PostProcessor object to the end of the chain."""
 271         self._pps.append(pp)
 272         pp.set_downloader(self)
 273
 274     def add_progress_hook(self, ph):
 275         """Add the progress hook (currently only for the file downloader)"""
 276         self._progress_hooks.append(ph)
 277
 278     def _bidi_workaround(self, message):
 279         if not hasattr(self, '_output_channel'):
 280             return message
 281
 282         assert hasattr(self, '_output_process')
 283         assert isinstance(message, compat_str)
 284         line_count = message.count('\n') + 1
 285         self._output_process.stdin.write((message + '\n').encode('utf-8'))
 286         self._output_process.stdin.flush()
 287         res = ''.join(self._output_channel.readline().decode('utf-8')
 288                        for _ in range(line_count))
 289         return res[:-len('\n')]
 290
 291     def to_screen(self, message, skip_eol=False):
 292         """Print message to stdout if not in quiet mode."""
 293         return self.to_stdout(message, skip_eol, check_quiet=True)
 294
 295     def _write_string(self, s, out=None):
 296         write_string(s, out=out, encoding=self.params.get('encoding'))
 297
 298     def to_stdout(self, message, skip_eol=False, check_quiet=False):
 299         """Print message to stdout if not in quiet mode."""
 300         if self.params.get('logger'):
 301             self.params['logger'].debug(message)
 302         elif not check_quiet or not self.params.get('quiet', False):
 303             message = self._bidi_workaround(message)
 304             terminator = ['\n', ''][skip_eol]
 305             output = message + terminator
 306
 307             self._write_string(output, self._screen_file)
 308
 309     def to_stderr(self, message):
 310         """Print message to stderr."""
 311         assert isinstance(message, compat_str)
 312         if self.params.get('logger'):
 313             self.params['logger'].error(message)
 314         else:
 315             message = self._bidi_workaround(message)
 316             output = message + '\n'
 317             self._write_string(output, self._err_file)
 318
 319     def to_console_title(self, message):
 320         if not self.params.get('consoletitle', False):
 321             return
 322         if os.name == 'nt' and ctypes.windll.kernel32.GetConsoleWindow():
 323             # c_wchar_p() might not be necessary if `message` is
 324             # already of type unicode()
 325             ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message))
 326         elif 'TERM' in os.environ:
 327             self._write_string('\033]0;%s\007' % message, self._screen_file)
 328
 329     def save_console_title(self):
 330         if not self.params.get('consoletitle', False):
 331             return
 332         if 'TERM' in os.environ:
 333             # Save the title on stack
 334             self._write_string('\033[22;0t', self._screen_file)
 335
 336     def restore_console_title(self):
 337         if not self.params.get('consoletitle', False):
 338             return
 339         if 'TERM' in os.environ:
 340             # Restore the title from stack
 341             self._write_string('\033[23;0t', self._screen_file)
 342
 343     def __enter__(self):
 344         self.save_console_title()
 345         return self
 346
 347     def __exit__(self, *args):
 348         self.restore_console_title()
 349
 350         if self.params.get('cookiefile') is not None:
 351             self.cookiejar.save()
 352
 353     def trouble(self, message=None, tb=None):
 354         """Determine action to take when a download problem appears.
 355
 356         Depending on if the downloader has been configured to ignore
 357         download errors or not, this method may throw an exception or
 358         not when errors are found, after printing the message.
 359
 360         tb, if given, is additional traceback information.
 361         """
 362         if message is not None:
 363             self.to_stderr(message)
 364         if self.params.get('verbose'):
 365             if tb is None:
 366                 if sys.exc_info()[0]:  # if .trouble has been called from an except block
 367                     tb = ''
 368                     if hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
 369                         tb += ''.join(traceback.format_exception(*sys.exc_info()[1].exc_info))
 370                     tb += compat_str(traceback.format_exc())
 371                 else:
 372                     tb_data = traceback.format_list(traceback.extract_stack())
 373                     tb = ''.join(tb_data)
 374             self.to_stderr(tb)
 375         if not self.params.get('ignoreerrors', False):
 376             if sys.exc_info()[0] and hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
 377                 exc_info = sys.exc_info()[1].exc_info
 378             else:
 379                 exc_info = sys.exc_info()
 380             raise DownloadError(message, exc_info)
 381         self._download_retcode = 1
 382
 383     def report_warning(self, message):
 384         '''
 385         Print the message to stderr, it will be prefixed with 'WARNING:'
 386         If stderr is a tty file the 'WARNING:' will be colored
 387         '''
 388         if self.params.get('logger') is not None:
 389             self.params['logger'].warning(message)
 390         else:
 391             if self.params.get('no_warnings'):
 392                 return
 393             if self._err_file.isatty() and os.name != 'nt':
 394                 _msg_header = '\033[0;33mWARNING:\033[0m'
 395             else:
 396                 _msg_header = 'WARNING:'
 397             warning_message = '%s %s' % (_msg_header, message)
 398             self.to_stderr(warning_message)
 399
 400     def report_error(self, message, tb=None):
 401         '''
 402         Do the same as trouble, but prefixes the message with 'ERROR:', colored
 403         in red if stderr is a tty file.
 404         '''
 405         if self._err_file.isatty() and os.name != 'nt':
 406             _msg_header = '\033[0;31mERROR:\033[0m'
 407         else:
 408             _msg_header = 'ERROR:'
 409         error_message = '%s %s' % (_msg_header, message)
 410         self.trouble(error_message, tb)
 411
 412     def report_file_already_downloaded(self, file_name):
 413         """Report file has already been fully downloaded."""
 414         try:
 415             self.to_screen('[download] %s has already been downloaded' % file_name)
 416         except UnicodeEncodeError:
 417             self.to_screen('[download] The file has already been downloaded')
 418
 419     def prepare_filename(self, info_dict):
 420         """Generate the output filename."""
 421         try:
 422             template_dict = dict(info_dict)
 423
 424             template_dict['epoch'] = int(time.time())
 425             autonumber_size = self.params.get('autonumber_size')
 426             if autonumber_size is None:
 427                 autonumber_size = 5
 428             autonumber_templ = '%0' + str(autonumber_size) + 'd'
 429             template_dict['autonumber'] = autonumber_templ % self._num_downloads
 430             if template_dict.get('playlist_index') is not None:
 431                 template_dict['playlist_index'] = '%0*d' % (len(str(template_dict['n_entries'])), template_dict['playlist_index'])
 432             if template_dict.get('resolution') is None:
 433                 if template_dict.get('width') and template_dict.get('height'):
 434                     template_dict['resolution'] = '%dx%d' % (template_dict['width'], template_dict['height'])
 435                 elif template_dict.get('height'):
 436                     template_dict['resolution'] = '%sp' % template_dict['height']
 437                 elif template_dict.get('width'):
 438                     template_dict['resolution'] = '?x%d' % template_dict['width']
 439
 440             sanitize = lambda k, v: sanitize_filename(
 441                 compat_str(v),
 442                 restricted=self.params.get('restrictfilenames'),
 443                 is_id=(k == 'id'))
 444             template_dict = dict((k, sanitize(k, v))
 445                                  for k, v in template_dict.items()
 446                                  if v is not None)
 447             template_dict = collections.defaultdict(lambda: 'NA', template_dict)
 448
 449             outtmpl = self.params.get('outtmpl', DEFAULT_OUTTMPL)
 450             tmpl = os.path.expanduser(outtmpl)
 451             filename = tmpl % template_dict
 452             return filename
 453         except ValueError as err:
 454             self.report_error('Error in output template: ' + str(err) + ' (encoding: ' + repr(preferredencoding()) + ')')
 455             return None
 456
 457     def _match_entry(self, info_dict):
 458         """ Returns None iff the file should be downloaded """
 459
 460         video_title = info_dict.get('title', info_dict.get('id', 'video'))
 461         if 'title' in info_dict:
 462             # This can happen when we're just evaluating the playlist
 463             title = info_dict['title']
 464             matchtitle = self.params.get('matchtitle', False)
 465             if matchtitle:
 466                 if not re.search(matchtitle, title, re.IGNORECASE):
 467                     return '"' + title + '" title did not match pattern "' + matchtitle + '"'
 468             rejecttitle = self.params.get('rejecttitle', False)
 469             if rejecttitle:
 470                 if re.search(rejecttitle, title, re.IGNORECASE):
 471                     return '"' + title + '" title matched reject pattern "' + rejecttitle + '"'
 472         date = info_dict.get('upload_date', None)
 473         if date is not None:
 474             dateRange = self.params.get('daterange', DateRange())
 475             if date not in dateRange:
 476                 return '%s upload date is not in range %s' % (date_from_str(date).isoformat(), dateRange)
 477         view_count = info_dict.get('view_count', None)
 478         if view_count is not None:
 479             min_views = self.params.get('min_views')
 480             if min_views is not None and view_count < min_views:
 481                 return 'Skipping %s, because it has not reached minimum view count (%d/%d)' % (video_title, view_count, min_views)
 482             max_views = self.params.get('max_views')
 483             if max_views is not None and view_count > max_views:
 484                 return 'Skipping %s, because it has exceeded the maximum view count (%d/%d)' % (video_title, view_count, max_views)
 485         age_limit = self.params.get('age_limit')
 486         if age_limit is not None:
 487             actual_age_limit = info_dict.get('age_limit')
 488             if actual_age_limit is None:
 489                 actual_age_limit = 0
 490             if age_limit < actual_age_limit:
 491                 return 'Skipping "' + title + '" because it is age restricted'
 492         if self.in_download_archive(info_dict):
 493             return '%s has already been recorded in archive' % video_title
 494         return None
 495
 496     @staticmethod
 497     def add_extra_info(info_dict, extra_info):
 498         '''Set the keys from extra_info in info dict if they are missing'''
 499         for key, value in extra_info.items():
 500             info_dict.setdefault(key, value)
 501
 502     def extract_info(self, url, download=True, ie_key=None, extra_info={},
 503                      process=True):
 504         '''
 505         Returns a list with a dictionary for each video we find.
 506         If 'download', also downloads the videos.
 507         extra_info is a dict containing the extra values to add to each result
 508          '''
 509
 510         if ie_key:
 511             ies = [self.get_info_extractor(ie_key)]
 512         else:
 513             ies = self._ies
 514
 515         for ie in ies:
 516             if not ie.suitable(url):
 517                 continue
 518
 519             if not ie.working():
 520                 self.report_warning('The program functionality for this site has been marked as broken, '
 521                                     'and will probably not work.')
 522
 523             try:
 524                 ie_result = ie.extract(url)
 525                 if ie_result is None: # Finished already (backwards compatibility; listformats and friends should be moved here)
 526                     break
 527                 if isinstance(ie_result, list):
 528                     # Backwards compatibility: old IE result format
 529                     ie_result = {
 530                         '_type': 'compat_list',
 531                         'entries': ie_result,
 532                     }
 533                 self.add_default_extra_info(ie_result, ie, url)
 534                 if process:
 535                     return self.process_ie_result(ie_result, download, extra_info)
 536                 else:
 537                     return ie_result
 538             except ExtractorError as de: # An error we somewhat expected
 539                 self.report_error(compat_str(de), de.format_traceback())
 540                 break
 541             except MaxDownloadsReached:
 542                 raise
 543             except Exception as e:
 544                 if self.params.get('ignoreerrors', False):
 545                     self.report_error(compat_str(e), tb=compat_str(traceback.format_exc()))
 546                     break
 547                 else:
 548                     raise
 549         else:
 550             self.report_error('no suitable InfoExtractor for URL %s' % url)
 551
 552     def add_default_extra_info(self, ie_result, ie, url):
 553         self.add_extra_info(ie_result, {
 554             'extractor': ie.IE_NAME,
 555             'webpage_url': url,
 556             'webpage_url_basename': url_basename(url),
 557             'extractor_key': ie.ie_key(),
 558         })
 559
 560     def process_ie_result(self, ie_result, download=True, extra_info={}):
 561         """
 562         Take the result of the ie(may be modified) and resolve all unresolved
 563         references (URLs, playlist items).
 564
 565         It will also download the videos if 'download'.
 566         Returns the resolved ie_result.
 567         """
 568
 569         result_type = ie_result.get('_type', 'video')
 570
 571         if self.params.get('extract_flat', False):
 572             if result_type in ('url', 'url_transparent'):
 573                 return ie_result
 574
 575         if result_type == 'video':
 576             self.add_extra_info(ie_result, extra_info)
 577             return self.process_video_result(ie_result, download=download)
 578         elif result_type == 'url':
 579             # We have to add extra_info to the results because it may be
 580             # contained in a playlist
 581             return self.extract_info(ie_result['url'],
 582                                      download,
 583                                      ie_key=ie_result.get('ie_key'),
 584                                      extra_info=extra_info)
 585         elif result_type == 'url_transparent':
 586             # Use the information from the embedding page
 587             info = self.extract_info(
 588                 ie_result['url'], ie_key=ie_result.get('ie_key'),
 589                 extra_info=extra_info, download=False, process=False)
 590
 591             def make_result(embedded_info):
 592                 new_result = ie_result.copy()
 593                 for f in ('_type', 'url', 'ext', 'player_url', 'formats',
 594                           'entries', 'ie_key', 'duration',
 595                           'subtitles', 'annotations', 'format',
 596                           'thumbnail', 'thumbnails'):
 597                     if f in new_result:
 598                         del new_result[f]
 599                     if f in embedded_info:
 600                         new_result[f] = embedded_info[f]
 601                 return new_result
 602             new_result = make_result(info)
 603
 604             assert new_result.get('_type') != 'url_transparent'
 605             if new_result.get('_type') == 'compat_list':
 606                 new_result['entries'] = [
 607                     make_result(e) for e in new_result['entries']]
 608
 609             return self.process_ie_result(
 610                 new_result, download=download, extra_info=extra_info)
 611         elif result_type == 'playlist':
 612             # We process each entry in the playlist
 613             playlist = ie_result.get('title', None) or ie_result.get('id', None)
 614             self.to_screen('[download] Downloading playlist: %s' % playlist)
 615
 616             playlist_results = []
 617
 618             playliststart = self.params.get('playliststart', 1) - 1
 619             playlistend = self.params.get('playlistend', None)
 620             # For backwards compatibility, interpret -1 as whole list
 621             if playlistend == -1:
 622                 playlistend = None
 623
 624             if isinstance(ie_result['entries'], list):
 625                 n_all_entries = len(ie_result['entries'])
 626                 entries = ie_result['entries'][playliststart:playlistend]
 627                 n_entries = len(entries)
 628                 self.to_screen(
 629                     "[%s] playlist %s: Collected %d video ids (downloading %d of them)" %
 630                     (ie_result['extractor'], playlist, n_all_entries, n_entries))
 631             else:
 632                 assert isinstance(ie_result['entries'], PagedList)
 633                 entries = ie_result['entries'].getslice(
 634                     playliststart, playlistend)
 635                 n_entries = len(entries)
 636                 self.to_screen(
 637                     "[%s] playlist %s: Downloading %d videos" %
 638                     (ie_result['extractor'], playlist, n_entries))
 639
 640             for i, entry in enumerate(entries, 1):
 641                 self.to_screen('[download] Downloading video #%s of %s' % (i, n_entries))
 642                 extra = {
 643                     'n_entries': n_entries,
 644                     'playlist': playlist,
 645                     'playlist_index': i + playliststart,
 646                     'extractor': ie_result['extractor'],
 647                     'webpage_url': ie_result['webpage_url'],
 648                     'webpage_url_basename': url_basename(ie_result['webpage_url']),
 649                     'extractor_key': ie_result['extractor_key'],
 650                 }
 651
 652                 reason = self._match_entry(entry)
 653                 if reason is not None:
 654                     self.to_screen('[download] ' + reason)
 655                     continue
 656
 657                 entry_result = self.process_ie_result(entry,
 658                                                       download=download,
 659                                                       extra_info=extra)
 660                 playlist_results.append(entry_result)
 661             ie_result['entries'] = playlist_results
 662             return ie_result
 663         elif result_type == 'compat_list':
 664             def _fixup(r):
 665                 self.add_extra_info(r,
 666                     {
 667                         'extractor': ie_result['extractor'],
 668                         'webpage_url': ie_result['webpage_url'],
 669                         'webpage_url_basename': url_basename(ie_result['webpage_url']),
 670                         'extractor_key': ie_result['extractor_key'],
 671                     })
 672                 return r
 673             ie_result['entries'] = [
 674                 self.process_ie_result(_fixup(r), download, extra_info)
 675                 for r in ie_result['entries']
 676             ]
 677             return ie_result
 678         else:
 679             raise Exception('Invalid result type: %s' % result_type)
 680
 681     def select_format(self, format_spec, available_formats):
 682         if format_spec == 'best' or format_spec is None:
 683             return available_formats[-1]
 684         elif format_spec == 'worst':
 685             return available_formats[0]
 686         elif format_spec == 'bestaudio':
 687             audio_formats = [
 688                 f for f in available_formats
 689                 if f.get('vcodec') == 'none']
 690             if audio_formats:
 691                 return audio_formats[-1]
 692         elif format_spec == 'worstaudio':
 693             audio_formats = [
 694                 f for f in available_formats
 695                 if f.get('vcodec') == 'none']
 696             if audio_formats:
 697                 return audio_formats[0]
 698         elif format_spec == 'bestvideo':
 699             video_formats = [
 700                 f for f in available_formats
 701                 if f.get('acodec') == 'none']
 702             if video_formats:
 703                 return video_formats[-1]
 704         elif format_spec == 'worstvideo':
 705             video_formats = [
 706                 f for f in available_formats
 707                 if f.get('acodec') == 'none']
 708             if video_formats:
 709                 return video_formats[0]
 710         else:
 711             extensions = ['mp4', 'flv', 'webm', '3gp']
 712             if format_spec in extensions:
 713                 filter_f = lambda f: f['ext'] == format_spec
 714             else:
 715                 filter_f = lambda f: f['format_id'] == format_spec
 716             matches = list(filter(filter_f, available_formats))
 717             if matches:
 718                 return matches[-1]
 719         return None
 720
 721     def process_video_result(self, info_dict, download=True):
 722         assert info_dict.get('_type', 'video') == 'video'
 723
 724         if 'id' not in info_dict:
 725             raise ExtractorError('Missing "id" field in extractor result')
 726         if 'title' not in info_dict:
 727             raise ExtractorError('Missing "title" field in extractor result')
 728
 729         if 'playlist' not in info_dict:
 730             # It isn't part of a playlist
 731             info_dict['playlist'] = None
 732             info_dict['playlist_index'] = None
 733
 734         thumbnails = info_dict.get('thumbnails')
 735         if thumbnails:
 736             thumbnails.sort(key=lambda t: (
 737                 t.get('width'), t.get('height'), t.get('url')))
 738             for t in thumbnails:
 739                 if 'width' in t and 'height' in t:
 740                     t['resolution'] = '%dx%d' % (t['width'], t['height'])
 741
 742         if thumbnails and 'thumbnail' not in info_dict:
 743             info_dict['thumbnail'] = thumbnails[-1]['url']
 744
 745         if 'display_id' not in info_dict and 'id' in info_dict:
 746             info_dict['display_id'] = info_dict['id']
 747
 748         if info_dict.get('upload_date') is None and info_dict.get('timestamp') is not None:
 749             upload_date = datetime.datetime.utcfromtimestamp(
 750                 info_dict['timestamp'])
 751             info_dict['upload_date'] = upload_date.strftime('%Y%m%d')
 752
 753         # This extractors handle format selection themselves
 754         if info_dict['extractor'] in ['Youku']:
 755             if download:
 756                 self.process_info(info_dict)
 757             return info_dict
 758
 759         # We now pick which formats have to be downloaded
 760         if info_dict.get('formats') is None:
 761             # There's only one format available
 762             formats = [info_dict]
 763         else:
 764             formats = info_dict['formats']
 765
 766         if not formats:
 767             raise ExtractorError('No video formats found!')
 768
 769         # We check that all the formats have the format and format_id fields
 770         for i, format in enumerate(formats):
 771             if 'url' not in format:
 772                 raise ExtractorError('Missing "url" key in result (index %d)' % i)
 773
 774             if format.get('format_id') is None:
 775                 format['format_id'] = compat_str(i)
 776             if format.get('format') is None:
 777                 format['format'] = '{id} - {res}{note}'.format(
 778                     id=format['format_id'],
 779                     res=self.format_resolution(format),
 780                     note=' ({0})'.format(format['format_note']) if format.get('format_note') is not None else '',
 781                 )
 782             # Automatically determine file extension if missing
 783             if 'ext' not in format:
 784                 format['ext'] = determine_ext(format['url']).lower()
 785
 786         format_limit = self.params.get('format_limit', None)
 787         if format_limit:
 788             formats = list(takewhile_inclusive(
 789                 lambda f: f['format_id'] != format_limit, formats
 790             ))
 791
 792         # TODO Central sorting goes here
 793
 794         if formats[0] is not info_dict:
 795             # only set the 'formats' fields if the original info_dict list them
 796             # otherwise we end up with a circular reference, the first (and unique)
 797             # element in the 'formats' field in info_dict is info_dict itself,
 798             # wich can't be exported to json
 799             info_dict['formats'] = formats
 800         if self.params.get('listformats', None):
 801             self.list_formats(info_dict)
 802             return
 803
 804         req_format = self.params.get('format')
 805         if req_format is None:
 806             req_format = 'best'
 807         formats_to_download = []
 808         # The -1 is for supporting YoutubeIE
 809         if req_format in ('-1', 'all'):
 810             formats_to_download = formats
 811         else:
 812             # We can accept formats requested in the format: 34/5/best, we pick
 813             # the first that is available, starting from left
 814             req_formats = req_format.split('/')
 815             for rf in req_formats:
 816                 if re.match(r'.+?\+.+?', rf) is not None:
 817                     # Two formats have been requested like '137+139'
 818                     format_1, format_2 = rf.split('+')
 819                     formats_info = (self.select_format(format_1, formats),
 820                         self.select_format(format_2, formats))
 821                     if all(formats_info):
 822                         selected_format = {
 823                             'requested_formats': formats_info,
 824                             'format': rf,
 825                             'ext': formats_info[0]['ext'],
 826                         }
 827                     else:
 828                         selected_format = None
 829                 else:
 830                     selected_format = self.select_format(rf, formats)
 831                 if selected_format is not None:
 832                     formats_to_download = [selected_format]
 833                     break
 834         if not formats_to_download:
 835             raise ExtractorError('requested format not available',
 836                                  expected=True)
 837
 838         if download:
 839             if len(formats_to_download) > 1:
 840                 self.to_screen('[info] %s: downloading video in %s formats' % (info_dict['id'], len(formats_to_download)))
 841             for format in formats_to_download:
 842                 new_info = dict(info_dict)
 843                 new_info.update(format)
 844                 self.process_info(new_info)
 845         # We update the info dict with the best quality format (backwards compatibility)
 846         info_dict.update(formats_to_download[-1])
 847         return info_dict
 848
 849     def process_info(self, info_dict):
 850         """Process a single resolved IE result."""
 851
 852         assert info_dict.get('_type', 'video') == 'video'
 853
 854         max_downloads = self.params.get('max_downloads')
 855         if max_downloads is not None:
 856             if self._num_downloads >= int(max_downloads):
 857                 raise MaxDownloadsReached()
 858
 859         info_dict['fulltitle'] = info_dict['title']
 860         if len(info_dict['title']) > 200:
 861             info_dict['title'] = info_dict['title'][:197] + '...'
 862
 863         # Keep for backwards compatibility
 864         info_dict['stitle'] = info_dict['title']
 865
 866         if 'format' not in info_dict:
 867             info_dict['format'] = info_dict['ext']
 868
 869         reason = self._match_entry(info_dict)
 870         if reason is not None:
 871             self.to_screen('[download] ' + reason)
 872             return
 873
 874         self._num_downloads += 1
 875
 876         filename = self.prepare_filename(info_dict)
 877
 878         # Forced printings
 879         if self.params.get('forcetitle', False):
 880             self.to_stdout(info_dict['fulltitle'])
 881         if self.params.get('forceid', False):
 882             self.to_stdout(info_dict['id'])
 883         if self.params.get('forceurl', False):
 884             # For RTMP URLs, also include the playpath
 885             self.to_stdout(info_dict['url'] + info_dict.get('play_path', ''))
 886         if self.params.get('forcethumbnail', False) and info_dict.get('thumbnail') is not None:
 887             self.to_stdout(info_dict['thumbnail'])
 888         if self.params.get('forcedescription', False) and info_dict.get('description') is not None:
 889             self.to_stdout(info_dict['description'])
 890         if self.params.get('forcefilename', False) and filename is not None:
 891             self.to_stdout(filename)
 892         if self.params.get('forceduration', False) and info_dict.get('duration') is not None:
 893             self.to_stdout(formatSeconds(info_dict['duration']))
 894         if self.params.get('forceformat', False):
 895             self.to_stdout(info_dict['format'])
 896         if self.params.get('forcejson', False):
 897             info_dict['_filename'] = filename
 898             self.to_stdout(json.dumps(info_dict))
 899
 900         # Do nothing else if in simulate mode
 901         if self.params.get('simulate', False):
 902             return
 903
 904         if filename is None:
 905             return
 906
 907         try:
 908             dn = os.path.dirname(encodeFilename(filename))
 909             if dn and not os.path.exists(dn):
 910                 os.makedirs(dn)
 911         except (OSError, IOError) as err:
 912             self.report_error('unable to create directory ' + compat_str(err))
 913             return
 914
 915         if self.params.get('writedescription', False):
 916             descfn = filename + '.description'
 917             if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(descfn)):
 918                 self.to_screen('[info] Video description is already present')
 919             else:
 920                 try:
 921                     self.to_screen('[info] Writing video description to: ' + descfn)
 922                     with io.open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile:
 923                         descfile.write(info_dict['description'])
 924                 except (KeyError, TypeError):
 925                     self.report_warning('There\'s no description to write.')
 926                 except (OSError, IOError):
 927                     self.report_error('Cannot write description file ' + descfn)
 928                     return
 929
 930         if self.params.get('writeannotations', False):
 931             annofn = filename + '.annotations.xml'
 932             if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(annofn)):
 933                 self.to_screen('[info] Video annotations are already present')
 934             else:
 935                 try:
 936                     self.to_screen('[info] Writing video annotations to: ' + annofn)
 937                     with io.open(encodeFilename(annofn), 'w', encoding='utf-8') as annofile:
 938                         annofile.write(info_dict['annotations'])
 939                 except (KeyError, TypeError):
 940                     self.report_warning('There are no annotations to write.')
 941                 except (OSError, IOError):
 942                     self.report_error('Cannot write annotations file: ' + annofn)
 943                     return
 944
 945         subtitles_are_requested = any([self.params.get('writesubtitles', False),
 946                                        self.params.get('writeautomaticsub')])
 947
 948         if subtitles_are_requested and 'subtitles' in info_dict and info_dict['subtitles']:
 949             # subtitles download errors are already managed as troubles in relevant IE
 950             # that way it will silently go on when used with unsupporting IE
 951             subtitles = info_dict['subtitles']
 952             sub_format = self.params.get('subtitlesformat', 'srt')
 953             for sub_lang in subtitles.keys():
 954                 sub = subtitles[sub_lang]
 955                 if sub is None:
 956                     continue
 957                 try:
 958                     sub_filename = subtitles_filename(filename, sub_lang, sub_format)
 959                     if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(sub_filename)):
 960                         self.to_screen('[info] Video subtitle %s.%s is already_present' % (sub_lang, sub_format))
 961                     else:
 962                         self.to_screen('[info] Writing video subtitles to: ' + sub_filename)
 963                         with io.open(encodeFilename(sub_filename), 'w', encoding='utf-8') as subfile:
 964                                 subfile.write(sub)
 965                 except (OSError, IOError):
 966                     self.report_error('Cannot write subtitles file ' + sub_filename)
 967                     return
 968
 969         if self.params.get('writeinfojson', False):
 970             infofn = os.path.splitext(filename)[0] + '.info.json'
 971             if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(infofn)):
 972                 self.to_screen('[info] Video description metadata is already present')
 973             else:
 974                 self.to_screen('[info] Writing video description metadata as JSON to: ' + infofn)
 975                 try:
 976                     write_json_file(info_dict, encodeFilename(infofn))
 977                 except (OSError, IOError):
 978                     self.report_error('Cannot write metadata to JSON file ' + infofn)
 979                     return
 980
 981         if self.params.get('writethumbnail', False):
 982             if info_dict.get('thumbnail') is not None:
 983                 thumb_format = determine_ext(info_dict['thumbnail'], 'jpg')
 984                 thumb_filename = os.path.splitext(filename)[0] + '.' + thumb_format
 985                 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(thumb_filename)):
 986                     self.to_screen('[%s] %s: Thumbnail is already present' %
 987                                    (info_dict['extractor'], info_dict['id']))
 988                 else:
 989                     self.to_screen('[%s] %s: Downloading thumbnail ...' %
 990                                    (info_dict['extractor'], info_dict['id']))
 991                     try:
 992                         uf = self.urlopen(info_dict['thumbnail'])
 993                         with open(thumb_filename, 'wb') as thumbf:
 994                             shutil.copyfileobj(uf, thumbf)
 995                         self.to_screen('[%s] %s: Writing thumbnail to: %s' %
 996                             (info_dict['extractor'], info_dict['id'], thumb_filename))
 997                     except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
 998                         self.report_warning('Unable to download thumbnail "%s": %s' %
 999                             (info_dict['thumbnail'], compat_str(err)))
1000
1001         if not self.params.get('skip_download', False):
1002             if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(filename)):
1003                 success = True
1004             else:
1005                 try:
1006                     def dl(name, info):
1007                         fd = get_suitable_downloader(info)(self, self.params)
1008                         for ph in self._progress_hooks:
1009                             fd.add_progress_hook(ph)
1010                         if self.params.get('verbose'):
1011                             self.to_stdout('[debug] Invoking downloader on %r' % info.get('url'))
1012                         return fd.download(name, info)
1013                     if info_dict.get('requested_formats') is not None:
1014                         downloaded = []
1015                         success = True
1016                         merger = FFmpegMergerPP(self, not self.params.get('keepvideo'))
1017                         if not merger._get_executable():
1018                             postprocessors = []
1019                             self.report_warning('You have requested multiple '
1020                                 'formats but ffmpeg or avconv are not installed.'
1021                                 ' The formats won\'t be merged')
1022                         else:
1023                             postprocessors = [merger]
1024                         for f in info_dict['requested_formats']:
1025                             new_info = dict(info_dict)
1026                             new_info.update(f)
1027                             fname = self.prepare_filename(new_info)
1028                             fname = prepend_extension(fname, 'f%s' % f['format_id'])
1029                             downloaded.append(fname)
1030                             partial_success = dl(fname, new_info)
1031                             success = success and partial_success
1032                         info_dict['__postprocessors'] = postprocessors
1033                         info_dict['__files_to_merge'] = downloaded
1034                     else:
1035                         # Just a single file
1036                         success = dl(filename, info_dict)
1037                 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
1038                     self.report_error('unable to download video data: %s' % str(err))
1039                     return
1040                 except (OSError, IOError) as err:
1041                     raise UnavailableVideoError(err)
1042                 except (ContentTooShortError, ) as err:
1043                     self.report_error('content too short (expected %s bytes and served %s)' % (err.expected, err.downloaded))
1044                     return
1045
1046             if success:
1047                 try:
1048                     self.post_process(filename, info_dict)
1049                 except (PostProcessingError) as err:
1050                     self.report_error('postprocessing: %s' % str(err))
1051                     return
1052
1053         self.record_download_archive(info_dict)
1054
1055     def download(self, url_list):
1056         """Download a given list of URLs."""
1057         outtmpl = self.params.get('outtmpl', DEFAULT_OUTTMPL)
1058         if (len(url_list) > 1 and
1059                 '%' not in outtmpl
1060                 and self.params.get('max_downloads') != 1):
1061             raise SameFileError(outtmpl)
1062
1063         for url in url_list:
1064             try:
1065                 #It also downloads the videos
1066                 self.extract_info(url)
1067             except UnavailableVideoError:
1068                 self.report_error('unable to download video')
1069             except MaxDownloadsReached:
1070                 self.to_screen('[info] Maximum number of downloaded files reached.')
1071                 raise
1072
1073         return self._download_retcode
1074
1075     def download_with_info_file(self, info_filename):
1076         with io.open(info_filename, 'r', encoding='utf-8') as f:
1077             info = json.load(f)
1078         try:
1079             self.process_ie_result(info, download=True)
1080         except DownloadError:
1081             webpage_url = info.get('webpage_url')
1082             if webpage_url is not None:
1083                 self.report_warning('The info failed to download, trying with "%s"' % webpage_url)
1084                 return self.download([webpage_url])
1085             else:
1086                 raise
1087         return self._download_retcode
1088
1089     def post_process(self, filename, ie_info):
1090         """Run all the postprocessors on the given file."""
1091         info = dict(ie_info)
1092         info['filepath'] = filename
1093         keep_video = None
1094         pps_chain = []
1095         if ie_info.get('__postprocessors') is not None:
1096             pps_chain.extend(ie_info['__postprocessors'])
1097         pps_chain.extend(self._pps)
1098         for pp in pps_chain:
1099             try:
1100                 keep_video_wish, new_info = pp.run(info)
1101                 if keep_video_wish is not None:
1102                     if keep_video_wish:
1103                         keep_video = keep_video_wish
1104                     elif keep_video is None:
1105                         # No clear decision yet, let IE decide
1106                         keep_video = keep_video_wish
1107             except PostProcessingError as e:
1108                 self.report_error(e.msg)
1109         if keep_video is False and not self.params.get('keepvideo', False):
1110             try:
1111                 self.to_screen('Deleting original file %s (pass -k to keep)' % filename)
1112                 os.remove(encodeFilename(filename))
1113             except (IOError, OSError):
1114                 self.report_warning('Unable to remove downloaded video file')
1115
1116     def _make_archive_id(self, info_dict):
1117         # Future-proof against any change in case
1118         # and backwards compatibility with prior versions
1119         extractor = info_dict.get('extractor_key')
1120         if extractor is None:
1121             if 'id' in info_dict:
1122                 extractor = info_dict.get('ie_key')  # key in a playlist
1123         if extractor is None:
1124             return None  # Incomplete video information
1125         return extractor.lower() + ' ' + info_dict['id']
1126
1127     def in_download_archive(self, info_dict):
1128         fn = self.params.get('download_archive')
1129         if fn is None:
1130             return False
1131
1132         vid_id = self._make_archive_id(info_dict)
1133         if vid_id is None:
1134             return False  # Incomplete video information
1135
1136         try:
1137             with locked_file(fn, 'r', encoding='utf-8') as archive_file:
1138                 for line in archive_file:
1139                     if line.strip() == vid_id:
1140                         return True
1141         except IOError as ioe:
1142             if ioe.errno != errno.ENOENT:
1143                 raise
1144         return False
1145
1146     def record_download_archive(self, info_dict):
1147         fn = self.params.get('download_archive')
1148         if fn is None:
1149             return
1150         vid_id = self._make_archive_id(info_dict)
1151         assert vid_id
1152         with locked_file(fn, 'a', encoding='utf-8') as archive_file:
1153             archive_file.write(vid_id + '\n')
1154
1155     @staticmethod
1156     def format_resolution(format, default='unknown'):
1157         if format.get('vcodec') == 'none':
1158             return 'audio only'
1159         if format.get('resolution') is not None:
1160             return format['resolution']
1161         if format.get('height') is not None:
1162             if format.get('width') is not None:
1163                 res = '%sx%s' % (format['width'], format['height'])
1164             else:
1165                 res = '%sp' % format['height']
1166         elif format.get('width') is not None:
1167             res = '?x%d' % format['width']
1168         else:
1169             res = default
1170         return res
1171
1172     def _format_note(self, fdict):
1173         res = ''
1174         if fdict.get('ext') in ['f4f', 'f4m']:
1175             res += '(unsupported) '
1176         if fdict.get('format_note') is not None:
1177             res += fdict['format_note'] + ' '
1178         if fdict.get('tbr') is not None:
1179             res += '%4dk ' % fdict['tbr']
1180         if fdict.get('container') is not None:
1181             if res:
1182                 res += ', '
1183             res += '%s container' % fdict['container']
1184         if (fdict.get('vcodec') is not None and
1185                 fdict.get('vcodec') != 'none'):
1186             if res:
1187                 res += ', '
1188             res += fdict['vcodec']
1189             if fdict.get('vbr') is not None:
1190                 res += '@'
1191         elif fdict.get('vbr') is not None and fdict.get('abr') is not None:
1192             res += 'video@'
1193         if fdict.get('vbr') is not None:
1194             res += '%4dk' % fdict['vbr']
1195         if fdict.get('acodec') is not None:
1196             if res:
1197                 res += ', '
1198             if fdict['acodec'] == 'none':
1199                 res += 'video only'
1200             else:
1201                 res += '%-5s' % fdict['acodec']
1202         elif fdict.get('abr') is not None:
1203             if res:
1204                 res += ', '
1205             res += 'audio'
1206         if fdict.get('abr') is not None:
1207             res += '@%3dk' % fdict['abr']
1208         if fdict.get('asr') is not None:
1209             res += ' (%5dHz)' % fdict['asr']
1210         if fdict.get('filesize') is not None:
1211             if res:
1212                 res += ', '
1213             res += format_bytes(fdict['filesize'])
1214         elif fdict.get('filesize_approx') is not None:
1215             if res:
1216                 res += ', '
1217             res += '~' + format_bytes(fdict['filesize_approx'])
1218         return res
1219
1220     def list_formats(self, info_dict):
1221         def line(format, idlen=20):
1222             return (('%-' + compat_str(idlen + 1) + 's%-10s%-12s%s') % (
1223                 format['format_id'],
1224                 format['ext'],
1225                 self.format_resolution(format),
1226                 self._format_note(format),
1227             ))
1228
1229         formats = info_dict.get('formats', [info_dict])
1230         idlen = max(len('format code'),
1231                     max(len(f['format_id']) for f in formats))
1232         formats_s = [line(f, idlen) for f in formats]
1233         if len(formats) > 1:
1234             formats_s[0] += (' ' if self._format_note(formats[0]) else '') + '(worst)'
1235             formats_s[-1] += (' ' if self._format_note(formats[-1]) else '') + '(best)'
1236
1237         header_line = line({
1238             'format_id': 'format code', 'ext': 'extension',
1239             'resolution': 'resolution', 'format_note': 'note'}, idlen=idlen)
1240         self.to_screen('[info] Available formats for %s:\n%s\n%s' %
1241                        (info_dict['id'], header_line, '\n'.join(formats_s)))
1242
1243     def urlopen(self, req):
1244         """ Start an HTTP download """
1245
1246         # According to RFC 3986, URLs can not contain non-ASCII characters, however this is not
1247         # always respected by websites, some tend to give out URLs with non percent-encoded
1248         # non-ASCII characters (see telemb.py, ard.py [#3412])
1249         # urllib chokes on URLs with non-ASCII characters (see http://bugs.python.org/issue3991)
1250         # To work around aforementioned issue we will replace request's original URL with
1251         # percent-encoded one
1252         url = req if isinstance(req, compat_str) else req.get_full_url()
1253         url_escaped = escape_url(url)
1254
1255         # Substitute URL if any change after escaping
1256         if url != url_escaped:
1257             if isinstance(req, compat_str):
1258                 req = url_escaped
1259             else:
1260                 req = compat_urllib_request.Request(
1261                     url_escaped, data=req.data, headers=req.headers,
1262                     origin_req_host=req.origin_req_host, unverifiable=req.unverifiable)
1263
1264         return self._opener.open(req, timeout=self._socket_timeout)
1265
1266     def print_debug_header(self):
1267         if not self.params.get('verbose'):
1268             return
1269
1270         if type('') is not compat_str:
1271             # Python 2.6 on SLES11 SP1 (https://github.com/rg3/youtube-dl/issues/3326)
1272             self.report_warning(
1273                 'Your Python is broken! Update to a newer and supported version')
1274
1275         encoding_str = (
1276             '[debug] Encodings: locale %s, fs %s, out %s, pref %s\n' % (
1277                 locale.getpreferredencoding(),
1278                 sys.getfilesystemencoding(),
1279                 sys.stdout.encoding,
1280                 self.get_encoding()))
1281         write_string(encoding_str, encoding=None)
1282
1283         self._write_string('[debug] youtube-dl version ' + __version__ + '\n')
1284         try:
1285             sp = subprocess.Popen(
1286                 ['git', 'rev-parse', '--short', 'HEAD'],
1287                 stdout=subprocess.PIPE, stderr=subprocess.PIPE,
1288                 cwd=os.path.dirname(os.path.abspath(__file__)))
1289             out, err = sp.communicate()
1290             out = out.decode().strip()
1291             if re.match('[0-9a-f]+', out):
1292                 self._write_string('[debug] Git HEAD: ' + out + '\n')
1293         except:
1294             try:
1295                 sys.exc_clear()
1296             except:
1297                 pass
1298         self._write_string('[debug] Python version %s - %s' %
1299                      (platform.python_version(), platform_name()) + '\n')
1300
1301         proxy_map = {}
1302         for handler in self._opener.handlers:
1303             if hasattr(handler, 'proxies'):
1304                 proxy_map.update(handler.proxies)
1305         self._write_string('[debug] Proxy map: ' + compat_str(proxy_map) + '\n')
1306
1307     def _setup_opener(self):
1308         timeout_val = self.params.get('socket_timeout')
1309         self._socket_timeout = 600 if timeout_val is None else float(timeout_val)
1310
1311         opts_cookiefile = self.params.get('cookiefile')
1312         opts_proxy = self.params.get('proxy')
1313
1314         if opts_cookiefile is None:
1315             self.cookiejar = compat_cookiejar.CookieJar()
1316         else:
1317             self.cookiejar = compat_cookiejar.MozillaCookieJar(
1318                 opts_cookiefile)
1319             if os.access(opts_cookiefile, os.R_OK):
1320                 self.cookiejar.load()
1321
1322         cookie_processor = compat_urllib_request.HTTPCookieProcessor(
1323             self.cookiejar)
1324         if opts_proxy is not None:
1325             if opts_proxy == '':
1326                 proxies = {}
1327             else:
1328                 proxies = {'http': opts_proxy, 'https': opts_proxy}
1329         else:
1330             proxies = compat_urllib_request.getproxies()
1331             # Set HTTPS proxy to HTTP one if given (https://github.com/rg3/youtube-dl/issues/805)
1332             if 'http' in proxies and 'https' not in proxies:
1333                 proxies['https'] = proxies['http']
1334         proxy_handler = compat_urllib_request.ProxyHandler(proxies)
1335
1336         debuglevel = 1 if self.params.get('debug_printtraffic') else 0
1337         https_handler = make_HTTPS_handler(
1338             self.params.get('nocheckcertificate', False), debuglevel=debuglevel)
1339         ydlh = YoutubeDLHandler(debuglevel=debuglevel)
1340         opener = compat_urllib_request.build_opener(
1341             https_handler, proxy_handler, cookie_processor, ydlh)
1342         # Delete the default user-agent header, which would otherwise apply in
1343         # cases where our custom HTTP handler doesn't come into play
1344         # (See https://github.com/rg3/youtube-dl/issues/1309 for details)
1345         opener.addheaders = []
1346         self._opener = opener
1347
1348     def encode(self, s):
1349         if isinstance(s, bytes):
1350             return s  # Already encoded
1351
1352         try:
1353             return s.encode(self.get_encoding())
1354         except UnicodeEncodeError as err:
1355             err.reason = err.reason + '. Check your system encoding configuration or use the --encoding option.'
1356             raise
1357
1358     def get_encoding(self):
1359         encoding = self.params.get('encoding')
1360         if encoding is None:
1361             encoding = preferredencoding()
1362         return encoding