_ Git - youtube-dl/blob - youtube_dl/YoutubeDL.py

   1 #!/usr/bin/env python
   2 # -*- coding: utf-8 -*-
   3
   4 from __future__ import absolute_import, unicode_literals
   5
   6 import collections
   7 import datetime
   8 import errno
   9 import io
  10 import json
  11 import locale
  12 import os
  13 import platform
  14 import re
  15 import shutil
  16 import subprocess
  17 import socket
  18 import sys
  19 import time
  20 import traceback
  21
  22 if os.name == 'nt':
  23     import ctypes
  24
  25 from .compat import (
  26     compat_cookiejar,
  27     compat_expanduser,
  28     compat_http_client,
  29     compat_str,
  30     compat_urllib_error,
  31     compat_urllib_request,
  32 )
  33 from .utils import (
  34     escape_url,
  35     ContentTooShortError,
  36     date_from_str,
  37     DateRange,
  38     DEFAULT_OUTTMPL,
  39     determine_ext,
  40     DownloadError,
  41     encodeFilename,
  42     ExtractorError,
  43     format_bytes,
  44     formatSeconds,
  45     get_term_width,
  46     locked_file,
  47     make_HTTPS_handler,
  48     MaxDownloadsReached,
  49     PagedList,
  50     PostProcessingError,
  51     platform_name,
  52     preferredencoding,
  53     SameFileError,
  54     sanitize_filename,
  55     subtitles_filename,
  56     takewhile_inclusive,
  57     UnavailableVideoError,
  58     url_basename,
  59     write_json_file,
  60     write_string,
  61     YoutubeDLHandler,
  62     prepend_extension,
  63     args_to_str,
  64 )
  65 from .cache import Cache
  66 from .extractor import get_info_extractor, gen_extractors
  67 from .downloader import get_suitable_downloader
  68 from .downloader.rtmp import rtmpdump_version
  69 from .postprocessor import FFmpegMergerPP, FFmpegPostProcessor
  70 from .version import __version__
  71
  72
  73 class YoutubeDL(object):
  74     """YoutubeDL class.
  75
  76     YoutubeDL objects are the ones responsible of downloading the
  77     actual video file and writing it to disk if the user has requested
  78     it, among some other tasks. In most cases there should be one per
  79     program. As, given a video URL, the downloader doesn't know how to
  80     extract all the needed information, task that InfoExtractors do, it
  81     has to pass the URL to one of them.
  82
  83     For this, YoutubeDL objects have a method that allows
  84     InfoExtractors to be registered in a given order. When it is passed
  85     a URL, the YoutubeDL object handles it to the first InfoExtractor it
  86     finds that reports being able to handle it. The InfoExtractor extracts
  87     all the information about the video or videos the URL refers to, and
  88     YoutubeDL process the extracted information, possibly using a File
  89     Downloader to download the video.
  90
  91     YoutubeDL objects accept a lot of parameters. In order not to saturate
  92     the object constructor with arguments, it receives a dictionary of
  93     options instead. These options are available through the params
  94     attribute for the InfoExtractors to use. The YoutubeDL also
  95     registers itself as the downloader in charge for the InfoExtractors
  96     that are added to it, so this is a "mutual registration".
  97
  98     Available options:
  99
 100     username:          Username for authentication purposes.
 101     password:          Password for authentication purposes.
 102     videopassword:     Password for acces a video.
 103     usenetrc:          Use netrc for authentication instead.
 104     verbose:           Print additional info to stdout.
 105     quiet:             Do not print messages to stdout.
 106     no_warnings:       Do not print out anything for warnings.
 107     forceurl:          Force printing final URL.
 108     forcetitle:        Force printing title.
 109     forceid:           Force printing ID.
 110     forcethumbnail:    Force printing thumbnail URL.
 111     forcedescription:  Force printing description.
 112     forcefilename:     Force printing final filename.
 113     forceduration:     Force printing duration.
 114     forcejson:         Force printing info_dict as JSON.
 115     dump_single_json:  Force printing the info_dict of the whole playlist
 116                        (or video) as a single JSON line.
 117     simulate:          Do not download the video files.
 118     format:            Video format code.
 119     format_limit:      Highest quality format to try.
 120     outtmpl:           Template for output names.
 121     restrictfilenames: Do not allow "&" and spaces in file names
 122     ignoreerrors:      Do not stop on download errors.
 123     nooverwrites:      Prevent overwriting files.
 124     playliststart:     Playlist item to start at.
 125     playlistend:       Playlist item to end at.
 126     matchtitle:        Download only matching titles.
 127     rejecttitle:       Reject downloads for matching titles.
 128     logger:            Log messages to a logging.Logger instance.
 129     logtostderr:       Log messages to stderr instead of stdout.
 130     writedescription:  Write the video description to a .description file
 131     writeinfojson:     Write the video description to a .info.json file
 132     writeannotations:  Write the video annotations to a .annotations.xml file
 133     writethumbnail:    Write the thumbnail image to a file
 134     writesubtitles:    Write the video subtitles to a file
 135     writeautomaticsub: Write the automatic subtitles to a file
 136     allsubtitles:      Downloads all the subtitles of the video
 137                        (requires writesubtitles or writeautomaticsub)
 138     listsubtitles:     Lists all available subtitles for the video
 139     subtitlesformat:   Subtitle format [srt/sbv/vtt] (default=srt)
 140     subtitleslangs:    List of languages of the subtitles to download
 141     keepvideo:         Keep the video file after post-processing
 142     daterange:         A DateRange object, download only if the upload_date is in the range.
 143     skip_download:     Skip the actual download of the video file
 144     cachedir:          Location of the cache files in the filesystem.
 145                        False to disable filesystem cache.
 146     noplaylist:        Download single video instead of a playlist if in doubt.
 147     age_limit:         An integer representing the user's age in years.
 148                        Unsuitable videos for the given age are skipped.
 149     min_views:         An integer representing the minimum view count the video
 150                        must have in order to not be skipped.
 151                        Videos without view count information are always
 152                        downloaded. None for no limit.
 153     max_views:         An integer representing the maximum view count.
 154                        Videos that are more popular than that are not
 155                        downloaded.
 156                        Videos without view count information are always
 157                        downloaded. None for no limit.
 158     download_archive:  File name of a file where all downloads are recorded.
 159                        Videos already present in the file are not downloaded
 160                        again.
 161     cookiefile:        File name where cookies should be read from and dumped to.
 162     nocheckcertificate:Do not verify SSL certificates
 163     prefer_insecure:   Use HTTP instead of HTTPS to retrieve information.
 164                        At the moment, this is only supported by YouTube.
 165     proxy:             URL of the proxy server to use
 166     socket_timeout:    Time to wait for unresponsive hosts, in seconds
 167     bidi_workaround:   Work around buggy terminals without bidirectional text
 168                        support, using fridibi
 169     debug_printtraffic:Print out sent and received HTTP traffic
 170     include_ads:       Download ads as well
 171     default_search:    Prepend this string if an input url is not valid.
 172                        'auto' for elaborate guessing
 173     encoding:          Use this encoding instead of the system-specified.
 174     extract_flat:      Do not resolve URLs, return the immediate result.
 175                        Pass in 'in_playlist' to only show this behavior for
 176                        playlist items.
 177     no_playlist:       If the URL contains both a playlist and a video ID,
 178                        download the video, not the playlist.
 179
 180     The following parameters are not used by YoutubeDL itself, they are used by
 181     the FileDownloader:
 182     nopart, updatetime, buffersize, ratelimit, min_filesize, max_filesize, test,
 183     noresizebuffer, retries, continuedl, noprogress, consoletitle
 184
 185     The following options are used by the post processors:
 186     prefer_ffmpeg:     If True, use ffmpeg instead of avconv if both are available,
 187                        otherwise prefer avconv.
 188     exec_cmd:          Arbitrary command to run after downloading
 189     """
 190
 191     params = None
 192     _ies = []
 193     _pps = []
 194     _download_retcode = None
 195     _num_downloads = None
 196     _screen_file = None
 197
 198     def __init__(self, params=None, auto_init=True):
 199         """Create a FileDownloader object with the given options."""
 200         if params is None:
 201             params = {}
 202         self._ies = []
 203         self._ies_instances = {}
 204         self._pps = []
 205         self._progress_hooks = []
 206         self._download_retcode = 0
 207         self._num_downloads = 0
 208         self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)]
 209         self._err_file = sys.stderr
 210         self.params = params
 211         self.cache = Cache(self)
 212
 213         if params.get('bidi_workaround', False):
 214             try:
 215                 import pty
 216                 master, slave = pty.openpty()
 217                 width = get_term_width()
 218                 if width is None:
 219                     width_args = []
 220                 else:
 221                     width_args = ['-w', str(width)]
 222                 sp_kwargs = dict(
 223                     stdin=subprocess.PIPE,
 224                     stdout=slave,
 225                     stderr=self._err_file)
 226                 try:
 227                     self._output_process = subprocess.Popen(
 228                         ['bidiv'] + width_args, **sp_kwargs
 229                     )
 230                 except OSError:
 231                     self._output_process = subprocess.Popen(
 232                         ['fribidi', '-c', 'UTF-8'] + width_args, **sp_kwargs)
 233                 self._output_channel = os.fdopen(master, 'rb')
 234             except OSError as ose:
 235                 if ose.errno == 2:
 236                     self.report_warning('Could not find fribidi executable, ignoring --bidi-workaround . Make sure that  fribidi  is an executable file in one of the directories in your $PATH.')
 237                 else:
 238                     raise
 239
 240         if (sys.version_info >= (3,) and sys.platform != 'win32' and
 241                 sys.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968']
 242                 and not params.get('restrictfilenames', False)):
 243             # On Python 3, the Unicode filesystem API will throw errors (#1474)
 244             self.report_warning(
 245                 'Assuming --restrict-filenames since file system encoding '
 246                 'cannot encode all characters. '
 247                 'Set the LC_ALL environment variable to fix this.')
 248             self.params['restrictfilenames'] = True
 249
 250         if '%(stitle)s' in self.params.get('outtmpl', ''):
 251             self.report_warning('%(stitle)s is deprecated. Use the %(title)s and the --restrict-filenames flag(which also secures %(uploader)s et al) instead.')
 252
 253         self._setup_opener()
 254
 255         if auto_init:
 256             self.print_debug_header()
 257             self.add_default_info_extractors()
 258
 259     def warn_if_short_id(self, argv):
 260         # short YouTube ID starting with dash?
 261         idxs = [
 262             i for i, a in enumerate(argv)
 263             if re.match(r'^-[0-9A-Za-z_-]{10}$', a)]
 264         if idxs:
 265             correct_argv = (
 266                 ['youtube-dl'] +
 267                 [a for i, a in enumerate(argv) if i not in idxs] +
 268                 ['--'] + [argv[i] for i in idxs]
 269             )
 270             self.report_warning(
 271                 'Long argument string detected. '
 272                 'Use -- to separate parameters and URLs, like this:\n%s\n' %
 273                 args_to_str(correct_argv))
 274
 275     def add_info_extractor(self, ie):
 276         """Add an InfoExtractor object to the end of the list."""
 277         self._ies.append(ie)
 278         self._ies_instances[ie.ie_key()] = ie
 279         ie.set_downloader(self)
 280
 281     def get_info_extractor(self, ie_key):
 282         """
 283         Get an instance of an IE with name ie_key, it will try to get one from
 284         the _ies list, if there's no instance it will create a new one and add
 285         it to the extractor list.
 286         """
 287         ie = self._ies_instances.get(ie_key)
 288         if ie is None:
 289             ie = get_info_extractor(ie_key)()
 290             self.add_info_extractor(ie)
 291         return ie
 292
 293     def add_default_info_extractors(self):
 294         """
 295         Add the InfoExtractors returned by gen_extractors to the end of the list
 296         """
 297         for ie in gen_extractors():
 298             self.add_info_extractor(ie)
 299
 300     def add_post_processor(self, pp):
 301         """Add a PostProcessor object to the end of the chain."""
 302         self._pps.append(pp)
 303         pp.set_downloader(self)
 304
 305     def add_progress_hook(self, ph):
 306         """Add the progress hook (currently only for the file downloader)"""
 307         self._progress_hooks.append(ph)
 308
 309     def _bidi_workaround(self, message):
 310         if not hasattr(self, '_output_channel'):
 311             return message
 312
 313         assert hasattr(self, '_output_process')
 314         assert isinstance(message, compat_str)
 315         line_count = message.count('\n') + 1
 316         self._output_process.stdin.write((message + '\n').encode('utf-8'))
 317         self._output_process.stdin.flush()
 318         res = ''.join(self._output_channel.readline().decode('utf-8')
 319                       for _ in range(line_count))
 320         return res[:-len('\n')]
 321
 322     def to_screen(self, message, skip_eol=False):
 323         """Print message to stdout if not in quiet mode."""
 324         return self.to_stdout(message, skip_eol, check_quiet=True)
 325
 326     def _write_string(self, s, out=None):
 327         write_string(s, out=out, encoding=self.params.get('encoding'))
 328
 329     def to_stdout(self, message, skip_eol=False, check_quiet=False):
 330         """Print message to stdout if not in quiet mode."""
 331         if self.params.get('logger'):
 332             self.params['logger'].debug(message)
 333         elif not check_quiet or not self.params.get('quiet', False):
 334             message = self._bidi_workaround(message)
 335             terminator = ['\n', ''][skip_eol]
 336             output = message + terminator
 337
 338             self._write_string(output, self._screen_file)
 339
 340     def to_stderr(self, message):
 341         """Print message to stderr."""
 342         assert isinstance(message, compat_str)
 343         if self.params.get('logger'):
 344             self.params['logger'].error(message)
 345         else:
 346             message = self._bidi_workaround(message)
 347             output = message + '\n'
 348             self._write_string(output, self._err_file)
 349
 350     def to_console_title(self, message):
 351         if not self.params.get('consoletitle', False):
 352             return
 353         if os.name == 'nt' and ctypes.windll.kernel32.GetConsoleWindow():
 354             # c_wchar_p() might not be necessary if `message` is
 355             # already of type unicode()
 356             ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message))
 357         elif 'TERM' in os.environ:
 358             self._write_string('\033]0;%s\007' % message, self._screen_file)
 359
 360     def save_console_title(self):
 361         if not self.params.get('consoletitle', False):
 362             return
 363         if 'TERM' in os.environ:
 364             # Save the title on stack
 365             self._write_string('\033[22;0t', self._screen_file)
 366
 367     def restore_console_title(self):
 368         if not self.params.get('consoletitle', False):
 369             return
 370         if 'TERM' in os.environ:
 371             # Restore the title from stack
 372             self._write_string('\033[23;0t', self._screen_file)
 373
 374     def __enter__(self):
 375         self.save_console_title()
 376         return self
 377
 378     def __exit__(self, *args):
 379         self.restore_console_title()
 380
 381         if self.params.get('cookiefile') is not None:
 382             self.cookiejar.save()
 383
 384     def trouble(self, message=None, tb=None):
 385         """Determine action to take when a download problem appears.
 386
 387         Depending on if the downloader has been configured to ignore
 388         download errors or not, this method may throw an exception or
 389         not when errors are found, after printing the message.
 390
 391         tb, if given, is additional traceback information.
 392         """
 393         if message is not None:
 394             self.to_stderr(message)
 395         if self.params.get('verbose'):
 396             if tb is None:
 397                 if sys.exc_info()[0]:  # if .trouble has been called from an except block
 398                     tb = ''
 399                     if hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
 400                         tb += ''.join(traceback.format_exception(*sys.exc_info()[1].exc_info))
 401                     tb += compat_str(traceback.format_exc())
 402                 else:
 403                     tb_data = traceback.format_list(traceback.extract_stack())
 404                     tb = ''.join(tb_data)
 405             self.to_stderr(tb)
 406         if not self.params.get('ignoreerrors', False):
 407             if sys.exc_info()[0] and hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
 408                 exc_info = sys.exc_info()[1].exc_info
 409             else:
 410                 exc_info = sys.exc_info()
 411             raise DownloadError(message, exc_info)
 412         self._download_retcode = 1
 413
 414     def report_warning(self, message):
 415         '''
 416         Print the message to stderr, it will be prefixed with 'WARNING:'
 417         If stderr is a tty file the 'WARNING:' will be colored
 418         '''
 419         if self.params.get('logger') is not None:
 420             self.params['logger'].warning(message)
 421         else:
 422             if self.params.get('no_warnings'):
 423                 return
 424             if self._err_file.isatty() and os.name != 'nt':
 425                 _msg_header = '\033[0;33mWARNING:\033[0m'
 426             else:
 427                 _msg_header = 'WARNING:'
 428             warning_message = '%s %s' % (_msg_header, message)
 429             self.to_stderr(warning_message)
 430
 431     def report_error(self, message, tb=None):
 432         '''
 433         Do the same as trouble, but prefixes the message with 'ERROR:', colored
 434         in red if stderr is a tty file.
 435         '''
 436         if self._err_file.isatty() and os.name != 'nt':
 437             _msg_header = '\033[0;31mERROR:\033[0m'
 438         else:
 439             _msg_header = 'ERROR:'
 440         error_message = '%s %s' % (_msg_header, message)
 441         self.trouble(error_message, tb)
 442
 443     def report_file_already_downloaded(self, file_name):
 444         """Report file has already been fully downloaded."""
 445         try:
 446             self.to_screen('[download] %s has already been downloaded' % file_name)
 447         except UnicodeEncodeError:
 448             self.to_screen('[download] The file has already been downloaded')
 449
 450     def prepare_filename(self, info_dict):
 451         """Generate the output filename."""
 452         try:
 453             template_dict = dict(info_dict)
 454
 455             template_dict['epoch'] = int(time.time())
 456             autonumber_size = self.params.get('autonumber_size')
 457             if autonumber_size is None:
 458                 autonumber_size = 5
 459             autonumber_templ = '%0' + str(autonumber_size) + 'd'
 460             template_dict['autonumber'] = autonumber_templ % self._num_downloads
 461             if template_dict.get('playlist_index') is not None:
 462                 template_dict['playlist_index'] = '%0*d' % (len(str(template_dict['n_entries'])), template_dict['playlist_index'])
 463             if template_dict.get('resolution') is None:
 464                 if template_dict.get('width') and template_dict.get('height'):
 465                     template_dict['resolution'] = '%dx%d' % (template_dict['width'], template_dict['height'])
 466                 elif template_dict.get('height'):
 467                     template_dict['resolution'] = '%sp' % template_dict['height']
 468                 elif template_dict.get('width'):
 469                     template_dict['resolution'] = '?x%d' % template_dict['width']
 470
 471             sanitize = lambda k, v: sanitize_filename(
 472                 compat_str(v),
 473                 restricted=self.params.get('restrictfilenames'),
 474                 is_id=(k == 'id'))
 475             template_dict = dict((k, sanitize(k, v))
 476                                  for k, v in template_dict.items()
 477                                  if v is not None)
 478             template_dict = collections.defaultdict(lambda: 'NA', template_dict)
 479
 480             outtmpl = self.params.get('outtmpl', DEFAULT_OUTTMPL)
 481             tmpl = compat_expanduser(outtmpl)
 482             filename = tmpl % template_dict
 483             return filename
 484         except ValueError as err:
 485             self.report_error('Error in output template: ' + str(err) + ' (encoding: ' + repr(preferredencoding()) + ')')
 486             return None
 487
 488     def _match_entry(self, info_dict):
 489         """ Returns None iff the file should be downloaded """
 490
 491         video_title = info_dict.get('title', info_dict.get('id', 'video'))
 492         if 'title' in info_dict:
 493             # This can happen when we're just evaluating the playlist
 494             title = info_dict['title']
 495             matchtitle = self.params.get('matchtitle', False)
 496             if matchtitle:
 497                 if not re.search(matchtitle, title, re.IGNORECASE):
 498                     return '"' + title + '" title did not match pattern "' + matchtitle + '"'
 499             rejecttitle = self.params.get('rejecttitle', False)
 500             if rejecttitle:
 501                 if re.search(rejecttitle, title, re.IGNORECASE):
 502                     return '"' + title + '" title matched reject pattern "' + rejecttitle + '"'
 503         date = info_dict.get('upload_date', None)
 504         if date is not None:
 505             dateRange = self.params.get('daterange', DateRange())
 506             if date not in dateRange:
 507                 return '%s upload date is not in range %s' % (date_from_str(date).isoformat(), dateRange)
 508         view_count = info_dict.get('view_count', None)
 509         if view_count is not None:
 510             min_views = self.params.get('min_views')
 511             if min_views is not None and view_count < min_views:
 512                 return 'Skipping %s, because it has not reached minimum view count (%d/%d)' % (video_title, view_count, min_views)
 513             max_views = self.params.get('max_views')
 514             if max_views is not None and view_count > max_views:
 515                 return 'Skipping %s, because it has exceeded the maximum view count (%d/%d)' % (video_title, view_count, max_views)
 516         age_limit = self.params.get('age_limit')
 517         if age_limit is not None:
 518             actual_age_limit = info_dict.get('age_limit')
 519             if actual_age_limit is None:
 520                 actual_age_limit = 0
 521             if age_limit < actual_age_limit:
 522                 return 'Skipping "' + title + '" because it is age restricted'
 523         if self.in_download_archive(info_dict):
 524             return '%s has already been recorded in archive' % video_title
 525         return None
 526
 527     @staticmethod
 528     def add_extra_info(info_dict, extra_info):
 529         '''Set the keys from extra_info in info dict if they are missing'''
 530         for key, value in extra_info.items():
 531             info_dict.setdefault(key, value)
 532
 533     def extract_info(self, url, download=True, ie_key=None, extra_info={},
 534                      process=True):
 535         '''
 536         Returns a list with a dictionary for each video we find.
 537         If 'download', also downloads the videos.
 538         extra_info is a dict containing the extra values to add to each result
 539          '''
 540
 541         if ie_key:
 542             ies = [self.get_info_extractor(ie_key)]
 543         else:
 544             ies = self._ies
 545
 546         for ie in ies:
 547             if not ie.suitable(url):
 548                 continue
 549
 550             if not ie.working():
 551                 self.report_warning('The program functionality for this site has been marked as broken, '
 552                                     'and will probably not work.')
 553
 554             try:
 555                 ie_result = ie.extract(url)
 556                 if ie_result is None:  # Finished already (backwards compatibility; listformats and friends should be moved here)
 557                     break
 558                 if isinstance(ie_result, list):
 559                     # Backwards compatibility: old IE result format
 560                     ie_result = {
 561                         '_type': 'compat_list',
 562                         'entries': ie_result,
 563                     }
 564                 self.add_default_extra_info(ie_result, ie, url)
 565                 if process:
 566                     return self.process_ie_result(ie_result, download, extra_info)
 567                 else:
 568                     return ie_result
 569             except ExtractorError as de:  # An error we somewhat expected
 570                 self.report_error(compat_str(de), de.format_traceback())
 571                 break
 572             except MaxDownloadsReached:
 573                 raise
 574             except Exception as e:
 575                 if self.params.get('ignoreerrors', False):
 576                     self.report_error(compat_str(e), tb=compat_str(traceback.format_exc()))
 577                     break
 578                 else:
 579                     raise
 580         else:
 581             self.report_error('no suitable InfoExtractor for URL %s' % url)
 582
 583     def add_default_extra_info(self, ie_result, ie, url):
 584         self.add_extra_info(ie_result, {
 585             'extractor': ie.IE_NAME,
 586             'webpage_url': url,
 587             'webpage_url_basename': url_basename(url),
 588             'extractor_key': ie.ie_key(),
 589         })
 590
 591     def process_ie_result(self, ie_result, download=True, extra_info={}):
 592         """
 593         Take the result of the ie(may be modified) and resolve all unresolved
 594         references (URLs, playlist items).
 595
 596         It will also download the videos if 'download'.
 597         Returns the resolved ie_result.
 598         """
 599
 600         result_type = ie_result.get('_type', 'video')
 601
 602         if result_type in ('url', 'url_transparent'):
 603             extract_flat = self.params.get('extract_flat', False)
 604             if ((extract_flat == 'in_playlist' and 'playlist' in extra_info) or
 605                     extract_flat is True):
 606                 if self.params.get('forcejson', False):
 607                     self.to_stdout(json.dumps(ie_result))
 608                 return ie_result
 609
 610         if result_type == 'video':
 611             self.add_extra_info(ie_result, extra_info)
 612             return self.process_video_result(ie_result, download=download)
 613         elif result_type == 'url':
 614             # We have to add extra_info to the results because it may be
 615             # contained in a playlist
 616             return self.extract_info(ie_result['url'],
 617                                      download,
 618                                      ie_key=ie_result.get('ie_key'),
 619                                      extra_info=extra_info)
 620         elif result_type == 'url_transparent':
 621             # Use the information from the embedding page
 622             info = self.extract_info(
 623                 ie_result['url'], ie_key=ie_result.get('ie_key'),
 624                 extra_info=extra_info, download=False, process=False)
 625
 626             def make_result(embedded_info):
 627                 new_result = ie_result.copy()
 628                 for f in ('_type', 'url', 'ext', 'player_url', 'formats',
 629                           'entries', 'ie_key', 'duration',
 630                           'subtitles', 'annotations', 'format',
 631                           'thumbnail', 'thumbnails'):
 632                     if f in new_result:
 633                         del new_result[f]
 634                     if f in embedded_info:
 635                         new_result[f] = embedded_info[f]
 636                 return new_result
 637             new_result = make_result(info)
 638
 639             assert new_result.get('_type') != 'url_transparent'
 640             if new_result.get('_type') == 'compat_list':
 641                 new_result['entries'] = [
 642                     make_result(e) for e in new_result['entries']]
 643
 644             return self.process_ie_result(
 645                 new_result, download=download, extra_info=extra_info)
 646         elif result_type == 'playlist' or result_type == 'multi_video':
 647             # We process each entry in the playlist
 648             playlist = ie_result.get('title', None) or ie_result.get('id', None)
 649             self.to_screen('[download] Downloading playlist: %s' % playlist)
 650
 651             playlist_results = []
 652
 653             playliststart = self.params.get('playliststart', 1) - 1
 654             playlistend = self.params.get('playlistend', None)
 655             # For backwards compatibility, interpret -1 as whole list
 656             if playlistend == -1:
 657                 playlistend = None
 658
 659             if isinstance(ie_result['entries'], list):
 660                 n_all_entries = len(ie_result['entries'])
 661                 entries = ie_result['entries'][playliststart:playlistend]
 662                 n_entries = len(entries)
 663                 self.to_screen(
 664                     "[%s] playlist %s: Collected %d video ids (downloading %d of them)" %
 665                     (ie_result['extractor'], playlist, n_all_entries, n_entries))
 666             else:
 667                 assert isinstance(ie_result['entries'], PagedList)
 668                 entries = ie_result['entries'].getslice(
 669                     playliststart, playlistend)
 670                 n_entries = len(entries)
 671                 self.to_screen(
 672                     "[%s] playlist %s: Downloading %d videos" %
 673                     (ie_result['extractor'], playlist, n_entries))
 674
 675             for i, entry in enumerate(entries, 1):
 676                 self.to_screen('[download] Downloading video #%s of %s' % (i, n_entries))
 677                 extra = {
 678                     'n_entries': n_entries,
 679                     'playlist': playlist,
 680                     'playlist_id': ie_result.get('id'),
 681                     'playlist_title': ie_result.get('title'),
 682                     'playlist_index': i + playliststart,
 683                     'extractor': ie_result['extractor'],
 684                     'webpage_url': ie_result['webpage_url'],
 685                     'webpage_url_basename': url_basename(ie_result['webpage_url']),
 686                     'extractor_key': ie_result['extractor_key'],
 687                 }
 688
 689                 reason = self._match_entry(entry)
 690                 if reason is not None:
 691                     self.to_screen('[download] ' + reason)
 692                     continue
 693
 694                 entry_result = self.process_ie_result(entry,
 695                                                       download=download,
 696                                                       extra_info=extra)
 697                 playlist_results.append(entry_result)
 698             ie_result['entries'] = playlist_results
 699             return ie_result
 700         elif result_type == 'compat_list':
 701             self.report_warning(
 702                 'Extractor %s returned a compat_list result. '
 703                 'It needs to be updated.' % ie_result.get('extractor'))
 704
 705             def _fixup(r):
 706                 self.add_extra_info(
 707                     r,
 708                     {
 709                         'extractor': ie_result['extractor'],
 710                         'webpage_url': ie_result['webpage_url'],
 711                         'webpage_url_basename': url_basename(ie_result['webpage_url']),
 712                         'extractor_key': ie_result['extractor_key'],
 713                     }
 714                 )
 715                 return r
 716             ie_result['entries'] = [
 717                 self.process_ie_result(_fixup(r), download, extra_info)
 718                 for r in ie_result['entries']
 719             ]
 720             return ie_result
 721         else:
 722             raise Exception('Invalid result type: %s' % result_type)
 723
 724     def select_format(self, format_spec, available_formats):
 725         if format_spec == 'best' or format_spec is None:
 726             return available_formats[-1]
 727         elif format_spec == 'worst':
 728             return available_formats[0]
 729         elif format_spec == 'bestaudio':
 730             audio_formats = [
 731                 f for f in available_formats
 732                 if f.get('vcodec') == 'none']
 733             if audio_formats:
 734                 return audio_formats[-1]
 735         elif format_spec == 'worstaudio':
 736             audio_formats = [
 737                 f for f in available_formats
 738                 if f.get('vcodec') == 'none']
 739             if audio_formats:
 740                 return audio_formats[0]
 741         elif format_spec == 'bestvideo':
 742             video_formats = [
 743                 f for f in available_formats
 744                 if f.get('acodec') == 'none']
 745             if video_formats:
 746                 return video_formats[-1]
 747         elif format_spec == 'worstvideo':
 748             video_formats = [
 749                 f for f in available_formats
 750                 if f.get('acodec') == 'none']
 751             if video_formats:
 752                 return video_formats[0]
 753         else:
 754             extensions = ['mp4', 'flv', 'webm', '3gp', 'm4a']
 755             if format_spec in extensions:
 756                 filter_f = lambda f: f['ext'] == format_spec
 757             else:
 758                 filter_f = lambda f: f['format_id'] == format_spec
 759             matches = list(filter(filter_f, available_formats))
 760             if matches:
 761                 return matches[-1]
 762         return None
 763
 764     def process_video_result(self, info_dict, download=True):
 765         assert info_dict.get('_type', 'video') == 'video'
 766
 767         if 'id' not in info_dict:
 768             raise ExtractorError('Missing "id" field in extractor result')
 769         if 'title' not in info_dict:
 770             raise ExtractorError('Missing "title" field in extractor result')
 771
 772         if 'playlist' not in info_dict:
 773             # It isn't part of a playlist
 774             info_dict['playlist'] = None
 775             info_dict['playlist_index'] = None
 776
 777         thumbnails = info_dict.get('thumbnails')
 778         if thumbnails:
 779             thumbnails.sort(key=lambda t: (
 780                 t.get('width'), t.get('height'), t.get('url')))
 781             for t in thumbnails:
 782                 if 'width' in t and 'height' in t:
 783                     t['resolution'] = '%dx%d' % (t['width'], t['height'])
 784
 785         if thumbnails and 'thumbnail' not in info_dict:
 786             info_dict['thumbnail'] = thumbnails[-1]['url']
 787
 788         if 'display_id' not in info_dict and 'id' in info_dict:
 789             info_dict['display_id'] = info_dict['id']
 790
 791         if info_dict.get('upload_date') is None and info_dict.get('timestamp') is not None:
 792             upload_date = datetime.datetime.utcfromtimestamp(
 793                 info_dict['timestamp'])
 794             info_dict['upload_date'] = upload_date.strftime('%Y%m%d')
 795
 796         # This extractors handle format selection themselves
 797         if info_dict['extractor'] in ['Youku']:
 798             if download:
 799                 self.process_info(info_dict)
 800             return info_dict
 801
 802         # We now pick which formats have to be downloaded
 803         if info_dict.get('formats') is None:
 804             # There's only one format available
 805             formats = [info_dict]
 806         else:
 807             formats = info_dict['formats']
 808
 809         if not formats:
 810             raise ExtractorError('No video formats found!')
 811
 812         # We check that all the formats have the format and format_id fields
 813         for i, format in enumerate(formats):
 814             if 'url' not in format:
 815                 raise ExtractorError('Missing "url" key in result (index %d)' % i)
 816
 817             if format.get('format_id') is None:
 818                 format['format_id'] = compat_str(i)
 819             if format.get('format') is None:
 820                 format['format'] = '{id} - {res}{note}'.format(
 821                     id=format['format_id'],
 822                     res=self.format_resolution(format),
 823                     note=' ({0})'.format(format['format_note']) if format.get('format_note') is not None else '',
 824                 )
 825             # Automatically determine file extension if missing
 826             if 'ext' not in format:
 827                 format['ext'] = determine_ext(format['url']).lower()
 828
 829         format_limit = self.params.get('format_limit', None)
 830         if format_limit:
 831             formats = list(takewhile_inclusive(
 832                 lambda f: f['format_id'] != format_limit, formats
 833             ))
 834
 835         # TODO Central sorting goes here
 836
 837         if formats[0] is not info_dict:
 838             # only set the 'formats' fields if the original info_dict list them
 839             # otherwise we end up with a circular reference, the first (and unique)
 840             # element in the 'formats' field in info_dict is info_dict itself,
 841             # wich can't be exported to json
 842             info_dict['formats'] = formats
 843         if self.params.get('listformats', None):
 844             self.list_formats(info_dict)
 845             return
 846
 847         req_format = self.params.get('format')
 848         if req_format is None:
 849             req_format = 'best'
 850         formats_to_download = []
 851         # The -1 is for supporting YoutubeIE
 852         if req_format in ('-1', 'all'):
 853             formats_to_download = formats
 854         else:
 855             for rfstr in req_format.split(','):
 856                 # We can accept formats requested in the format: 34/5/best, we pick
 857                 # the first that is available, starting from left
 858                 req_formats = rfstr.split('/')
 859                 for rf in req_formats:
 860                     if re.match(r'.+?\+.+?', rf) is not None:
 861                         # Two formats have been requested like '137+139'
 862                         format_1, format_2 = rf.split('+')
 863                         formats_info = (self.select_format(format_1, formats),
 864                                         self.select_format(format_2, formats))
 865                         if all(formats_info):
 866                             # The first format must contain the video and the
 867                             # second the audio
 868                             if formats_info[0].get('vcodec') == 'none':
 869                                 self.report_error('The first format must '
 870                                                   'contain the video, try using '
 871                                                   '"-f %s+%s"' % (format_2, format_1))
 872                                 return
 873                             selected_format = {
 874                                 'requested_formats': formats_info,
 875                                 'format': rf,
 876                                 'ext': formats_info[0]['ext'],
 877                             }
 878                         else:
 879                             selected_format = None
 880                     else:
 881                         selected_format = self.select_format(rf, formats)
 882                     if selected_format is not None:
 883                         formats_to_download.append(selected_format)
 884                         break
 885         if not formats_to_download:
 886             raise ExtractorError('requested format not available',
 887                                  expected=True)
 888
 889         if download:
 890             if len(formats_to_download) > 1:
 891                 self.to_screen('[info] %s: downloading video in %s formats' % (info_dict['id'], len(formats_to_download)))
 892             for format in formats_to_download:
 893                 new_info = dict(info_dict)
 894                 new_info.update(format)
 895                 self.process_info(new_info)
 896         # We update the info dict with the best quality format (backwards compatibility)
 897         info_dict.update(formats_to_download[-1])
 898         return info_dict
 899
 900     def process_info(self, info_dict):
 901         """Process a single resolved IE result."""
 902
 903         assert info_dict.get('_type', 'video') == 'video'
 904
 905         max_downloads = self.params.get('max_downloads')
 906         if max_downloads is not None:
 907             if self._num_downloads >= int(max_downloads):
 908                 raise MaxDownloadsReached()
 909
 910         info_dict['fulltitle'] = info_dict['title']
 911         if len(info_dict['title']) > 200:
 912             info_dict['title'] = info_dict['title'][:197] + '...'
 913
 914         # Keep for backwards compatibility
 915         info_dict['stitle'] = info_dict['title']
 916
 917         if 'format' not in info_dict:
 918             info_dict['format'] = info_dict['ext']
 919
 920         reason = self._match_entry(info_dict)
 921         if reason is not None:
 922             self.to_screen('[download] ' + reason)
 923             return
 924
 925         self._num_downloads += 1
 926
 927         filename = self.prepare_filename(info_dict)
 928
 929         # Forced printings
 930         if self.params.get('forcetitle', False):
 931             self.to_stdout(info_dict['fulltitle'])
 932         if self.params.get('forceid', False):
 933             self.to_stdout(info_dict['id'])
 934         if self.params.get('forceurl', False):
 935             # For RTMP URLs, also include the playpath
 936             self.to_stdout(info_dict['url'] + info_dict.get('play_path', ''))
 937         if self.params.get('forcethumbnail', False) and info_dict.get('thumbnail') is not None:
 938             self.to_stdout(info_dict['thumbnail'])
 939         if self.params.get('forcedescription', False) and info_dict.get('description') is not None:
 940             self.to_stdout(info_dict['description'])
 941         if self.params.get('forcefilename', False) and filename is not None:
 942             self.to_stdout(filename)
 943         if self.params.get('forceduration', False) and info_dict.get('duration') is not None:
 944             self.to_stdout(formatSeconds(info_dict['duration']))
 945         if self.params.get('forceformat', False):
 946             self.to_stdout(info_dict['format'])
 947         if self.params.get('forcejson', False):
 948             info_dict['_filename'] = filename
 949             self.to_stdout(json.dumps(info_dict))
 950         if self.params.get('dump_single_json', False):
 951             info_dict['_filename'] = filename
 952
 953         # Do nothing else if in simulate mode
 954         if self.params.get('simulate', False):
 955             return
 956
 957         if filename is None:
 958             return
 959
 960         try:
 961             dn = os.path.dirname(encodeFilename(filename))
 962             if dn and not os.path.exists(dn):
 963                 os.makedirs(dn)
 964         except (OSError, IOError) as err:
 965             self.report_error('unable to create directory ' + compat_str(err))
 966             return
 967
 968         if self.params.get('writedescription', False):
 969             descfn = filename + '.description'
 970             if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(descfn)):
 971                 self.to_screen('[info] Video description is already present')
 972             else:
 973                 try:
 974                     self.to_screen('[info] Writing video description to: ' + descfn)
 975                     with io.open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile:
 976                         descfile.write(info_dict['description'])
 977                 except (KeyError, TypeError):
 978                     self.report_warning('There\'s no description to write.')
 979                 except (OSError, IOError):
 980                     self.report_error('Cannot write description file ' + descfn)
 981                     return
 982
 983         if self.params.get('writeannotations', False):
 984             annofn = filename + '.annotations.xml'
 985             if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(annofn)):
 986                 self.to_screen('[info] Video annotations are already present')
 987             else:
 988                 try:
 989                     self.to_screen('[info] Writing video annotations to: ' + annofn)
 990                     with io.open(encodeFilename(annofn), 'w', encoding='utf-8') as annofile:
 991                         annofile.write(info_dict['annotations'])
 992                 except (KeyError, TypeError):
 993                     self.report_warning('There are no annotations to write.')
 994                 except (OSError, IOError):
 995                     self.report_error('Cannot write annotations file: ' + annofn)
 996                     return
 997
 998         subtitles_are_requested = any([self.params.get('writesubtitles', False),
 999                                        self.params.get('writeautomaticsub')])
1000
1001         if subtitles_are_requested and 'subtitles' in info_dict and info_dict['subtitles']:
1002             # subtitles download errors are already managed as troubles in relevant IE
1003             # that way it will silently go on when used with unsupporting IE
1004             subtitles = info_dict['subtitles']
1005             sub_format = self.params.get('subtitlesformat', 'srt')
1006             for sub_lang in subtitles.keys():
1007                 sub = subtitles[sub_lang]
1008                 if sub is None:
1009                     continue
1010                 try:
1011                     sub_filename = subtitles_filename(filename, sub_lang, sub_format)
1012                     if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(sub_filename)):
1013                         self.to_screen('[info] Video subtitle %s.%s is already_present' % (sub_lang, sub_format))
1014                     else:
1015                         self.to_screen('[info] Writing video subtitles to: ' + sub_filename)
1016                         with io.open(encodeFilename(sub_filename), 'w', encoding='utf-8') as subfile:
1017                             subfile.write(sub)
1018                 except (OSError, IOError):
1019                     self.report_error('Cannot write subtitles file ' + sub_filename)
1020                     return
1021
1022         if self.params.get('writeinfojson', False):
1023             infofn = os.path.splitext(filename)[0] + '.info.json'
1024             if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(infofn)):
1025                 self.to_screen('[info] Video description metadata is already present')
1026             else:
1027                 self.to_screen('[info] Writing video description metadata as JSON to: ' + infofn)
1028                 try:
1029                     write_json_file(info_dict, infofn)
1030                 except (OSError, IOError):
1031                     self.report_error('Cannot write metadata to JSON file ' + infofn)
1032                     return
1033
1034         if self.params.get('writethumbnail', False):
1035             if info_dict.get('thumbnail') is not None:
1036                 thumb_format = determine_ext(info_dict['thumbnail'], 'jpg')
1037                 thumb_filename = os.path.splitext(filename)[0] + '.' + thumb_format
1038                 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(thumb_filename)):
1039                     self.to_screen('[%s] %s: Thumbnail is already present' %
1040                                    (info_dict['extractor'], info_dict['id']))
1041                 else:
1042                     self.to_screen('[%s] %s: Downloading thumbnail ...' %
1043                                    (info_dict['extractor'], info_dict['id']))
1044                     try:
1045                         uf = self.urlopen(info_dict['thumbnail'])
1046                         with open(thumb_filename, 'wb') as thumbf:
1047                             shutil.copyfileobj(uf, thumbf)
1048                         self.to_screen('[%s] %s: Writing thumbnail to: %s' %
1049                                        (info_dict['extractor'], info_dict['id'], thumb_filename))
1050                     except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
1051                         self.report_warning('Unable to download thumbnail "%s": %s' %
1052                                             (info_dict['thumbnail'], compat_str(err)))
1053
1054         if not self.params.get('skip_download', False):
1055             if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(filename)):
1056                 success = True
1057             else:
1058                 try:
1059                     def dl(name, info):
1060                         fd = get_suitable_downloader(info)(self, self.params)
1061                         for ph in self._progress_hooks:
1062                             fd.add_progress_hook(ph)
1063                         if self.params.get('verbose'):
1064                             self.to_stdout('[debug] Invoking downloader on %r' % info.get('url'))
1065                         return fd.download(name, info)
1066                     if info_dict.get('requested_formats') is not None:
1067                         downloaded = []
1068                         success = True
1069                         merger = FFmpegMergerPP(self, not self.params.get('keepvideo'))
1070                         if not merger._executable:
1071                             postprocessors = []
1072                             self.report_warning('You have requested multiple '
1073                                                 'formats but ffmpeg or avconv are not installed.'
1074                                                 ' The formats won\'t be merged')
1075                         else:
1076                             postprocessors = [merger]
1077                         for f in info_dict['requested_formats']:
1078                             new_info = dict(info_dict)
1079                             new_info.update(f)
1080                             fname = self.prepare_filename(new_info)
1081                             fname = prepend_extension(fname, 'f%s' % f['format_id'])
1082                             downloaded.append(fname)
1083                             partial_success = dl(fname, new_info)
1084                             success = success and partial_success
1085                         info_dict['__postprocessors'] = postprocessors
1086                         info_dict['__files_to_merge'] = downloaded
1087                     else:
1088                         # Just a single file
1089                         success = dl(filename, info_dict)
1090                 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
1091                     self.report_error('unable to download video data: %s' % str(err))
1092                     return
1093                 except (OSError, IOError) as err:
1094                     raise UnavailableVideoError(err)
1095                 except (ContentTooShortError, ) as err:
1096                     self.report_error('content too short (expected %s bytes and served %s)' % (err.expected, err.downloaded))
1097                     return
1098
1099             if success:
1100                 try:
1101                     self.post_process(filename, info_dict)
1102                 except (PostProcessingError) as err:
1103                     self.report_error('postprocessing: %s' % str(err))
1104                     return
1105
1106         self.record_download_archive(info_dict)
1107
1108     def download(self, url_list):
1109         """Download a given list of URLs."""
1110         outtmpl = self.params.get('outtmpl', DEFAULT_OUTTMPL)
1111         if (len(url_list) > 1 and
1112                 '%' not in outtmpl
1113                 and self.params.get('max_downloads') != 1):
1114             raise SameFileError(outtmpl)
1115
1116         for url in url_list:
1117             try:
1118                 # It also downloads the videos
1119                 res = self.extract_info(url)
1120             except UnavailableVideoError:
1121                 self.report_error('unable to download video')
1122             except MaxDownloadsReached:
1123                 self.to_screen('[info] Maximum number of downloaded files reached.')
1124                 raise
1125             else:
1126                 if self.params.get('dump_single_json', False):
1127                     self.to_stdout(json.dumps(res))
1128
1129         return self._download_retcode
1130
1131     def download_with_info_file(self, info_filename):
1132         with io.open(info_filename, 'r', encoding='utf-8') as f:
1133             info = json.load(f)
1134         try:
1135             self.process_ie_result(info, download=True)
1136         except DownloadError:
1137             webpage_url = info.get('webpage_url')
1138             if webpage_url is not None:
1139                 self.report_warning('The info failed to download, trying with "%s"' % webpage_url)
1140                 return self.download([webpage_url])
1141             else:
1142                 raise
1143         return self._download_retcode
1144
1145     def post_process(self, filename, ie_info):
1146         """Run all the postprocessors on the given file."""
1147         info = dict(ie_info)
1148         info['filepath'] = filename
1149         keep_video = None
1150         pps_chain = []
1151         if ie_info.get('__postprocessors') is not None:
1152             pps_chain.extend(ie_info['__postprocessors'])
1153         pps_chain.extend(self._pps)
1154         for pp in pps_chain:
1155             try:
1156                 keep_video_wish, new_info = pp.run(info)
1157                 if keep_video_wish is not None:
1158                     if keep_video_wish:
1159                         keep_video = keep_video_wish
1160                     elif keep_video is None:
1161                         # No clear decision yet, let IE decide
1162                         keep_video = keep_video_wish
1163             except PostProcessingError as e:
1164                 self.report_error(e.msg)
1165         if keep_video is False and not self.params.get('keepvideo', False):
1166             try:
1167                 self.to_screen('Deleting original file %s (pass -k to keep)' % filename)
1168                 os.remove(encodeFilename(filename))
1169             except (IOError, OSError):
1170                 self.report_warning('Unable to remove downloaded video file')
1171
1172     def _make_archive_id(self, info_dict):
1173         # Future-proof against any change in case
1174         # and backwards compatibility with prior versions
1175         extractor = info_dict.get('extractor_key')
1176         if extractor is None:
1177             if 'id' in info_dict:
1178                 extractor = info_dict.get('ie_key')  # key in a playlist
1179         if extractor is None:
1180             return None  # Incomplete video information
1181         return extractor.lower() + ' ' + info_dict['id']
1182
1183     def in_download_archive(self, info_dict):
1184         fn = self.params.get('download_archive')
1185         if fn is None:
1186             return False
1187
1188         vid_id = self._make_archive_id(info_dict)
1189         if vid_id is None:
1190             return False  # Incomplete video information
1191
1192         try:
1193             with locked_file(fn, 'r', encoding='utf-8') as archive_file:
1194                 for line in archive_file:
1195                     if line.strip() == vid_id:
1196                         return True
1197         except IOError as ioe:
1198             if ioe.errno != errno.ENOENT:
1199                 raise
1200         return False
1201
1202     def record_download_archive(self, info_dict):
1203         fn = self.params.get('download_archive')
1204         if fn is None:
1205             return
1206         vid_id = self._make_archive_id(info_dict)
1207         assert vid_id
1208         with locked_file(fn, 'a', encoding='utf-8') as archive_file:
1209             archive_file.write(vid_id + '\n')
1210
1211     @staticmethod
1212     def format_resolution(format, default='unknown'):
1213         if format.get('vcodec') == 'none':
1214             return 'audio only'
1215         if format.get('resolution') is not None:
1216             return format['resolution']
1217         if format.get('height') is not None:
1218             if format.get('width') is not None:
1219                 res = '%sx%s' % (format['width'], format['height'])
1220             else:
1221                 res = '%sp' % format['height']
1222         elif format.get('width') is not None:
1223             res = '?x%d' % format['width']
1224         else:
1225             res = default
1226         return res
1227
1228     def _format_note(self, fdict):
1229         res = ''
1230         if fdict.get('ext') in ['f4f', 'f4m']:
1231             res += '(unsupported) '
1232         if fdict.get('format_note') is not None:
1233             res += fdict['format_note'] + ' '
1234         if fdict.get('tbr') is not None:
1235             res += '%4dk ' % fdict['tbr']
1236         if fdict.get('container') is not None:
1237             if res:
1238                 res += ', '
1239             res += '%s container' % fdict['container']
1240         if (fdict.get('vcodec') is not None and
1241                 fdict.get('vcodec') != 'none'):
1242             if res:
1243                 res += ', '
1244             res += fdict['vcodec']
1245             if fdict.get('vbr') is not None:
1246                 res += '@'
1247         elif fdict.get('vbr') is not None and fdict.get('abr') is not None:
1248             res += 'video@'
1249         if fdict.get('vbr') is not None:
1250             res += '%4dk' % fdict['vbr']
1251         if fdict.get('fps') is not None:
1252             res += ', %sfps' % fdict['fps']
1253         if fdict.get('acodec') is not None:
1254             if res:
1255                 res += ', '
1256             if fdict['acodec'] == 'none':
1257                 res += 'video only'
1258             else:
1259                 res += '%-5s' % fdict['acodec']
1260         elif fdict.get('abr') is not None:
1261             if res:
1262                 res += ', '
1263             res += 'audio'
1264         if fdict.get('abr') is not None:
1265             res += '@%3dk' % fdict['abr']
1266         if fdict.get('asr') is not None:
1267             res += ' (%5dHz)' % fdict['asr']
1268         if fdict.get('filesize') is not None:
1269             if res:
1270                 res += ', '
1271             res += format_bytes(fdict['filesize'])
1272         elif fdict.get('filesize_approx') is not None:
1273             if res:
1274                 res += ', '
1275             res += '~' + format_bytes(fdict['filesize_approx'])
1276         return res
1277
1278     def list_formats(self, info_dict):
1279         def line(format, idlen=20):
1280             return (('%-' + compat_str(idlen + 1) + 's%-10s%-12s%s') % (
1281                 format['format_id'],
1282                 format['ext'],
1283                 self.format_resolution(format),
1284                 self._format_note(format),
1285             ))
1286
1287         formats = info_dict.get('formats', [info_dict])
1288         idlen = max(len('format code'),
1289                     max(len(f['format_id']) for f in formats))
1290         formats_s = [line(f, idlen) for f in formats]
1291         if len(formats) > 1:
1292             formats_s[0] += (' ' if self._format_note(formats[0]) else '') + '(worst)'
1293             formats_s[-1] += (' ' if self._format_note(formats[-1]) else '') + '(best)'
1294
1295         header_line = line({
1296             'format_id': 'format code', 'ext': 'extension',
1297             'resolution': 'resolution', 'format_note': 'note'}, idlen=idlen)
1298         self.to_screen('[info] Available formats for %s:\n%s\n%s' %
1299                        (info_dict['id'], header_line, '\n'.join(formats_s)))
1300
1301     def urlopen(self, req):
1302         """ Start an HTTP download """
1303
1304         # According to RFC 3986, URLs can not contain non-ASCII characters, however this is not
1305         # always respected by websites, some tend to give out URLs with non percent-encoded
1306         # non-ASCII characters (see telemb.py, ard.py [#3412])
1307         # urllib chokes on URLs with non-ASCII characters (see http://bugs.python.org/issue3991)
1308         # To work around aforementioned issue we will replace request's original URL with
1309         # percent-encoded one
1310         req_is_string = isinstance(req, basestring if sys.version_info < (3, 0) else compat_str)
1311         url = req if req_is_string else req.get_full_url()
1312         url_escaped = escape_url(url)
1313
1314         # Substitute URL if any change after escaping
1315         if url != url_escaped:
1316             if req_is_string:
1317                 req = url_escaped
1318             else:
1319                 req = compat_urllib_request.Request(
1320                     url_escaped, data=req.data, headers=req.headers,
1321                     origin_req_host=req.origin_req_host, unverifiable=req.unverifiable)
1322
1323         return self._opener.open(req, timeout=self._socket_timeout)
1324
1325     def print_debug_header(self):
1326         if not self.params.get('verbose'):
1327             return
1328
1329         if type('') is not compat_str:
1330             # Python 2.6 on SLES11 SP1 (https://github.com/rg3/youtube-dl/issues/3326)
1331             self.report_warning(
1332                 'Your Python is broken! Update to a newer and supported version')
1333
1334         stdout_encoding = getattr(
1335             sys.stdout, 'encoding', 'missing (%s)' % type(sys.stdout).__name__)
1336         encoding_str = (
1337             '[debug] Encodings: locale %s, fs %s, out %s, pref %s\n' % (
1338                 locale.getpreferredencoding(),
1339                 sys.getfilesystemencoding(),
1340                 stdout_encoding,
1341                 self.get_encoding()))
1342         write_string(encoding_str, encoding=None)
1343
1344         self._write_string('[debug] youtube-dl version ' + __version__ + '\n')
1345         try:
1346             sp = subprocess.Popen(
1347                 ['git', 'rev-parse', '--short', 'HEAD'],
1348                 stdout=subprocess.PIPE, stderr=subprocess.PIPE,
1349                 cwd=os.path.dirname(os.path.abspath(__file__)))
1350             out, err = sp.communicate()
1351             out = out.decode().strip()
1352             if re.match('[0-9a-f]+', out):
1353                 self._write_string('[debug] Git HEAD: ' + out + '\n')
1354         except:
1355             try:
1356                 sys.exc_clear()
1357             except:
1358                 pass
1359         self._write_string('[debug] Python version %s - %s\n' % (
1360             platform.python_version(), platform_name()))
1361
1362         exe_versions = FFmpegPostProcessor.get_versions()
1363         exe_versions['rtmpdump'] = rtmpdump_version()
1364         exe_str = ', '.join(
1365             '%s %s' % (exe, v)
1366             for exe, v in sorted(exe_versions.items())
1367             if v
1368         )
1369         if not exe_str:
1370             exe_str = 'none'
1371         self._write_string('[debug] exe versions: %s\n' % exe_str)
1372
1373         proxy_map = {}
1374         for handler in self._opener.handlers:
1375             if hasattr(handler, 'proxies'):
1376                 proxy_map.update(handler.proxies)
1377         self._write_string('[debug] Proxy map: ' + compat_str(proxy_map) + '\n')
1378
1379     def _setup_opener(self):
1380         timeout_val = self.params.get('socket_timeout')
1381         self._socket_timeout = 600 if timeout_val is None else float(timeout_val)
1382
1383         opts_cookiefile = self.params.get('cookiefile')
1384         opts_proxy = self.params.get('proxy')
1385
1386         if opts_cookiefile is None:
1387             self.cookiejar = compat_cookiejar.CookieJar()
1388         else:
1389             self.cookiejar = compat_cookiejar.MozillaCookieJar(
1390                 opts_cookiefile)
1391             if os.access(opts_cookiefile, os.R_OK):
1392                 self.cookiejar.load()
1393
1394         cookie_processor = compat_urllib_request.HTTPCookieProcessor(
1395             self.cookiejar)
1396         if opts_proxy is not None:
1397             if opts_proxy == '':
1398                 proxies = {}
1399             else:
1400                 proxies = {'http': opts_proxy, 'https': opts_proxy}
1401         else:
1402             proxies = compat_urllib_request.getproxies()
1403             # Set HTTPS proxy to HTTP one if given (https://github.com/rg3/youtube-dl/issues/805)
1404             if 'http' in proxies and 'https' not in proxies:
1405                 proxies['https'] = proxies['http']
1406         proxy_handler = compat_urllib_request.ProxyHandler(proxies)
1407
1408         debuglevel = 1 if self.params.get('debug_printtraffic') else 0
1409         https_handler = make_HTTPS_handler(
1410             self.params.get('nocheckcertificate', False), debuglevel=debuglevel)
1411         ydlh = YoutubeDLHandler(debuglevel=debuglevel)
1412         opener = compat_urllib_request.build_opener(
1413             https_handler, proxy_handler, cookie_processor, ydlh)
1414         # Delete the default user-agent header, which would otherwise apply in
1415         # cases where our custom HTTP handler doesn't come into play
1416         # (See https://github.com/rg3/youtube-dl/issues/1309 for details)
1417         opener.addheaders = []
1418         self._opener = opener
1419
1420     def encode(self, s):
1421         if isinstance(s, bytes):
1422             return s  # Already encoded
1423
1424         try:
1425             return s.encode(self.get_encoding())
1426         except UnicodeEncodeError as err:
1427             err.reason = err.reason + '. Check your system encoding configuration or use the --encoding option.'
1428             raise
1429
1430     def get_encoding(self):
1431         encoding = self.params.get('encoding')
1432         if encoding is None:
1433             encoding = preferredencoding()
1434         return encoding