_ Git - youtube-dl/blob - youtube_dl/YoutubeDL.py

   1 #!/usr/bin/env python
   2 # -*- coding: utf-8 -*-
   3
   4 from __future__ import absolute_import, unicode_literals
   5
   6 import collections
   7 import datetime
   8 import errno
   9 import io
  10 import json
  11 import locale
  12 import os
  13 import platform
  14 import re
  15 import shutil
  16 import subprocess
  17 import socket
  18 import sys
  19 import time
  20 import traceback
  21
  22 if os.name == 'nt':
  23     import ctypes
  24
  25 from .compat import (
  26     compat_cookiejar,
  27     compat_expanduser,
  28     compat_http_client,
  29     compat_str,
  30     compat_urllib_error,
  31     compat_urllib_request,
  32 )
  33 from .utils import (
  34     escape_url,
  35     ContentTooShortError,
  36     date_from_str,
  37     DateRange,
  38     DEFAULT_OUTTMPL,
  39     determine_ext,
  40     DownloadError,
  41     encodeFilename,
  42     ExtractorError,
  43     format_bytes,
  44     formatSeconds,
  45     get_term_width,
  46     locked_file,
  47     make_HTTPS_handler,
  48     MaxDownloadsReached,
  49     PagedList,
  50     PostProcessingError,
  51     platform_name,
  52     preferredencoding,
  53     SameFileError,
  54     sanitize_filename,
  55     subtitles_filename,
  56     takewhile_inclusive,
  57     UnavailableVideoError,
  58     url_basename,
  59     write_json_file,
  60     write_string,
  61     YoutubeDLHandler,
  62     prepend_extension,
  63     args_to_str,
  64 )
  65 from .cache import Cache
  66 from .extractor import get_info_extractor, gen_extractors
  67 from .downloader import get_suitable_downloader
  68 from .downloader.rtmp import rtmpdump_version
  69 from .postprocessor import FFmpegMergerPP, FFmpegPostProcessor
  70 from .version import __version__
  71
  72
  73 class YoutubeDL(object):
  74     """YoutubeDL class.
  75
  76     YoutubeDL objects are the ones responsible of downloading the
  77     actual video file and writing it to disk if the user has requested
  78     it, among some other tasks. In most cases there should be one per
  79     program. As, given a video URL, the downloader doesn't know how to
  80     extract all the needed information, task that InfoExtractors do, it
  81     has to pass the URL to one of them.
  82
  83     For this, YoutubeDL objects have a method that allows
  84     InfoExtractors to be registered in a given order. When it is passed
  85     a URL, the YoutubeDL object handles it to the first InfoExtractor it
  86     finds that reports being able to handle it. The InfoExtractor extracts
  87     all the information about the video or videos the URL refers to, and
  88     YoutubeDL process the extracted information, possibly using a File
  89     Downloader to download the video.
  90
  91     YoutubeDL objects accept a lot of parameters. In order not to saturate
  92     the object constructor with arguments, it receives a dictionary of
  93     options instead. These options are available through the params
  94     attribute for the InfoExtractors to use. The YoutubeDL also
  95     registers itself as the downloader in charge for the InfoExtractors
  96     that are added to it, so this is a "mutual registration".
  97
  98     Available options:
  99
 100     username:          Username for authentication purposes.
 101     password:          Password for authentication purposes.
 102     videopassword:     Password for acces a video.
 103     usenetrc:          Use netrc for authentication instead.
 104     verbose:           Print additional info to stdout.
 105     quiet:             Do not print messages to stdout.
 106     no_warnings:       Do not print out anything for warnings.
 107     forceurl:          Force printing final URL.
 108     forcetitle:        Force printing title.
 109     forceid:           Force printing ID.
 110     forcethumbnail:    Force printing thumbnail URL.
 111     forcedescription:  Force printing description.
 112     forcefilename:     Force printing final filename.
 113     forceduration:     Force printing duration.
 114     forcejson:         Force printing info_dict as JSON.
 115     dump_single_json:  Force printing the info_dict of the whole playlist
 116                        (or video) as a single JSON line.
 117     simulate:          Do not download the video files.
 118     format:            Video format code.
 119     format_limit:      Highest quality format to try.
 120     outtmpl:           Template for output names.
 121     restrictfilenames: Do not allow "&" and spaces in file names
 122     ignoreerrors:      Do not stop on download errors.
 123     nooverwrites:      Prevent overwriting files.
 124     playliststart:     Playlist item to start at.
 125     playlistend:       Playlist item to end at.
 126     matchtitle:        Download only matching titles.
 127     rejecttitle:       Reject downloads for matching titles.
 128     logger:            Log messages to a logging.Logger instance.
 129     logtostderr:       Log messages to stderr instead of stdout.
 130     writedescription:  Write the video description to a .description file
 131     writeinfojson:     Write the video description to a .info.json file
 132     writeannotations:  Write the video annotations to a .annotations.xml file
 133     writethumbnail:    Write the thumbnail image to a file
 134     writesubtitles:    Write the video subtitles to a file
 135     writeautomaticsub: Write the automatic subtitles to a file
 136     allsubtitles:      Downloads all the subtitles of the video
 137                        (requires writesubtitles or writeautomaticsub)
 138     listsubtitles:     Lists all available subtitles for the video
 139     subtitlesformat:   Subtitle format [srt/sbv/vtt] (default=srt)
 140     subtitleslangs:    List of languages of the subtitles to download
 141     keepvideo:         Keep the video file after post-processing
 142     daterange:         A DateRange object, download only if the upload_date is in the range.
 143     skip_download:     Skip the actual download of the video file
 144     cachedir:          Location of the cache files in the filesystem.
 145                        False to disable filesystem cache.
 146     noplaylist:        Download single video instead of a playlist if in doubt.
 147     age_limit:         An integer representing the user's age in years.
 148                        Unsuitable videos for the given age are skipped.
 149     min_views:         An integer representing the minimum view count the video
 150                        must have in order to not be skipped.
 151                        Videos without view count information are always
 152                        downloaded. None for no limit.
 153     max_views:         An integer representing the maximum view count.
 154                        Videos that are more popular than that are not
 155                        downloaded.
 156                        Videos without view count information are always
 157                        downloaded. None for no limit.
 158     download_archive:  File name of a file where all downloads are recorded.
 159                        Videos already present in the file are not downloaded
 160                        again.
 161     cookiefile:        File name where cookies should be read from and dumped to.
 162     nocheckcertificate:Do not verify SSL certificates
 163     prefer_insecure:   Use HTTP instead of HTTPS to retrieve information.
 164                        At the moment, this is only supported by YouTube.
 165     proxy:             URL of the proxy server to use
 166     socket_timeout:    Time to wait for unresponsive hosts, in seconds
 167     bidi_workaround:   Work around buggy terminals without bidirectional text
 168                        support, using fridibi
 169     debug_printtraffic:Print out sent and received HTTP traffic
 170     include_ads:       Download ads as well
 171     default_search:    Prepend this string if an input url is not valid.
 172                        'auto' for elaborate guessing
 173     encoding:          Use this encoding instead of the system-specified.
 174     extract_flat:      Do not resolve URLs, return the immediate result.
 175                        Pass in 'in_playlist' to only show this behavior for
 176                        playlist items.
 177
 178     The following parameters are not used by YoutubeDL itself, they are used by
 179     the FileDownloader:
 180     nopart, updatetime, buffersize, ratelimit, min_filesize, max_filesize, test,
 181     noresizebuffer, retries, continuedl, noprogress, consoletitle
 182
 183     The following options are used by the post processors:
 184     prefer_ffmpeg:     If True, use ffmpeg instead of avconv if both are available,
 185                        otherwise prefer avconv.
 186     exec_cmd:          Arbitrary command to run after downloading
 187     """
 188
 189     params = None
 190     _ies = []
 191     _pps = []
 192     _download_retcode = None
 193     _num_downloads = None
 194     _screen_file = None
 195
 196     def __init__(self, params=None, auto_init=True):
 197         """Create a FileDownloader object with the given options."""
 198         if params is None:
 199             params = {}
 200         self._ies = []
 201         self._ies_instances = {}
 202         self._pps = []
 203         self._progress_hooks = []
 204         self._download_retcode = 0
 205         self._num_downloads = 0
 206         self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)]
 207         self._err_file = sys.stderr
 208         self.params = params
 209         self.cache = Cache(self)
 210
 211         if params.get('bidi_workaround', False):
 212             try:
 213                 import pty
 214                 master, slave = pty.openpty()
 215                 width = get_term_width()
 216                 if width is None:
 217                     width_args = []
 218                 else:
 219                     width_args = ['-w', str(width)]
 220                 sp_kwargs = dict(
 221                     stdin=subprocess.PIPE,
 222                     stdout=slave,
 223                     stderr=self._err_file)
 224                 try:
 225                     self._output_process = subprocess.Popen(
 226                         ['bidiv'] + width_args, **sp_kwargs
 227                     )
 228                 except OSError:
 229                     self._output_process = subprocess.Popen(
 230                         ['fribidi', '-c', 'UTF-8'] + width_args, **sp_kwargs)
 231                 self._output_channel = os.fdopen(master, 'rb')
 232             except OSError as ose:
 233                 if ose.errno == 2:
 234                     self.report_warning('Could not find fribidi executable, ignoring --bidi-workaround . Make sure that  fribidi  is an executable file in one of the directories in your $PATH.')
 235                 else:
 236                     raise
 237
 238         if (sys.version_info >= (3,) and sys.platform != 'win32' and
 239                 sys.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968']
 240                 and not params.get('restrictfilenames', False)):
 241             # On Python 3, the Unicode filesystem API will throw errors (#1474)
 242             self.report_warning(
 243                 'Assuming --restrict-filenames since file system encoding '
 244                 'cannot encode all characters. '
 245                 'Set the LC_ALL environment variable to fix this.')
 246             self.params['restrictfilenames'] = True
 247
 248         if '%(stitle)s' in self.params.get('outtmpl', ''):
 249             self.report_warning('%(stitle)s is deprecated. Use the %(title)s and the --restrict-filenames flag(which also secures %(uploader)s et al) instead.')
 250
 251         self._setup_opener()
 252
 253         if auto_init:
 254             self.print_debug_header()
 255             self.add_default_info_extractors()
 256
 257     def warn_if_short_id(self, argv):
 258         # short YouTube ID starting with dash?
 259         idxs = [
 260             i for i, a in enumerate(argv)
 261             if re.match(r'^-[0-9A-Za-z_-]{10}$', a)]
 262         if idxs:
 263             correct_argv = (
 264                 ['youtube-dl'] +
 265                 [a for i, a in enumerate(argv) if i not in idxs] +
 266                 ['--'] + [argv[i] for i in idxs]
 267             )
 268             self.report_warning(
 269                 'Long argument string detected. '
 270                 'Use -- to separate parameters and URLs, like this:\n%s\n' %
 271                 args_to_str(correct_argv))
 272
 273     def add_info_extractor(self, ie):
 274         """Add an InfoExtractor object to the end of the list."""
 275         self._ies.append(ie)
 276         self._ies_instances[ie.ie_key()] = ie
 277         ie.set_downloader(self)
 278
 279     def get_info_extractor(self, ie_key):
 280         """
 281         Get an instance of an IE with name ie_key, it will try to get one from
 282         the _ies list, if there's no instance it will create a new one and add
 283         it to the extractor list.
 284         """
 285         ie = self._ies_instances.get(ie_key)
 286         if ie is None:
 287             ie = get_info_extractor(ie_key)()
 288             self.add_info_extractor(ie)
 289         return ie
 290
 291     def add_default_info_extractors(self):
 292         """
 293         Add the InfoExtractors returned by gen_extractors to the end of the list
 294         """
 295         for ie in gen_extractors():
 296             self.add_info_extractor(ie)
 297
 298     def add_post_processor(self, pp):
 299         """Add a PostProcessor object to the end of the chain."""
 300         self._pps.append(pp)
 301         pp.set_downloader(self)
 302
 303     def add_progress_hook(self, ph):
 304         """Add the progress hook (currently only for the file downloader)"""
 305         self._progress_hooks.append(ph)
 306
 307     def _bidi_workaround(self, message):
 308         if not hasattr(self, '_output_channel'):
 309             return message
 310
 311         assert hasattr(self, '_output_process')
 312         assert isinstance(message, compat_str)
 313         line_count = message.count('\n') + 1
 314         self._output_process.stdin.write((message + '\n').encode('utf-8'))
 315         self._output_process.stdin.flush()
 316         res = ''.join(self._output_channel.readline().decode('utf-8')
 317                       for _ in range(line_count))
 318         return res[:-len('\n')]
 319
 320     def to_screen(self, message, skip_eol=False):
 321         """Print message to stdout if not in quiet mode."""
 322         return self.to_stdout(message, skip_eol, check_quiet=True)
 323
 324     def _write_string(self, s, out=None):
 325         write_string(s, out=out, encoding=self.params.get('encoding'))
 326
 327     def to_stdout(self, message, skip_eol=False, check_quiet=False):
 328         """Print message to stdout if not in quiet mode."""
 329         if self.params.get('logger'):
 330             self.params['logger'].debug(message)
 331         elif not check_quiet or not self.params.get('quiet', False):
 332             message = self._bidi_workaround(message)
 333             terminator = ['\n', ''][skip_eol]
 334             output = message + terminator
 335
 336             self._write_string(output, self._screen_file)
 337
 338     def to_stderr(self, message):
 339         """Print message to stderr."""
 340         assert isinstance(message, compat_str)
 341         if self.params.get('logger'):
 342             self.params['logger'].error(message)
 343         else:
 344             message = self._bidi_workaround(message)
 345             output = message + '\n'
 346             self._write_string(output, self._err_file)
 347
 348     def to_console_title(self, message):
 349         if not self.params.get('consoletitle', False):
 350             return
 351         if os.name == 'nt' and ctypes.windll.kernel32.GetConsoleWindow():
 352             # c_wchar_p() might not be necessary if `message` is
 353             # already of type unicode()
 354             ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message))
 355         elif 'TERM' in os.environ:
 356             self._write_string('\033]0;%s\007' % message, self._screen_file)
 357
 358     def save_console_title(self):
 359         if not self.params.get('consoletitle', False):
 360             return
 361         if 'TERM' in os.environ:
 362             # Save the title on stack
 363             self._write_string('\033[22;0t', self._screen_file)
 364
 365     def restore_console_title(self):
 366         if not self.params.get('consoletitle', False):
 367             return
 368         if 'TERM' in os.environ:
 369             # Restore the title from stack
 370             self._write_string('\033[23;0t', self._screen_file)
 371
 372     def __enter__(self):
 373         self.save_console_title()
 374         return self
 375
 376     def __exit__(self, *args):
 377         self.restore_console_title()
 378
 379         if self.params.get('cookiefile') is not None:
 380             self.cookiejar.save()
 381
 382     def trouble(self, message=None, tb=None):
 383         """Determine action to take when a download problem appears.
 384
 385         Depending on if the downloader has been configured to ignore
 386         download errors or not, this method may throw an exception or
 387         not when errors are found, after printing the message.
 388
 389         tb, if given, is additional traceback information.
 390         """
 391         if message is not None:
 392             self.to_stderr(message)
 393         if self.params.get('verbose'):
 394             if tb is None:
 395                 if sys.exc_info()[0]:  # if .trouble has been called from an except block
 396                     tb = ''
 397                     if hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
 398                         tb += ''.join(traceback.format_exception(*sys.exc_info()[1].exc_info))
 399                     tb += compat_str(traceback.format_exc())
 400                 else:
 401                     tb_data = traceback.format_list(traceback.extract_stack())
 402                     tb = ''.join(tb_data)
 403             self.to_stderr(tb)
 404         if not self.params.get('ignoreerrors', False):
 405             if sys.exc_info()[0] and hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
 406                 exc_info = sys.exc_info()[1].exc_info
 407             else:
 408                 exc_info = sys.exc_info()
 409             raise DownloadError(message, exc_info)
 410         self._download_retcode = 1
 411
 412     def report_warning(self, message):
 413         '''
 414         Print the message to stderr, it will be prefixed with 'WARNING:'
 415         If stderr is a tty file the 'WARNING:' will be colored
 416         '''
 417         if self.params.get('logger') is not None:
 418             self.params['logger'].warning(message)
 419         else:
 420             if self.params.get('no_warnings'):
 421                 return
 422             if self._err_file.isatty() and os.name != 'nt':
 423                 _msg_header = '\033[0;33mWARNING:\033[0m'
 424             else:
 425                 _msg_header = 'WARNING:'
 426             warning_message = '%s %s' % (_msg_header, message)
 427             self.to_stderr(warning_message)
 428
 429     def report_error(self, message, tb=None):
 430         '''
 431         Do the same as trouble, but prefixes the message with 'ERROR:', colored
 432         in red if stderr is a tty file.
 433         '''
 434         if self._err_file.isatty() and os.name != 'nt':
 435             _msg_header = '\033[0;31mERROR:\033[0m'
 436         else:
 437             _msg_header = 'ERROR:'
 438         error_message = '%s %s' % (_msg_header, message)
 439         self.trouble(error_message, tb)
 440
 441     def report_file_already_downloaded(self, file_name):
 442         """Report file has already been fully downloaded."""
 443         try:
 444             self.to_screen('[download] %s has already been downloaded' % file_name)
 445         except UnicodeEncodeError:
 446             self.to_screen('[download] The file has already been downloaded')
 447
 448     def prepare_filename(self, info_dict):
 449         """Generate the output filename."""
 450         try:
 451             template_dict = dict(info_dict)
 452
 453             template_dict['epoch'] = int(time.time())
 454             autonumber_size = self.params.get('autonumber_size')
 455             if autonumber_size is None:
 456                 autonumber_size = 5
 457             autonumber_templ = '%0' + str(autonumber_size) + 'd'
 458             template_dict['autonumber'] = autonumber_templ % self._num_downloads
 459             if template_dict.get('playlist_index') is not None:
 460                 template_dict['playlist_index'] = '%0*d' % (len(str(template_dict['n_entries'])), template_dict['playlist_index'])
 461             if template_dict.get('resolution') is None:
 462                 if template_dict.get('width') and template_dict.get('height'):
 463                     template_dict['resolution'] = '%dx%d' % (template_dict['width'], template_dict['height'])
 464                 elif template_dict.get('height'):
 465                     template_dict['resolution'] = '%sp' % template_dict['height']
 466                 elif template_dict.get('width'):
 467                     template_dict['resolution'] = '?x%d' % template_dict['width']
 468
 469             sanitize = lambda k, v: sanitize_filename(
 470                 compat_str(v),
 471                 restricted=self.params.get('restrictfilenames'),
 472                 is_id=(k == 'id'))
 473             template_dict = dict((k, sanitize(k, v))
 474                                  for k, v in template_dict.items()
 475                                  if v is not None)
 476             template_dict = collections.defaultdict(lambda: 'NA', template_dict)
 477
 478             outtmpl = self.params.get('outtmpl', DEFAULT_OUTTMPL)
 479             tmpl = compat_expanduser(outtmpl)
 480             filename = tmpl % template_dict
 481             return filename
 482         except ValueError as err:
 483             self.report_error('Error in output template: ' + str(err) + ' (encoding: ' + repr(preferredencoding()) + ')')
 484             return None
 485
 486     def _match_entry(self, info_dict):
 487         """ Returns None iff the file should be downloaded """
 488
 489         video_title = info_dict.get('title', info_dict.get('id', 'video'))
 490         if 'title' in info_dict:
 491             # This can happen when we're just evaluating the playlist
 492             title = info_dict['title']
 493             matchtitle = self.params.get('matchtitle', False)
 494             if matchtitle:
 495                 if not re.search(matchtitle, title, re.IGNORECASE):
 496                     return '"' + title + '" title did not match pattern "' + matchtitle + '"'
 497             rejecttitle = self.params.get('rejecttitle', False)
 498             if rejecttitle:
 499                 if re.search(rejecttitle, title, re.IGNORECASE):
 500                     return '"' + title + '" title matched reject pattern "' + rejecttitle + '"'
 501         date = info_dict.get('upload_date', None)
 502         if date is not None:
 503             dateRange = self.params.get('daterange', DateRange())
 504             if date not in dateRange:
 505                 return '%s upload date is not in range %s' % (date_from_str(date).isoformat(), dateRange)
 506         view_count = info_dict.get('view_count', None)
 507         if view_count is not None:
 508             min_views = self.params.get('min_views')
 509             if min_views is not None and view_count < min_views:
 510                 return 'Skipping %s, because it has not reached minimum view count (%d/%d)' % (video_title, view_count, min_views)
 511             max_views = self.params.get('max_views')
 512             if max_views is not None and view_count > max_views:
 513                 return 'Skipping %s, because it has exceeded the maximum view count (%d/%d)' % (video_title, view_count, max_views)
 514         age_limit = self.params.get('age_limit')
 515         if age_limit is not None:
 516             actual_age_limit = info_dict.get('age_limit')
 517             if actual_age_limit is None:
 518                 actual_age_limit = 0
 519             if age_limit < actual_age_limit:
 520                 return 'Skipping "' + title + '" because it is age restricted'
 521         if self.in_download_archive(info_dict):
 522             return '%s has already been recorded in archive' % video_title
 523         return None
 524
 525     @staticmethod
 526     def add_extra_info(info_dict, extra_info):
 527         '''Set the keys from extra_info in info dict if they are missing'''
 528         for key, value in extra_info.items():
 529             info_dict.setdefault(key, value)
 530
 531     def extract_info(self, url, download=True, ie_key=None, extra_info={},
 532                      process=True):
 533         '''
 534         Returns a list with a dictionary for each video we find.
 535         If 'download', also downloads the videos.
 536         extra_info is a dict containing the extra values to add to each result
 537          '''
 538
 539         if ie_key:
 540             ies = [self.get_info_extractor(ie_key)]
 541         else:
 542             ies = self._ies
 543
 544         for ie in ies:
 545             if not ie.suitable(url):
 546                 continue
 547
 548             if not ie.working():
 549                 self.report_warning('The program functionality for this site has been marked as broken, '
 550                                     'and will probably not work.')
 551
 552             try:
 553                 ie_result = ie.extract(url)
 554                 if ie_result is None:  # Finished already (backwards compatibility; listformats and friends should be moved here)
 555                     break
 556                 if isinstance(ie_result, list):
 557                     # Backwards compatibility: old IE result format
 558                     ie_result = {
 559                         '_type': 'compat_list',
 560                         'entries': ie_result,
 561                     }
 562                 self.add_default_extra_info(ie_result, ie, url)
 563                 if process:
 564                     return self.process_ie_result(ie_result, download, extra_info)
 565                 else:
 566                     return ie_result
 567             except ExtractorError as de:  # An error we somewhat expected
 568                 self.report_error(compat_str(de), de.format_traceback())
 569                 break
 570             except MaxDownloadsReached:
 571                 raise
 572             except Exception as e:
 573                 if self.params.get('ignoreerrors', False):
 574                     self.report_error(compat_str(e), tb=compat_str(traceback.format_exc()))
 575                     break
 576                 else:
 577                     raise
 578         else:
 579             self.report_error('no suitable InfoExtractor for URL %s' % url)
 580
 581     def add_default_extra_info(self, ie_result, ie, url):
 582         self.add_extra_info(ie_result, {
 583             'extractor': ie.IE_NAME,
 584             'webpage_url': url,
 585             'webpage_url_basename': url_basename(url),
 586             'extractor_key': ie.ie_key(),
 587         })
 588
 589     def process_ie_result(self, ie_result, download=True, extra_info={}):
 590         """
 591         Take the result of the ie(may be modified) and resolve all unresolved
 592         references (URLs, playlist items).
 593
 594         It will also download the videos if 'download'.
 595         Returns the resolved ie_result.
 596         """
 597
 598         result_type = ie_result.get('_type', 'video')
 599
 600         if result_type in ('url', 'url_transparent'):
 601             extract_flat = self.params.get('extract_flat', False)
 602             if ((extract_flat == 'in_playlist' and 'playlist' in extra_info) or
 603                     extract_flat is True):
 604                 if self.params.get('forcejson', False):
 605                     self.to_stdout(json.dumps(ie_result))
 606                 return ie_result
 607
 608         if result_type == 'video':
 609             self.add_extra_info(ie_result, extra_info)
 610             return self.process_video_result(ie_result, download=download)
 611         elif result_type == 'url':
 612             # We have to add extra_info to the results because it may be
 613             # contained in a playlist
 614             return self.extract_info(ie_result['url'],
 615                                      download,
 616                                      ie_key=ie_result.get('ie_key'),
 617                                      extra_info=extra_info)
 618         elif result_type == 'url_transparent':
 619             # Use the information from the embedding page
 620             info = self.extract_info(
 621                 ie_result['url'], ie_key=ie_result.get('ie_key'),
 622                 extra_info=extra_info, download=False, process=False)
 623
 624             def make_result(embedded_info):
 625                 new_result = ie_result.copy()
 626                 for f in ('_type', 'url', 'ext', 'player_url', 'formats',
 627                           'entries', 'ie_key', 'duration',
 628                           'subtitles', 'annotations', 'format',
 629                           'thumbnail', 'thumbnails'):
 630                     if f in new_result:
 631                         del new_result[f]
 632                     if f in embedded_info:
 633                         new_result[f] = embedded_info[f]
 634                 return new_result
 635             new_result = make_result(info)
 636
 637             assert new_result.get('_type') != 'url_transparent'
 638             if new_result.get('_type') == 'compat_list':
 639                 new_result['entries'] = [
 640                     make_result(e) for e in new_result['entries']]
 641
 642             return self.process_ie_result(
 643                 new_result, download=download, extra_info=extra_info)
 644         elif result_type == 'playlist' or result_type == 'multi_video':
 645             # We process each entry in the playlist
 646             playlist = ie_result.get('title', None) or ie_result.get('id', None)
 647             self.to_screen('[download] Downloading playlist: %s' % playlist)
 648
 649             playlist_results = []
 650
 651             playliststart = self.params.get('playliststart', 1) - 1
 652             playlistend = self.params.get('playlistend', None)
 653             # For backwards compatibility, interpret -1 as whole list
 654             if playlistend == -1:
 655                 playlistend = None
 656
 657             if isinstance(ie_result['entries'], list):
 658                 n_all_entries = len(ie_result['entries'])
 659                 entries = ie_result['entries'][playliststart:playlistend]
 660                 n_entries = len(entries)
 661                 self.to_screen(
 662                     "[%s] playlist %s: Collected %d video ids (downloading %d of them)" %
 663                     (ie_result['extractor'], playlist, n_all_entries, n_entries))
 664             else:
 665                 assert isinstance(ie_result['entries'], PagedList)
 666                 entries = ie_result['entries'].getslice(
 667                     playliststart, playlistend)
 668                 n_entries = len(entries)
 669                 self.to_screen(
 670                     "[%s] playlist %s: Downloading %d videos" %
 671                     (ie_result['extractor'], playlist, n_entries))
 672
 673             for i, entry in enumerate(entries, 1):
 674                 self.to_screen('[download] Downloading video #%s of %s' % (i, n_entries))
 675                 extra = {
 676                     'n_entries': n_entries,
 677                     'playlist': playlist,
 678                     'playlist_id': ie_result.get('id'),
 679                     'playlist_title': ie_result.get('title'),
 680                     'playlist_index': i + playliststart,
 681                     'extractor': ie_result['extractor'],
 682                     'webpage_url': ie_result['webpage_url'],
 683                     'webpage_url_basename': url_basename(ie_result['webpage_url']),
 684                     'extractor_key': ie_result['extractor_key'],
 685                 }
 686
 687                 reason = self._match_entry(entry)
 688                 if reason is not None:
 689                     self.to_screen('[download] ' + reason)
 690                     continue
 691
 692                 entry_result = self.process_ie_result(entry,
 693                                                       download=download,
 694                                                       extra_info=extra)
 695                 playlist_results.append(entry_result)
 696             ie_result['entries'] = playlist_results
 697             return ie_result
 698         elif result_type == 'compat_list':
 699             self.report_warning(
 700                 'Extractor %s returned a compat_list result. '
 701                 'It needs to be updated.' % ie_result.get('extractor'))
 702
 703             def _fixup(r):
 704                 self.add_extra_info(
 705                     r,
 706                     {
 707                         'extractor': ie_result['extractor'],
 708                         'webpage_url': ie_result['webpage_url'],
 709                         'webpage_url_basename': url_basename(ie_result['webpage_url']),
 710                         'extractor_key': ie_result['extractor_key'],
 711                     }
 712                 )
 713                 return r
 714             ie_result['entries'] = [
 715                 self.process_ie_result(_fixup(r), download, extra_info)
 716                 for r in ie_result['entries']
 717             ]
 718             return ie_result
 719         else:
 720             raise Exception('Invalid result type: %s' % result_type)
 721
 722     def select_format(self, format_spec, available_formats):
 723         if format_spec == 'best' or format_spec is None:
 724             return available_formats[-1]
 725         elif format_spec == 'worst':
 726             return available_formats[0]
 727         elif format_spec == 'bestaudio':
 728             audio_formats = [
 729                 f for f in available_formats
 730                 if f.get('vcodec') == 'none']
 731             if audio_formats:
 732                 return audio_formats[-1]
 733         elif format_spec == 'worstaudio':
 734             audio_formats = [
 735                 f for f in available_formats
 736                 if f.get('vcodec') == 'none']
 737             if audio_formats:
 738                 return audio_formats[0]
 739         elif format_spec == 'bestvideo':
 740             video_formats = [
 741                 f for f in available_formats
 742                 if f.get('acodec') == 'none']
 743             if video_formats:
 744                 return video_formats[-1]
 745         elif format_spec == 'worstvideo':
 746             video_formats = [
 747                 f for f in available_formats
 748                 if f.get('acodec') == 'none']
 749             if video_formats:
 750                 return video_formats[0]
 751         else:
 752             extensions = ['mp4', 'flv', 'webm', '3gp', 'm4a']
 753             if format_spec in extensions:
 754                 filter_f = lambda f: f['ext'] == format_spec
 755             else:
 756                 filter_f = lambda f: f['format_id'] == format_spec
 757             matches = list(filter(filter_f, available_formats))
 758             if matches:
 759                 return matches[-1]
 760         return None
 761
 762     def process_video_result(self, info_dict, download=True):
 763         assert info_dict.get('_type', 'video') == 'video'
 764
 765         if 'id' not in info_dict:
 766             raise ExtractorError('Missing "id" field in extractor result')
 767         if 'title' not in info_dict:
 768             raise ExtractorError('Missing "title" field in extractor result')
 769
 770         if 'playlist' not in info_dict:
 771             # It isn't part of a playlist
 772             info_dict['playlist'] = None
 773             info_dict['playlist_index'] = None
 774
 775         thumbnails = info_dict.get('thumbnails')
 776         if thumbnails:
 777             thumbnails.sort(key=lambda t: (
 778                 t.get('width'), t.get('height'), t.get('url')))
 779             for t in thumbnails:
 780                 if 'width' in t and 'height' in t:
 781                     t['resolution'] = '%dx%d' % (t['width'], t['height'])
 782
 783         if thumbnails and 'thumbnail' not in info_dict:
 784             info_dict['thumbnail'] = thumbnails[-1]['url']
 785
 786         if 'display_id' not in info_dict and 'id' in info_dict:
 787             info_dict['display_id'] = info_dict['id']
 788
 789         if info_dict.get('upload_date') is None and info_dict.get('timestamp') is not None:
 790             upload_date = datetime.datetime.utcfromtimestamp(
 791                 info_dict['timestamp'])
 792             info_dict['upload_date'] = upload_date.strftime('%Y%m%d')
 793
 794         # This extractors handle format selection themselves
 795         if info_dict['extractor'] in ['Youku']:
 796             if download:
 797                 self.process_info(info_dict)
 798             return info_dict
 799
 800         # We now pick which formats have to be downloaded
 801         if info_dict.get('formats') is None:
 802             # There's only one format available
 803             formats = [info_dict]
 804         else:
 805             formats = info_dict['formats']
 806
 807         if not formats:
 808             raise ExtractorError('No video formats found!')
 809
 810         # We check that all the formats have the format and format_id fields
 811         for i, format in enumerate(formats):
 812             if 'url' not in format:
 813                 raise ExtractorError('Missing "url" key in result (index %d)' % i)
 814
 815             if format.get('format_id') is None:
 816                 format['format_id'] = compat_str(i)
 817             if format.get('format') is None:
 818                 format['format'] = '{id} - {res}{note}'.format(
 819                     id=format['format_id'],
 820                     res=self.format_resolution(format),
 821                     note=' ({0})'.format(format['format_note']) if format.get('format_note') is not None else '',
 822                 )
 823             # Automatically determine file extension if missing
 824             if 'ext' not in format:
 825                 format['ext'] = determine_ext(format['url']).lower()
 826
 827         format_limit = self.params.get('format_limit', None)
 828         if format_limit:
 829             formats = list(takewhile_inclusive(
 830                 lambda f: f['format_id'] != format_limit, formats
 831             ))
 832
 833         # TODO Central sorting goes here
 834
 835         if formats[0] is not info_dict:
 836             # only set the 'formats' fields if the original info_dict list them
 837             # otherwise we end up with a circular reference, the first (and unique)
 838             # element in the 'formats' field in info_dict is info_dict itself,
 839             # wich can't be exported to json
 840             info_dict['formats'] = formats
 841         if self.params.get('listformats', None):
 842             self.list_formats(info_dict)
 843             return
 844
 845         req_format = self.params.get('format')
 846         if req_format is None:
 847             req_format = 'best'
 848         formats_to_download = []
 849         # The -1 is for supporting YoutubeIE
 850         if req_format in ('-1', 'all'):
 851             formats_to_download = formats
 852         else:
 853             for rfstr in req_format.split(','):
 854                 # We can accept formats requested in the format: 34/5/best, we pick
 855                 # the first that is available, starting from left
 856                 req_formats = rfstr.split('/')
 857                 for rf in req_formats:
 858                     if re.match(r'.+?\+.+?', rf) is not None:
 859                         # Two formats have been requested like '137+139'
 860                         format_1, format_2 = rf.split('+')
 861                         formats_info = (self.select_format(format_1, formats),
 862                                         self.select_format(format_2, formats))
 863                         if all(formats_info):
 864                             # The first format must contain the video and the
 865                             # second the audio
 866                             if formats_info[0].get('vcodec') == 'none':
 867                                 self.report_error('The first format must '
 868                                                   'contain the video, try using '
 869                                                   '"-f %s+%s"' % (format_2, format_1))
 870                                 return
 871                             selected_format = {
 872                                 'requested_formats': formats_info,
 873                                 'format': rf,
 874                                 'ext': formats_info[0]['ext'],
 875                             }
 876                         else:
 877                             selected_format = None
 878                     else:
 879                         selected_format = self.select_format(rf, formats)
 880                     if selected_format is not None:
 881                         formats_to_download.append(selected_format)
 882                         break
 883         if not formats_to_download:
 884             raise ExtractorError('requested format not available',
 885                                  expected=True)
 886
 887         if download:
 888             if len(formats_to_download) > 1:
 889                 self.to_screen('[info] %s: downloading video in %s formats' % (info_dict['id'], len(formats_to_download)))
 890             for format in formats_to_download:
 891                 new_info = dict(info_dict)
 892                 new_info.update(format)
 893                 self.process_info(new_info)
 894         # We update the info dict with the best quality format (backwards compatibility)
 895         info_dict.update(formats_to_download[-1])
 896         return info_dict
 897
 898     def process_info(self, info_dict):
 899         """Process a single resolved IE result."""
 900
 901         assert info_dict.get('_type', 'video') == 'video'
 902
 903         max_downloads = self.params.get('max_downloads')
 904         if max_downloads is not None:
 905             if self._num_downloads >= int(max_downloads):
 906                 raise MaxDownloadsReached()
 907
 908         info_dict['fulltitle'] = info_dict['title']
 909         if len(info_dict['title']) > 200:
 910             info_dict['title'] = info_dict['title'][:197] + '...'
 911
 912         # Keep for backwards compatibility
 913         info_dict['stitle'] = info_dict['title']
 914
 915         if 'format' not in info_dict:
 916             info_dict['format'] = info_dict['ext']
 917
 918         reason = self._match_entry(info_dict)
 919         if reason is not None:
 920             self.to_screen('[download] ' + reason)
 921             return
 922
 923         self._num_downloads += 1
 924
 925         filename = self.prepare_filename(info_dict)
 926
 927         # Forced printings
 928         if self.params.get('forcetitle', False):
 929             self.to_stdout(info_dict['fulltitle'])
 930         if self.params.get('forceid', False):
 931             self.to_stdout(info_dict['id'])
 932         if self.params.get('forceurl', False):
 933             # For RTMP URLs, also include the playpath
 934             self.to_stdout(info_dict['url'] + info_dict.get('play_path', ''))
 935         if self.params.get('forcethumbnail', False) and info_dict.get('thumbnail') is not None:
 936             self.to_stdout(info_dict['thumbnail'])
 937         if self.params.get('forcedescription', False) and info_dict.get('description') is not None:
 938             self.to_stdout(info_dict['description'])
 939         if self.params.get('forcefilename', False) and filename is not None:
 940             self.to_stdout(filename)
 941         if self.params.get('forceduration', False) and info_dict.get('duration') is not None:
 942             self.to_stdout(formatSeconds(info_dict['duration']))
 943         if self.params.get('forceformat', False):
 944             self.to_stdout(info_dict['format'])
 945         if self.params.get('forcejson', False):
 946             info_dict['_filename'] = filename
 947             self.to_stdout(json.dumps(info_dict))
 948         if self.params.get('dump_single_json', False):
 949             info_dict['_filename'] = filename
 950
 951         # Do nothing else if in simulate mode
 952         if self.params.get('simulate', False):
 953             return
 954
 955         if filename is None:
 956             return
 957
 958         try:
 959             dn = os.path.dirname(encodeFilename(filename))
 960             if dn and not os.path.exists(dn):
 961                 os.makedirs(dn)
 962         except (OSError, IOError) as err:
 963             self.report_error('unable to create directory ' + compat_str(err))
 964             return
 965
 966         if self.params.get('writedescription', False):
 967             descfn = filename + '.description'
 968             if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(descfn)):
 969                 self.to_screen('[info] Video description is already present')
 970             else:
 971                 try:
 972                     self.to_screen('[info] Writing video description to: ' + descfn)
 973                     with io.open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile:
 974                         descfile.write(info_dict['description'])
 975                 except (KeyError, TypeError):
 976                     self.report_warning('There\'s no description to write.')
 977                 except (OSError, IOError):
 978                     self.report_error('Cannot write description file ' + descfn)
 979                     return
 980
 981         if self.params.get('writeannotations', False):
 982             annofn = filename + '.annotations.xml'
 983             if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(annofn)):
 984                 self.to_screen('[info] Video annotations are already present')
 985             else:
 986                 try:
 987                     self.to_screen('[info] Writing video annotations to: ' + annofn)
 988                     with io.open(encodeFilename(annofn), 'w', encoding='utf-8') as annofile:
 989                         annofile.write(info_dict['annotations'])
 990                 except (KeyError, TypeError):
 991                     self.report_warning('There are no annotations to write.')
 992                 except (OSError, IOError):
 993                     self.report_error('Cannot write annotations file: ' + annofn)
 994                     return
 995
 996         subtitles_are_requested = any([self.params.get('writesubtitles', False),
 997                                        self.params.get('writeautomaticsub')])
 998
 999         if subtitles_are_requested and 'subtitles' in info_dict and info_dict['subtitles']:
1000             # subtitles download errors are already managed as troubles in relevant IE
1001             # that way it will silently go on when used with unsupporting IE
1002             subtitles = info_dict['subtitles']
1003             sub_format = self.params.get('subtitlesformat', 'srt')
1004             for sub_lang in subtitles.keys():
1005                 sub = subtitles[sub_lang]
1006                 if sub is None:
1007                     continue
1008                 try:
1009                     sub_filename = subtitles_filename(filename, sub_lang, sub_format)
1010                     if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(sub_filename)):
1011                         self.to_screen('[info] Video subtitle %s.%s is already_present' % (sub_lang, sub_format))
1012                     else:
1013                         self.to_screen('[info] Writing video subtitles to: ' + sub_filename)
1014                         with io.open(encodeFilename(sub_filename), 'w', encoding='utf-8') as subfile:
1015                             subfile.write(sub)
1016                 except (OSError, IOError):
1017                     self.report_error('Cannot write subtitles file ' + sub_filename)
1018                     return
1019
1020         if self.params.get('writeinfojson', False):
1021             infofn = os.path.splitext(filename)[0] + '.info.json'
1022             if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(infofn)):
1023                 self.to_screen('[info] Video description metadata is already present')
1024             else:
1025                 self.to_screen('[info] Writing video description metadata as JSON to: ' + infofn)
1026                 try:
1027                     write_json_file(info_dict, infofn)
1028                 except (OSError, IOError):
1029                     self.report_error('Cannot write metadata to JSON file ' + infofn)
1030                     return
1031
1032         if self.params.get('writethumbnail', False):
1033             if info_dict.get('thumbnail') is not None:
1034                 thumb_format = determine_ext(info_dict['thumbnail'], 'jpg')
1035                 thumb_filename = os.path.splitext(filename)[0] + '.' + thumb_format
1036                 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(thumb_filename)):
1037                     self.to_screen('[%s] %s: Thumbnail is already present' %
1038                                    (info_dict['extractor'], info_dict['id']))
1039                 else:
1040                     self.to_screen('[%s] %s: Downloading thumbnail ...' %
1041                                    (info_dict['extractor'], info_dict['id']))
1042                     try:
1043                         uf = self.urlopen(info_dict['thumbnail'])
1044                         with open(thumb_filename, 'wb') as thumbf:
1045                             shutil.copyfileobj(uf, thumbf)
1046                         self.to_screen('[%s] %s: Writing thumbnail to: %s' %
1047                                        (info_dict['extractor'], info_dict['id'], thumb_filename))
1048                     except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
1049                         self.report_warning('Unable to download thumbnail "%s": %s' %
1050                                             (info_dict['thumbnail'], compat_str(err)))
1051
1052         if not self.params.get('skip_download', False):
1053             if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(filename)):
1054                 success = True
1055             else:
1056                 try:
1057                     def dl(name, info):
1058                         fd = get_suitable_downloader(info)(self, self.params)
1059                         for ph in self._progress_hooks:
1060                             fd.add_progress_hook(ph)
1061                         if self.params.get('verbose'):
1062                             self.to_stdout('[debug] Invoking downloader on %r' % info.get('url'))
1063                         return fd.download(name, info)
1064                     if info_dict.get('requested_formats') is not None:
1065                         downloaded = []
1066                         success = True
1067                         merger = FFmpegMergerPP(self, not self.params.get('keepvideo'))
1068                         if not merger._executable:
1069                             postprocessors = []
1070                             self.report_warning('You have requested multiple '
1071                                                 'formats but ffmpeg or avconv are not installed.'
1072                                                 ' The formats won\'t be merged')
1073                         else:
1074                             postprocessors = [merger]
1075                         for f in info_dict['requested_formats']:
1076                             new_info = dict(info_dict)
1077                             new_info.update(f)
1078                             fname = self.prepare_filename(new_info)
1079                             fname = prepend_extension(fname, 'f%s' % f['format_id'])
1080                             downloaded.append(fname)
1081                             partial_success = dl(fname, new_info)
1082                             success = success and partial_success
1083                         info_dict['__postprocessors'] = postprocessors
1084                         info_dict['__files_to_merge'] = downloaded
1085                     else:
1086                         # Just a single file
1087                         success = dl(filename, info_dict)
1088                 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
1089                     self.report_error('unable to download video data: %s' % str(err))
1090                     return
1091                 except (OSError, IOError) as err:
1092                     raise UnavailableVideoError(err)
1093                 except (ContentTooShortError, ) as err:
1094                     self.report_error('content too short (expected %s bytes and served %s)' % (err.expected, err.downloaded))
1095                     return
1096
1097             if success:
1098                 try:
1099                     self.post_process(filename, info_dict)
1100                 except (PostProcessingError) as err:
1101                     self.report_error('postprocessing: %s' % str(err))
1102                     return
1103
1104         self.record_download_archive(info_dict)
1105
1106     def download(self, url_list):
1107         """Download a given list of URLs."""
1108         outtmpl = self.params.get('outtmpl', DEFAULT_OUTTMPL)
1109         if (len(url_list) > 1 and
1110                 '%' not in outtmpl
1111                 and self.params.get('max_downloads') != 1):
1112             raise SameFileError(outtmpl)
1113
1114         for url in url_list:
1115             try:
1116                 # It also downloads the videos
1117                 res = self.extract_info(url)
1118             except UnavailableVideoError:
1119                 self.report_error('unable to download video')
1120             except MaxDownloadsReached:
1121                 self.to_screen('[info] Maximum number of downloaded files reached.')
1122                 raise
1123             else:
1124                 if self.params.get('dump_single_json', False):
1125                     self.to_stdout(json.dumps(res))
1126
1127         return self._download_retcode
1128
1129     def download_with_info_file(self, info_filename):
1130         with io.open(info_filename, 'r', encoding='utf-8') as f:
1131             info = json.load(f)
1132         try:
1133             self.process_ie_result(info, download=True)
1134         except DownloadError:
1135             webpage_url = info.get('webpage_url')
1136             if webpage_url is not None:
1137                 self.report_warning('The info failed to download, trying with "%s"' % webpage_url)
1138                 return self.download([webpage_url])
1139             else:
1140                 raise
1141         return self._download_retcode
1142
1143     def post_process(self, filename, ie_info):
1144         """Run all the postprocessors on the given file."""
1145         info = dict(ie_info)
1146         info['filepath'] = filename
1147         keep_video = None
1148         pps_chain = []
1149         if ie_info.get('__postprocessors') is not None:
1150             pps_chain.extend(ie_info['__postprocessors'])
1151         pps_chain.extend(self._pps)
1152         for pp in pps_chain:
1153             try:
1154                 keep_video_wish, new_info = pp.run(info)
1155                 if keep_video_wish is not None:
1156                     if keep_video_wish:
1157                         keep_video = keep_video_wish
1158                     elif keep_video is None:
1159                         # No clear decision yet, let IE decide
1160                         keep_video = keep_video_wish
1161             except PostProcessingError as e:
1162                 self.report_error(e.msg)
1163         if keep_video is False and not self.params.get('keepvideo', False):
1164             try:
1165                 self.to_screen('Deleting original file %s (pass -k to keep)' % filename)
1166                 os.remove(encodeFilename(filename))
1167             except (IOError, OSError):
1168                 self.report_warning('Unable to remove downloaded video file')
1169
1170     def _make_archive_id(self, info_dict):
1171         # Future-proof against any change in case
1172         # and backwards compatibility with prior versions
1173         extractor = info_dict.get('extractor_key')
1174         if extractor is None:
1175             if 'id' in info_dict:
1176                 extractor = info_dict.get('ie_key')  # key in a playlist
1177         if extractor is None:
1178             return None  # Incomplete video information
1179         return extractor.lower() + ' ' + info_dict['id']
1180
1181     def in_download_archive(self, info_dict):
1182         fn = self.params.get('download_archive')
1183         if fn is None:
1184             return False
1185
1186         vid_id = self._make_archive_id(info_dict)
1187         if vid_id is None:
1188             return False  # Incomplete video information
1189
1190         try:
1191             with locked_file(fn, 'r', encoding='utf-8') as archive_file:
1192                 for line in archive_file:
1193                     if line.strip() == vid_id:
1194                         return True
1195         except IOError as ioe:
1196             if ioe.errno != errno.ENOENT:
1197                 raise
1198         return False
1199
1200     def record_download_archive(self, info_dict):
1201         fn = self.params.get('download_archive')
1202         if fn is None:
1203             return
1204         vid_id = self._make_archive_id(info_dict)
1205         assert vid_id
1206         with locked_file(fn, 'a', encoding='utf-8') as archive_file:
1207             archive_file.write(vid_id + '\n')
1208
1209     @staticmethod
1210     def format_resolution(format, default='unknown'):
1211         if format.get('vcodec') == 'none':
1212             return 'audio only'
1213         if format.get('resolution') is not None:
1214             return format['resolution']
1215         if format.get('height') is not None:
1216             if format.get('width') is not None:
1217                 res = '%sx%s' % (format['width'], format['height'])
1218             else:
1219                 res = '%sp' % format['height']
1220         elif format.get('width') is not None:
1221             res = '?x%d' % format['width']
1222         else:
1223             res = default
1224         return res
1225
1226     def _format_note(self, fdict):
1227         res = ''
1228         if fdict.get('ext') in ['f4f', 'f4m']:
1229             res += '(unsupported) '
1230         if fdict.get('format_note') is not None:
1231             res += fdict['format_note'] + ' '
1232         if fdict.get('tbr') is not None:
1233             res += '%4dk ' % fdict['tbr']
1234         if fdict.get('container') is not None:
1235             if res:
1236                 res += ', '
1237             res += '%s container' % fdict['container']
1238         if (fdict.get('vcodec') is not None and
1239                 fdict.get('vcodec') != 'none'):
1240             if res:
1241                 res += ', '
1242             res += fdict['vcodec']
1243             if fdict.get('vbr') is not None:
1244                 res += '@'
1245         elif fdict.get('vbr') is not None and fdict.get('abr') is not None:
1246             res += 'video@'
1247         if fdict.get('vbr') is not None:
1248             res += '%4dk' % fdict['vbr']
1249         if fdict.get('fps') is not None:
1250             res += ', %sfps' % fdict['fps']
1251         if fdict.get('acodec') is not None:
1252             if res:
1253                 res += ', '
1254             if fdict['acodec'] == 'none':
1255                 res += 'video only'
1256             else:
1257                 res += '%-5s' % fdict['acodec']
1258         elif fdict.get('abr') is not None:
1259             if res:
1260                 res += ', '
1261             res += 'audio'
1262         if fdict.get('abr') is not None:
1263             res += '@%3dk' % fdict['abr']
1264         if fdict.get('asr') is not None:
1265             res += ' (%5dHz)' % fdict['asr']
1266         if fdict.get('filesize') is not None:
1267             if res:
1268                 res += ', '
1269             res += format_bytes(fdict['filesize'])
1270         elif fdict.get('filesize_approx') is not None:
1271             if res:
1272                 res += ', '
1273             res += '~' + format_bytes(fdict['filesize_approx'])
1274         return res
1275
1276     def list_formats(self, info_dict):
1277         def line(format, idlen=20):
1278             return (('%-' + compat_str(idlen + 1) + 's%-10s%-12s%s') % (
1279                 format['format_id'],
1280                 format['ext'],
1281                 self.format_resolution(format),
1282                 self._format_note(format),
1283             ))
1284
1285         formats = info_dict.get('formats', [info_dict])
1286         idlen = max(len('format code'),
1287                     max(len(f['format_id']) for f in formats))
1288         formats_s = [line(f, idlen) for f in formats]
1289         if len(formats) > 1:
1290             formats_s[0] += (' ' if self._format_note(formats[0]) else '') + '(worst)'
1291             formats_s[-1] += (' ' if self._format_note(formats[-1]) else '') + '(best)'
1292
1293         header_line = line({
1294             'format_id': 'format code', 'ext': 'extension',
1295             'resolution': 'resolution', 'format_note': 'note'}, idlen=idlen)
1296         self.to_screen('[info] Available formats for %s:\n%s\n%s' %
1297                        (info_dict['id'], header_line, '\n'.join(formats_s)))
1298
1299     def urlopen(self, req):
1300         """ Start an HTTP download """
1301
1302         # According to RFC 3986, URLs can not contain non-ASCII characters, however this is not
1303         # always respected by websites, some tend to give out URLs with non percent-encoded
1304         # non-ASCII characters (see telemb.py, ard.py [#3412])
1305         # urllib chokes on URLs with non-ASCII characters (see http://bugs.python.org/issue3991)
1306         # To work around aforementioned issue we will replace request's original URL with
1307         # percent-encoded one
1308         req_is_string = isinstance(req, basestring if sys.version_info < (3, 0) else compat_str)
1309         url = req if req_is_string else req.get_full_url()
1310         url_escaped = escape_url(url)
1311
1312         # Substitute URL if any change after escaping
1313         if url != url_escaped:
1314             if req_is_string:
1315                 req = url_escaped
1316             else:
1317                 req = compat_urllib_request.Request(
1318                     url_escaped, data=req.data, headers=req.headers,
1319                     origin_req_host=req.origin_req_host, unverifiable=req.unverifiable)
1320
1321         return self._opener.open(req, timeout=self._socket_timeout)
1322
1323     def print_debug_header(self):
1324         if not self.params.get('verbose'):
1325             return
1326
1327         if type('') is not compat_str:
1328             # Python 2.6 on SLES11 SP1 (https://github.com/rg3/youtube-dl/issues/3326)
1329             self.report_warning(
1330                 'Your Python is broken! Update to a newer and supported version')
1331
1332         stdout_encoding = getattr(
1333             sys.stdout, 'encoding', 'missing (%s)' % type(sys.stdout).__name__)
1334         encoding_str = (
1335             '[debug] Encodings: locale %s, fs %s, out %s, pref %s\n' % (
1336                 locale.getpreferredencoding(),
1337                 sys.getfilesystemencoding(),
1338                 stdout_encoding,
1339                 self.get_encoding()))
1340         write_string(encoding_str, encoding=None)
1341
1342         self._write_string('[debug] youtube-dl version ' + __version__ + '\n')
1343         try:
1344             sp = subprocess.Popen(
1345                 ['git', 'rev-parse', '--short', 'HEAD'],
1346                 stdout=subprocess.PIPE, stderr=subprocess.PIPE,
1347                 cwd=os.path.dirname(os.path.abspath(__file__)))
1348             out, err = sp.communicate()
1349             out = out.decode().strip()
1350             if re.match('[0-9a-f]+', out):
1351                 self._write_string('[debug] Git HEAD: ' + out + '\n')
1352         except:
1353             try:
1354                 sys.exc_clear()
1355             except:
1356                 pass
1357         self._write_string('[debug] Python version %s - %s\n' % (
1358             platform.python_version(), platform_name()))
1359
1360         exe_versions = FFmpegPostProcessor.get_versions()
1361         exe_versions['rtmpdump'] = rtmpdump_version()
1362         exe_str = ', '.join(
1363             '%s %s' % (exe, v)
1364             for exe, v in sorted(exe_versions.items())
1365             if v
1366         )
1367         if not exe_str:
1368             exe_str = 'none'
1369         self._write_string('[debug] exe versions: %s\n' % exe_str)
1370
1371         proxy_map = {}
1372         for handler in self._opener.handlers:
1373             if hasattr(handler, 'proxies'):
1374                 proxy_map.update(handler.proxies)
1375         self._write_string('[debug] Proxy map: ' + compat_str(proxy_map) + '\n')
1376
1377     def _setup_opener(self):
1378         timeout_val = self.params.get('socket_timeout')
1379         self._socket_timeout = 600 if timeout_val is None else float(timeout_val)
1380
1381         opts_cookiefile = self.params.get('cookiefile')
1382         opts_proxy = self.params.get('proxy')
1383
1384         if opts_cookiefile is None:
1385             self.cookiejar = compat_cookiejar.CookieJar()
1386         else:
1387             self.cookiejar = compat_cookiejar.MozillaCookieJar(
1388                 opts_cookiefile)
1389             if os.access(opts_cookiefile, os.R_OK):
1390                 self.cookiejar.load()
1391
1392         cookie_processor = compat_urllib_request.HTTPCookieProcessor(
1393             self.cookiejar)
1394         if opts_proxy is not None:
1395             if opts_proxy == '':
1396                 proxies = {}
1397             else:
1398                 proxies = {'http': opts_proxy, 'https': opts_proxy}
1399         else:
1400             proxies = compat_urllib_request.getproxies()
1401             # Set HTTPS proxy to HTTP one if given (https://github.com/rg3/youtube-dl/issues/805)
1402             if 'http' in proxies and 'https' not in proxies:
1403                 proxies['https'] = proxies['http']
1404         proxy_handler = compat_urllib_request.ProxyHandler(proxies)
1405
1406         debuglevel = 1 if self.params.get('debug_printtraffic') else 0
1407         https_handler = make_HTTPS_handler(
1408             self.params.get('nocheckcertificate', False), debuglevel=debuglevel)
1409         ydlh = YoutubeDLHandler(debuglevel=debuglevel)
1410         opener = compat_urllib_request.build_opener(
1411             https_handler, proxy_handler, cookie_processor, ydlh)
1412         # Delete the default user-agent header, which would otherwise apply in
1413         # cases where our custom HTTP handler doesn't come into play
1414         # (See https://github.com/rg3/youtube-dl/issues/1309 for details)
1415         opener.addheaders = []
1416         self._opener = opener
1417
1418     def encode(self, s):
1419         if isinstance(s, bytes):
1420             return s  # Already encoded
1421
1422         try:
1423             return s.encode(self.get_encoding())
1424         except UnicodeEncodeError as err:
1425             err.reason = err.reason + '. Check your system encoding configuration or use the --encoding option.'
1426             raise
1427
1428     def get_encoding(self):
1429         encoding = self.params.get('encoding')
1430         if encoding is None:
1431             encoding = preferredencoding()
1432         return encoding