_ Git - youtube-dl/blob - youtube_dl/YoutubeDL.py

   1 #!/usr/bin/env python
   2 # -*- coding: utf-8 -*-
   3
   4 from __future__ import absolute_import, unicode_literals
   5
   6 import collections
   7 import datetime
   8 import errno
   9 import io
  10 import json
  11 import locale
  12 import os
  13 import platform
  14 import re
  15 import shutil
  16 import subprocess
  17 import socket
  18 import sys
  19 import time
  20 import traceback
  21
  22 if os.name == 'nt':
  23     import ctypes
  24
  25 from .compat import (
  26     compat_cookiejar,
  27     compat_expanduser,
  28     compat_http_client,
  29     compat_str,
  30     compat_urllib_error,
  31     compat_urllib_request,
  32 )
  33 from .utils import (
  34     escape_url,
  35     ContentTooShortError,
  36     date_from_str,
  37     DateRange,
  38     DEFAULT_OUTTMPL,
  39     determine_ext,
  40     DownloadError,
  41     encodeFilename,
  42     ExtractorError,
  43     format_bytes,
  44     formatSeconds,
  45     get_term_width,
  46     locked_file,
  47     make_HTTPS_handler,
  48     MaxDownloadsReached,
  49     PagedList,
  50     PostProcessingError,
  51     platform_name,
  52     preferredencoding,
  53     SameFileError,
  54     sanitize_filename,
  55     subtitles_filename,
  56     takewhile_inclusive,
  57     UnavailableVideoError,
  58     url_basename,
  59     write_json_file,
  60     write_string,
  61     YoutubeDLHandler,
  62     prepend_extension,
  63     args_to_str,
  64 )
  65 from .cache import Cache
  66 from .extractor import get_info_extractor, gen_extractors
  67 from .downloader import get_suitable_downloader
  68 from .downloader.rtmp import rtmpdump_version
  69 from .postprocessor import FFmpegMergerPP, FFmpegPostProcessor
  70 from .version import __version__
  71
  72
  73 class YoutubeDL(object):
  74     """YoutubeDL class.
  75
  76     YoutubeDL objects are the ones responsible of downloading the
  77     actual video file and writing it to disk if the user has requested
  78     it, among some other tasks. In most cases there should be one per
  79     program. As, given a video URL, the downloader doesn't know how to
  80     extract all the needed information, task that InfoExtractors do, it
  81     has to pass the URL to one of them.
  82
  83     For this, YoutubeDL objects have a method that allows
  84     InfoExtractors to be registered in a given order. When it is passed
  85     a URL, the YoutubeDL object handles it to the first InfoExtractor it
  86     finds that reports being able to handle it. The InfoExtractor extracts
  87     all the information about the video or videos the URL refers to, and
  88     YoutubeDL process the extracted information, possibly using a File
  89     Downloader to download the video.
  90
  91     YoutubeDL objects accept a lot of parameters. In order not to saturate
  92     the object constructor with arguments, it receives a dictionary of
  93     options instead. These options are available through the params
  94     attribute for the InfoExtractors to use. The YoutubeDL also
  95     registers itself as the downloader in charge for the InfoExtractors
  96     that are added to it, so this is a "mutual registration".
  97
  98     Available options:
  99
 100     username:          Username for authentication purposes.
 101     password:          Password for authentication purposes.
 102     videopassword:     Password for acces a video.
 103     usenetrc:          Use netrc for authentication instead.
 104     verbose:           Print additional info to stdout.
 105     quiet:             Do not print messages to stdout.
 106     no_warnings:       Do not print out anything for warnings.
 107     forceurl:          Force printing final URL.
 108     forcetitle:        Force printing title.
 109     forceid:           Force printing ID.
 110     forcethumbnail:    Force printing thumbnail URL.
 111     forcedescription:  Force printing description.
 112     forcefilename:     Force printing final filename.
 113     forceduration:     Force printing duration.
 114     forcejson:         Force printing info_dict as JSON.
 115     dump_single_json:  Force printing the info_dict of the whole playlist
 116                        (or video) as a single JSON line.
 117     simulate:          Do not download the video files.
 118     format:            Video format code.
 119     format_limit:      Highest quality format to try.
 120     outtmpl:           Template for output names.
 121     restrictfilenames: Do not allow "&" and spaces in file names
 122     ignoreerrors:      Do not stop on download errors.
 123     nooverwrites:      Prevent overwriting files.
 124     playliststart:     Playlist item to start at.
 125     playlistend:       Playlist item to end at.
 126     matchtitle:        Download only matching titles.
 127     rejecttitle:       Reject downloads for matching titles.
 128     logger:            Log messages to a logging.Logger instance.
 129     logtostderr:       Log messages to stderr instead of stdout.
 130     writedescription:  Write the video description to a .description file
 131     writeinfojson:     Write the video description to a .info.json file
 132     writeannotations:  Write the video annotations to a .annotations.xml file
 133     writethumbnail:    Write the thumbnail image to a file
 134     writesubtitles:    Write the video subtitles to a file
 135     writeautomaticsub: Write the automatic subtitles to a file
 136     allsubtitles:      Downloads all the subtitles of the video
 137                        (requires writesubtitles or writeautomaticsub)
 138     listsubtitles:     Lists all available subtitles for the video
 139     subtitlesformat:   Subtitle format [srt/sbv/vtt] (default=srt)
 140     subtitleslangs:    List of languages of the subtitles to download
 141     keepvideo:         Keep the video file after post-processing
 142     daterange:         A DateRange object, download only if the upload_date is in the range.
 143     skip_download:     Skip the actual download of the video file
 144     cachedir:          Location of the cache files in the filesystem.
 145                        False to disable filesystem cache.
 146     noplaylist:        Download single video instead of a playlist if in doubt.
 147     age_limit:         An integer representing the user's age in years.
 148                        Unsuitable videos for the given age are skipped.
 149     min_views:         An integer representing the minimum view count the video
 150                        must have in order to not be skipped.
 151                        Videos without view count information are always
 152                        downloaded. None for no limit.
 153     max_views:         An integer representing the maximum view count.
 154                        Videos that are more popular than that are not
 155                        downloaded.
 156                        Videos without view count information are always
 157                        downloaded. None for no limit.
 158     download_archive:  File name of a file where all downloads are recorded.
 159                        Videos already present in the file are not downloaded
 160                        again.
 161     cookiefile:        File name where cookies should be read from and dumped to.
 162     nocheckcertificate:Do not verify SSL certificates
 163     prefer_insecure:   Use HTTP instead of HTTPS to retrieve information.
 164                        At the moment, this is only supported by YouTube.
 165     proxy:             URL of the proxy server to use
 166     socket_timeout:    Time to wait for unresponsive hosts, in seconds
 167     bidi_workaround:   Work around buggy terminals without bidirectional text
 168                        support, using fridibi
 169     debug_printtraffic:Print out sent and received HTTP traffic
 170     include_ads:       Download ads as well
 171     default_search:    Prepend this string if an input url is not valid.
 172                        'auto' for elaborate guessing
 173     encoding:          Use this encoding instead of the system-specified.
 174     extract_flat:      Do not resolve URLs, return the immediate result.
 175                        Pass in 'in_playlist' to only show this behavior for
 176                        playlist items.
 177
 178     The following parameters are not used by YoutubeDL itself, they are used by
 179     the FileDownloader:
 180     nopart, updatetime, buffersize, ratelimit, min_filesize, max_filesize, test,
 181     noresizebuffer, retries, continuedl, noprogress, consoletitle
 182
 183     The following options are used by the post processors:
 184     prefer_ffmpeg:     If True, use ffmpeg instead of avconv if both are available,
 185                        otherwise prefer avconv.
 186     exec_cmd:          Arbitrary command to run after downloading
 187     """
 188
 189     params = None
 190     _ies = []
 191     _pps = []
 192     _download_retcode = None
 193     _num_downloads = None
 194     _screen_file = None
 195
 196     def __init__(self, params=None, auto_init=True):
 197         """Create a FileDownloader object with the given options."""
 198         if params is None:
 199             params = {}
 200         self._ies = []
 201         self._ies_instances = {}
 202         self._pps = []
 203         self._progress_hooks = []
 204         self._download_retcode = 0
 205         self._num_downloads = 0
 206         self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)]
 207         self._err_file = sys.stderr
 208         self.params = params
 209         self.cache = Cache(self)
 210
 211         if params.get('bidi_workaround', False):
 212             try:
 213                 import pty
 214                 master, slave = pty.openpty()
 215                 width = get_term_width()
 216                 if width is None:
 217                     width_args = []
 218                 else:
 219                     width_args = ['-w', str(width)]
 220                 sp_kwargs = dict(
 221                     stdin=subprocess.PIPE,
 222                     stdout=slave,
 223                     stderr=self._err_file)
 224                 try:
 225                     self._output_process = subprocess.Popen(
 226                         ['bidiv'] + width_args, **sp_kwargs
 227                     )
 228                 except OSError:
 229                     self._output_process = subprocess.Popen(
 230                         ['fribidi', '-c', 'UTF-8'] + width_args, **sp_kwargs)
 231                 self._output_channel = os.fdopen(master, 'rb')
 232             except OSError as ose:
 233                 if ose.errno == 2:
 234                     self.report_warning('Could not find fribidi executable, ignoring --bidi-workaround . Make sure that  fribidi  is an executable file in one of the directories in your $PATH.')
 235                 else:
 236                     raise
 237
 238         if (sys.version_info >= (3,) and sys.platform != 'win32' and
 239                 sys.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968']
 240                 and not params.get('restrictfilenames', False)):
 241             # On Python 3, the Unicode filesystem API will throw errors (#1474)
 242             self.report_warning(
 243                 'Assuming --restrict-filenames since file system encoding '
 244                 'cannot encode all characters. '
 245                 'Set the LC_ALL environment variable to fix this.')
 246             self.params['restrictfilenames'] = True
 247
 248         if '%(stitle)s' in self.params.get('outtmpl', ''):
 249             self.report_warning('%(stitle)s is deprecated. Use the %(title)s and the --restrict-filenames flag(which also secures %(uploader)s et al) instead.')
 250
 251         self._setup_opener()
 252
 253         if auto_init:
 254             self.print_debug_header()
 255             self.add_default_info_extractors()
 256
 257     def warn_if_short_id(self, argv):
 258         # short YouTube ID starting with dash?
 259         idxs = [
 260             i for i, a in enumerate(argv)
 261             if re.match(r'^-[0-9A-Za-z_-]{10}$', a)]
 262         if idxs:
 263             correct_argv = (
 264                 ['youtube-dl'] +
 265                 [a for i, a in enumerate(argv) if i not in idxs] +
 266                 ['--'] + [argv[i] for i in idxs]
 267             )
 268             self.report_warning(
 269                 'Long argument string detected. '
 270                 'Use -- to separate parameters and URLs, like this:\n%s\n' %
 271                 args_to_str(correct_argv))
 272
 273     def add_info_extractor(self, ie):
 274         """Add an InfoExtractor object to the end of the list."""
 275         self._ies.append(ie)
 276         self._ies_instances[ie.ie_key()] = ie
 277         ie.set_downloader(self)
 278
 279     def get_info_extractor(self, ie_key):
 280         """
 281         Get an instance of an IE with name ie_key, it will try to get one from
 282         the _ies list, if there's no instance it will create a new one and add
 283         it to the extractor list.
 284         """
 285         ie = self._ies_instances.get(ie_key)
 286         if ie is None:
 287             ie = get_info_extractor(ie_key)()
 288             self.add_info_extractor(ie)
 289         return ie
 290
 291     def add_default_info_extractors(self):
 292         """
 293         Add the InfoExtractors returned by gen_extractors to the end of the list
 294         """
 295         for ie in gen_extractors():
 296             self.add_info_extractor(ie)
 297
 298     def add_post_processor(self, pp):
 299         """Add a PostProcessor object to the end of the chain."""
 300         self._pps.append(pp)
 301         pp.set_downloader(self)
 302
 303     def add_progress_hook(self, ph):
 304         """Add the progress hook (currently only for the file downloader)"""
 305         self._progress_hooks.append(ph)
 306
 307     def _bidi_workaround(self, message):
 308         if not hasattr(self, '_output_channel'):
 309             return message
 310
 311         assert hasattr(self, '_output_process')
 312         assert isinstance(message, compat_str)
 313         line_count = message.count('\n') + 1
 314         self._output_process.stdin.write((message + '\n').encode('utf-8'))
 315         self._output_process.stdin.flush()
 316         res = ''.join(self._output_channel.readline().decode('utf-8')
 317                       for _ in range(line_count))
 318         return res[:-len('\n')]
 319
 320     def to_screen(self, message, skip_eol=False):
 321         """Print message to stdout if not in quiet mode."""
 322         return self.to_stdout(message, skip_eol, check_quiet=True)
 323
 324     def _write_string(self, s, out=None):
 325         write_string(s, out=out, encoding=self.params.get('encoding'))
 326
 327     def to_stdout(self, message, skip_eol=False, check_quiet=False):
 328         """Print message to stdout if not in quiet mode."""
 329         if self.params.get('logger'):
 330             self.params['logger'].debug(message)
 331         elif not check_quiet or not self.params.get('quiet', False):
 332             message = self._bidi_workaround(message)
 333             terminator = ['\n', ''][skip_eol]
 334             output = message + terminator
 335
 336             self._write_string(output, self._screen_file)
 337
 338     def to_stderr(self, message):
 339         """Print message to stderr."""
 340         assert isinstance(message, compat_str)
 341         if self.params.get('logger'):
 342             self.params['logger'].error(message)
 343         else:
 344             message = self._bidi_workaround(message)
 345             output = message + '\n'
 346             self._write_string(output, self._err_file)
 347
 348     def to_console_title(self, message):
 349         if not self.params.get('consoletitle', False):
 350             return
 351         if os.name == 'nt' and ctypes.windll.kernel32.GetConsoleWindow():
 352             # c_wchar_p() might not be necessary if `message` is
 353             # already of type unicode()
 354             ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message))
 355         elif 'TERM' in os.environ:
 356             self._write_string('\033]0;%s\007' % message, self._screen_file)
 357
 358     def save_console_title(self):
 359         if not self.params.get('consoletitle', False):
 360             return
 361         if 'TERM' in os.environ:
 362             # Save the title on stack
 363             self._write_string('\033[22;0t', self._screen_file)
 364
 365     def restore_console_title(self):
 366         if not self.params.get('consoletitle', False):
 367             return
 368         if 'TERM' in os.environ:
 369             # Restore the title from stack
 370             self._write_string('\033[23;0t', self._screen_file)
 371
 372     def __enter__(self):
 373         self.save_console_title()
 374         return self
 375
 376     def __exit__(self, *args):
 377         self.restore_console_title()
 378
 379         if self.params.get('cookiefile') is not None:
 380             self.cookiejar.save()
 381
 382     def trouble(self, message=None, tb=None):
 383         """Determine action to take when a download problem appears.
 384
 385         Depending on if the downloader has been configured to ignore
 386         download errors or not, this method may throw an exception or
 387         not when errors are found, after printing the message.
 388
 389         tb, if given, is additional traceback information.
 390         """
 391         if message is not None:
 392             self.to_stderr(message)
 393         if self.params.get('verbose'):
 394             if tb is None:
 395                 if sys.exc_info()[0]:  # if .trouble has been called from an except block
 396                     tb = ''
 397                     if hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
 398                         tb += ''.join(traceback.format_exception(*sys.exc_info()[1].exc_info))
 399                     tb += compat_str(traceback.format_exc())
 400                 else:
 401                     tb_data = traceback.format_list(traceback.extract_stack())
 402                     tb = ''.join(tb_data)
 403             self.to_stderr(tb)
 404         if not self.params.get('ignoreerrors', False):
 405             if sys.exc_info()[0] and hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
 406                 exc_info = sys.exc_info()[1].exc_info
 407             else:
 408                 exc_info = sys.exc_info()
 409             raise DownloadError(message, exc_info)
 410         self._download_retcode = 1
 411
 412     def report_warning(self, message):
 413         '''
 414         Print the message to stderr, it will be prefixed with 'WARNING:'
 415         If stderr is a tty file the 'WARNING:' will be colored
 416         '''
 417         if self.params.get('logger') is not None:
 418             self.params['logger'].warning(message)
 419         else:
 420             if self.params.get('no_warnings'):
 421                 return
 422             if self._err_file.isatty() and os.name != 'nt':
 423                 _msg_header = '\033[0;33mWARNING:\033[0m'
 424             else:
 425                 _msg_header = 'WARNING:'
 426             warning_message = '%s %s' % (_msg_header, message)
 427             self.to_stderr(warning_message)
 428
 429     def report_error(self, message, tb=None):
 430         '''
 431         Do the same as trouble, but prefixes the message with 'ERROR:', colored
 432         in red if stderr is a tty file.
 433         '''
 434         if self._err_file.isatty() and os.name != 'nt':
 435             _msg_header = '\033[0;31mERROR:\033[0m'
 436         else:
 437             _msg_header = 'ERROR:'
 438         error_message = '%s %s' % (_msg_header, message)
 439         self.trouble(error_message, tb)
 440
 441     def report_file_already_downloaded(self, file_name):
 442         """Report file has already been fully downloaded."""
 443         try:
 444             self.to_screen('[download] %s has already been downloaded' % file_name)
 445         except UnicodeEncodeError:
 446             self.to_screen('[download] The file has already been downloaded')
 447
 448     def prepare_filename(self, info_dict):
 449         """Generate the output filename."""
 450         try:
 451             template_dict = dict(info_dict)
 452
 453             template_dict['epoch'] = int(time.time())
 454             autonumber_size = self.params.get('autonumber_size')
 455             if autonumber_size is None:
 456                 autonumber_size = 5
 457             autonumber_templ = '%0' + str(autonumber_size) + 'd'
 458             template_dict['autonumber'] = autonumber_templ % self._num_downloads
 459             if template_dict.get('playlist_index') is not None:
 460                 template_dict['playlist_index'] = '%0*d' % (len(str(template_dict['n_entries'])), template_dict['playlist_index'])
 461             if template_dict.get('resolution') is None:
 462                 if template_dict.get('width') and template_dict.get('height'):
 463                     template_dict['resolution'] = '%dx%d' % (template_dict['width'], template_dict['height'])
 464                 elif template_dict.get('height'):
 465                     template_dict['resolution'] = '%sp' % template_dict['height']
 466                 elif template_dict.get('width'):
 467                     template_dict['resolution'] = '?x%d' % template_dict['width']
 468
 469             sanitize = lambda k, v: sanitize_filename(
 470                 compat_str(v),
 471                 restricted=self.params.get('restrictfilenames'),
 472                 is_id=(k == 'id'))
 473             template_dict = dict((k, sanitize(k, v))
 474                                  for k, v in template_dict.items()
 475                                  if v is not None)
 476             template_dict = collections.defaultdict(lambda: 'NA', template_dict)
 477
 478             outtmpl = self.params.get('outtmpl', DEFAULT_OUTTMPL)
 479             tmpl = compat_expanduser(outtmpl)
 480             filename = tmpl % template_dict
 481             return filename
 482         except ValueError as err:
 483             self.report_error('Error in output template: ' + str(err) + ' (encoding: ' + repr(preferredencoding()) + ')')
 484             return None
 485
 486     def _match_entry(self, info_dict):
 487         """ Returns None iff the file should be downloaded """
 488
 489         video_title = info_dict.get('title', info_dict.get('id', 'video'))
 490         if 'title' in info_dict:
 491             # This can happen when we're just evaluating the playlist
 492             title = info_dict['title']
 493             matchtitle = self.params.get('matchtitle', False)
 494             if matchtitle:
 495                 if not re.search(matchtitle, title, re.IGNORECASE):
 496                     return '"' + title + '" title did not match pattern "' + matchtitle + '"'
 497             rejecttitle = self.params.get('rejecttitle', False)
 498             if rejecttitle:
 499                 if re.search(rejecttitle, title, re.IGNORECASE):
 500                     return '"' + title + '" title matched reject pattern "' + rejecttitle + '"'
 501         date = info_dict.get('upload_date', None)
 502         if date is not None:
 503             dateRange = self.params.get('daterange', DateRange())
 504             if date not in dateRange:
 505                 return '%s upload date is not in range %s' % (date_from_str(date).isoformat(), dateRange)
 506         view_count = info_dict.get('view_count', None)
 507         if view_count is not None:
 508             min_views = self.params.get('min_views')
 509             if min_views is not None and view_count < min_views:
 510                 return 'Skipping %s, because it has not reached minimum view count (%d/%d)' % (video_title, view_count, min_views)
 511             max_views = self.params.get('max_views')
 512             if max_views is not None and view_count > max_views:
 513                 return 'Skipping %s, because it has exceeded the maximum view count (%d/%d)' % (video_title, view_count, max_views)
 514         age_limit = self.params.get('age_limit')
 515         if age_limit is not None:
 516             actual_age_limit = info_dict.get('age_limit')
 517             if actual_age_limit is None:
 518                 actual_age_limit = 0
 519             if age_limit < actual_age_limit:
 520                 return 'Skipping "' + title + '" because it is age restricted'
 521         if self.in_download_archive(info_dict):
 522             return '%s has already been recorded in archive' % video_title
 523         return None
 524
 525     @staticmethod
 526     def add_extra_info(info_dict, extra_info):
 527         '''Set the keys from extra_info in info dict if they are missing'''
 528         for key, value in extra_info.items():
 529             info_dict.setdefault(key, value)
 530
 531     def extract_info(self, url, download=True, ie_key=None, extra_info={},
 532                      process=True):
 533         '''
 534         Returns a list with a dictionary for each video we find.
 535         If 'download', also downloads the videos.
 536         extra_info is a dict containing the extra values to add to each result
 537          '''
 538
 539         if ie_key:
 540             ies = [self.get_info_extractor(ie_key)]
 541         else:
 542             ies = self._ies
 543
 544         for ie in ies:
 545             if not ie.suitable(url):
 546                 continue
 547
 548             if not ie.working():
 549                 self.report_warning('The program functionality for this site has been marked as broken, '
 550                                     'and will probably not work.')
 551
 552             try:
 553                 ie_result = ie.extract(url)
 554                 if ie_result is None:  # Finished already (backwards compatibility; listformats and friends should be moved here)
 555                     break
 556                 if isinstance(ie_result, list):
 557                     # Backwards compatibility: old IE result format
 558                     ie_result = {
 559                         '_type': 'compat_list',
 560                         'entries': ie_result,
 561                     }
 562                 self.add_default_extra_info(ie_result, ie, url)
 563                 if process:
 564                     return self.process_ie_result(ie_result, download, extra_info)
 565                 else:
 566                     return ie_result
 567             except ExtractorError as de:  # An error we somewhat expected
 568                 self.report_error(compat_str(de), de.format_traceback())
 569                 break
 570             except MaxDownloadsReached:
 571                 raise
 572             except Exception as e:
 573                 if self.params.get('ignoreerrors', False):
 574                     self.report_error(compat_str(e), tb=compat_str(traceback.format_exc()))
 575                     break
 576                 else:
 577                     raise
 578         else:
 579             self.report_error('no suitable InfoExtractor for URL %s' % url)
 580
 581     def add_default_extra_info(self, ie_result, ie, url):
 582         self.add_extra_info(ie_result, {
 583             'extractor': ie.IE_NAME,
 584             'webpage_url': url,
 585             'webpage_url_basename': url_basename(url),
 586             'extractor_key': ie.ie_key(),
 587         })
 588
 589     def process_ie_result(self, ie_result, download=True, extra_info={}):
 590         """
 591         Take the result of the ie(may be modified) and resolve all unresolved
 592         references (URLs, playlist items).
 593
 594         It will also download the videos if 'download'.
 595         Returns the resolved ie_result.
 596         """
 597
 598         result_type = ie_result.get('_type', 'video')
 599
 600         if result_type in ('url', 'url_transparent'):
 601             extract_flat = self.params.get('extract_flat', False)
 602             if ((extract_flat == 'in_playlist' and 'playlist' in extra_info) or
 603                     extract_flat is True):
 604                 if self.params.get('forcejson', False):
 605                     self.to_stdout(json.dumps(ie_result))
 606                 return ie_result
 607
 608         if result_type == 'video':
 609             self.add_extra_info(ie_result, extra_info)
 610             return self.process_video_result(ie_result, download=download)
 611         elif result_type == 'url':
 612             # We have to add extra_info to the results because it may be
 613             # contained in a playlist
 614             return self.extract_info(ie_result['url'],
 615                                      download,
 616                                      ie_key=ie_result.get('ie_key'),
 617                                      extra_info=extra_info)
 618         elif result_type == 'url_transparent':
 619             # Use the information from the embedding page
 620             info = self.extract_info(
 621                 ie_result['url'], ie_key=ie_result.get('ie_key'),
 622                 extra_info=extra_info, download=False, process=False)
 623
 624             def make_result(embedded_info):
 625                 new_result = ie_result.copy()
 626                 for f in ('_type', 'url', 'ext', 'player_url', 'formats',
 627                           'entries', 'ie_key', 'duration',
 628                           'subtitles', 'annotations', 'format',
 629                           'thumbnail', 'thumbnails'):
 630                     if f in new_result:
 631                         del new_result[f]
 632                     if f in embedded_info:
 633                         new_result[f] = embedded_info[f]
 634                 return new_result
 635             new_result = make_result(info)
 636
 637             assert new_result.get('_type') != 'url_transparent'
 638             if new_result.get('_type') == 'compat_list':
 639                 new_result['entries'] = [
 640                     make_result(e) for e in new_result['entries']]
 641
 642             return self.process_ie_result(
 643                 new_result, download=download, extra_info=extra_info)
 644         elif result_type == 'playlist' or result_type == 'multi_video':
 645             # We process each entry in the playlist
 646             playlist = ie_result.get('title', None) or ie_result.get('id', None)
 647             self.to_screen('[download] Downloading playlist: %s' % playlist)
 648
 649             playlist_results = []
 650
 651             playliststart = self.params.get('playliststart', 1) - 1
 652             playlistend = self.params.get('playlistend', None)
 653             # For backwards compatibility, interpret -1 as whole list
 654             if playlistend == -1:
 655                 playlistend = None
 656
 657             if isinstance(ie_result['entries'], list):
 658                 n_all_entries = len(ie_result['entries'])
 659                 entries = ie_result['entries'][playliststart:playlistend]
 660                 n_entries = len(entries)
 661                 self.to_screen(
 662                     "[%s] playlist %s: Collected %d video ids (downloading %d of them)" %
 663                     (ie_result['extractor'], playlist, n_all_entries, n_entries))
 664             else:
 665                 assert isinstance(ie_result['entries'], PagedList)
 666                 entries = ie_result['entries'].getslice(
 667                     playliststart, playlistend)
 668                 n_entries = len(entries)
 669                 self.to_screen(
 670                     "[%s] playlist %s: Downloading %d videos" %
 671                     (ie_result['extractor'], playlist, n_entries))
 672
 673             for i, entry in enumerate(entries, 1):
 674                 self.to_screen('[download] Downloading video #%s of %s' % (i, n_entries))
 675                 extra = {
 676                     'n_entries': n_entries,
 677                     'playlist': playlist,
 678                     'playlist_id': ie_result.get('id'),
 679                     'playlist_title': ie_result.get('title'),
 680                     'playlist_index': i + playliststart,
 681                     'extractor': ie_result['extractor'],
 682                     'webpage_url': ie_result['webpage_url'],
 683                     'webpage_url_basename': url_basename(ie_result['webpage_url']),
 684                     'extractor_key': ie_result['extractor_key'],
 685                 }
 686
 687                 reason = self._match_entry(entry)
 688                 if reason is not None:
 689                     self.to_screen('[download] ' + reason)
 690                     continue
 691
 692                 entry_result = self.process_ie_result(entry,
 693                                                       download=download,
 694                                                       extra_info=extra)
 695                 playlist_results.append(entry_result)
 696             ie_result['entries'] = playlist_results
 697             return ie_result
 698         elif result_type == 'compat_list':
 699             self.report_warning(
 700                 'Extractor %s returned a compat_list result. '
 701                 'It needs to be updated.' % ie_result.get('extractor'))
 702
 703             def _fixup(r):
 704                 self.add_extra_info(
 705                     r,
 706                     {
 707                         'extractor': ie_result['extractor'],
 708                         'webpage_url': ie_result['webpage_url'],
 709                         'webpage_url_basename': url_basename(ie_result['webpage_url']),
 710                         'extractor_key': ie_result['extractor_key'],
 711                     }
 712                 )
 713                 return r
 714             ie_result['entries'] = [
 715                 self.process_ie_result(_fixup(r), download, extra_info)
 716                 for r in ie_result['entries']
 717             ]
 718             return ie_result
 719         else:
 720             raise Exception('Invalid result type: %s' % result_type)
 721
 722     def select_format(self, format_spec, available_formats):
 723         if format_spec == 'best' or format_spec is None:
 724             return available_formats[-1]
 725         elif format_spec == 'worst':
 726             return available_formats[0]
 727         elif format_spec == 'bestaudio':
 728             audio_formats = [
 729                 f for f in available_formats
 730                 if f.get('vcodec') == 'none']
 731             if audio_formats:
 732                 return audio_formats[-1]
 733         elif format_spec == 'worstaudio':
 734             audio_formats = [
 735                 f for f in available_formats
 736                 if f.get('vcodec') == 'none']
 737             if audio_formats:
 738                 return audio_formats[0]
 739         elif format_spec == 'bestvideo':
 740             video_formats = [
 741                 f for f in available_formats
 742                 if f.get('acodec') == 'none']
 743             if video_formats:
 744                 return video_formats[-1]
 745         elif format_spec == 'worstvideo':
 746             video_formats = [
 747                 f for f in available_formats
 748                 if f.get('acodec') == 'none']
 749             if video_formats:
 750                 return video_formats[0]
 751         else:
 752             extensions = ['mp4', 'flv', 'webm', '3gp', 'm4a']
 753             if format_spec in extensions:
 754                 filter_f = lambda f: f['ext'] == format_spec
 755             else:
 756                 filter_f = lambda f: f['format_id'] == format_spec
 757             matches = list(filter(filter_f, available_formats))
 758             if matches:
 759                 return matches[-1]
 760         return None
 761
 762     def process_video_result(self, info_dict, download=True):
 763         assert info_dict.get('_type', 'video') == 'video'
 764
 765         if 'id' not in info_dict:
 766             raise ExtractorError('Missing "id" field in extractor result')
 767         if 'title' not in info_dict:
 768             raise ExtractorError('Missing "title" field in extractor result')
 769
 770         if 'playlist' not in info_dict:
 771             # It isn't part of a playlist
 772             info_dict['playlist'] = None
 773             info_dict['playlist_index'] = None
 774
 775         thumbnails = info_dict.get('thumbnails')
 776         if thumbnails:
 777             thumbnails.sort(key=lambda t: (
 778                 t.get('width'), t.get('height'), t.get('url')))
 779             for t in thumbnails:
 780                 if 'width' in t and 'height' in t:
 781                     t['resolution'] = '%dx%d' % (t['width'], t['height'])
 782
 783         if thumbnails and 'thumbnail' not in info_dict:
 784             info_dict['thumbnail'] = thumbnails[-1]['url']
 785
 786         if 'display_id' not in info_dict and 'id' in info_dict:
 787             info_dict['display_id'] = info_dict['id']
 788
 789         if info_dict.get('upload_date') is None and info_dict.get('timestamp') is not None:
 790             # Working around negative timestamps in Windows
 791             # (see http://bugs.python.org/issue1646728)
 792             if info_dict['timestamp'] < 0 and os.name == 'nt':
 793                 info_dict['timestamp'] = 0
 794             upload_date = datetime.datetime.utcfromtimestamp(
 795                 info_dict['timestamp'])
 796             info_dict['upload_date'] = upload_date.strftime('%Y%m%d')
 797
 798         # This extractors handle format selection themselves
 799         if info_dict['extractor'] in ['Youku']:
 800             if download:
 801                 self.process_info(info_dict)
 802             return info_dict
 803
 804         # We now pick which formats have to be downloaded
 805         if info_dict.get('formats') is None:
 806             # There's only one format available
 807             formats = [info_dict]
 808         else:
 809             formats = info_dict['formats']
 810
 811         if not formats:
 812             raise ExtractorError('No video formats found!')
 813
 814         # We check that all the formats have the format and format_id fields
 815         for i, format in enumerate(formats):
 816             if 'url' not in format:
 817                 raise ExtractorError('Missing "url" key in result (index %d)' % i)
 818
 819             if format.get('format_id') is None:
 820                 format['format_id'] = compat_str(i)
 821             if format.get('format') is None:
 822                 format['format'] = '{id} - {res}{note}'.format(
 823                     id=format['format_id'],
 824                     res=self.format_resolution(format),
 825                     note=' ({0})'.format(format['format_note']) if format.get('format_note') is not None else '',
 826                 )
 827             # Automatically determine file extension if missing
 828             if 'ext' not in format:
 829                 format['ext'] = determine_ext(format['url']).lower()
 830
 831         format_limit = self.params.get('format_limit', None)
 832         if format_limit:
 833             formats = list(takewhile_inclusive(
 834                 lambda f: f['format_id'] != format_limit, formats
 835             ))
 836
 837         # TODO Central sorting goes here
 838
 839         if formats[0] is not info_dict:
 840             # only set the 'formats' fields if the original info_dict list them
 841             # otherwise we end up with a circular reference, the first (and unique)
 842             # element in the 'formats' field in info_dict is info_dict itself,
 843             # wich can't be exported to json
 844             info_dict['formats'] = formats
 845         if self.params.get('listformats', None):
 846             self.list_formats(info_dict)
 847             return
 848
 849         req_format = self.params.get('format')
 850         if req_format is None:
 851             req_format = 'best'
 852         formats_to_download = []
 853         # The -1 is for supporting YoutubeIE
 854         if req_format in ('-1', 'all'):
 855             formats_to_download = formats
 856         else:
 857             for rfstr in req_format.split(','):
 858                 # We can accept formats requested in the format: 34/5/best, we pick
 859                 # the first that is available, starting from left
 860                 req_formats = rfstr.split('/')
 861                 for rf in req_formats:
 862                     if re.match(r'.+?\+.+?', rf) is not None:
 863                         # Two formats have been requested like '137+139'
 864                         format_1, format_2 = rf.split('+')
 865                         formats_info = (self.select_format(format_1, formats),
 866                                         self.select_format(format_2, formats))
 867                         if all(formats_info):
 868                             # The first format must contain the video and the
 869                             # second the audio
 870                             if formats_info[0].get('vcodec') == 'none':
 871                                 self.report_error('The first format must '
 872                                                   'contain the video, try using '
 873                                                   '"-f %s+%s"' % (format_2, format_1))
 874                                 return
 875                             selected_format = {
 876                                 'requested_formats': formats_info,
 877                                 'format': rf,
 878                                 'ext': formats_info[0]['ext'],
 879                             }
 880                         else:
 881                             selected_format = None
 882                     else:
 883                         selected_format = self.select_format(rf, formats)
 884                     if selected_format is not None:
 885                         formats_to_download.append(selected_format)
 886                         break
 887         if not formats_to_download:
 888             raise ExtractorError('requested format not available',
 889                                  expected=True)
 890
 891         if download:
 892             if len(formats_to_download) > 1:
 893                 self.to_screen('[info] %s: downloading video in %s formats' % (info_dict['id'], len(formats_to_download)))
 894             for format in formats_to_download:
 895                 new_info = dict(info_dict)
 896                 new_info.update(format)
 897                 self.process_info(new_info)
 898         # We update the info dict with the best quality format (backwards compatibility)
 899         info_dict.update(formats_to_download[-1])
 900         return info_dict
 901
 902     def process_info(self, info_dict):
 903         """Process a single resolved IE result."""
 904
 905         assert info_dict.get('_type', 'video') == 'video'
 906
 907         max_downloads = self.params.get('max_downloads')
 908         if max_downloads is not None:
 909             if self._num_downloads >= int(max_downloads):
 910                 raise MaxDownloadsReached()
 911
 912         info_dict['fulltitle'] = info_dict['title']
 913         if len(info_dict['title']) > 200:
 914             info_dict['title'] = info_dict['title'][:197] + '...'
 915
 916         # Keep for backwards compatibility
 917         info_dict['stitle'] = info_dict['title']
 918
 919         if 'format' not in info_dict:
 920             info_dict['format'] = info_dict['ext']
 921
 922         reason = self._match_entry(info_dict)
 923         if reason is not None:
 924             self.to_screen('[download] ' + reason)
 925             return
 926
 927         self._num_downloads += 1
 928
 929         filename = self.prepare_filename(info_dict)
 930
 931         # Forced printings
 932         if self.params.get('forcetitle', False):
 933             self.to_stdout(info_dict['fulltitle'])
 934         if self.params.get('forceid', False):
 935             self.to_stdout(info_dict['id'])
 936         if self.params.get('forceurl', False):
 937             # For RTMP URLs, also include the playpath
 938             self.to_stdout(info_dict['url'] + info_dict.get('play_path', ''))
 939         if self.params.get('forcethumbnail', False) and info_dict.get('thumbnail') is not None:
 940             self.to_stdout(info_dict['thumbnail'])
 941         if self.params.get('forcedescription', False) and info_dict.get('description') is not None:
 942             self.to_stdout(info_dict['description'])
 943         if self.params.get('forcefilename', False) and filename is not None:
 944             self.to_stdout(filename)
 945         if self.params.get('forceduration', False) and info_dict.get('duration') is not None:
 946             self.to_stdout(formatSeconds(info_dict['duration']))
 947         if self.params.get('forceformat', False):
 948             self.to_stdout(info_dict['format'])
 949         if self.params.get('forcejson', False):
 950             info_dict['_filename'] = filename
 951             self.to_stdout(json.dumps(info_dict))
 952         if self.params.get('dump_single_json', False):
 953             info_dict['_filename'] = filename
 954
 955         # Do nothing else if in simulate mode
 956         if self.params.get('simulate', False):
 957             return
 958
 959         if filename is None:
 960             return
 961
 962         try:
 963             dn = os.path.dirname(encodeFilename(filename))
 964             if dn and not os.path.exists(dn):
 965                 os.makedirs(dn)
 966         except (OSError, IOError) as err:
 967             self.report_error('unable to create directory ' + compat_str(err))
 968             return
 969
 970         if self.params.get('writedescription', False):
 971             descfn = filename + '.description'
 972             if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(descfn)):
 973                 self.to_screen('[info] Video description is already present')
 974             else:
 975                 try:
 976                     self.to_screen('[info] Writing video description to: ' + descfn)
 977                     with io.open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile:
 978                         descfile.write(info_dict['description'])
 979                 except (KeyError, TypeError):
 980                     self.report_warning('There\'s no description to write.')
 981                 except (OSError, IOError):
 982                     self.report_error('Cannot write description file ' + descfn)
 983                     return
 984
 985         if self.params.get('writeannotations', False):
 986             annofn = filename + '.annotations.xml'
 987             if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(annofn)):
 988                 self.to_screen('[info] Video annotations are already present')
 989             else:
 990                 try:
 991                     self.to_screen('[info] Writing video annotations to: ' + annofn)
 992                     with io.open(encodeFilename(annofn), 'w', encoding='utf-8') as annofile:
 993                         annofile.write(info_dict['annotations'])
 994                 except (KeyError, TypeError):
 995                     self.report_warning('There are no annotations to write.')
 996                 except (OSError, IOError):
 997                     self.report_error('Cannot write annotations file: ' + annofn)
 998                     return
 999
1000         subtitles_are_requested = any([self.params.get('writesubtitles', False),
1001                                        self.params.get('writeautomaticsub')])
1002
1003         if subtitles_are_requested and 'subtitles' in info_dict and info_dict['subtitles']:
1004             # subtitles download errors are already managed as troubles in relevant IE
1005             # that way it will silently go on when used with unsupporting IE
1006             subtitles = info_dict['subtitles']
1007             sub_format = self.params.get('subtitlesformat', 'srt')
1008             for sub_lang in subtitles.keys():
1009                 sub = subtitles[sub_lang]
1010                 if sub is None:
1011                     continue
1012                 try:
1013                     sub_filename = subtitles_filename(filename, sub_lang, sub_format)
1014                     if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(sub_filename)):
1015                         self.to_screen('[info] Video subtitle %s.%s is already_present' % (sub_lang, sub_format))
1016                     else:
1017                         self.to_screen('[info] Writing video subtitles to: ' + sub_filename)
1018                         with io.open(encodeFilename(sub_filename), 'w', encoding='utf-8') as subfile:
1019                             subfile.write(sub)
1020                 except (OSError, IOError):
1021                     self.report_error('Cannot write subtitles file ' + sub_filename)
1022                     return
1023
1024         if self.params.get('writeinfojson', False):
1025             infofn = os.path.splitext(filename)[0] + '.info.json'
1026             if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(infofn)):
1027                 self.to_screen('[info] Video description metadata is already present')
1028             else:
1029                 self.to_screen('[info] Writing video description metadata as JSON to: ' + infofn)
1030                 try:
1031                     write_json_file(info_dict, infofn)
1032                 except (OSError, IOError):
1033                     self.report_error('Cannot write metadata to JSON file ' + infofn)
1034                     return
1035
1036         if self.params.get('writethumbnail', False):
1037             if info_dict.get('thumbnail') is not None:
1038                 thumb_format = determine_ext(info_dict['thumbnail'], 'jpg')
1039                 thumb_filename = os.path.splitext(filename)[0] + '.' + thumb_format
1040                 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(thumb_filename)):
1041                     self.to_screen('[%s] %s: Thumbnail is already present' %
1042                                    (info_dict['extractor'], info_dict['id']))
1043                 else:
1044                     self.to_screen('[%s] %s: Downloading thumbnail ...' %
1045                                    (info_dict['extractor'], info_dict['id']))
1046                     try:
1047                         uf = self.urlopen(info_dict['thumbnail'])
1048                         with open(thumb_filename, 'wb') as thumbf:
1049                             shutil.copyfileobj(uf, thumbf)
1050                         self.to_screen('[%s] %s: Writing thumbnail to: %s' %
1051                                        (info_dict['extractor'], info_dict['id'], thumb_filename))
1052                     except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
1053                         self.report_warning('Unable to download thumbnail "%s": %s' %
1054                                             (info_dict['thumbnail'], compat_str(err)))
1055
1056         if not self.params.get('skip_download', False):
1057             if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(filename)):
1058                 success = True
1059             else:
1060                 try:
1061                     def dl(name, info):
1062                         fd = get_suitable_downloader(info)(self, self.params)
1063                         for ph in self._progress_hooks:
1064                             fd.add_progress_hook(ph)
1065                         if self.params.get('verbose'):
1066                             self.to_stdout('[debug] Invoking downloader on %r' % info.get('url'))
1067                         return fd.download(name, info)
1068                     if info_dict.get('requested_formats') is not None:
1069                         downloaded = []
1070                         success = True
1071                         merger = FFmpegMergerPP(self, not self.params.get('keepvideo'))
1072                         if not merger._executable:
1073                             postprocessors = []
1074                             self.report_warning('You have requested multiple '
1075                                                 'formats but ffmpeg or avconv are not installed.'
1076                                                 ' The formats won\'t be merged')
1077                         else:
1078                             postprocessors = [merger]
1079                         for f in info_dict['requested_formats']:
1080                             new_info = dict(info_dict)
1081                             new_info.update(f)
1082                             fname = self.prepare_filename(new_info)
1083                             fname = prepend_extension(fname, 'f%s' % f['format_id'])
1084                             downloaded.append(fname)
1085                             partial_success = dl(fname, new_info)
1086                             success = success and partial_success
1087                         info_dict['__postprocessors'] = postprocessors
1088                         info_dict['__files_to_merge'] = downloaded
1089                     else:
1090                         # Just a single file
1091                         success = dl(filename, info_dict)
1092                 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
1093                     self.report_error('unable to download video data: %s' % str(err))
1094                     return
1095                 except (OSError, IOError) as err:
1096                     raise UnavailableVideoError(err)
1097                 except (ContentTooShortError, ) as err:
1098                     self.report_error('content too short (expected %s bytes and served %s)' % (err.expected, err.downloaded))
1099                     return
1100
1101             if success:
1102                 try:
1103                     self.post_process(filename, info_dict)
1104                 except (PostProcessingError) as err:
1105                     self.report_error('postprocessing: %s' % str(err))
1106                     return
1107
1108         self.record_download_archive(info_dict)
1109
1110     def download(self, url_list):
1111         """Download a given list of URLs."""
1112         outtmpl = self.params.get('outtmpl', DEFAULT_OUTTMPL)
1113         if (len(url_list) > 1 and
1114                 '%' not in outtmpl
1115                 and self.params.get('max_downloads') != 1):
1116             raise SameFileError(outtmpl)
1117
1118         for url in url_list:
1119             try:
1120                 # It also downloads the videos
1121                 res = self.extract_info(url)
1122             except UnavailableVideoError:
1123                 self.report_error('unable to download video')
1124             except MaxDownloadsReached:
1125                 self.to_screen('[info] Maximum number of downloaded files reached.')
1126                 raise
1127             else:
1128                 if self.params.get('dump_single_json', False):
1129                     self.to_stdout(json.dumps(res))
1130
1131         return self._download_retcode
1132
1133     def download_with_info_file(self, info_filename):
1134         with io.open(info_filename, 'r', encoding='utf-8') as f:
1135             info = json.load(f)
1136         try:
1137             self.process_ie_result(info, download=True)
1138         except DownloadError:
1139             webpage_url = info.get('webpage_url')
1140             if webpage_url is not None:
1141                 self.report_warning('The info failed to download, trying with "%s"' % webpage_url)
1142                 return self.download([webpage_url])
1143             else:
1144                 raise
1145         return self._download_retcode
1146
1147     def post_process(self, filename, ie_info):
1148         """Run all the postprocessors on the given file."""
1149         info = dict(ie_info)
1150         info['filepath'] = filename
1151         keep_video = None
1152         pps_chain = []
1153         if ie_info.get('__postprocessors') is not None:
1154             pps_chain.extend(ie_info['__postprocessors'])
1155         pps_chain.extend(self._pps)
1156         for pp in pps_chain:
1157             try:
1158                 keep_video_wish, new_info = pp.run(info)
1159                 if keep_video_wish is not None:
1160                     if keep_video_wish:
1161                         keep_video = keep_video_wish
1162                     elif keep_video is None:
1163                         # No clear decision yet, let IE decide
1164                         keep_video = keep_video_wish
1165             except PostProcessingError as e:
1166                 self.report_error(e.msg)
1167         if keep_video is False and not self.params.get('keepvideo', False):
1168             try:
1169                 self.to_screen('Deleting original file %s (pass -k to keep)' % filename)
1170                 os.remove(encodeFilename(filename))
1171             except (IOError, OSError):
1172                 self.report_warning('Unable to remove downloaded video file')
1173
1174     def _make_archive_id(self, info_dict):
1175         # Future-proof against any change in case
1176         # and backwards compatibility with prior versions
1177         extractor = info_dict.get('extractor_key')
1178         if extractor is None:
1179             if 'id' in info_dict:
1180                 extractor = info_dict.get('ie_key')  # key in a playlist
1181         if extractor is None:
1182             return None  # Incomplete video information
1183         return extractor.lower() + ' ' + info_dict['id']
1184
1185     def in_download_archive(self, info_dict):
1186         fn = self.params.get('download_archive')
1187         if fn is None:
1188             return False
1189
1190         vid_id = self._make_archive_id(info_dict)
1191         if vid_id is None:
1192             return False  # Incomplete video information
1193
1194         try:
1195             with locked_file(fn, 'r', encoding='utf-8') as archive_file:
1196                 for line in archive_file:
1197                     if line.strip() == vid_id:
1198                         return True
1199         except IOError as ioe:
1200             if ioe.errno != errno.ENOENT:
1201                 raise
1202         return False
1203
1204     def record_download_archive(self, info_dict):
1205         fn = self.params.get('download_archive')
1206         if fn is None:
1207             return
1208         vid_id = self._make_archive_id(info_dict)
1209         assert vid_id
1210         with locked_file(fn, 'a', encoding='utf-8') as archive_file:
1211             archive_file.write(vid_id + '\n')
1212
1213     @staticmethod
1214     def format_resolution(format, default='unknown'):
1215         if format.get('vcodec') == 'none':
1216             return 'audio only'
1217         if format.get('resolution') is not None:
1218             return format['resolution']
1219         if format.get('height') is not None:
1220             if format.get('width') is not None:
1221                 res = '%sx%s' % (format['width'], format['height'])
1222             else:
1223                 res = '%sp' % format['height']
1224         elif format.get('width') is not None:
1225             res = '?x%d' % format['width']
1226         else:
1227             res = default
1228         return res
1229
1230     def _format_note(self, fdict):
1231         res = ''
1232         if fdict.get('ext') in ['f4f', 'f4m']:
1233             res += '(unsupported) '
1234         if fdict.get('format_note') is not None:
1235             res += fdict['format_note'] + ' '
1236         if fdict.get('tbr') is not None:
1237             res += '%4dk ' % fdict['tbr']
1238         if fdict.get('container') is not None:
1239             if res:
1240                 res += ', '
1241             res += '%s container' % fdict['container']
1242         if (fdict.get('vcodec') is not None and
1243                 fdict.get('vcodec') != 'none'):
1244             if res:
1245                 res += ', '
1246             res += fdict['vcodec']
1247             if fdict.get('vbr') is not None:
1248                 res += '@'
1249         elif fdict.get('vbr') is not None and fdict.get('abr') is not None:
1250             res += 'video@'
1251         if fdict.get('vbr') is not None:
1252             res += '%4dk' % fdict['vbr']
1253         if fdict.get('fps') is not None:
1254             res += ', %sfps' % fdict['fps']
1255         if fdict.get('acodec') is not None:
1256             if res:
1257                 res += ', '
1258             if fdict['acodec'] == 'none':
1259                 res += 'video only'
1260             else:
1261                 res += '%-5s' % fdict['acodec']
1262         elif fdict.get('abr') is not None:
1263             if res:
1264                 res += ', '
1265             res += 'audio'
1266         if fdict.get('abr') is not None:
1267             res += '@%3dk' % fdict['abr']
1268         if fdict.get('asr') is not None:
1269             res += ' (%5dHz)' % fdict['asr']
1270         if fdict.get('filesize') is not None:
1271             if res:
1272                 res += ', '
1273             res += format_bytes(fdict['filesize'])
1274         elif fdict.get('filesize_approx') is not None:
1275             if res:
1276                 res += ', '
1277             res += '~' + format_bytes(fdict['filesize_approx'])
1278         return res
1279
1280     def list_formats(self, info_dict):
1281         def line(format, idlen=20):
1282             return (('%-' + compat_str(idlen + 1) + 's%-10s%-12s%s') % (
1283                 format['format_id'],
1284                 format['ext'],
1285                 self.format_resolution(format),
1286                 self._format_note(format),
1287             ))
1288
1289         formats = info_dict.get('formats', [info_dict])
1290         idlen = max(len('format code'),
1291                     max(len(f['format_id']) for f in formats))
1292         formats_s = [line(f, idlen) for f in formats]
1293         if len(formats) > 1:
1294             formats_s[0] += (' ' if self._format_note(formats[0]) else '') + '(worst)'
1295             formats_s[-1] += (' ' if self._format_note(formats[-1]) else '') + '(best)'
1296
1297         header_line = line({
1298             'format_id': 'format code', 'ext': 'extension',
1299             'resolution': 'resolution', 'format_note': 'note'}, idlen=idlen)
1300         self.to_screen('[info] Available formats for %s:\n%s\n%s' %
1301                        (info_dict['id'], header_line, '\n'.join(formats_s)))
1302
1303     def urlopen(self, req):
1304         """ Start an HTTP download """
1305
1306         # According to RFC 3986, URLs can not contain non-ASCII characters, however this is not
1307         # always respected by websites, some tend to give out URLs with non percent-encoded
1308         # non-ASCII characters (see telemb.py, ard.py [#3412])
1309         # urllib chokes on URLs with non-ASCII characters (see http://bugs.python.org/issue3991)
1310         # To work around aforementioned issue we will replace request's original URL with
1311         # percent-encoded one
1312         req_is_string = isinstance(req, basestring if sys.version_info < (3, 0) else compat_str)
1313         url = req if req_is_string else req.get_full_url()
1314         url_escaped = escape_url(url)
1315
1316         # Substitute URL if any change after escaping
1317         if url != url_escaped:
1318             if req_is_string:
1319                 req = url_escaped
1320             else:
1321                 req = compat_urllib_request.Request(
1322                     url_escaped, data=req.data, headers=req.headers,
1323                     origin_req_host=req.origin_req_host, unverifiable=req.unverifiable)
1324
1325         return self._opener.open(req, timeout=self._socket_timeout)
1326
1327     def print_debug_header(self):
1328         if not self.params.get('verbose'):
1329             return
1330
1331         if type('') is not compat_str:
1332             # Python 2.6 on SLES11 SP1 (https://github.com/rg3/youtube-dl/issues/3326)
1333             self.report_warning(
1334                 'Your Python is broken! Update to a newer and supported version')
1335
1336         stdout_encoding = getattr(
1337             sys.stdout, 'encoding', 'missing (%s)' % type(sys.stdout).__name__)
1338         encoding_str = (
1339             '[debug] Encodings: locale %s, fs %s, out %s, pref %s\n' % (
1340                 locale.getpreferredencoding(),
1341                 sys.getfilesystemencoding(),
1342                 stdout_encoding,
1343                 self.get_encoding()))
1344         write_string(encoding_str, encoding=None)
1345
1346         self._write_string('[debug] youtube-dl version ' + __version__ + '\n')
1347         try:
1348             sp = subprocess.Popen(
1349                 ['git', 'rev-parse', '--short', 'HEAD'],
1350                 stdout=subprocess.PIPE, stderr=subprocess.PIPE,
1351                 cwd=os.path.dirname(os.path.abspath(__file__)))
1352             out, err = sp.communicate()
1353             out = out.decode().strip()
1354             if re.match('[0-9a-f]+', out):
1355                 self._write_string('[debug] Git HEAD: ' + out + '\n')
1356         except:
1357             try:
1358                 sys.exc_clear()
1359             except:
1360                 pass
1361         self._write_string('[debug] Python version %s - %s\n' % (
1362             platform.python_version(), platform_name()))
1363
1364         exe_versions = FFmpegPostProcessor.get_versions()
1365         exe_versions['rtmpdump'] = rtmpdump_version()
1366         exe_str = ', '.join(
1367             '%s %s' % (exe, v)
1368             for exe, v in sorted(exe_versions.items())
1369             if v
1370         )
1371         if not exe_str:
1372             exe_str = 'none'
1373         self._write_string('[debug] exe versions: %s\n' % exe_str)
1374
1375         proxy_map = {}
1376         for handler in self._opener.handlers:
1377             if hasattr(handler, 'proxies'):
1378                 proxy_map.update(handler.proxies)
1379         self._write_string('[debug] Proxy map: ' + compat_str(proxy_map) + '\n')
1380
1381     def _setup_opener(self):
1382         timeout_val = self.params.get('socket_timeout')
1383         self._socket_timeout = 600 if timeout_val is None else float(timeout_val)
1384
1385         opts_cookiefile = self.params.get('cookiefile')
1386         opts_proxy = self.params.get('proxy')
1387
1388         if opts_cookiefile is None:
1389             self.cookiejar = compat_cookiejar.CookieJar()
1390         else:
1391             self.cookiejar = compat_cookiejar.MozillaCookieJar(
1392                 opts_cookiefile)
1393             if os.access(opts_cookiefile, os.R_OK):
1394                 self.cookiejar.load()
1395
1396         cookie_processor = compat_urllib_request.HTTPCookieProcessor(
1397             self.cookiejar)
1398         if opts_proxy is not None:
1399             if opts_proxy == '':
1400                 proxies = {}
1401             else:
1402                 proxies = {'http': opts_proxy, 'https': opts_proxy}
1403         else:
1404             proxies = compat_urllib_request.getproxies()
1405             # Set HTTPS proxy to HTTP one if given (https://github.com/rg3/youtube-dl/issues/805)
1406             if 'http' in proxies and 'https' not in proxies:
1407                 proxies['https'] = proxies['http']
1408         proxy_handler = compat_urllib_request.ProxyHandler(proxies)
1409
1410         debuglevel = 1 if self.params.get('debug_printtraffic') else 0
1411         https_handler = make_HTTPS_handler(
1412             self.params.get('nocheckcertificate', False), debuglevel=debuglevel)
1413         ydlh = YoutubeDLHandler(debuglevel=debuglevel)
1414         opener = compat_urllib_request.build_opener(
1415             https_handler, proxy_handler, cookie_processor, ydlh)
1416         # Delete the default user-agent header, which would otherwise apply in
1417         # cases where our custom HTTP handler doesn't come into play
1418         # (See https://github.com/rg3/youtube-dl/issues/1309 for details)
1419         opener.addheaders = []
1420         self._opener = opener
1421
1422     def encode(self, s):
1423         if isinstance(s, bytes):
1424             return s  # Already encoded
1425
1426         try:
1427             return s.encode(self.get_encoding())
1428         except UnicodeEncodeError as err:
1429             err.reason = err.reason + '. Check your system encoding configuration or use the --encoding option.'
1430             raise
1431
1432     def get_encoding(self):
1433         encoding = self.params.get('encoding')
1434         if encoding is None:
1435             encoding = preferredencoding()
1436         return encoding