_ Git - youtube-dl/blob - youtube_dl/YoutubeDL.py

   1 #!/usr/bin/env python
   2 # -*- coding: utf-8 -*-
   3
   4 from __future__ import absolute_import, unicode_literals
   5
   6 import collections
   7 import datetime
   8 import errno
   9 import io
  10 import json
  11 import locale
  12 import os
  13 import platform
  14 import re
  15 import shutil
  16 import subprocess
  17 import socket
  18 import sys
  19 import time
  20 import traceback
  21
  22 if os.name == 'nt':
  23     import ctypes
  24
  25 from .utils import (
  26     compat_cookiejar,
  27     compat_expanduser,
  28     compat_http_client,
  29     compat_str,
  30     compat_urllib_error,
  31     compat_urllib_request,
  32     escape_url,
  33     ContentTooShortError,
  34     date_from_str,
  35     DateRange,
  36     DEFAULT_OUTTMPL,
  37     determine_ext,
  38     DownloadError,
  39     encodeFilename,
  40     ExtractorError,
  41     format_bytes,
  42     formatSeconds,
  43     get_term_width,
  44     locked_file,
  45     make_HTTPS_handler,
  46     MaxDownloadsReached,
  47     PagedList,
  48     PostProcessingError,
  49     platform_name,
  50     preferredencoding,
  51     SameFileError,
  52     sanitize_filename,
  53     subtitles_filename,
  54     takewhile_inclusive,
  55     UnavailableVideoError,
  56     url_basename,
  57     write_json_file,
  58     write_string,
  59     YoutubeDLHandler,
  60     prepend_extension,
  61 )
  62 from .cache import Cache
  63 from .extractor import get_info_extractor, gen_extractors
  64 from .downloader import get_suitable_downloader
  65 from .downloader.rtmp import rtmpdump_version
  66 from .postprocessor import FFmpegMergerPP, FFmpegPostProcessor
  67 from .version import __version__
  68
  69
  70 class YoutubeDL(object):
  71     """YoutubeDL class.
  72
  73     YoutubeDL objects are the ones responsible of downloading the
  74     actual video file and writing it to disk if the user has requested
  75     it, among some other tasks. In most cases there should be one per
  76     program. As, given a video URL, the downloader doesn't know how to
  77     extract all the needed information, task that InfoExtractors do, it
  78     has to pass the URL to one of them.
  79
  80     For this, YoutubeDL objects have a method that allows
  81     InfoExtractors to be registered in a given order. When it is passed
  82     a URL, the YoutubeDL object handles it to the first InfoExtractor it
  83     finds that reports being able to handle it. The InfoExtractor extracts
  84     all the information about the video or videos the URL refers to, and
  85     YoutubeDL process the extracted information, possibly using a File
  86     Downloader to download the video.
  87
  88     YoutubeDL objects accept a lot of parameters. In order not to saturate
  89     the object constructor with arguments, it receives a dictionary of
  90     options instead. These options are available through the params
  91     attribute for the InfoExtractors to use. The YoutubeDL also
  92     registers itself as the downloader in charge for the InfoExtractors
  93     that are added to it, so this is a "mutual registration".
  94
  95     Available options:
  96
  97     username:          Username for authentication purposes.
  98     password:          Password for authentication purposes.
  99     videopassword:     Password for acces a video.
 100     usenetrc:          Use netrc for authentication instead.
 101     verbose:           Print additional info to stdout.
 102     quiet:             Do not print messages to stdout.
 103     no_warnings:       Do not print out anything for warnings.
 104     forceurl:          Force printing final URL.
 105     forcetitle:        Force printing title.
 106     forceid:           Force printing ID.
 107     forcethumbnail:    Force printing thumbnail URL.
 108     forcedescription:  Force printing description.
 109     forcefilename:     Force printing final filename.
 110     forceduration:     Force printing duration.
 111     forcejson:         Force printing info_dict as JSON.
 112     dump_single_json:  Force printing the info_dict of the whole playlist
 113                        (or video) as a single JSON line.
 114     simulate:          Do not download the video files.
 115     format:            Video format code.
 116     format_limit:      Highest quality format to try.
 117     outtmpl:           Template for output names.
 118     restrictfilenames: Do not allow "&" and spaces in file names
 119     ignoreerrors:      Do not stop on download errors.
 120     nooverwrites:      Prevent overwriting files.
 121     playliststart:     Playlist item to start at.
 122     playlistend:       Playlist item to end at.
 123     matchtitle:        Download only matching titles.
 124     rejecttitle:       Reject downloads for matching titles.
 125     logger:            Log messages to a logging.Logger instance.
 126     logtostderr:       Log messages to stderr instead of stdout.
 127     writedescription:  Write the video description to a .description file
 128     writeinfojson:     Write the video description to a .info.json file
 129     writeannotations:  Write the video annotations to a .annotations.xml file
 130     writethumbnail:    Write the thumbnail image to a file
 131     writesubtitles:    Write the video subtitles to a file
 132     writeautomaticsub: Write the automatic subtitles to a file
 133     allsubtitles:      Downloads all the subtitles of the video
 134                        (requires writesubtitles or writeautomaticsub)
 135     listsubtitles:     Lists all available subtitles for the video
 136     subtitlesformat:   Subtitle format [srt/sbv/vtt] (default=srt)
 137     subtitleslangs:    List of languages of the subtitles to download
 138     keepvideo:         Keep the video file after post-processing
 139     daterange:         A DateRange object, download only if the upload_date is in the range.
 140     skip_download:     Skip the actual download of the video file
 141     cachedir:          Location of the cache files in the filesystem.
 142                        False to disable filesystem cache.
 143     noplaylist:        Download single video instead of a playlist if in doubt.
 144     age_limit:         An integer representing the user's age in years.
 145                        Unsuitable videos for the given age are skipped.
 146     min_views:         An integer representing the minimum view count the video
 147                        must have in order to not be skipped.
 148                        Videos without view count information are always
 149                        downloaded. None for no limit.
 150     max_views:         An integer representing the maximum view count.
 151                        Videos that are more popular than that are not
 152                        downloaded.
 153                        Videos without view count information are always
 154                        downloaded. None for no limit.
 155     download_archive:  File name of a file where all downloads are recorded.
 156                        Videos already present in the file are not downloaded
 157                        again.
 158     cookiefile:        File name where cookies should be read from and dumped to.
 159     nocheckcertificate:Do not verify SSL certificates
 160     prefer_insecure:   Use HTTP instead of HTTPS to retrieve information.
 161                        At the moment, this is only supported by YouTube.
 162     proxy:             URL of the proxy server to use
 163     socket_timeout:    Time to wait for unresponsive hosts, in seconds
 164     bidi_workaround:   Work around buggy terminals without bidirectional text
 165                        support, using fridibi
 166     debug_printtraffic:Print out sent and received HTTP traffic
 167     include_ads:       Download ads as well
 168     default_search:    Prepend this string if an input url is not valid.
 169                        'auto' for elaborate guessing
 170     encoding:          Use this encoding instead of the system-specified.
 171     extract_flat:      Do not resolve URLs, return the immediate result.
 172                        Pass in 'in_playlist' to only show this behavior for
 173                        playlist items.
 174
 175     The following parameters are not used by YoutubeDL itself, they are used by
 176     the FileDownloader:
 177     nopart, updatetime, buffersize, ratelimit, min_filesize, max_filesize, test,
 178     noresizebuffer, retries, continuedl, noprogress, consoletitle
 179
 180     The following options are used by the post processors:
 181     prefer_ffmpeg:     If True, use ffmpeg instead of avconv if both are available,
 182                        otherwise prefer avconv.
 183     exec_cmd:          Arbitrary command to run after downloading
 184     """
 185
 186     params = None
 187     _ies = []
 188     _pps = []
 189     _download_retcode = None
 190     _num_downloads = None
 191     _screen_file = None
 192
 193     def __init__(self, params=None, auto_init=True):
 194         """Create a FileDownloader object with the given options."""
 195         if params is None:
 196             params = {}
 197         self._ies = []
 198         self._ies_instances = {}
 199         self._pps = []
 200         self._progress_hooks = []
 201         self._download_retcode = 0
 202         self._num_downloads = 0
 203         self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)]
 204         self._err_file = sys.stderr
 205         self.params = params
 206         self.cache = Cache(self)
 207
 208         if params.get('bidi_workaround', False):
 209             try:
 210                 import pty
 211                 master, slave = pty.openpty()
 212                 width = get_term_width()
 213                 if width is None:
 214                     width_args = []
 215                 else:
 216                     width_args = ['-w', str(width)]
 217                 sp_kwargs = dict(
 218                     stdin=subprocess.PIPE,
 219                     stdout=slave,
 220                     stderr=self._err_file)
 221                 try:
 222                     self._output_process = subprocess.Popen(
 223                         ['bidiv'] + width_args, **sp_kwargs
 224                     )
 225                 except OSError:
 226                     self._output_process = subprocess.Popen(
 227                         ['fribidi', '-c', 'UTF-8'] + width_args, **sp_kwargs)
 228                 self._output_channel = os.fdopen(master, 'rb')
 229             except OSError as ose:
 230                 if ose.errno == 2:
 231                     self.report_warning('Could not find fribidi executable, ignoring --bidi-workaround . Make sure that  fribidi  is an executable file in one of the directories in your $PATH.')
 232                 else:
 233                     raise
 234
 235         if (sys.version_info >= (3,) and sys.platform != 'win32' and
 236                 sys.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968']
 237                 and not params.get('restrictfilenames', False)):
 238             # On Python 3, the Unicode filesystem API will throw errors (#1474)
 239             self.report_warning(
 240                 'Assuming --restrict-filenames since file system encoding '
 241                 'cannot encode all characters. '
 242                 'Set the LC_ALL environment variable to fix this.')
 243             self.params['restrictfilenames'] = True
 244
 245         if '%(stitle)s' in self.params.get('outtmpl', ''):
 246             self.report_warning('%(stitle)s is deprecated. Use the %(title)s and the --restrict-filenames flag(which also secures %(uploader)s et al) instead.')
 247
 248         self._setup_opener()
 249
 250         if auto_init:
 251             self.print_debug_header()
 252             self.add_default_info_extractors()
 253
 254     def add_info_extractor(self, ie):
 255         """Add an InfoExtractor object to the end of the list."""
 256         self._ies.append(ie)
 257         self._ies_instances[ie.ie_key()] = ie
 258         ie.set_downloader(self)
 259
 260     def get_info_extractor(self, ie_key):
 261         """
 262         Get an instance of an IE with name ie_key, it will try to get one from
 263         the _ies list, if there's no instance it will create a new one and add
 264         it to the extractor list.
 265         """
 266         ie = self._ies_instances.get(ie_key)
 267         if ie is None:
 268             ie = get_info_extractor(ie_key)()
 269             self.add_info_extractor(ie)
 270         return ie
 271
 272     def add_default_info_extractors(self):
 273         """
 274         Add the InfoExtractors returned by gen_extractors to the end of the list
 275         """
 276         for ie in gen_extractors():
 277             self.add_info_extractor(ie)
 278
 279     def add_post_processor(self, pp):
 280         """Add a PostProcessor object to the end of the chain."""
 281         self._pps.append(pp)
 282         pp.set_downloader(self)
 283
 284     def add_progress_hook(self, ph):
 285         """Add the progress hook (currently only for the file downloader)"""
 286         self._progress_hooks.append(ph)
 287
 288     def _bidi_workaround(self, message):
 289         if not hasattr(self, '_output_channel'):
 290             return message
 291
 292         assert hasattr(self, '_output_process')
 293         assert isinstance(message, compat_str)
 294         line_count = message.count('\n') + 1
 295         self._output_process.stdin.write((message + '\n').encode('utf-8'))
 296         self._output_process.stdin.flush()
 297         res = ''.join(self._output_channel.readline().decode('utf-8')
 298                        for _ in range(line_count))
 299         return res[:-len('\n')]
 300
 301     def to_screen(self, message, skip_eol=False):
 302         """Print message to stdout if not in quiet mode."""
 303         return self.to_stdout(message, skip_eol, check_quiet=True)
 304
 305     def _write_string(self, s, out=None):
 306         write_string(s, out=out, encoding=self.params.get('encoding'))
 307
 308     def to_stdout(self, message, skip_eol=False, check_quiet=False):
 309         """Print message to stdout if not in quiet mode."""
 310         if self.params.get('logger'):
 311             self.params['logger'].debug(message)
 312         elif not check_quiet or not self.params.get('quiet', False):
 313             message = self._bidi_workaround(message)
 314             terminator = ['\n', ''][skip_eol]
 315             output = message + terminator
 316
 317             self._write_string(output, self._screen_file)
 318
 319     def to_stderr(self, message):
 320         """Print message to stderr."""
 321         assert isinstance(message, compat_str)
 322         if self.params.get('logger'):
 323             self.params['logger'].error(message)
 324         else:
 325             message = self._bidi_workaround(message)
 326             output = message + '\n'
 327             self._write_string(output, self._err_file)
 328
 329     def to_console_title(self, message):
 330         if not self.params.get('consoletitle', False):
 331             return
 332         if os.name == 'nt' and ctypes.windll.kernel32.GetConsoleWindow():
 333             # c_wchar_p() might not be necessary if `message` is
 334             # already of type unicode()
 335             ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message))
 336         elif 'TERM' in os.environ:
 337             self._write_string('\033]0;%s\007' % message, self._screen_file)
 338
 339     def save_console_title(self):
 340         if not self.params.get('consoletitle', False):
 341             return
 342         if 'TERM' in os.environ:
 343             # Save the title on stack
 344             self._write_string('\033[22;0t', self._screen_file)
 345
 346     def restore_console_title(self):
 347         if not self.params.get('consoletitle', False):
 348             return
 349         if 'TERM' in os.environ:
 350             # Restore the title from stack
 351             self._write_string('\033[23;0t', self._screen_file)
 352
 353     def __enter__(self):
 354         self.save_console_title()
 355         return self
 356
 357     def __exit__(self, *args):
 358         self.restore_console_title()
 359
 360         if self.params.get('cookiefile') is not None:
 361             self.cookiejar.save()
 362
 363     def trouble(self, message=None, tb=None):
 364         """Determine action to take when a download problem appears.
 365
 366         Depending on if the downloader has been configured to ignore
 367         download errors or not, this method may throw an exception or
 368         not when errors are found, after printing the message.
 369
 370         tb, if given, is additional traceback information.
 371         """
 372         if message is not None:
 373             self.to_stderr(message)
 374         if self.params.get('verbose'):
 375             if tb is None:
 376                 if sys.exc_info()[0]:  # if .trouble has been called from an except block
 377                     tb = ''
 378                     if hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
 379                         tb += ''.join(traceback.format_exception(*sys.exc_info()[1].exc_info))
 380                     tb += compat_str(traceback.format_exc())
 381                 else:
 382                     tb_data = traceback.format_list(traceback.extract_stack())
 383                     tb = ''.join(tb_data)
 384             self.to_stderr(tb)
 385         if not self.params.get('ignoreerrors', False):
 386             if sys.exc_info()[0] and hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
 387                 exc_info = sys.exc_info()[1].exc_info
 388             else:
 389                 exc_info = sys.exc_info()
 390             raise DownloadError(message, exc_info)
 391         self._download_retcode = 1
 392
 393     def report_warning(self, message):
 394         '''
 395         Print the message to stderr, it will be prefixed with 'WARNING:'
 396         If stderr is a tty file the 'WARNING:' will be colored
 397         '''
 398         if self.params.get('logger') is not None:
 399             self.params['logger'].warning(message)
 400         else:
 401             if self.params.get('no_warnings'):
 402                 return
 403             if self._err_file.isatty() and os.name != 'nt':
 404                 _msg_header = '\033[0;33mWARNING:\033[0m'
 405             else:
 406                 _msg_header = 'WARNING:'
 407             warning_message = '%s %s' % (_msg_header, message)
 408             self.to_stderr(warning_message)
 409
 410     def report_error(self, message, tb=None):
 411         '''
 412         Do the same as trouble, but prefixes the message with 'ERROR:', colored
 413         in red if stderr is a tty file.
 414         '''
 415         if self._err_file.isatty() and os.name != 'nt':
 416             _msg_header = '\033[0;31mERROR:\033[0m'
 417         else:
 418             _msg_header = 'ERROR:'
 419         error_message = '%s %s' % (_msg_header, message)
 420         self.trouble(error_message, tb)
 421
 422     def report_file_already_downloaded(self, file_name):
 423         """Report file has already been fully downloaded."""
 424         try:
 425             self.to_screen('[download] %s has already been downloaded' % file_name)
 426         except UnicodeEncodeError:
 427             self.to_screen('[download] The file has already been downloaded')
 428
 429     def prepare_filename(self, info_dict):
 430         """Generate the output filename."""
 431         try:
 432             template_dict = dict(info_dict)
 433
 434             template_dict['epoch'] = int(time.time())
 435             autonumber_size = self.params.get('autonumber_size')
 436             if autonumber_size is None:
 437                 autonumber_size = 5
 438             autonumber_templ = '%0' + str(autonumber_size) + 'd'
 439             template_dict['autonumber'] = autonumber_templ % self._num_downloads
 440             if template_dict.get('playlist_index') is not None:
 441                 template_dict['playlist_index'] = '%0*d' % (len(str(template_dict['n_entries'])), template_dict['playlist_index'])
 442             if template_dict.get('resolution') is None:
 443                 if template_dict.get('width') and template_dict.get('height'):
 444                     template_dict['resolution'] = '%dx%d' % (template_dict['width'], template_dict['height'])
 445                 elif template_dict.get('height'):
 446                     template_dict['resolution'] = '%sp' % template_dict['height']
 447                 elif template_dict.get('width'):
 448                     template_dict['resolution'] = '?x%d' % template_dict['width']
 449
 450             sanitize = lambda k, v: sanitize_filename(
 451                 compat_str(v),
 452                 restricted=self.params.get('restrictfilenames'),
 453                 is_id=(k == 'id'))
 454             template_dict = dict((k, sanitize(k, v))
 455                                  for k, v in template_dict.items()
 456                                  if v is not None)
 457             template_dict = collections.defaultdict(lambda: 'NA', template_dict)
 458
 459             outtmpl = self.params.get('outtmpl', DEFAULT_OUTTMPL)
 460             tmpl = compat_expanduser(outtmpl)
 461             filename = tmpl % template_dict
 462             return filename
 463         except ValueError as err:
 464             self.report_error('Error in output template: ' + str(err) + ' (encoding: ' + repr(preferredencoding()) + ')')
 465             return None
 466
 467     def _match_entry(self, info_dict):
 468         """ Returns None iff the file should be downloaded """
 469
 470         video_title = info_dict.get('title', info_dict.get('id', 'video'))
 471         if 'title' in info_dict:
 472             # This can happen when we're just evaluating the playlist
 473             title = info_dict['title']
 474             matchtitle = self.params.get('matchtitle', False)
 475             if matchtitle:
 476                 if not re.search(matchtitle, title, re.IGNORECASE):
 477                     return '"' + title + '" title did not match pattern "' + matchtitle + '"'
 478             rejecttitle = self.params.get('rejecttitle', False)
 479             if rejecttitle:
 480                 if re.search(rejecttitle, title, re.IGNORECASE):
 481                     return '"' + title + '" title matched reject pattern "' + rejecttitle + '"'
 482         date = info_dict.get('upload_date', None)
 483         if date is not None:
 484             dateRange = self.params.get('daterange', DateRange())
 485             if date not in dateRange:
 486                 return '%s upload date is not in range %s' % (date_from_str(date).isoformat(), dateRange)
 487         view_count = info_dict.get('view_count', None)
 488         if view_count is not None:
 489             min_views = self.params.get('min_views')
 490             if min_views is not None and view_count < min_views:
 491                 return 'Skipping %s, because it has not reached minimum view count (%d/%d)' % (video_title, view_count, min_views)
 492             max_views = self.params.get('max_views')
 493             if max_views is not None and view_count > max_views:
 494                 return 'Skipping %s, because it has exceeded the maximum view count (%d/%d)' % (video_title, view_count, max_views)
 495         age_limit = self.params.get('age_limit')
 496         if age_limit is not None:
 497             actual_age_limit = info_dict.get('age_limit')
 498             if actual_age_limit is None:
 499                 actual_age_limit = 0
 500             if age_limit < actual_age_limit:
 501                 return 'Skipping "' + title + '" because it is age restricted'
 502         if self.in_download_archive(info_dict):
 503             return '%s has already been recorded in archive' % video_title
 504         return None
 505
 506     @staticmethod
 507     def add_extra_info(info_dict, extra_info):
 508         '''Set the keys from extra_info in info dict if they are missing'''
 509         for key, value in extra_info.items():
 510             info_dict.setdefault(key, value)
 511
 512     def extract_info(self, url, download=True, ie_key=None, extra_info={},
 513                      process=True):
 514         '''
 515         Returns a list with a dictionary for each video we find.
 516         If 'download', also downloads the videos.
 517         extra_info is a dict containing the extra values to add to each result
 518          '''
 519
 520         if ie_key:
 521             ies = [self.get_info_extractor(ie_key)]
 522         else:
 523             ies = self._ies
 524
 525         for ie in ies:
 526             if not ie.suitable(url):
 527                 continue
 528
 529             if not ie.working():
 530                 self.report_warning('The program functionality for this site has been marked as broken, '
 531                                     'and will probably not work.')
 532
 533             try:
 534                 ie_result = ie.extract(url)
 535                 if ie_result is None: # Finished already (backwards compatibility; listformats and friends should be moved here)
 536                     break
 537                 if isinstance(ie_result, list):
 538                     # Backwards compatibility: old IE result format
 539                     ie_result = {
 540                         '_type': 'compat_list',
 541                         'entries': ie_result,
 542                     }
 543                 self.add_default_extra_info(ie_result, ie, url)
 544                 if process:
 545                     return self.process_ie_result(ie_result, download, extra_info)
 546                 else:
 547                     return ie_result
 548             except ExtractorError as de: # An error we somewhat expected
 549                 self.report_error(compat_str(de), de.format_traceback())
 550                 break
 551             except MaxDownloadsReached:
 552                 raise
 553             except Exception as e:
 554                 if self.params.get('ignoreerrors', False):
 555                     self.report_error(compat_str(e), tb=compat_str(traceback.format_exc()))
 556                     break
 557                 else:
 558                     raise
 559         else:
 560             self.report_error('no suitable InfoExtractor for URL %s' % url)
 561
 562     def add_default_extra_info(self, ie_result, ie, url):
 563         self.add_extra_info(ie_result, {
 564             'extractor': ie.IE_NAME,
 565             'webpage_url': url,
 566             'webpage_url_basename': url_basename(url),
 567             'extractor_key': ie.ie_key(),
 568         })
 569
 570     def process_ie_result(self, ie_result, download=True, extra_info={}):
 571         """
 572         Take the result of the ie(may be modified) and resolve all unresolved
 573         references (URLs, playlist items).
 574
 575         It will also download the videos if 'download'.
 576         Returns the resolved ie_result.
 577         """
 578
 579         result_type = ie_result.get('_type', 'video')
 580
 581         if result_type in ('url', 'url_transparent'):
 582             extract_flat = self.params.get('extract_flat', False)
 583             if ((extract_flat == 'in_playlist' and 'playlist' in extra_info) or
 584                     extract_flat is True):
 585                 if self.params.get('forcejson', False):
 586                     self.to_stdout(json.dumps(ie_result))
 587                 return ie_result
 588
 589         if result_type == 'video':
 590             self.add_extra_info(ie_result, extra_info)
 591             return self.process_video_result(ie_result, download=download)
 592         elif result_type == 'url':
 593             # We have to add extra_info to the results because it may be
 594             # contained in a playlist
 595             return self.extract_info(ie_result['url'],
 596                                      download,
 597                                      ie_key=ie_result.get('ie_key'),
 598                                      extra_info=extra_info)
 599         elif result_type == 'url_transparent':
 600             # Use the information from the embedding page
 601             info = self.extract_info(
 602                 ie_result['url'], ie_key=ie_result.get('ie_key'),
 603                 extra_info=extra_info, download=False, process=False)
 604
 605             def make_result(embedded_info):
 606                 new_result = ie_result.copy()
 607                 for f in ('_type', 'url', 'ext', 'player_url', 'formats',
 608                           'entries', 'ie_key', 'duration',
 609                           'subtitles', 'annotations', 'format',
 610                           'thumbnail', 'thumbnails'):
 611                     if f in new_result:
 612                         del new_result[f]
 613                     if f in embedded_info:
 614                         new_result[f] = embedded_info[f]
 615                 return new_result
 616             new_result = make_result(info)
 617
 618             assert new_result.get('_type') != 'url_transparent'
 619             if new_result.get('_type') == 'compat_list':
 620                 new_result['entries'] = [
 621                     make_result(e) for e in new_result['entries']]
 622
 623             return self.process_ie_result(
 624                 new_result, download=download, extra_info=extra_info)
 625         elif result_type == 'playlist':
 626             # We process each entry in the playlist
 627             playlist = ie_result.get('title', None) or ie_result.get('id', None)
 628             self.to_screen('[download] Downloading playlist: %s' % playlist)
 629
 630             playlist_results = []
 631
 632             playliststart = self.params.get('playliststart', 1) - 1
 633             playlistend = self.params.get('playlistend', None)
 634             # For backwards compatibility, interpret -1 as whole list
 635             if playlistend == -1:
 636                 playlistend = None
 637
 638             if isinstance(ie_result['entries'], list):
 639                 n_all_entries = len(ie_result['entries'])
 640                 entries = ie_result['entries'][playliststart:playlistend]
 641                 n_entries = len(entries)
 642                 self.to_screen(
 643                     "[%s] playlist %s: Collected %d video ids (downloading %d of them)" %
 644                     (ie_result['extractor'], playlist, n_all_entries, n_entries))
 645             else:
 646                 assert isinstance(ie_result['entries'], PagedList)
 647                 entries = ie_result['entries'].getslice(
 648                     playliststart, playlistend)
 649                 n_entries = len(entries)
 650                 self.to_screen(
 651                     "[%s] playlist %s: Downloading %d videos" %
 652                     (ie_result['extractor'], playlist, n_entries))
 653
 654             for i, entry in enumerate(entries, 1):
 655                 self.to_screen('[download] Downloading video #%s of %s' % (i, n_entries))
 656                 extra = {
 657                     'n_entries': n_entries,
 658                     'playlist': playlist,
 659                     'playlist_index': i + playliststart,
 660                     'extractor': ie_result['extractor'],
 661                     'webpage_url': ie_result['webpage_url'],
 662                     'webpage_url_basename': url_basename(ie_result['webpage_url']),
 663                     'extractor_key': ie_result['extractor_key'],
 664                 }
 665
 666                 reason = self._match_entry(entry)
 667                 if reason is not None:
 668                     self.to_screen('[download] ' + reason)
 669                     continue
 670
 671                 entry_result = self.process_ie_result(entry,
 672                                                       download=download,
 673                                                       extra_info=extra)
 674                 playlist_results.append(entry_result)
 675             ie_result['entries'] = playlist_results
 676             return ie_result
 677         elif result_type == 'compat_list':
 678             def _fixup(r):
 679                 self.add_extra_info(r,
 680                     {
 681                         'extractor': ie_result['extractor'],
 682                         'webpage_url': ie_result['webpage_url'],
 683                         'webpage_url_basename': url_basename(ie_result['webpage_url']),
 684                         'extractor_key': ie_result['extractor_key'],
 685                     })
 686                 return r
 687             ie_result['entries'] = [
 688                 self.process_ie_result(_fixup(r), download, extra_info)
 689                 for r in ie_result['entries']
 690             ]
 691             return ie_result
 692         else:
 693             raise Exception('Invalid result type: %s' % result_type)
 694
 695     def select_format(self, format_spec, available_formats):
 696         if format_spec == 'best' or format_spec is None:
 697             return available_formats[-1]
 698         elif format_spec == 'worst':
 699             return available_formats[0]
 700         elif format_spec == 'bestaudio':
 701             audio_formats = [
 702                 f for f in available_formats
 703                 if f.get('vcodec') == 'none']
 704             if audio_formats:
 705                 return audio_formats[-1]
 706         elif format_spec == 'worstaudio':
 707             audio_formats = [
 708                 f for f in available_formats
 709                 if f.get('vcodec') == 'none']
 710             if audio_formats:
 711                 return audio_formats[0]
 712         elif format_spec == 'bestvideo':
 713             video_formats = [
 714                 f for f in available_formats
 715                 if f.get('acodec') == 'none']
 716             if video_formats:
 717                 return video_formats[-1]
 718         elif format_spec == 'worstvideo':
 719             video_formats = [
 720                 f for f in available_formats
 721                 if f.get('acodec') == 'none']
 722             if video_formats:
 723                 return video_formats[0]
 724         else:
 725             extensions = ['mp4', 'flv', 'webm', '3gp', 'm4a']
 726             if format_spec in extensions:
 727                 filter_f = lambda f: f['ext'] == format_spec
 728             else:
 729                 filter_f = lambda f: f['format_id'] == format_spec
 730             matches = list(filter(filter_f, available_formats))
 731             if matches:
 732                 return matches[-1]
 733         return None
 734
 735     def process_video_result(self, info_dict, download=True):
 736         assert info_dict.get('_type', 'video') == 'video'
 737
 738         if 'id' not in info_dict:
 739             raise ExtractorError('Missing "id" field in extractor result')
 740         if 'title' not in info_dict:
 741             raise ExtractorError('Missing "title" field in extractor result')
 742
 743         if 'playlist' not in info_dict:
 744             # It isn't part of a playlist
 745             info_dict['playlist'] = None
 746             info_dict['playlist_index'] = None
 747
 748         thumbnails = info_dict.get('thumbnails')
 749         if thumbnails:
 750             thumbnails.sort(key=lambda t: (
 751                 t.get('width'), t.get('height'), t.get('url')))
 752             for t in thumbnails:
 753                 if 'width' in t and 'height' in t:
 754                     t['resolution'] = '%dx%d' % (t['width'], t['height'])
 755
 756         if thumbnails and 'thumbnail' not in info_dict:
 757             info_dict['thumbnail'] = thumbnails[-1]['url']
 758
 759         if 'display_id' not in info_dict and 'id' in info_dict:
 760             info_dict['display_id'] = info_dict['id']
 761
 762         if info_dict.get('upload_date') is None and info_dict.get('timestamp') is not None:
 763             upload_date = datetime.datetime.utcfromtimestamp(
 764                 info_dict['timestamp'])
 765             info_dict['upload_date'] = upload_date.strftime('%Y%m%d')
 766
 767         # This extractors handle format selection themselves
 768         if info_dict['extractor'] in ['Youku']:
 769             if download:
 770                 self.process_info(info_dict)
 771             return info_dict
 772
 773         # We now pick which formats have to be downloaded
 774         if info_dict.get('formats') is None:
 775             # There's only one format available
 776             formats = [info_dict]
 777         else:
 778             formats = info_dict['formats']
 779
 780         if not formats:
 781             raise ExtractorError('No video formats found!')
 782
 783         # We check that all the formats have the format and format_id fields
 784         for i, format in enumerate(formats):
 785             if 'url' not in format:
 786                 raise ExtractorError('Missing "url" key in result (index %d)' % i)
 787
 788             if format.get('format_id') is None:
 789                 format['format_id'] = compat_str(i)
 790             if format.get('format') is None:
 791                 format['format'] = '{id} - {res}{note}'.format(
 792                     id=format['format_id'],
 793                     res=self.format_resolution(format),
 794                     note=' ({0})'.format(format['format_note']) if format.get('format_note') is not None else '',
 795                 )
 796             # Automatically determine file extension if missing
 797             if 'ext' not in format:
 798                 format['ext'] = determine_ext(format['url']).lower()
 799
 800         format_limit = self.params.get('format_limit', None)
 801         if format_limit:
 802             formats = list(takewhile_inclusive(
 803                 lambda f: f['format_id'] != format_limit, formats
 804             ))
 805
 806         # TODO Central sorting goes here
 807
 808         if formats[0] is not info_dict:
 809             # only set the 'formats' fields if the original info_dict list them
 810             # otherwise we end up with a circular reference, the first (and unique)
 811             # element in the 'formats' field in info_dict is info_dict itself,
 812             # wich can't be exported to json
 813             info_dict['formats'] = formats
 814         if self.params.get('listformats', None):
 815             self.list_formats(info_dict)
 816             return
 817
 818         req_format = self.params.get('format')
 819         if req_format is None:
 820             req_format = 'best'
 821         formats_to_download = []
 822         # The -1 is for supporting YoutubeIE
 823         if req_format in ('-1', 'all'):
 824             formats_to_download = formats
 825         else:
 826             for rfstr in req_format.split(','):
 827                 # We can accept formats requested in the format: 34/5/best, we pick
 828                 # the first that is available, starting from left
 829                 req_formats = rfstr.split('/')
 830                 for rf in req_formats:
 831                     if re.match(r'.+?\+.+?', rf) is not None:
 832                         # Two formats have been requested like '137+139'
 833                         format_1, format_2 = rf.split('+')
 834                         formats_info = (self.select_format(format_1, formats),
 835                             self.select_format(format_2, formats))
 836                         if all(formats_info):
 837                             selected_format = {
 838                                 'requested_formats': formats_info,
 839                                 'format': rf,
 840                                 'ext': formats_info[0]['ext'],
 841                             }
 842                         else:
 843                             selected_format = None
 844                     else:
 845                         selected_format = self.select_format(rf, formats)
 846                     if selected_format is not None:
 847                         formats_to_download.append(selected_format)
 848                         break
 849         if not formats_to_download:
 850             raise ExtractorError('requested format not available',
 851                                  expected=True)
 852
 853         if download:
 854             if len(formats_to_download) > 1:
 855                 self.to_screen('[info] %s: downloading video in %s formats' % (info_dict['id'], len(formats_to_download)))
 856             for format in formats_to_download:
 857                 new_info = dict(info_dict)
 858                 new_info.update(format)
 859                 self.process_info(new_info)
 860         # We update the info dict with the best quality format (backwards compatibility)
 861         info_dict.update(formats_to_download[-1])
 862         return info_dict
 863
 864     def process_info(self, info_dict):
 865         """Process a single resolved IE result."""
 866
 867         assert info_dict.get('_type', 'video') == 'video'
 868
 869         max_downloads = self.params.get('max_downloads')
 870         if max_downloads is not None:
 871             if self._num_downloads >= int(max_downloads):
 872                 raise MaxDownloadsReached()
 873
 874         info_dict['fulltitle'] = info_dict['title']
 875         if len(info_dict['title']) > 200:
 876             info_dict['title'] = info_dict['title'][:197] + '...'
 877
 878         # Keep for backwards compatibility
 879         info_dict['stitle'] = info_dict['title']
 880
 881         if 'format' not in info_dict:
 882             info_dict['format'] = info_dict['ext']
 883
 884         reason = self._match_entry(info_dict)
 885         if reason is not None:
 886             self.to_screen('[download] ' + reason)
 887             return
 888
 889         self._num_downloads += 1
 890
 891         filename = self.prepare_filename(info_dict)
 892
 893         # Forced printings
 894         if self.params.get('forcetitle', False):
 895             self.to_stdout(info_dict['fulltitle'])
 896         if self.params.get('forceid', False):
 897             self.to_stdout(info_dict['id'])
 898         if self.params.get('forceurl', False):
 899             # For RTMP URLs, also include the playpath
 900             self.to_stdout(info_dict['url'] + info_dict.get('play_path', ''))
 901         if self.params.get('forcethumbnail', False) and info_dict.get('thumbnail') is not None:
 902             self.to_stdout(info_dict['thumbnail'])
 903         if self.params.get('forcedescription', False) and info_dict.get('description') is not None:
 904             self.to_stdout(info_dict['description'])
 905         if self.params.get('forcefilename', False) and filename is not None:
 906             self.to_stdout(filename)
 907         if self.params.get('forceduration', False) and info_dict.get('duration') is not None:
 908             self.to_stdout(formatSeconds(info_dict['duration']))
 909         if self.params.get('forceformat', False):
 910             self.to_stdout(info_dict['format'])
 911         if self.params.get('forcejson', False):
 912             info_dict['_filename'] = filename
 913             self.to_stdout(json.dumps(info_dict))
 914         if self.params.get('dump_single_json', False):
 915             info_dict['_filename'] = filename
 916
 917         # Do nothing else if in simulate mode
 918         if self.params.get('simulate', False):
 919             return
 920
 921         if filename is None:
 922             return
 923
 924         try:
 925             dn = os.path.dirname(encodeFilename(filename))
 926             if dn and not os.path.exists(dn):
 927                 os.makedirs(dn)
 928         except (OSError, IOError) as err:
 929             self.report_error('unable to create directory ' + compat_str(err))
 930             return
 931
 932         if self.params.get('writedescription', False):
 933             descfn = filename + '.description'
 934             if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(descfn)):
 935                 self.to_screen('[info] Video description is already present')
 936             else:
 937                 try:
 938                     self.to_screen('[info] Writing video description to: ' + descfn)
 939                     with io.open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile:
 940                         descfile.write(info_dict['description'])
 941                 except (KeyError, TypeError):
 942                     self.report_warning('There\'s no description to write.')
 943                 except (OSError, IOError):
 944                     self.report_error('Cannot write description file ' + descfn)
 945                     return
 946
 947         if self.params.get('writeannotations', False):
 948             annofn = filename + '.annotations.xml'
 949             if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(annofn)):
 950                 self.to_screen('[info] Video annotations are already present')
 951             else:
 952                 try:
 953                     self.to_screen('[info] Writing video annotations to: ' + annofn)
 954                     with io.open(encodeFilename(annofn), 'w', encoding='utf-8') as annofile:
 955                         annofile.write(info_dict['annotations'])
 956                 except (KeyError, TypeError):
 957                     self.report_warning('There are no annotations to write.')
 958                 except (OSError, IOError):
 959                     self.report_error('Cannot write annotations file: ' + annofn)
 960                     return
 961
 962         subtitles_are_requested = any([self.params.get('writesubtitles', False),
 963                                        self.params.get('writeautomaticsub')])
 964
 965         if subtitles_are_requested and 'subtitles' in info_dict and info_dict['subtitles']:
 966             # subtitles download errors are already managed as troubles in relevant IE
 967             # that way it will silently go on when used with unsupporting IE
 968             subtitles = info_dict['subtitles']
 969             sub_format = self.params.get('subtitlesformat', 'srt')
 970             for sub_lang in subtitles.keys():
 971                 sub = subtitles[sub_lang]
 972                 if sub is None:
 973                     continue
 974                 try:
 975                     sub_filename = subtitles_filename(filename, sub_lang, sub_format)
 976                     if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(sub_filename)):
 977                         self.to_screen('[info] Video subtitle %s.%s is already_present' % (sub_lang, sub_format))
 978                     else:
 979                         self.to_screen('[info] Writing video subtitles to: ' + sub_filename)
 980                         with io.open(encodeFilename(sub_filename), 'w', encoding='utf-8') as subfile:
 981                                 subfile.write(sub)
 982                 except (OSError, IOError):
 983                     self.report_error('Cannot write subtitles file ' + sub_filename)
 984                     return
 985
 986         if self.params.get('writeinfojson', False):
 987             infofn = os.path.splitext(filename)[0] + '.info.json'
 988             if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(infofn)):
 989                 self.to_screen('[info] Video description metadata is already present')
 990             else:
 991                 self.to_screen('[info] Writing video description metadata as JSON to: ' + infofn)
 992                 try:
 993                     write_json_file(info_dict, encodeFilename(infofn))
 994                 except (OSError, IOError):
 995                     self.report_error('Cannot write metadata to JSON file ' + infofn)
 996                     return
 997
 998         if self.params.get('writethumbnail', False):
 999             if info_dict.get('thumbnail') is not None:
1000                 thumb_format = determine_ext(info_dict['thumbnail'], 'jpg')
1001                 thumb_filename = os.path.splitext(filename)[0] + '.' + thumb_format
1002                 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(thumb_filename)):
1003                     self.to_screen('[%s] %s: Thumbnail is already present' %
1004                                    (info_dict['extractor'], info_dict['id']))
1005                 else:
1006                     self.to_screen('[%s] %s: Downloading thumbnail ...' %
1007                                    (info_dict['extractor'], info_dict['id']))
1008                     try:
1009                         uf = self.urlopen(info_dict['thumbnail'])
1010                         with open(thumb_filename, 'wb') as thumbf:
1011                             shutil.copyfileobj(uf, thumbf)
1012                         self.to_screen('[%s] %s: Writing thumbnail to: %s' %
1013                             (info_dict['extractor'], info_dict['id'], thumb_filename))
1014                     except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
1015                         self.report_warning('Unable to download thumbnail "%s": %s' %
1016                             (info_dict['thumbnail'], compat_str(err)))
1017
1018         if not self.params.get('skip_download', False):
1019             if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(filename)):
1020                 success = True
1021             else:
1022                 try:
1023                     def dl(name, info):
1024                         fd = get_suitable_downloader(info)(self, self.params)
1025                         for ph in self._progress_hooks:
1026                             fd.add_progress_hook(ph)
1027                         if self.params.get('verbose'):
1028                             self.to_stdout('[debug] Invoking downloader on %r' % info.get('url'))
1029                         return fd.download(name, info)
1030                     if info_dict.get('requested_formats') is not None:
1031                         downloaded = []
1032                         success = True
1033                         merger = FFmpegMergerPP(self, not self.params.get('keepvideo'))
1034                         if not merger._executable:
1035                             postprocessors = []
1036                             self.report_warning('You have requested multiple '
1037                                 'formats but ffmpeg or avconv are not installed.'
1038                                 ' The formats won\'t be merged')
1039                         else:
1040                             postprocessors = [merger]
1041                         for f in info_dict['requested_formats']:
1042                             new_info = dict(info_dict)
1043                             new_info.update(f)
1044                             fname = self.prepare_filename(new_info)
1045                             fname = prepend_extension(fname, 'f%s' % f['format_id'])
1046                             downloaded.append(fname)
1047                             partial_success = dl(fname, new_info)
1048                             success = success and partial_success
1049                         info_dict['__postprocessors'] = postprocessors
1050                         info_dict['__files_to_merge'] = downloaded
1051                     else:
1052                         # Just a single file
1053                         success = dl(filename, info_dict)
1054                 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
1055                     self.report_error('unable to download video data: %s' % str(err))
1056                     return
1057                 except (OSError, IOError) as err:
1058                     raise UnavailableVideoError(err)
1059                 except (ContentTooShortError, ) as err:
1060                     self.report_error('content too short (expected %s bytes and served %s)' % (err.expected, err.downloaded))
1061                     return
1062
1063             if success:
1064                 try:
1065                     self.post_process(filename, info_dict)
1066                 except (PostProcessingError) as err:
1067                     self.report_error('postprocessing: %s' % str(err))
1068                     return
1069
1070         self.record_download_archive(info_dict)
1071
1072     def download(self, url_list):
1073         """Download a given list of URLs."""
1074         outtmpl = self.params.get('outtmpl', DEFAULT_OUTTMPL)
1075         if (len(url_list) > 1 and
1076                 '%' not in outtmpl
1077                 and self.params.get('max_downloads') != 1):
1078             raise SameFileError(outtmpl)
1079
1080         for url in url_list:
1081             try:
1082                 #It also downloads the videos
1083                 res = self.extract_info(url)
1084             except UnavailableVideoError:
1085                 self.report_error('unable to download video')
1086             except MaxDownloadsReached:
1087                 self.to_screen('[info] Maximum number of downloaded files reached.')
1088                 raise
1089             else:
1090                 if self.params.get('dump_single_json', False):
1091                     self.to_stdout(json.dumps(res))
1092
1093         return self._download_retcode
1094
1095     def download_with_info_file(self, info_filename):
1096         with io.open(info_filename, 'r', encoding='utf-8') as f:
1097             info = json.load(f)
1098         try:
1099             self.process_ie_result(info, download=True)
1100         except DownloadError:
1101             webpage_url = info.get('webpage_url')
1102             if webpage_url is not None:
1103                 self.report_warning('The info failed to download, trying with "%s"' % webpage_url)
1104                 return self.download([webpage_url])
1105             else:
1106                 raise
1107         return self._download_retcode
1108
1109     def post_process(self, filename, ie_info):
1110         """Run all the postprocessors on the given file."""
1111         info = dict(ie_info)
1112         info['filepath'] = filename
1113         keep_video = None
1114         pps_chain = []
1115         if ie_info.get('__postprocessors') is not None:
1116             pps_chain.extend(ie_info['__postprocessors'])
1117         pps_chain.extend(self._pps)
1118         for pp in pps_chain:
1119             try:
1120                 keep_video_wish, new_info = pp.run(info)
1121                 if keep_video_wish is not None:
1122                     if keep_video_wish:
1123                         keep_video = keep_video_wish
1124                     elif keep_video is None:
1125                         # No clear decision yet, let IE decide
1126                         keep_video = keep_video_wish
1127             except PostProcessingError as e:
1128                 self.report_error(e.msg)
1129         if keep_video is False and not self.params.get('keepvideo', False):
1130             try:
1131                 self.to_screen('Deleting original file %s (pass -k to keep)' % filename)
1132                 os.remove(encodeFilename(filename))
1133             except (IOError, OSError):
1134                 self.report_warning('Unable to remove downloaded video file')
1135
1136     def _make_archive_id(self, info_dict):
1137         # Future-proof against any change in case
1138         # and backwards compatibility with prior versions
1139         extractor = info_dict.get('extractor_key')
1140         if extractor is None:
1141             if 'id' in info_dict:
1142                 extractor = info_dict.get('ie_key')  # key in a playlist
1143         if extractor is None:
1144             return None  # Incomplete video information
1145         return extractor.lower() + ' ' + info_dict['id']
1146
1147     def in_download_archive(self, info_dict):
1148         fn = self.params.get('download_archive')
1149         if fn is None:
1150             return False
1151
1152         vid_id = self._make_archive_id(info_dict)
1153         if vid_id is None:
1154             return False  # Incomplete video information
1155
1156         try:
1157             with locked_file(fn, 'r', encoding='utf-8') as archive_file:
1158                 for line in archive_file:
1159                     if line.strip() == vid_id:
1160                         return True
1161         except IOError as ioe:
1162             if ioe.errno != errno.ENOENT:
1163                 raise
1164         return False
1165
1166     def record_download_archive(self, info_dict):
1167         fn = self.params.get('download_archive')
1168         if fn is None:
1169             return
1170         vid_id = self._make_archive_id(info_dict)
1171         assert vid_id
1172         with locked_file(fn, 'a', encoding='utf-8') as archive_file:
1173             archive_file.write(vid_id + '\n')
1174
1175     @staticmethod
1176     def format_resolution(format, default='unknown'):
1177         if format.get('vcodec') == 'none':
1178             return 'audio only'
1179         if format.get('resolution') is not None:
1180             return format['resolution']
1181         if format.get('height') is not None:
1182             if format.get('width') is not None:
1183                 res = '%sx%s' % (format['width'], format['height'])
1184             else:
1185                 res = '%sp' % format['height']
1186         elif format.get('width') is not None:
1187             res = '?x%d' % format['width']
1188         else:
1189             res = default
1190         return res
1191
1192     def _format_note(self, fdict):
1193         res = ''
1194         if fdict.get('ext') in ['f4f', 'f4m']:
1195             res += '(unsupported) '
1196         if fdict.get('format_note') is not None:
1197             res += fdict['format_note'] + ' '
1198         if fdict.get('tbr') is not None:
1199             res += '%4dk ' % fdict['tbr']
1200         if fdict.get('container') is not None:
1201             if res:
1202                 res += ', '
1203             res += '%s container' % fdict['container']
1204         if (fdict.get('vcodec') is not None and
1205                 fdict.get('vcodec') != 'none'):
1206             if res:
1207                 res += ', '
1208             res += fdict['vcodec']
1209             if fdict.get('vbr') is not None:
1210                 res += '@'
1211         elif fdict.get('vbr') is not None and fdict.get('abr') is not None:
1212             res += 'video@'
1213         if fdict.get('vbr') is not None:
1214             res += '%4dk' % fdict['vbr']
1215         if fdict.get('fps') is not None:
1216             res += ', %sfps' % fdict['fps']
1217         if fdict.get('acodec') is not None:
1218             if res:
1219                 res += ', '
1220             if fdict['acodec'] == 'none':
1221                 res += 'video only'
1222             else:
1223                 res += '%-5s' % fdict['acodec']
1224         elif fdict.get('abr') is not None:
1225             if res:
1226                 res += ', '
1227             res += 'audio'
1228         if fdict.get('abr') is not None:
1229             res += '@%3dk' % fdict['abr']
1230         if fdict.get('asr') is not None:
1231             res += ' (%5dHz)' % fdict['asr']
1232         if fdict.get('filesize') is not None:
1233             if res:
1234                 res += ', '
1235             res += format_bytes(fdict['filesize'])
1236         elif fdict.get('filesize_approx') is not None:
1237             if res:
1238                 res += ', '
1239             res += '~' + format_bytes(fdict['filesize_approx'])
1240         return res
1241
1242     def list_formats(self, info_dict):
1243         def line(format, idlen=20):
1244             return (('%-' + compat_str(idlen + 1) + 's%-10s%-12s%s') % (
1245                 format['format_id'],
1246                 format['ext'],
1247                 self.format_resolution(format),
1248                 self._format_note(format),
1249             ))
1250
1251         formats = info_dict.get('formats', [info_dict])
1252         idlen = max(len('format code'),
1253                     max(len(f['format_id']) for f in formats))
1254         formats_s = [line(f, idlen) for f in formats]
1255         if len(formats) > 1:
1256             formats_s[0] += (' ' if self._format_note(formats[0]) else '') + '(worst)'
1257             formats_s[-1] += (' ' if self._format_note(formats[-1]) else '') + '(best)'
1258
1259         header_line = line({
1260             'format_id': 'format code', 'ext': 'extension',
1261             'resolution': 'resolution', 'format_note': 'note'}, idlen=idlen)
1262         self.to_screen('[info] Available formats for %s:\n%s\n%s' %
1263                        (info_dict['id'], header_line, '\n'.join(formats_s)))
1264
1265     def urlopen(self, req):
1266         """ Start an HTTP download """
1267
1268         # According to RFC 3986, URLs can not contain non-ASCII characters, however this is not
1269         # always respected by websites, some tend to give out URLs with non percent-encoded
1270         # non-ASCII characters (see telemb.py, ard.py [#3412])
1271         # urllib chokes on URLs with non-ASCII characters (see http://bugs.python.org/issue3991)
1272         # To work around aforementioned issue we will replace request's original URL with
1273         # percent-encoded one
1274         req_is_string = isinstance(req, basestring if sys.version_info < (3, 0) else compat_str)
1275         url = req if req_is_string else req.get_full_url()
1276         url_escaped = escape_url(url)
1277
1278         # Substitute URL if any change after escaping
1279         if url != url_escaped:
1280             if req_is_string:
1281                 req = url_escaped
1282             else:
1283                 req = compat_urllib_request.Request(
1284                     url_escaped, data=req.data, headers=req.headers,
1285                     origin_req_host=req.origin_req_host, unverifiable=req.unverifiable)
1286
1287         return self._opener.open(req, timeout=self._socket_timeout)
1288
1289     def print_debug_header(self):
1290         if not self.params.get('verbose'):
1291             return
1292
1293         if type('') is not compat_str:
1294             # Python 2.6 on SLES11 SP1 (https://github.com/rg3/youtube-dl/issues/3326)
1295             self.report_warning(
1296                 'Your Python is broken! Update to a newer and supported version')
1297
1298         encoding_str = (
1299             '[debug] Encodings: locale %s, fs %s, out %s, pref %s\n' % (
1300                 locale.getpreferredencoding(),
1301                 sys.getfilesystemencoding(),
1302                 sys.stdout.encoding,
1303                 self.get_encoding()))
1304         write_string(encoding_str, encoding=None)
1305
1306         self._write_string('[debug] youtube-dl version ' + __version__ + '\n')
1307         try:
1308             sp = subprocess.Popen(
1309                 ['git', 'rev-parse', '--short', 'HEAD'],
1310                 stdout=subprocess.PIPE, stderr=subprocess.PIPE,
1311                 cwd=os.path.dirname(os.path.abspath(__file__)))
1312             out, err = sp.communicate()
1313             out = out.decode().strip()
1314             if re.match('[0-9a-f]+', out):
1315                 self._write_string('[debug] Git HEAD: ' + out + '\n')
1316         except:
1317             try:
1318                 sys.exc_clear()
1319             except:
1320                 pass
1321         self._write_string('[debug] Python version %s - %s\n' % (
1322             platform.python_version(), platform_name()))
1323
1324         exe_versions = FFmpegPostProcessor.get_versions()
1325         exe_versions['rtmpdump'] = rtmpdump_version()
1326         exe_str = ', '.join(
1327             '%s %s' % (exe, v)
1328             for exe, v in sorted(exe_versions.items())
1329             if v
1330         )
1331         if not exe_str:
1332             exe_str = 'none'
1333         self._write_string('[debug] exe versions: %s\n' % exe_str)
1334
1335         proxy_map = {}
1336         for handler in self._opener.handlers:
1337             if hasattr(handler, 'proxies'):
1338                 proxy_map.update(handler.proxies)
1339         self._write_string('[debug] Proxy map: ' + compat_str(proxy_map) + '\n')
1340
1341     def _setup_opener(self):
1342         timeout_val = self.params.get('socket_timeout')
1343         self._socket_timeout = 600 if timeout_val is None else float(timeout_val)
1344
1345         opts_cookiefile = self.params.get('cookiefile')
1346         opts_proxy = self.params.get('proxy')
1347
1348         if opts_cookiefile is None:
1349             self.cookiejar = compat_cookiejar.CookieJar()
1350         else:
1351             self.cookiejar = compat_cookiejar.MozillaCookieJar(
1352                 opts_cookiefile)
1353             if os.access(opts_cookiefile, os.R_OK):
1354                 self.cookiejar.load()
1355
1356         cookie_processor = compat_urllib_request.HTTPCookieProcessor(
1357             self.cookiejar)
1358         if opts_proxy is not None:
1359             if opts_proxy == '':
1360                 proxies = {}
1361             else:
1362                 proxies = {'http': opts_proxy, 'https': opts_proxy}
1363         else:
1364             proxies = compat_urllib_request.getproxies()
1365             # Set HTTPS proxy to HTTP one if given (https://github.com/rg3/youtube-dl/issues/805)
1366             if 'http' in proxies and 'https' not in proxies:
1367                 proxies['https'] = proxies['http']
1368         proxy_handler = compat_urllib_request.ProxyHandler(proxies)
1369
1370         debuglevel = 1 if self.params.get('debug_printtraffic') else 0
1371         https_handler = make_HTTPS_handler(
1372             self.params.get('nocheckcertificate', False), debuglevel=debuglevel)
1373         ydlh = YoutubeDLHandler(debuglevel=debuglevel)
1374         opener = compat_urllib_request.build_opener(
1375             https_handler, proxy_handler, cookie_processor, ydlh)
1376         # Delete the default user-agent header, which would otherwise apply in
1377         # cases where our custom HTTP handler doesn't come into play
1378         # (See https://github.com/rg3/youtube-dl/issues/1309 for details)
1379         opener.addheaders = []
1380         self._opener = opener
1381
1382     def encode(self, s):
1383         if isinstance(s, bytes):
1384             return s  # Already encoded
1385
1386         try:
1387             return s.encode(self.get_encoding())
1388         except UnicodeEncodeError as err:
1389             err.reason = err.reason + '. Check your system encoding configuration or use the --encoding option.'
1390             raise
1391
1392     def get_encoding(self):
1393         encoding = self.params.get('encoding')
1394         if encoding is None:
1395             encoding = preferredencoding()
1396         return encoding