_ Git - youtube-dl/blob - youtube_dl/YoutubeDL.py

   1 #!/usr/bin/env python
   2 # -*- coding: utf-8 -*-
   3
   4 from __future__ import absolute_import, unicode_literals
   5
   6 import collections
   7 import datetime
   8 import errno
   9 import io
  10 import json
  11 import locale
  12 import os
  13 import platform
  14 import re
  15 import shutil
  16 import subprocess
  17 import socket
  18 import sys
  19 import time
  20 import traceback
  21
  22 if os.name == 'nt':
  23     import ctypes
  24
  25 from .utils import (
  26     compat_cookiejar,
  27     compat_http_client,
  28     compat_str,
  29     compat_urllib_error,
  30     compat_urllib_request,
  31     escape_url,
  32     ContentTooShortError,
  33     date_from_str,
  34     DateRange,
  35     DEFAULT_OUTTMPL,
  36     determine_ext,
  37     DownloadError,
  38     encodeFilename,
  39     ExtractorError,
  40     format_bytes,
  41     formatSeconds,
  42     get_term_width,
  43     locked_file,
  44     make_HTTPS_handler,
  45     MaxDownloadsReached,
  46     PagedList,
  47     PostProcessingError,
  48     platform_name,
  49     preferredencoding,
  50     SameFileError,
  51     sanitize_filename,
  52     subtitles_filename,
  53     takewhile_inclusive,
  54     UnavailableVideoError,
  55     url_basename,
  56     write_json_file,
  57     write_string,
  58     YoutubeDLHandler,
  59     prepend_extension,
  60 )
  61 from .cache import Cache
  62 from .extractor import get_info_extractor, gen_extractors
  63 from .downloader import get_suitable_downloader
  64 from .postprocessor import FFmpegMergerPP
  65 from .version import __version__
  66
  67
  68 class YoutubeDL(object):
  69     """YoutubeDL class.
  70
  71     YoutubeDL objects are the ones responsible of downloading the
  72     actual video file and writing it to disk if the user has requested
  73     it, among some other tasks. In most cases there should be one per
  74     program. As, given a video URL, the downloader doesn't know how to
  75     extract all the needed information, task that InfoExtractors do, it
  76     has to pass the URL to one of them.
  77
  78     For this, YoutubeDL objects have a method that allows
  79     InfoExtractors to be registered in a given order. When it is passed
  80     a URL, the YoutubeDL object handles it to the first InfoExtractor it
  81     finds that reports being able to handle it. The InfoExtractor extracts
  82     all the information about the video or videos the URL refers to, and
  83     YoutubeDL process the extracted information, possibly using a File
  84     Downloader to download the video.
  85
  86     YoutubeDL objects accept a lot of parameters. In order not to saturate
  87     the object constructor with arguments, it receives a dictionary of
  88     options instead. These options are available through the params
  89     attribute for the InfoExtractors to use. The YoutubeDL also
  90     registers itself as the downloader in charge for the InfoExtractors
  91     that are added to it, so this is a "mutual registration".
  92
  93     Available options:
  94
  95     username:          Username for authentication purposes.
  96     password:          Password for authentication purposes.
  97     videopassword:     Password for acces a video.
  98     usenetrc:          Use netrc for authentication instead.
  99     verbose:           Print additional info to stdout.
 100     quiet:             Do not print messages to stdout.
 101     no_warnings:       Do not print out anything for warnings.
 102     forceurl:          Force printing final URL.
 103     forcetitle:        Force printing title.
 104     forceid:           Force printing ID.
 105     forcethumbnail:    Force printing thumbnail URL.
 106     forcedescription:  Force printing description.
 107     forcefilename:     Force printing final filename.
 108     forceduration:     Force printing duration.
 109     forcejson:         Force printing info_dict as JSON.
 110     simulate:          Do not download the video files.
 111     format:            Video format code.
 112     format_limit:      Highest quality format to try.
 113     outtmpl:           Template for output names.
 114     restrictfilenames: Do not allow "&" and spaces in file names
 115     ignoreerrors:      Do not stop on download errors.
 116     nooverwrites:      Prevent overwriting files.
 117     playliststart:     Playlist item to start at.
 118     playlistend:       Playlist item to end at.
 119     matchtitle:        Download only matching titles.
 120     rejecttitle:       Reject downloads for matching titles.
 121     logger:            Log messages to a logging.Logger instance.
 122     logtostderr:       Log messages to stderr instead of stdout.
 123     writedescription:  Write the video description to a .description file
 124     writeinfojson:     Write the video description to a .info.json file
 125     writeannotations:  Write the video annotations to a .annotations.xml file
 126     writethumbnail:    Write the thumbnail image to a file
 127     writesubtitles:    Write the video subtitles to a file
 128     writeautomaticsub: Write the automatic subtitles to a file
 129     allsubtitles:      Downloads all the subtitles of the video
 130                        (requires writesubtitles or writeautomaticsub)
 131     listsubtitles:     Lists all available subtitles for the video
 132     subtitlesformat:   Subtitle format [srt/sbv/vtt] (default=srt)
 133     subtitleslangs:    List of languages of the subtitles to download
 134     keepvideo:         Keep the video file after post-processing
 135     daterange:         A DateRange object, download only if the upload_date is in the range.
 136     skip_download:     Skip the actual download of the video file
 137     cachedir:          Location of the cache files in the filesystem.
 138                        False to disable filesystem cache.
 139     noplaylist:        Download single video instead of a playlist if in doubt.
 140     age_limit:         An integer representing the user's age in years.
 141                        Unsuitable videos for the given age are skipped.
 142     min_views:         An integer representing the minimum view count the video
 143                        must have in order to not be skipped.
 144                        Videos without view count information are always
 145                        downloaded. None for no limit.
 146     max_views:         An integer representing the maximum view count.
 147                        Videos that are more popular than that are not
 148                        downloaded.
 149                        Videos without view count information are always
 150                        downloaded. None for no limit.
 151     download_archive:  File name of a file where all downloads are recorded.
 152                        Videos already present in the file are not downloaded
 153                        again.
 154     cookiefile:        File name where cookies should be read from and dumped to.
 155     nocheckcertificate:Do not verify SSL certificates
 156     prefer_insecure:   Use HTTP instead of HTTPS to retrieve information.
 157                        At the moment, this is only supported by YouTube.
 158     proxy:             URL of the proxy server to use
 159     socket_timeout:    Time to wait for unresponsive hosts, in seconds
 160     bidi_workaround:   Work around buggy terminals without bidirectional text
 161                        support, using fridibi
 162     debug_printtraffic:Print out sent and received HTTP traffic
 163     include_ads:       Download ads as well
 164     default_search:    Prepend this string if an input url is not valid.
 165                        'auto' for elaborate guessing
 166     encoding:          Use this encoding instead of the system-specified.
 167     extract_flat:      Do not resolve URLs, return the immediate result.
 168                        Pass in 'in_playlist' to only show this behavior for
 169                        playlist items.
 170
 171     The following parameters are not used by YoutubeDL itself, they are used by
 172     the FileDownloader:
 173     nopart, updatetime, buffersize, ratelimit, min_filesize, max_filesize, test,
 174     noresizebuffer, retries, continuedl, noprogress, consoletitle
 175
 176     The following options are used by the post processors:
 177     prefer_ffmpeg:     If True, use ffmpeg instead of avconv if both are available,
 178                        otherwise prefer avconv.
 179     exec_cmd:          Arbitrary command to run after downloading
 180     """
 181
 182     params = None
 183     _ies = []
 184     _pps = []
 185     _download_retcode = None
 186     _num_downloads = None
 187     _screen_file = None
 188
 189     def __init__(self, params=None):
 190         """Create a FileDownloader object with the given options."""
 191         if params is None:
 192             params = {}
 193         self._ies = []
 194         self._ies_instances = {}
 195         self._pps = []
 196         self._progress_hooks = []
 197         self._download_retcode = 0
 198         self._num_downloads = 0
 199         self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)]
 200         self._err_file = sys.stderr
 201         self.params = params
 202         self.cache = Cache(self)
 203
 204         if params.get('bidi_workaround', False):
 205             try:
 206                 import pty
 207                 master, slave = pty.openpty()
 208                 width = get_term_width()
 209                 if width is None:
 210                     width_args = []
 211                 else:
 212                     width_args = ['-w', str(width)]
 213                 sp_kwargs = dict(
 214                     stdin=subprocess.PIPE,
 215                     stdout=slave,
 216                     stderr=self._err_file)
 217                 try:
 218                     self._output_process = subprocess.Popen(
 219                         ['bidiv'] + width_args, **sp_kwargs
 220                     )
 221                 except OSError:
 222                     self._output_process = subprocess.Popen(
 223                         ['fribidi', '-c', 'UTF-8'] + width_args, **sp_kwargs)
 224                 self._output_channel = os.fdopen(master, 'rb')
 225             except OSError as ose:
 226                 if ose.errno == 2:
 227                     self.report_warning('Could not find fribidi executable, ignoring --bidi-workaround . Make sure that  fribidi  is an executable file in one of the directories in your $PATH.')
 228                 else:
 229                     raise
 230
 231         if (sys.version_info >= (3,) and sys.platform != 'win32' and
 232                 sys.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968']
 233                 and not params.get('restrictfilenames', False)):
 234             # On Python 3, the Unicode filesystem API will throw errors (#1474)
 235             self.report_warning(
 236                 'Assuming --restrict-filenames since file system encoding '
 237                 'cannot encode all characters. '
 238                 'Set the LC_ALL environment variable to fix this.')
 239             self.params['restrictfilenames'] = True
 240
 241         if '%(stitle)s' in self.params.get('outtmpl', ''):
 242             self.report_warning('%(stitle)s is deprecated. Use the %(title)s and the --restrict-filenames flag(which also secures %(uploader)s et al) instead.')
 243
 244         self._setup_opener()
 245
 246     def add_info_extractor(self, ie):
 247         """Add an InfoExtractor object to the end of the list."""
 248         self._ies.append(ie)
 249         self._ies_instances[ie.ie_key()] = ie
 250         ie.set_downloader(self)
 251
 252     def get_info_extractor(self, ie_key):
 253         """
 254         Get an instance of an IE with name ie_key, it will try to get one from
 255         the _ies list, if there's no instance it will create a new one and add
 256         it to the extractor list.
 257         """
 258         ie = self._ies_instances.get(ie_key)
 259         if ie is None:
 260             ie = get_info_extractor(ie_key)()
 261             self.add_info_extractor(ie)
 262         return ie
 263
 264     def add_default_info_extractors(self):
 265         """
 266         Add the InfoExtractors returned by gen_extractors to the end of the list
 267         """
 268         for ie in gen_extractors():
 269             self.add_info_extractor(ie)
 270
 271     def add_post_processor(self, pp):
 272         """Add a PostProcessor object to the end of the chain."""
 273         self._pps.append(pp)
 274         pp.set_downloader(self)
 275
 276     def add_progress_hook(self, ph):
 277         """Add the progress hook (currently only for the file downloader)"""
 278         self._progress_hooks.append(ph)
 279
 280     def _bidi_workaround(self, message):
 281         if not hasattr(self, '_output_channel'):
 282             return message
 283
 284         assert hasattr(self, '_output_process')
 285         assert isinstance(message, compat_str)
 286         line_count = message.count('\n') + 1
 287         self._output_process.stdin.write((message + '\n').encode('utf-8'))
 288         self._output_process.stdin.flush()
 289         res = ''.join(self._output_channel.readline().decode('utf-8')
 290                        for _ in range(line_count))
 291         return res[:-len('\n')]
 292
 293     def to_screen(self, message, skip_eol=False):
 294         """Print message to stdout if not in quiet mode."""
 295         return self.to_stdout(message, skip_eol, check_quiet=True)
 296
 297     def _write_string(self, s, out=None):
 298         write_string(s, out=out, encoding=self.params.get('encoding'))
 299
 300     def to_stdout(self, message, skip_eol=False, check_quiet=False):
 301         """Print message to stdout if not in quiet mode."""
 302         if self.params.get('logger'):
 303             self.params['logger'].debug(message)
 304         elif not check_quiet or not self.params.get('quiet', False):
 305             message = self._bidi_workaround(message)
 306             terminator = ['\n', ''][skip_eol]
 307             output = message + terminator
 308
 309             self._write_string(output, self._screen_file)
 310
 311     def to_stderr(self, message):
 312         """Print message to stderr."""
 313         assert isinstance(message, compat_str)
 314         if self.params.get('logger'):
 315             self.params['logger'].error(message)
 316         else:
 317             message = self._bidi_workaround(message)
 318             output = message + '\n'
 319             self._write_string(output, self._err_file)
 320
 321     def to_console_title(self, message):
 322         if not self.params.get('consoletitle', False):
 323             return
 324         if os.name == 'nt' and ctypes.windll.kernel32.GetConsoleWindow():
 325             # c_wchar_p() might not be necessary if `message` is
 326             # already of type unicode()
 327             ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message))
 328         elif 'TERM' in os.environ:
 329             self._write_string('\033]0;%s\007' % message, self._screen_file)
 330
 331     def save_console_title(self):
 332         if not self.params.get('consoletitle', False):
 333             return
 334         if 'TERM' in os.environ:
 335             # Save the title on stack
 336             self._write_string('\033[22;0t', self._screen_file)
 337
 338     def restore_console_title(self):
 339         if not self.params.get('consoletitle', False):
 340             return
 341         if 'TERM' in os.environ:
 342             # Restore the title from stack
 343             self._write_string('\033[23;0t', self._screen_file)
 344
 345     def __enter__(self):
 346         self.save_console_title()
 347         return self
 348
 349     def __exit__(self, *args):
 350         self.restore_console_title()
 351
 352         if self.params.get('cookiefile') is not None:
 353             self.cookiejar.save()
 354
 355     def trouble(self, message=None, tb=None):
 356         """Determine action to take when a download problem appears.
 357
 358         Depending on if the downloader has been configured to ignore
 359         download errors or not, this method may throw an exception or
 360         not when errors are found, after printing the message.
 361
 362         tb, if given, is additional traceback information.
 363         """
 364         if message is not None:
 365             self.to_stderr(message)
 366         if self.params.get('verbose'):
 367             if tb is None:
 368                 if sys.exc_info()[0]:  # if .trouble has been called from an except block
 369                     tb = ''
 370                     if hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
 371                         tb += ''.join(traceback.format_exception(*sys.exc_info()[1].exc_info))
 372                     tb += compat_str(traceback.format_exc())
 373                 else:
 374                     tb_data = traceback.format_list(traceback.extract_stack())
 375                     tb = ''.join(tb_data)
 376             self.to_stderr(tb)
 377         if not self.params.get('ignoreerrors', False):
 378             if sys.exc_info()[0] and hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
 379                 exc_info = sys.exc_info()[1].exc_info
 380             else:
 381                 exc_info = sys.exc_info()
 382             raise DownloadError(message, exc_info)
 383         self._download_retcode = 1
 384
 385     def report_warning(self, message):
 386         '''
 387         Print the message to stderr, it will be prefixed with 'WARNING:'
 388         If stderr is a tty file the 'WARNING:' will be colored
 389         '''
 390         if self.params.get('logger') is not None:
 391             self.params['logger'].warning(message)
 392         else:
 393             if self.params.get('no_warnings'):
 394                 return
 395             if self._err_file.isatty() and os.name != 'nt':
 396                 _msg_header = '\033[0;33mWARNING:\033[0m'
 397             else:
 398                 _msg_header = 'WARNING:'
 399             warning_message = '%s %s' % (_msg_header, message)
 400             self.to_stderr(warning_message)
 401
 402     def report_error(self, message, tb=None):
 403         '''
 404         Do the same as trouble, but prefixes the message with 'ERROR:', colored
 405         in red if stderr is a tty file.
 406         '''
 407         if self._err_file.isatty() and os.name != 'nt':
 408             _msg_header = '\033[0;31mERROR:\033[0m'
 409         else:
 410             _msg_header = 'ERROR:'
 411         error_message = '%s %s' % (_msg_header, message)
 412         self.trouble(error_message, tb)
 413
 414     def report_file_already_downloaded(self, file_name):
 415         """Report file has already been fully downloaded."""
 416         try:
 417             self.to_screen('[download] %s has already been downloaded' % file_name)
 418         except UnicodeEncodeError:
 419             self.to_screen('[download] The file has already been downloaded')
 420
 421     def prepare_filename(self, info_dict):
 422         """Generate the output filename."""
 423         try:
 424             template_dict = dict(info_dict)
 425
 426             template_dict['epoch'] = int(time.time())
 427             autonumber_size = self.params.get('autonumber_size')
 428             if autonumber_size is None:
 429                 autonumber_size = 5
 430             autonumber_templ = '%0' + str(autonumber_size) + 'd'
 431             template_dict['autonumber'] = autonumber_templ % self._num_downloads
 432             if template_dict.get('playlist_index') is not None:
 433                 template_dict['playlist_index'] = '%0*d' % (len(str(template_dict['n_entries'])), template_dict['playlist_index'])
 434             if template_dict.get('resolution') is None:
 435                 if template_dict.get('width') and template_dict.get('height'):
 436                     template_dict['resolution'] = '%dx%d' % (template_dict['width'], template_dict['height'])
 437                 elif template_dict.get('height'):
 438                     template_dict['resolution'] = '%sp' % template_dict['height']
 439                 elif template_dict.get('width'):
 440                     template_dict['resolution'] = '?x%d' % template_dict['width']
 441
 442             sanitize = lambda k, v: sanitize_filename(
 443                 compat_str(v),
 444                 restricted=self.params.get('restrictfilenames'),
 445                 is_id=(k == 'id'))
 446             template_dict = dict((k, sanitize(k, v))
 447                                  for k, v in template_dict.items()
 448                                  if v is not None)
 449             template_dict = collections.defaultdict(lambda: 'NA', template_dict)
 450
 451             outtmpl = self.params.get('outtmpl', DEFAULT_OUTTMPL)
 452             tmpl = os.path.expanduser(outtmpl)
 453             filename = tmpl % template_dict
 454             return filename
 455         except ValueError as err:
 456             self.report_error('Error in output template: ' + str(err) + ' (encoding: ' + repr(preferredencoding()) + ')')
 457             return None
 458
 459     def _match_entry(self, info_dict):
 460         """ Returns None iff the file should be downloaded """
 461
 462         video_title = info_dict.get('title', info_dict.get('id', 'video'))
 463         if 'title' in info_dict:
 464             # This can happen when we're just evaluating the playlist
 465             title = info_dict['title']
 466             matchtitle = self.params.get('matchtitle', False)
 467             if matchtitle:
 468                 if not re.search(matchtitle, title, re.IGNORECASE):
 469                     return '"' + title + '" title did not match pattern "' + matchtitle + '"'
 470             rejecttitle = self.params.get('rejecttitle', False)
 471             if rejecttitle:
 472                 if re.search(rejecttitle, title, re.IGNORECASE):
 473                     return '"' + title + '" title matched reject pattern "' + rejecttitle + '"'
 474         date = info_dict.get('upload_date', None)
 475         if date is not None:
 476             dateRange = self.params.get('daterange', DateRange())
 477             if date not in dateRange:
 478                 return '%s upload date is not in range %s' % (date_from_str(date).isoformat(), dateRange)
 479         view_count = info_dict.get('view_count', None)
 480         if view_count is not None:
 481             min_views = self.params.get('min_views')
 482             if min_views is not None and view_count < min_views:
 483                 return 'Skipping %s, because it has not reached minimum view count (%d/%d)' % (video_title, view_count, min_views)
 484             max_views = self.params.get('max_views')
 485             if max_views is not None and view_count > max_views:
 486                 return 'Skipping %s, because it has exceeded the maximum view count (%d/%d)' % (video_title, view_count, max_views)
 487         age_limit = self.params.get('age_limit')
 488         if age_limit is not None:
 489             actual_age_limit = info_dict.get('age_limit')
 490             if actual_age_limit is None:
 491                 actual_age_limit = 0
 492             if age_limit < actual_age_limit:
 493                 return 'Skipping "' + title + '" because it is age restricted'
 494         if self.in_download_archive(info_dict):
 495             return '%s has already been recorded in archive' % video_title
 496         return None
 497
 498     @staticmethod
 499     def add_extra_info(info_dict, extra_info):
 500         '''Set the keys from extra_info in info dict if they are missing'''
 501         for key, value in extra_info.items():
 502             info_dict.setdefault(key, value)
 503
 504     def extract_info(self, url, download=True, ie_key=None, extra_info={},
 505                      process=True):
 506         '''
 507         Returns a list with a dictionary for each video we find.
 508         If 'download', also downloads the videos.
 509         extra_info is a dict containing the extra values to add to each result
 510          '''
 511
 512         if ie_key:
 513             ies = [self.get_info_extractor(ie_key)]
 514         else:
 515             ies = self._ies
 516
 517         for ie in ies:
 518             if not ie.suitable(url):
 519                 continue
 520
 521             if not ie.working():
 522                 self.report_warning('The program functionality for this site has been marked as broken, '
 523                                     'and will probably not work.')
 524
 525             try:
 526                 ie_result = ie.extract(url)
 527                 if ie_result is None: # Finished already (backwards compatibility; listformats and friends should be moved here)
 528                     break
 529                 if isinstance(ie_result, list):
 530                     # Backwards compatibility: old IE result format
 531                     ie_result = {
 532                         '_type': 'compat_list',
 533                         'entries': ie_result,
 534                     }
 535                 self.add_default_extra_info(ie_result, ie, url)
 536                 if process:
 537                     return self.process_ie_result(ie_result, download, extra_info)
 538                 else:
 539                     return ie_result
 540             except ExtractorError as de: # An error we somewhat expected
 541                 self.report_error(compat_str(de), de.format_traceback())
 542                 break
 543             except MaxDownloadsReached:
 544                 raise
 545             except Exception as e:
 546                 if self.params.get('ignoreerrors', False):
 547                     self.report_error(compat_str(e), tb=compat_str(traceback.format_exc()))
 548                     break
 549                 else:
 550                     raise
 551         else:
 552             self.report_error('no suitable InfoExtractor for URL %s' % url)
 553
 554     def add_default_extra_info(self, ie_result, ie, url):
 555         self.add_extra_info(ie_result, {
 556             'extractor': ie.IE_NAME,
 557             'webpage_url': url,
 558             'webpage_url_basename': url_basename(url),
 559             'extractor_key': ie.ie_key(),
 560         })
 561
 562     def process_ie_result(self, ie_result, download=True, extra_info={}):
 563         """
 564         Take the result of the ie(may be modified) and resolve all unresolved
 565         references (URLs, playlist items).
 566
 567         It will also download the videos if 'download'.
 568         Returns the resolved ie_result.
 569         """
 570
 571         result_type = ie_result.get('_type', 'video')
 572
 573         if result_type in ('url', 'url_transparent'):
 574             extract_flat = self.params.get('extract_flat', False)
 575             if ((extract_flat == 'in_playlist' and 'playlist' in extra_info) or
 576                     extract_flat is True):
 577                 self.add_extra_info(ie_result, extra_info)
 578                 if self.params.get('forcejson', False):
 579                     self.to_stdout(json.dumps(ie_result))
 580                 return ie_result
 581
 582         if result_type == 'video':
 583             self.add_extra_info(ie_result, extra_info)
 584             return self.process_video_result(ie_result, download=download)
 585         elif result_type == 'url':
 586             # We have to add extra_info to the results because it may be
 587             # contained in a playlist
 588             return self.extract_info(ie_result['url'],
 589                                      download,
 590                                      ie_key=ie_result.get('ie_key'),
 591                                      extra_info=extra_info)
 592         elif result_type == 'url_transparent':
 593             # Use the information from the embedding page
 594             info = self.extract_info(
 595                 ie_result['url'], ie_key=ie_result.get('ie_key'),
 596                 extra_info=extra_info, download=False, process=False)
 597
 598             def make_result(embedded_info):
 599                 new_result = ie_result.copy()
 600                 for f in ('_type', 'url', 'ext', 'player_url', 'formats',
 601                           'entries', 'ie_key', 'duration',
 602                           'subtitles', 'annotations', 'format',
 603                           'thumbnail', 'thumbnails'):
 604                     if f in new_result:
 605                         del new_result[f]
 606                     if f in embedded_info:
 607                         new_result[f] = embedded_info[f]
 608                 return new_result
 609             new_result = make_result(info)
 610
 611             assert new_result.get('_type') != 'url_transparent'
 612             if new_result.get('_type') == 'compat_list':
 613                 new_result['entries'] = [
 614                     make_result(e) for e in new_result['entries']]
 615
 616             return self.process_ie_result(
 617                 new_result, download=download, extra_info=extra_info)
 618         elif result_type == 'playlist':
 619             # We process each entry in the playlist
 620             playlist = ie_result.get('title', None) or ie_result.get('id', None)
 621             self.to_screen('[download] Downloading playlist: %s' % playlist)
 622
 623             playlist_results = []
 624
 625             playliststart = self.params.get('playliststart', 1) - 1
 626             playlistend = self.params.get('playlistend', None)
 627             # For backwards compatibility, interpret -1 as whole list
 628             if playlistend == -1:
 629                 playlistend = None
 630
 631             if isinstance(ie_result['entries'], list):
 632                 n_all_entries = len(ie_result['entries'])
 633                 entries = ie_result['entries'][playliststart:playlistend]
 634                 n_entries = len(entries)
 635                 self.to_screen(
 636                     "[%s] playlist %s: Collected %d video ids (downloading %d of them)" %
 637                     (ie_result['extractor'], playlist, n_all_entries, n_entries))
 638             else:
 639                 assert isinstance(ie_result['entries'], PagedList)
 640                 entries = ie_result['entries'].getslice(
 641                     playliststart, playlistend)
 642                 n_entries = len(entries)
 643                 self.to_screen(
 644                     "[%s] playlist %s: Downloading %d videos" %
 645                     (ie_result['extractor'], playlist, n_entries))
 646
 647             for i, entry in enumerate(entries, 1):
 648                 self.to_screen('[download] Downloading video #%s of %s' % (i, n_entries))
 649                 extra = {
 650                     'n_entries': n_entries,
 651                     'playlist': playlist,
 652                     'playlist_index': i + playliststart,
 653                     'extractor': ie_result['extractor'],
 654                     'webpage_url': ie_result['webpage_url'],
 655                     'webpage_url_basename': url_basename(ie_result['webpage_url']),
 656                     'extractor_key': ie_result['extractor_key'],
 657                 }
 658
 659                 reason = self._match_entry(entry)
 660                 if reason is not None:
 661                     self.to_screen('[download] ' + reason)
 662                     continue
 663
 664                 entry_result = self.process_ie_result(entry,
 665                                                       download=download,
 666                                                       extra_info=extra)
 667                 playlist_results.append(entry_result)
 668             ie_result['entries'] = playlist_results
 669             return ie_result
 670         elif result_type == 'compat_list':
 671             def _fixup(r):
 672                 self.add_extra_info(r,
 673                     {
 674                         'extractor': ie_result['extractor'],
 675                         'webpage_url': ie_result['webpage_url'],
 676                         'webpage_url_basename': url_basename(ie_result['webpage_url']),
 677                         'extractor_key': ie_result['extractor_key'],
 678                     })
 679                 return r
 680             ie_result['entries'] = [
 681                 self.process_ie_result(_fixup(r), download, extra_info)
 682                 for r in ie_result['entries']
 683             ]
 684             return ie_result
 685         else:
 686             raise Exception('Invalid result type: %s' % result_type)
 687
 688     def select_format(self, format_spec, available_formats):
 689         if format_spec == 'best' or format_spec is None:
 690             return available_formats[-1]
 691         elif format_spec == 'worst':
 692             return available_formats[0]
 693         elif format_spec == 'bestaudio':
 694             audio_formats = [
 695                 f for f in available_formats
 696                 if f.get('vcodec') == 'none']
 697             if audio_formats:
 698                 return audio_formats[-1]
 699         elif format_spec == 'worstaudio':
 700             audio_formats = [
 701                 f for f in available_formats
 702                 if f.get('vcodec') == 'none']
 703             if audio_formats:
 704                 return audio_formats[0]
 705         elif format_spec == 'bestvideo':
 706             video_formats = [
 707                 f for f in available_formats
 708                 if f.get('acodec') == 'none']
 709             if video_formats:
 710                 return video_formats[-1]
 711         elif format_spec == 'worstvideo':
 712             video_formats = [
 713                 f for f in available_formats
 714                 if f.get('acodec') == 'none']
 715             if video_formats:
 716                 return video_formats[0]
 717         else:
 718             extensions = ['mp4', 'flv', 'webm', '3gp', 'm4a']
 719             if format_spec in extensions:
 720                 filter_f = lambda f: f['ext'] == format_spec
 721             else:
 722                 filter_f = lambda f: f['format_id'] == format_spec
 723             matches = list(filter(filter_f, available_formats))
 724             if matches:
 725                 return matches[-1]
 726         return None
 727
 728     def process_video_result(self, info_dict, download=True):
 729         assert info_dict.get('_type', 'video') == 'video'
 730
 731         if 'id' not in info_dict:
 732             raise ExtractorError('Missing "id" field in extractor result')
 733         if 'title' not in info_dict:
 734             raise ExtractorError('Missing "title" field in extractor result')
 735
 736         if 'playlist' not in info_dict:
 737             # It isn't part of a playlist
 738             info_dict['playlist'] = None
 739             info_dict['playlist_index'] = None
 740
 741         thumbnails = info_dict.get('thumbnails')
 742         if thumbnails:
 743             thumbnails.sort(key=lambda t: (
 744                 t.get('width'), t.get('height'), t.get('url')))
 745             for t in thumbnails:
 746                 if 'width' in t and 'height' in t:
 747                     t['resolution'] = '%dx%d' % (t['width'], t['height'])
 748
 749         if thumbnails and 'thumbnail' not in info_dict:
 750             info_dict['thumbnail'] = thumbnails[-1]['url']
 751
 752         if 'display_id' not in info_dict and 'id' in info_dict:
 753             info_dict['display_id'] = info_dict['id']
 754
 755         if info_dict.get('upload_date') is None and info_dict.get('timestamp') is not None:
 756             upload_date = datetime.datetime.utcfromtimestamp(
 757                 info_dict['timestamp'])
 758             info_dict['upload_date'] = upload_date.strftime('%Y%m%d')
 759
 760         # This extractors handle format selection themselves
 761         if info_dict['extractor'] in ['Youku']:
 762             if download:
 763                 self.process_info(info_dict)
 764             return info_dict
 765
 766         # We now pick which formats have to be downloaded
 767         if info_dict.get('formats') is None:
 768             # There's only one format available
 769             formats = [info_dict]
 770         else:
 771             formats = info_dict['formats']
 772
 773         if not formats:
 774             raise ExtractorError('No video formats found!')
 775
 776         # We check that all the formats have the format and format_id fields
 777         for i, format in enumerate(formats):
 778             if 'url' not in format:
 779                 raise ExtractorError('Missing "url" key in result (index %d)' % i)
 780
 781             if format.get('format_id') is None:
 782                 format['format_id'] = compat_str(i)
 783             if format.get('format') is None:
 784                 format['format'] = '{id} - {res}{note}'.format(
 785                     id=format['format_id'],
 786                     res=self.format_resolution(format),
 787                     note=' ({0})'.format(format['format_note']) if format.get('format_note') is not None else '',
 788                 )
 789             # Automatically determine file extension if missing
 790             if 'ext' not in format:
 791                 format['ext'] = determine_ext(format['url']).lower()
 792
 793         format_limit = self.params.get('format_limit', None)
 794         if format_limit:
 795             formats = list(takewhile_inclusive(
 796                 lambda f: f['format_id'] != format_limit, formats
 797             ))
 798
 799         # TODO Central sorting goes here
 800
 801         if formats[0] is not info_dict:
 802             # only set the 'formats' fields if the original info_dict list them
 803             # otherwise we end up with a circular reference, the first (and unique)
 804             # element in the 'formats' field in info_dict is info_dict itself,
 805             # wich can't be exported to json
 806             info_dict['formats'] = formats
 807         if self.params.get('listformats', None):
 808             self.list_formats(info_dict)
 809             return
 810
 811         req_format = self.params.get('format')
 812         if req_format is None:
 813             req_format = 'best'
 814         formats_to_download = []
 815         # The -1 is for supporting YoutubeIE
 816         if req_format in ('-1', 'all'):
 817             formats_to_download = formats
 818         else:
 819             for rfstr in req_format.split(','):
 820                 # We can accept formats requested in the format: 34/5/best, we pick
 821                 # the first that is available, starting from left
 822                 req_formats = rfstr.split('/')
 823                 for rf in req_formats:
 824                     if re.match(r'.+?\+.+?', rf) is not None:
 825                         # Two formats have been requested like '137+139'
 826                         format_1, format_2 = rf.split('+')
 827                         formats_info = (self.select_format(format_1, formats),
 828                             self.select_format(format_2, formats))
 829                         if all(formats_info):
 830                             selected_format = {
 831                                 'requested_formats': formats_info,
 832                                 'format': rf,
 833                                 'ext': formats_info[0]['ext'],
 834                             }
 835                         else:
 836                             selected_format = None
 837                     else:
 838                         selected_format = self.select_format(rf, formats)
 839                     if selected_format is not None:
 840                         formats_to_download.append(selected_format)
 841                         break
 842         if not formats_to_download:
 843             raise ExtractorError('requested format not available',
 844                                  expected=True)
 845
 846         if download:
 847             if len(formats_to_download) > 1:
 848                 self.to_screen('[info] %s: downloading video in %s formats' % (info_dict['id'], len(formats_to_download)))
 849             for format in formats_to_download:
 850                 new_info = dict(info_dict)
 851                 new_info.update(format)
 852                 self.process_info(new_info)
 853         # We update the info dict with the best quality format (backwards compatibility)
 854         info_dict.update(formats_to_download[-1])
 855         return info_dict
 856
 857     def process_info(self, info_dict):
 858         """Process a single resolved IE result."""
 859
 860         assert info_dict.get('_type', 'video') == 'video'
 861
 862         max_downloads = self.params.get('max_downloads')
 863         if max_downloads is not None:
 864             if self._num_downloads >= int(max_downloads):
 865                 raise MaxDownloadsReached()
 866
 867         info_dict['fulltitle'] = info_dict['title']
 868         if len(info_dict['title']) > 200:
 869             info_dict['title'] = info_dict['title'][:197] + '...'
 870
 871         # Keep for backwards compatibility
 872         info_dict['stitle'] = info_dict['title']
 873
 874         if 'format' not in info_dict:
 875             info_dict['format'] = info_dict['ext']
 876
 877         reason = self._match_entry(info_dict)
 878         if reason is not None:
 879             self.to_screen('[download] ' + reason)
 880             return
 881
 882         self._num_downloads += 1
 883
 884         filename = self.prepare_filename(info_dict)
 885
 886         # Forced printings
 887         if self.params.get('forcetitle', False):
 888             self.to_stdout(info_dict['fulltitle'])
 889         if self.params.get('forceid', False):
 890             self.to_stdout(info_dict['id'])
 891         if self.params.get('forceurl', False):
 892             # For RTMP URLs, also include the playpath
 893             self.to_stdout(info_dict['url'] + info_dict.get('play_path', ''))
 894         if self.params.get('forcethumbnail', False) and info_dict.get('thumbnail') is not None:
 895             self.to_stdout(info_dict['thumbnail'])
 896         if self.params.get('forcedescription', False) and info_dict.get('description') is not None:
 897             self.to_stdout(info_dict['description'])
 898         if self.params.get('forcefilename', False) and filename is not None:
 899             self.to_stdout(filename)
 900         if self.params.get('forceduration', False) and info_dict.get('duration') is not None:
 901             self.to_stdout(formatSeconds(info_dict['duration']))
 902         if self.params.get('forceformat', False):
 903             self.to_stdout(info_dict['format'])
 904         if self.params.get('forcejson', False):
 905             info_dict['_filename'] = filename
 906             self.to_stdout(json.dumps(info_dict))
 907
 908         # Do nothing else if in simulate mode
 909         if self.params.get('simulate', False):
 910             return
 911
 912         if filename is None:
 913             return
 914
 915         try:
 916             dn = os.path.dirname(encodeFilename(filename))
 917             if dn and not os.path.exists(dn):
 918                 os.makedirs(dn)
 919         except (OSError, IOError) as err:
 920             self.report_error('unable to create directory ' + compat_str(err))
 921             return
 922
 923         if self.params.get('writedescription', False):
 924             descfn = filename + '.description'
 925             if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(descfn)):
 926                 self.to_screen('[info] Video description is already present')
 927             else:
 928                 try:
 929                     self.to_screen('[info] Writing video description to: ' + descfn)
 930                     with io.open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile:
 931                         descfile.write(info_dict['description'])
 932                 except (KeyError, TypeError):
 933                     self.report_warning('There\'s no description to write.')
 934                 except (OSError, IOError):
 935                     self.report_error('Cannot write description file ' + descfn)
 936                     return
 937
 938         if self.params.get('writeannotations', False):
 939             annofn = filename + '.annotations.xml'
 940             if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(annofn)):
 941                 self.to_screen('[info] Video annotations are already present')
 942             else:
 943                 try:
 944                     self.to_screen('[info] Writing video annotations to: ' + annofn)
 945                     with io.open(encodeFilename(annofn), 'w', encoding='utf-8') as annofile:
 946                         annofile.write(info_dict['annotations'])
 947                 except (KeyError, TypeError):
 948                     self.report_warning('There are no annotations to write.')
 949                 except (OSError, IOError):
 950                     self.report_error('Cannot write annotations file: ' + annofn)
 951                     return
 952
 953         subtitles_are_requested = any([self.params.get('writesubtitles', False),
 954                                        self.params.get('writeautomaticsub')])
 955
 956         if subtitles_are_requested and 'subtitles' in info_dict and info_dict['subtitles']:
 957             # subtitles download errors are already managed as troubles in relevant IE
 958             # that way it will silently go on when used with unsupporting IE
 959             subtitles = info_dict['subtitles']
 960             sub_format = self.params.get('subtitlesformat', 'srt')
 961             for sub_lang in subtitles.keys():
 962                 sub = subtitles[sub_lang]
 963                 if sub is None:
 964                     continue
 965                 try:
 966                     sub_filename = subtitles_filename(filename, sub_lang, sub_format)
 967                     if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(sub_filename)):
 968                         self.to_screen('[info] Video subtitle %s.%s is already_present' % (sub_lang, sub_format))
 969                     else:
 970                         self.to_screen('[info] Writing video subtitles to: ' + sub_filename)
 971                         with io.open(encodeFilename(sub_filename), 'w', encoding='utf-8') as subfile:
 972                                 subfile.write(sub)
 973                 except (OSError, IOError):
 974                     self.report_error('Cannot write subtitles file ' + sub_filename)
 975                     return
 976
 977         if self.params.get('writeinfojson', False):
 978             infofn = os.path.splitext(filename)[0] + '.info.json'
 979             if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(infofn)):
 980                 self.to_screen('[info] Video description metadata is already present')
 981             else:
 982                 self.to_screen('[info] Writing video description metadata as JSON to: ' + infofn)
 983                 try:
 984                     write_json_file(info_dict, encodeFilename(infofn))
 985                 except (OSError, IOError):
 986                     self.report_error('Cannot write metadata to JSON file ' + infofn)
 987                     return
 988
 989         if self.params.get('writethumbnail', False):
 990             if info_dict.get('thumbnail') is not None:
 991                 thumb_format = determine_ext(info_dict['thumbnail'], 'jpg')
 992                 thumb_filename = os.path.splitext(filename)[0] + '.' + thumb_format
 993                 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(thumb_filename)):
 994                     self.to_screen('[%s] %s: Thumbnail is already present' %
 995                                    (info_dict['extractor'], info_dict['id']))
 996                 else:
 997                     self.to_screen('[%s] %s: Downloading thumbnail ...' %
 998                                    (info_dict['extractor'], info_dict['id']))
 999                     try:
1000                         uf = self.urlopen(info_dict['thumbnail'])
1001                         with open(thumb_filename, 'wb') as thumbf:
1002                             shutil.copyfileobj(uf, thumbf)
1003                         self.to_screen('[%s] %s: Writing thumbnail to: %s' %
1004                             (info_dict['extractor'], info_dict['id'], thumb_filename))
1005                     except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
1006                         self.report_warning('Unable to download thumbnail "%s": %s' %
1007                             (info_dict['thumbnail'], compat_str(err)))
1008
1009         if not self.params.get('skip_download', False):
1010             if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(filename)):
1011                 success = True
1012             else:
1013                 try:
1014                     def dl(name, info):
1015                         fd = get_suitable_downloader(info)(self, self.params)
1016                         for ph in self._progress_hooks:
1017                             fd.add_progress_hook(ph)
1018                         if self.params.get('verbose'):
1019                             self.to_stdout('[debug] Invoking downloader on %r' % info.get('url'))
1020                         return fd.download(name, info)
1021                     if info_dict.get('requested_formats') is not None:
1022                         downloaded = []
1023                         success = True
1024                         merger = FFmpegMergerPP(self, not self.params.get('keepvideo'))
1025                         if not merger._get_executable():
1026                             postprocessors = []
1027                             self.report_warning('You have requested multiple '
1028                                 'formats but ffmpeg or avconv are not installed.'
1029                                 ' The formats won\'t be merged')
1030                         else:
1031                             postprocessors = [merger]
1032                         for f in info_dict['requested_formats']:
1033                             new_info = dict(info_dict)
1034                             new_info.update(f)
1035                             fname = self.prepare_filename(new_info)
1036                             fname = prepend_extension(fname, 'f%s' % f['format_id'])
1037                             downloaded.append(fname)
1038                             partial_success = dl(fname, new_info)
1039                             success = success and partial_success
1040                         info_dict['__postprocessors'] = postprocessors
1041                         info_dict['__files_to_merge'] = downloaded
1042                     else:
1043                         # Just a single file
1044                         success = dl(filename, info_dict)
1045                 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
1046                     self.report_error('unable to download video data: %s' % str(err))
1047                     return
1048                 except (OSError, IOError) as err:
1049                     raise UnavailableVideoError(err)
1050                 except (ContentTooShortError, ) as err:
1051                     self.report_error('content too short (expected %s bytes and served %s)' % (err.expected, err.downloaded))
1052                     return
1053
1054             if success:
1055                 try:
1056                     self.post_process(filename, info_dict)
1057                 except (PostProcessingError) as err:
1058                     self.report_error('postprocessing: %s' % str(err))
1059                     return
1060
1061         self.record_download_archive(info_dict)
1062
1063     def download(self, url_list):
1064         """Download a given list of URLs."""
1065         outtmpl = self.params.get('outtmpl', DEFAULT_OUTTMPL)
1066         if (len(url_list) > 1 and
1067                 '%' not in outtmpl
1068                 and self.params.get('max_downloads') != 1):
1069             raise SameFileError(outtmpl)
1070
1071         for url in url_list:
1072             try:
1073                 #It also downloads the videos
1074                 self.extract_info(url)
1075             except UnavailableVideoError:
1076                 self.report_error('unable to download video')
1077             except MaxDownloadsReached:
1078                 self.to_screen('[info] Maximum number of downloaded files reached.')
1079                 raise
1080
1081         return self._download_retcode
1082
1083     def download_with_info_file(self, info_filename):
1084         with io.open(info_filename, 'r', encoding='utf-8') as f:
1085             info = json.load(f)
1086         try:
1087             self.process_ie_result(info, download=True)
1088         except DownloadError:
1089             webpage_url = info.get('webpage_url')
1090             if webpage_url is not None:
1091                 self.report_warning('The info failed to download, trying with "%s"' % webpage_url)
1092                 return self.download([webpage_url])
1093             else:
1094                 raise
1095         return self._download_retcode
1096
1097     def post_process(self, filename, ie_info):
1098         """Run all the postprocessors on the given file."""
1099         info = dict(ie_info)
1100         info['filepath'] = filename
1101         keep_video = None
1102         pps_chain = []
1103         if ie_info.get('__postprocessors') is not None:
1104             pps_chain.extend(ie_info['__postprocessors'])
1105         pps_chain.extend(self._pps)
1106         for pp in pps_chain:
1107             try:
1108                 keep_video_wish, new_info = pp.run(info)
1109                 if keep_video_wish is not None:
1110                     if keep_video_wish:
1111                         keep_video = keep_video_wish
1112                     elif keep_video is None:
1113                         # No clear decision yet, let IE decide
1114                         keep_video = keep_video_wish
1115             except PostProcessingError as e:
1116                 self.report_error(e.msg)
1117         if keep_video is False and not self.params.get('keepvideo', False):
1118             try:
1119                 self.to_screen('Deleting original file %s (pass -k to keep)' % filename)
1120                 os.remove(encodeFilename(filename))
1121             except (IOError, OSError):
1122                 self.report_warning('Unable to remove downloaded video file')
1123
1124     def _make_archive_id(self, info_dict):
1125         # Future-proof against any change in case
1126         # and backwards compatibility with prior versions
1127         extractor = info_dict.get('extractor_key')
1128         if extractor is None:
1129             if 'id' in info_dict:
1130                 extractor = info_dict.get('ie_key')  # key in a playlist
1131         if extractor is None:
1132             return None  # Incomplete video information
1133         return extractor.lower() + ' ' + info_dict['id']
1134
1135     def in_download_archive(self, info_dict):
1136         fn = self.params.get('download_archive')
1137         if fn is None:
1138             return False
1139
1140         vid_id = self._make_archive_id(info_dict)
1141         if vid_id is None:
1142             return False  # Incomplete video information
1143
1144         try:
1145             with locked_file(fn, 'r', encoding='utf-8') as archive_file:
1146                 for line in archive_file:
1147                     if line.strip() == vid_id:
1148                         return True
1149         except IOError as ioe:
1150             if ioe.errno != errno.ENOENT:
1151                 raise
1152         return False
1153
1154     def record_download_archive(self, info_dict):
1155         fn = self.params.get('download_archive')
1156         if fn is None:
1157             return
1158         vid_id = self._make_archive_id(info_dict)
1159         assert vid_id
1160         with locked_file(fn, 'a', encoding='utf-8') as archive_file:
1161             archive_file.write(vid_id + '\n')
1162
1163     @staticmethod
1164     def format_resolution(format, default='unknown'):
1165         if format.get('vcodec') == 'none':
1166             return 'audio only'
1167         if format.get('resolution') is not None:
1168             return format['resolution']
1169         if format.get('height') is not None:
1170             if format.get('width') is not None:
1171                 res = '%sx%s' % (format['width'], format['height'])
1172             else:
1173                 res = '%sp' % format['height']
1174         elif format.get('width') is not None:
1175             res = '?x%d' % format['width']
1176         else:
1177             res = default
1178         return res
1179
1180     def _format_note(self, fdict):
1181         res = ''
1182         if fdict.get('ext') in ['f4f', 'f4m']:
1183             res += '(unsupported) '
1184         if fdict.get('format_note') is not None:
1185             res += fdict['format_note'] + ' '
1186         if fdict.get('tbr') is not None:
1187             res += '%4dk ' % fdict['tbr']
1188         if fdict.get('container') is not None:
1189             if res:
1190                 res += ', '
1191             res += '%s container' % fdict['container']
1192         if (fdict.get('vcodec') is not None and
1193                 fdict.get('vcodec') != 'none'):
1194             if res:
1195                 res += ', '
1196             res += fdict['vcodec']
1197             if fdict.get('vbr') is not None:
1198                 res += '@'
1199         elif fdict.get('vbr') is not None and fdict.get('abr') is not None:
1200             res += 'video@'
1201         if fdict.get('vbr') is not None:
1202             res += '%4dk' % fdict['vbr']
1203         if fdict.get('acodec') is not None:
1204             if res:
1205                 res += ', '
1206             if fdict['acodec'] == 'none':
1207                 res += 'video only'
1208             else:
1209                 res += '%-5s' % fdict['acodec']
1210         elif fdict.get('abr') is not None:
1211             if res:
1212                 res += ', '
1213             res += 'audio'
1214         if fdict.get('abr') is not None:
1215             res += '@%3dk' % fdict['abr']
1216         if fdict.get('asr') is not None:
1217             res += ' (%5dHz)' % fdict['asr']
1218         if fdict.get('filesize') is not None:
1219             if res:
1220                 res += ', '
1221             res += format_bytes(fdict['filesize'])
1222         elif fdict.get('filesize_approx') is not None:
1223             if res:
1224                 res += ', '
1225             res += '~' + format_bytes(fdict['filesize_approx'])
1226         return res
1227
1228     def list_formats(self, info_dict):
1229         def line(format, idlen=20):
1230             return (('%-' + compat_str(idlen + 1) + 's%-10s%-12s%s') % (
1231                 format['format_id'],
1232                 format['ext'],
1233                 self.format_resolution(format),
1234                 self._format_note(format),
1235             ))
1236
1237         formats = info_dict.get('formats', [info_dict])
1238         idlen = max(len('format code'),
1239                     max(len(f['format_id']) for f in formats))
1240         formats_s = [line(f, idlen) for f in formats]
1241         if len(formats) > 1:
1242             formats_s[0] += (' ' if self._format_note(formats[0]) else '') + '(worst)'
1243             formats_s[-1] += (' ' if self._format_note(formats[-1]) else '') + '(best)'
1244
1245         header_line = line({
1246             'format_id': 'format code', 'ext': 'extension',
1247             'resolution': 'resolution', 'format_note': 'note'}, idlen=idlen)
1248         self.to_screen('[info] Available formats for %s:\n%s\n%s' %
1249                        (info_dict['id'], header_line, '\n'.join(formats_s)))
1250
1251     def urlopen(self, req):
1252         """ Start an HTTP download """
1253
1254         # According to RFC 3986, URLs can not contain non-ASCII characters, however this is not
1255         # always respected by websites, some tend to give out URLs with non percent-encoded
1256         # non-ASCII characters (see telemb.py, ard.py [#3412])
1257         # urllib chokes on URLs with non-ASCII characters (see http://bugs.python.org/issue3991)
1258         # To work around aforementioned issue we will replace request's original URL with
1259         # percent-encoded one
1260         req_is_string = isinstance(req, basestring if sys.version_info < (3, 0) else compat_str)
1261         url = req if req_is_string else req.get_full_url()
1262         url_escaped = escape_url(url)
1263
1264         # Substitute URL if any change after escaping
1265         if url != url_escaped:
1266             if req_is_string:
1267                 req = url_escaped
1268             else:
1269                 req = compat_urllib_request.Request(
1270                     url_escaped, data=req.data, headers=req.headers,
1271                     origin_req_host=req.origin_req_host, unverifiable=req.unverifiable)
1272
1273         return self._opener.open(req, timeout=self._socket_timeout)
1274
1275     def print_debug_header(self):
1276         if not self.params.get('verbose'):
1277             return
1278
1279         if type('') is not compat_str:
1280             # Python 2.6 on SLES11 SP1 (https://github.com/rg3/youtube-dl/issues/3326)
1281             self.report_warning(
1282                 'Your Python is broken! Update to a newer and supported version')
1283
1284         encoding_str = (
1285             '[debug] Encodings: locale %s, fs %s, out %s, pref %s\n' % (
1286                 locale.getpreferredencoding(),
1287                 sys.getfilesystemencoding(),
1288                 sys.stdout.encoding,
1289                 self.get_encoding()))
1290         write_string(encoding_str, encoding=None)
1291
1292         self._write_string('[debug] youtube-dl version ' + __version__ + '\n')
1293         try:
1294             sp = subprocess.Popen(
1295                 ['git', 'rev-parse', '--short', 'HEAD'],
1296                 stdout=subprocess.PIPE, stderr=subprocess.PIPE,
1297                 cwd=os.path.dirname(os.path.abspath(__file__)))
1298             out, err = sp.communicate()
1299             out = out.decode().strip()
1300             if re.match('[0-9a-f]+', out):
1301                 self._write_string('[debug] Git HEAD: ' + out + '\n')
1302         except:
1303             try:
1304                 sys.exc_clear()
1305             except:
1306                 pass
1307         self._write_string('[debug] Python version %s - %s' %
1308                      (platform.python_version(), platform_name()) + '\n')
1309
1310         proxy_map = {}
1311         for handler in self._opener.handlers:
1312             if hasattr(handler, 'proxies'):
1313                 proxy_map.update(handler.proxies)
1314         self._write_string('[debug] Proxy map: ' + compat_str(proxy_map) + '\n')
1315
1316     def _setup_opener(self):
1317         timeout_val = self.params.get('socket_timeout')
1318         self._socket_timeout = 600 if timeout_val is None else float(timeout_val)
1319
1320         opts_cookiefile = self.params.get('cookiefile')
1321         opts_proxy = self.params.get('proxy')
1322
1323         if opts_cookiefile is None:
1324             self.cookiejar = compat_cookiejar.CookieJar()
1325         else:
1326             self.cookiejar = compat_cookiejar.MozillaCookieJar(
1327                 opts_cookiefile)
1328             if os.access(opts_cookiefile, os.R_OK):
1329                 self.cookiejar.load()
1330
1331         cookie_processor = compat_urllib_request.HTTPCookieProcessor(
1332             self.cookiejar)
1333         if opts_proxy is not None:
1334             if opts_proxy == '':
1335                 proxies = {}
1336             else:
1337                 proxies = {'http': opts_proxy, 'https': opts_proxy}
1338         else:
1339             proxies = compat_urllib_request.getproxies()
1340             # Set HTTPS proxy to HTTP one if given (https://github.com/rg3/youtube-dl/issues/805)
1341             if 'http' in proxies and 'https' not in proxies:
1342                 proxies['https'] = proxies['http']
1343         proxy_handler = compat_urllib_request.ProxyHandler(proxies)
1344
1345         debuglevel = 1 if self.params.get('debug_printtraffic') else 0
1346         https_handler = make_HTTPS_handler(
1347             self.params.get('nocheckcertificate', False), debuglevel=debuglevel)
1348         ydlh = YoutubeDLHandler(debuglevel=debuglevel)
1349         opener = compat_urllib_request.build_opener(
1350             https_handler, proxy_handler, cookie_processor, ydlh)
1351         # Delete the default user-agent header, which would otherwise apply in
1352         # cases where our custom HTTP handler doesn't come into play
1353         # (See https://github.com/rg3/youtube-dl/issues/1309 for details)
1354         opener.addheaders = []
1355         self._opener = opener
1356
1357     def encode(self, s):
1358         if isinstance(s, bytes):
1359             return s  # Already encoded
1360
1361         try:
1362             return s.encode(self.get_encoding())
1363         except UnicodeEncodeError as err:
1364             err.reason = err.reason + '. Check your system encoding configuration or use the --encoding option.'
1365             raise
1366
1367     def get_encoding(self):
1368         encoding = self.params.get('encoding')
1369         if encoding is None:
1370             encoding = preferredencoding()
1371         return encoding