_ Git - youtube-dl/blob - youtube_dl/YoutubeDL.py

   1 #!/usr/bin/env python
   2 # -*- coding: utf-8 -*-
   3
   4 from __future__ import absolute_import, unicode_literals
   5
   6 import collections
   7 import datetime
   8 import errno
   9 import io
  10 import itertools
  11 import json
  12 import locale
  13 import os
  14 import platform
  15 import re
  16 import shutil
  17 import subprocess
  18 import socket
  19 import sys
  20 import time
  21 import traceback
  22
  23 if os.name == 'nt':
  24     import ctypes
  25
  26 from .compat import (
  27     compat_cookiejar,
  28     compat_expanduser,
  29     compat_http_client,
  30     compat_str,
  31     compat_urllib_error,
  32     compat_urllib_request,
  33 )
  34 from .utils import (
  35     escape_url,
  36     ContentTooShortError,
  37     date_from_str,
  38     DateRange,
  39     DEFAULT_OUTTMPL,
  40     determine_ext,
  41     DownloadError,
  42     encodeFilename,
  43     ExtractorError,
  44     format_bytes,
  45     formatSeconds,
  46     get_term_width,
  47     locked_file,
  48     make_HTTPS_handler,
  49     MaxDownloadsReached,
  50     PagedList,
  51     PostProcessingError,
  52     platform_name,
  53     preferredencoding,
  54     SameFileError,
  55     sanitize_filename,
  56     subtitles_filename,
  57     takewhile_inclusive,
  58     UnavailableVideoError,
  59     url_basename,
  60     write_json_file,
  61     write_string,
  62     YoutubeDLHandler,
  63     prepend_extension,
  64     args_to_str,
  65 )
  66 from .cache import Cache
  67 from .extractor import get_info_extractor, gen_extractors
  68 from .downloader import get_suitable_downloader
  69 from .downloader.rtmp import rtmpdump_version
  70 from .postprocessor import FFmpegMergerPP, FFmpegPostProcessor
  71 from .version import __version__
  72
  73
  74 class YoutubeDL(object):
  75     """YoutubeDL class.
  76
  77     YoutubeDL objects are the ones responsible of downloading the
  78     actual video file and writing it to disk if the user has requested
  79     it, among some other tasks. In most cases there should be one per
  80     program. As, given a video URL, the downloader doesn't know how to
  81     extract all the needed information, task that InfoExtractors do, it
  82     has to pass the URL to one of them.
  83
  84     For this, YoutubeDL objects have a method that allows
  85     InfoExtractors to be registered in a given order. When it is passed
  86     a URL, the YoutubeDL object handles it to the first InfoExtractor it
  87     finds that reports being able to handle it. The InfoExtractor extracts
  88     all the information about the video or videos the URL refers to, and
  89     YoutubeDL process the extracted information, possibly using a File
  90     Downloader to download the video.
  91
  92     YoutubeDL objects accept a lot of parameters. In order not to saturate
  93     the object constructor with arguments, it receives a dictionary of
  94     options instead. These options are available through the params
  95     attribute for the InfoExtractors to use. The YoutubeDL also
  96     registers itself as the downloader in charge for the InfoExtractors
  97     that are added to it, so this is a "mutual registration".
  98
  99     Available options:
 100
 101     username:          Username for authentication purposes.
 102     password:          Password for authentication purposes.
 103     videopassword:     Password for acces a video.
 104     usenetrc:          Use netrc for authentication instead.
 105     verbose:           Print additional info to stdout.
 106     quiet:             Do not print messages to stdout.
 107     no_warnings:       Do not print out anything for warnings.
 108     forceurl:          Force printing final URL.
 109     forcetitle:        Force printing title.
 110     forceid:           Force printing ID.
 111     forcethumbnail:    Force printing thumbnail URL.
 112     forcedescription:  Force printing description.
 113     forcefilename:     Force printing final filename.
 114     forceduration:     Force printing duration.
 115     forcejson:         Force printing info_dict as JSON.
 116     dump_single_json:  Force printing the info_dict of the whole playlist
 117                        (or video) as a single JSON line.
 118     simulate:          Do not download the video files.
 119     format:            Video format code.
 120     format_limit:      Highest quality format to try.
 121     outtmpl:           Template for output names.
 122     restrictfilenames: Do not allow "&" and spaces in file names
 123     ignoreerrors:      Do not stop on download errors.
 124     nooverwrites:      Prevent overwriting files.
 125     playliststart:     Playlist item to start at.
 126     playlistend:       Playlist item to end at.
 127     playlistreverse:   Download playlist items in reverse order.
 128     matchtitle:        Download only matching titles.
 129     rejecttitle:       Reject downloads for matching titles.
 130     logger:            Log messages to a logging.Logger instance.
 131     logtostderr:       Log messages to stderr instead of stdout.
 132     writedescription:  Write the video description to a .description file
 133     writeinfojson:     Write the video description to a .info.json file
 134     writeannotations:  Write the video annotations to a .annotations.xml file
 135     writethumbnail:    Write the thumbnail image to a file
 136     writesubtitles:    Write the video subtitles to a file
 137     writeautomaticsub: Write the automatic subtitles to a file
 138     allsubtitles:      Downloads all the subtitles of the video
 139                        (requires writesubtitles or writeautomaticsub)
 140     listsubtitles:     Lists all available subtitles for the video
 141     subtitlesformat:   Subtitle format [srt/sbv/vtt] (default=srt)
 142     subtitleslangs:    List of languages of the subtitles to download
 143     keepvideo:         Keep the video file after post-processing
 144     daterange:         A DateRange object, download only if the upload_date is in the range.
 145     skip_download:     Skip the actual download of the video file
 146     cachedir:          Location of the cache files in the filesystem.
 147                        False to disable filesystem cache.
 148     noplaylist:        Download single video instead of a playlist if in doubt.
 149     age_limit:         An integer representing the user's age in years.
 150                        Unsuitable videos for the given age are skipped.
 151     min_views:         An integer representing the minimum view count the video
 152                        must have in order to not be skipped.
 153                        Videos without view count information are always
 154                        downloaded. None for no limit.
 155     max_views:         An integer representing the maximum view count.
 156                        Videos that are more popular than that are not
 157                        downloaded.
 158                        Videos without view count information are always
 159                        downloaded. None for no limit.
 160     download_archive:  File name of a file where all downloads are recorded.
 161                        Videos already present in the file are not downloaded
 162                        again.
 163     cookiefile:        File name where cookies should be read from and dumped to.
 164     nocheckcertificate:Do not verify SSL certificates
 165     prefer_insecure:   Use HTTP instead of HTTPS to retrieve information.
 166                        At the moment, this is only supported by YouTube.
 167     proxy:             URL of the proxy server to use
 168     socket_timeout:    Time to wait for unresponsive hosts, in seconds
 169     bidi_workaround:   Work around buggy terminals without bidirectional text
 170                        support, using fridibi
 171     debug_printtraffic:Print out sent and received HTTP traffic
 172     include_ads:       Download ads as well
 173     default_search:    Prepend this string if an input url is not valid.
 174                        'auto' for elaborate guessing
 175     encoding:          Use this encoding instead of the system-specified.
 176     extract_flat:      Do not resolve URLs, return the immediate result.
 177                        Pass in 'in_playlist' to only show this behavior for
 178                        playlist items.
 179
 180     The following parameters are not used by YoutubeDL itself, they are used by
 181     the FileDownloader:
 182     nopart, updatetime, buffersize, ratelimit, min_filesize, max_filesize, test,
 183     noresizebuffer, retries, continuedl, noprogress, consoletitle
 184
 185     The following options are used by the post processors:
 186     prefer_ffmpeg:     If True, use ffmpeg instead of avconv if both are available,
 187                        otherwise prefer avconv.
 188     exec_cmd:          Arbitrary command to run after downloading
 189     """
 190
 191     params = None
 192     _ies = []
 193     _pps = []
 194     _download_retcode = None
 195     _num_downloads = None
 196     _screen_file = None
 197
 198     def __init__(self, params=None, auto_init=True):
 199         """Create a FileDownloader object with the given options."""
 200         if params is None:
 201             params = {}
 202         self._ies = []
 203         self._ies_instances = {}
 204         self._pps = []
 205         self._progress_hooks = []
 206         self._download_retcode = 0
 207         self._num_downloads = 0
 208         self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)]
 209         self._err_file = sys.stderr
 210         self.params = params
 211         self.cache = Cache(self)
 212
 213         if params.get('bidi_workaround', False):
 214             try:
 215                 import pty
 216                 master, slave = pty.openpty()
 217                 width = get_term_width()
 218                 if width is None:
 219                     width_args = []
 220                 else:
 221                     width_args = ['-w', str(width)]
 222                 sp_kwargs = dict(
 223                     stdin=subprocess.PIPE,
 224                     stdout=slave,
 225                     stderr=self._err_file)
 226                 try:
 227                     self._output_process = subprocess.Popen(
 228                         ['bidiv'] + width_args, **sp_kwargs
 229                     )
 230                 except OSError:
 231                     self._output_process = subprocess.Popen(
 232                         ['fribidi', '-c', 'UTF-8'] + width_args, **sp_kwargs)
 233                 self._output_channel = os.fdopen(master, 'rb')
 234             except OSError as ose:
 235                 if ose.errno == 2:
 236                     self.report_warning('Could not find fribidi executable, ignoring --bidi-workaround . Make sure that  fribidi  is an executable file in one of the directories in your $PATH.')
 237                 else:
 238                     raise
 239
 240         if (sys.version_info >= (3,) and sys.platform != 'win32' and
 241                 sys.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968']
 242                 and not params.get('restrictfilenames', False)):
 243             # On Python 3, the Unicode filesystem API will throw errors (#1474)
 244             self.report_warning(
 245                 'Assuming --restrict-filenames since file system encoding '
 246                 'cannot encode all characters. '
 247                 'Set the LC_ALL environment variable to fix this.')
 248             self.params['restrictfilenames'] = True
 249
 250         if '%(stitle)s' in self.params.get('outtmpl', ''):
 251             self.report_warning('%(stitle)s is deprecated. Use the %(title)s and the --restrict-filenames flag(which also secures %(uploader)s et al) instead.')
 252
 253         self._setup_opener()
 254
 255         if auto_init:
 256             self.print_debug_header()
 257             self.add_default_info_extractors()
 258
 259     def warn_if_short_id(self, argv):
 260         # short YouTube ID starting with dash?
 261         idxs = [
 262             i for i, a in enumerate(argv)
 263             if re.match(r'^-[0-9A-Za-z_-]{10}$', a)]
 264         if idxs:
 265             correct_argv = (
 266                 ['youtube-dl'] +
 267                 [a for i, a in enumerate(argv) if i not in idxs] +
 268                 ['--'] + [argv[i] for i in idxs]
 269             )
 270             self.report_warning(
 271                 'Long argument string detected. '
 272                 'Use -- to separate parameters and URLs, like this:\n%s\n' %
 273                 args_to_str(correct_argv))
 274
 275     def add_info_extractor(self, ie):
 276         """Add an InfoExtractor object to the end of the list."""
 277         self._ies.append(ie)
 278         self._ies_instances[ie.ie_key()] = ie
 279         ie.set_downloader(self)
 280
 281     def get_info_extractor(self, ie_key):
 282         """
 283         Get an instance of an IE with name ie_key, it will try to get one from
 284         the _ies list, if there's no instance it will create a new one and add
 285         it to the extractor list.
 286         """
 287         ie = self._ies_instances.get(ie_key)
 288         if ie is None:
 289             ie = get_info_extractor(ie_key)()
 290             self.add_info_extractor(ie)
 291         return ie
 292
 293     def add_default_info_extractors(self):
 294         """
 295         Add the InfoExtractors returned by gen_extractors to the end of the list
 296         """
 297         for ie in gen_extractors():
 298             self.add_info_extractor(ie)
 299
 300     def add_post_processor(self, pp):
 301         """Add a PostProcessor object to the end of the chain."""
 302         self._pps.append(pp)
 303         pp.set_downloader(self)
 304
 305     def add_progress_hook(self, ph):
 306         """Add the progress hook (currently only for the file downloader)"""
 307         self._progress_hooks.append(ph)
 308
 309     def _bidi_workaround(self, message):
 310         if not hasattr(self, '_output_channel'):
 311             return message
 312
 313         assert hasattr(self, '_output_process')
 314         assert isinstance(message, compat_str)
 315         line_count = message.count('\n') + 1
 316         self._output_process.stdin.write((message + '\n').encode('utf-8'))
 317         self._output_process.stdin.flush()
 318         res = ''.join(self._output_channel.readline().decode('utf-8')
 319                       for _ in range(line_count))
 320         return res[:-len('\n')]
 321
 322     def to_screen(self, message, skip_eol=False):
 323         """Print message to stdout if not in quiet mode."""
 324         return self.to_stdout(message, skip_eol, check_quiet=True)
 325
 326     def _write_string(self, s, out=None):
 327         write_string(s, out=out, encoding=self.params.get('encoding'))
 328
 329     def to_stdout(self, message, skip_eol=False, check_quiet=False):
 330         """Print message to stdout if not in quiet mode."""
 331         if self.params.get('logger'):
 332             self.params['logger'].debug(message)
 333         elif not check_quiet or not self.params.get('quiet', False):
 334             message = self._bidi_workaround(message)
 335             terminator = ['\n', ''][skip_eol]
 336             output = message + terminator
 337
 338             self._write_string(output, self._screen_file)
 339
 340     def to_stderr(self, message):
 341         """Print message to stderr."""
 342         assert isinstance(message, compat_str)
 343         if self.params.get('logger'):
 344             self.params['logger'].error(message)
 345         else:
 346             message = self._bidi_workaround(message)
 347             output = message + '\n'
 348             self._write_string(output, self._err_file)
 349
 350     def to_console_title(self, message):
 351         if not self.params.get('consoletitle', False):
 352             return
 353         if os.name == 'nt' and ctypes.windll.kernel32.GetConsoleWindow():
 354             # c_wchar_p() might not be necessary if `message` is
 355             # already of type unicode()
 356             ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message))
 357         elif 'TERM' in os.environ:
 358             self._write_string('\033]0;%s\007' % message, self._screen_file)
 359
 360     def save_console_title(self):
 361         if not self.params.get('consoletitle', False):
 362             return
 363         if 'TERM' in os.environ:
 364             # Save the title on stack
 365             self._write_string('\033[22;0t', self._screen_file)
 366
 367     def restore_console_title(self):
 368         if not self.params.get('consoletitle', False):
 369             return
 370         if 'TERM' in os.environ:
 371             # Restore the title from stack
 372             self._write_string('\033[23;0t', self._screen_file)
 373
 374     def __enter__(self):
 375         self.save_console_title()
 376         return self
 377
 378     def __exit__(self, *args):
 379         self.restore_console_title()
 380
 381         if self.params.get('cookiefile') is not None:
 382             self.cookiejar.save()
 383
 384     def trouble(self, message=None, tb=None):
 385         """Determine action to take when a download problem appears.
 386
 387         Depending on if the downloader has been configured to ignore
 388         download errors or not, this method may throw an exception or
 389         not when errors are found, after printing the message.
 390
 391         tb, if given, is additional traceback information.
 392         """
 393         if message is not None:
 394             self.to_stderr(message)
 395         if self.params.get('verbose'):
 396             if tb is None:
 397                 if sys.exc_info()[0]:  # if .trouble has been called from an except block
 398                     tb = ''
 399                     if hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
 400                         tb += ''.join(traceback.format_exception(*sys.exc_info()[1].exc_info))
 401                     tb += compat_str(traceback.format_exc())
 402                 else:
 403                     tb_data = traceback.format_list(traceback.extract_stack())
 404                     tb = ''.join(tb_data)
 405             self.to_stderr(tb)
 406         if not self.params.get('ignoreerrors', False):
 407             if sys.exc_info()[0] and hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
 408                 exc_info = sys.exc_info()[1].exc_info
 409             else:
 410                 exc_info = sys.exc_info()
 411             raise DownloadError(message, exc_info)
 412         self._download_retcode = 1
 413
 414     def report_warning(self, message):
 415         '''
 416         Print the message to stderr, it will be prefixed with 'WARNING:'
 417         If stderr is a tty file the 'WARNING:' will be colored
 418         '''
 419         if self.params.get('logger') is not None:
 420             self.params['logger'].warning(message)
 421         else:
 422             if self.params.get('no_warnings'):
 423                 return
 424             if self._err_file.isatty() and os.name != 'nt':
 425                 _msg_header = '\033[0;33mWARNING:\033[0m'
 426             else:
 427                 _msg_header = 'WARNING:'
 428             warning_message = '%s %s' % (_msg_header, message)
 429             self.to_stderr(warning_message)
 430
 431     def report_error(self, message, tb=None):
 432         '''
 433         Do the same as trouble, but prefixes the message with 'ERROR:', colored
 434         in red if stderr is a tty file.
 435         '''
 436         if self._err_file.isatty() and os.name != 'nt':
 437             _msg_header = '\033[0;31mERROR:\033[0m'
 438         else:
 439             _msg_header = 'ERROR:'
 440         error_message = '%s %s' % (_msg_header, message)
 441         self.trouble(error_message, tb)
 442
 443     def report_file_already_downloaded(self, file_name):
 444         """Report file has already been fully downloaded."""
 445         try:
 446             self.to_screen('[download] %s has already been downloaded' % file_name)
 447         except UnicodeEncodeError:
 448             self.to_screen('[download] The file has already been downloaded')
 449
 450     def prepare_filename(self, info_dict):
 451         """Generate the output filename."""
 452         try:
 453             template_dict = dict(info_dict)
 454
 455             template_dict['epoch'] = int(time.time())
 456             autonumber_size = self.params.get('autonumber_size')
 457             if autonumber_size is None:
 458                 autonumber_size = 5
 459             autonumber_templ = '%0' + str(autonumber_size) + 'd'
 460             template_dict['autonumber'] = autonumber_templ % self._num_downloads
 461             if template_dict.get('playlist_index') is not None:
 462                 template_dict['playlist_index'] = '%0*d' % (len(str(template_dict['n_entries'])), template_dict['playlist_index'])
 463             if template_dict.get('resolution') is None:
 464                 if template_dict.get('width') and template_dict.get('height'):
 465                     template_dict['resolution'] = '%dx%d' % (template_dict['width'], template_dict['height'])
 466                 elif template_dict.get('height'):
 467                     template_dict['resolution'] = '%sp' % template_dict['height']
 468                 elif template_dict.get('width'):
 469                     template_dict['resolution'] = '?x%d' % template_dict['width']
 470
 471             sanitize = lambda k, v: sanitize_filename(
 472                 compat_str(v),
 473                 restricted=self.params.get('restrictfilenames'),
 474                 is_id=(k == 'id'))
 475             template_dict = dict((k, sanitize(k, v))
 476                                  for k, v in template_dict.items()
 477                                  if v is not None)
 478             template_dict = collections.defaultdict(lambda: 'NA', template_dict)
 479
 480             outtmpl = self.params.get('outtmpl', DEFAULT_OUTTMPL)
 481             tmpl = compat_expanduser(outtmpl)
 482             filename = tmpl % template_dict
 483             return filename
 484         except ValueError as err:
 485             self.report_error('Error in output template: ' + str(err) + ' (encoding: ' + repr(preferredencoding()) + ')')
 486             return None
 487
 488     def _match_entry(self, info_dict):
 489         """ Returns None iff the file should be downloaded """
 490
 491         video_title = info_dict.get('title', info_dict.get('id', 'video'))
 492         if 'title' in info_dict:
 493             # This can happen when we're just evaluating the playlist
 494             title = info_dict['title']
 495             matchtitle = self.params.get('matchtitle', False)
 496             if matchtitle:
 497                 if not re.search(matchtitle, title, re.IGNORECASE):
 498                     return '"' + title + '" title did not match pattern "' + matchtitle + '"'
 499             rejecttitle = self.params.get('rejecttitle', False)
 500             if rejecttitle:
 501                 if re.search(rejecttitle, title, re.IGNORECASE):
 502                     return '"' + title + '" title matched reject pattern "' + rejecttitle + '"'
 503         date = info_dict.get('upload_date', None)
 504         if date is not None:
 505             dateRange = self.params.get('daterange', DateRange())
 506             if date not in dateRange:
 507                 return '%s upload date is not in range %s' % (date_from_str(date).isoformat(), dateRange)
 508         view_count = info_dict.get('view_count', None)
 509         if view_count is not None:
 510             min_views = self.params.get('min_views')
 511             if min_views is not None and view_count < min_views:
 512                 return 'Skipping %s, because it has not reached minimum view count (%d/%d)' % (video_title, view_count, min_views)
 513             max_views = self.params.get('max_views')
 514             if max_views is not None and view_count > max_views:
 515                 return 'Skipping %s, because it has exceeded the maximum view count (%d/%d)' % (video_title, view_count, max_views)
 516         age_limit = self.params.get('age_limit')
 517         if age_limit is not None:
 518             actual_age_limit = info_dict.get('age_limit')
 519             if actual_age_limit is None:
 520                 actual_age_limit = 0
 521             if age_limit < actual_age_limit:
 522                 return 'Skipping "' + title + '" because it is age restricted'
 523         if self.in_download_archive(info_dict):
 524             return '%s has already been recorded in archive' % video_title
 525         return None
 526
 527     @staticmethod
 528     def add_extra_info(info_dict, extra_info):
 529         '''Set the keys from extra_info in info dict if they are missing'''
 530         for key, value in extra_info.items():
 531             info_dict.setdefault(key, value)
 532
 533     def extract_info(self, url, download=True, ie_key=None, extra_info={},
 534                      process=True):
 535         '''
 536         Returns a list with a dictionary for each video we find.
 537         If 'download', also downloads the videos.
 538         extra_info is a dict containing the extra values to add to each result
 539          '''
 540
 541         if ie_key:
 542             ies = [self.get_info_extractor(ie_key)]
 543         else:
 544             ies = self._ies
 545
 546         for ie in ies:
 547             if not ie.suitable(url):
 548                 continue
 549
 550             if not ie.working():
 551                 self.report_warning('The program functionality for this site has been marked as broken, '
 552                                     'and will probably not work.')
 553
 554             try:
 555                 ie_result = ie.extract(url)
 556                 if ie_result is None:  # Finished already (backwards compatibility; listformats and friends should be moved here)
 557                     break
 558                 if isinstance(ie_result, list):
 559                     # Backwards compatibility: old IE result format
 560                     ie_result = {
 561                         '_type': 'compat_list',
 562                         'entries': ie_result,
 563                     }
 564                 self.add_default_extra_info(ie_result, ie, url)
 565                 if process:
 566                     return self.process_ie_result(ie_result, download, extra_info)
 567                 else:
 568                     return ie_result
 569             except ExtractorError as de:  # An error we somewhat expected
 570                 self.report_error(compat_str(de), de.format_traceback())
 571                 break
 572             except MaxDownloadsReached:
 573                 raise
 574             except Exception as e:
 575                 if self.params.get('ignoreerrors', False):
 576                     self.report_error(compat_str(e), tb=compat_str(traceback.format_exc()))
 577                     break
 578                 else:
 579                     raise
 580         else:
 581             self.report_error('no suitable InfoExtractor for URL %s' % url)
 582
 583     def add_default_extra_info(self, ie_result, ie, url):
 584         self.add_extra_info(ie_result, {
 585             'extractor': ie.IE_NAME,
 586             'webpage_url': url,
 587             'webpage_url_basename': url_basename(url),
 588             'extractor_key': ie.ie_key(),
 589         })
 590
 591     def process_ie_result(self, ie_result, download=True, extra_info={}):
 592         """
 593         Take the result of the ie(may be modified) and resolve all unresolved
 594         references (URLs, playlist items).
 595
 596         It will also download the videos if 'download'.
 597         Returns the resolved ie_result.
 598         """
 599
 600         result_type = ie_result.get('_type', 'video')
 601
 602         if result_type in ('url', 'url_transparent'):
 603             extract_flat = self.params.get('extract_flat', False)
 604             if ((extract_flat == 'in_playlist' and 'playlist' in extra_info) or
 605                     extract_flat is True):
 606                 if self.params.get('forcejson', False):
 607                     self.to_stdout(json.dumps(ie_result))
 608                 return ie_result
 609
 610         if result_type == 'video':
 611             self.add_extra_info(ie_result, extra_info)
 612             return self.process_video_result(ie_result, download=download)
 613         elif result_type == 'url':
 614             # We have to add extra_info to the results because it may be
 615             # contained in a playlist
 616             return self.extract_info(ie_result['url'],
 617                                      download,
 618                                      ie_key=ie_result.get('ie_key'),
 619                                      extra_info=extra_info)
 620         elif result_type == 'url_transparent':
 621             # Use the information from the embedding page
 622             info = self.extract_info(
 623                 ie_result['url'], ie_key=ie_result.get('ie_key'),
 624                 extra_info=extra_info, download=False, process=False)
 625
 626             force_properties = dict(
 627                 (k, v) for k, v in ie_result.items() if v is not None)
 628             for f in ('_type', 'url'):
 629                 if f in force_properties:
 630                     del force_properties[f]
 631             new_result = info.copy()
 632             new_result.update(force_properties)
 633
 634             assert new_result.get('_type') != 'url_transparent'
 635
 636             return self.process_ie_result(
 637                 new_result, download=download, extra_info=extra_info)
 638         elif result_type == 'playlist' or result_type == 'multi_video':
 639             # We process each entry in the playlist
 640             playlist = ie_result.get('title', None) or ie_result.get('id', None)
 641             self.to_screen('[download] Downloading playlist: %s' % playlist)
 642
 643             playlist_results = []
 644
 645             playliststart = self.params.get('playliststart', 1) - 1
 646             playlistend = self.params.get('playlistend', None)
 647             # For backwards compatibility, interpret -1 as whole list
 648             if playlistend == -1:
 649                 playlistend = None
 650
 651             ie_entries = ie_result['entries']
 652             if isinstance(ie_entries, list):
 653                 n_all_entries = len(ie_entries)
 654                 entries = ie_entries[playliststart:playlistend]
 655                 n_entries = len(entries)
 656                 self.to_screen(
 657                     "[%s] playlist %s: Collected %d video ids (downloading %d of them)" %
 658                     (ie_result['extractor'], playlist, n_all_entries, n_entries))
 659             elif isinstance(ie_entries, PagedList):
 660                 entries = ie_entries.getslice(
 661                     playliststart, playlistend)
 662                 n_entries = len(entries)
 663                 self.to_screen(
 664                     "[%s] playlist %s: Downloading %d videos" %
 665                     (ie_result['extractor'], playlist, n_entries))
 666             else:  # iterable
 667                 entries = list(itertools.islice(
 668                     ie_entries, playliststart, playlistend))
 669                 n_entries = len(entries)
 670                 self.to_screen(
 671                     "[%s] playlist %s: Downloading %d videos" %
 672                     (ie_result['extractor'], playlist, n_entries))
 673
 674             if self.params.get('playlistreverse', False):
 675                 entries = entries[::-1]
 676
 677             for i, entry in enumerate(entries, 1):
 678                 self.to_screen('[download] Downloading video #%s of %s' % (i, n_entries))
 679                 extra = {
 680                     'n_entries': n_entries,
 681                     'playlist': playlist,
 682                     'playlist_id': ie_result.get('id'),
 683                     'playlist_title': ie_result.get('title'),
 684                     'playlist_index': i + playliststart,
 685                     'extractor': ie_result['extractor'],
 686                     'webpage_url': ie_result['webpage_url'],
 687                     'webpage_url_basename': url_basename(ie_result['webpage_url']),
 688                     'extractor_key': ie_result['extractor_key'],
 689                 }
 690
 691                 reason = self._match_entry(entry)
 692                 if reason is not None:
 693                     self.to_screen('[download] ' + reason)
 694                     continue
 695
 696                 entry_result = self.process_ie_result(entry,
 697                                                       download=download,
 698                                                       extra_info=extra)
 699                 playlist_results.append(entry_result)
 700             ie_result['entries'] = playlist_results
 701             return ie_result
 702         elif result_type == 'compat_list':
 703             self.report_warning(
 704                 'Extractor %s returned a compat_list result. '
 705                 'It needs to be updated.' % ie_result.get('extractor'))
 706
 707             def _fixup(r):
 708                 self.add_extra_info(
 709                     r,
 710                     {
 711                         'extractor': ie_result['extractor'],
 712                         'webpage_url': ie_result['webpage_url'],
 713                         'webpage_url_basename': url_basename(ie_result['webpage_url']),
 714                         'extractor_key': ie_result['extractor_key'],
 715                     }
 716                 )
 717                 return r
 718             ie_result['entries'] = [
 719                 self.process_ie_result(_fixup(r), download, extra_info)
 720                 for r in ie_result['entries']
 721             ]
 722             return ie_result
 723         else:
 724             raise Exception('Invalid result type: %s' % result_type)
 725
 726     def select_format(self, format_spec, available_formats):
 727         if format_spec == 'best' or format_spec is None:
 728             return available_formats[-1]
 729         elif format_spec == 'worst':
 730             return available_formats[0]
 731         elif format_spec == 'bestaudio':
 732             audio_formats = [
 733                 f for f in available_formats
 734                 if f.get('vcodec') == 'none']
 735             if audio_formats:
 736                 return audio_formats[-1]
 737         elif format_spec == 'worstaudio':
 738             audio_formats = [
 739                 f for f in available_formats
 740                 if f.get('vcodec') == 'none']
 741             if audio_formats:
 742                 return audio_formats[0]
 743         elif format_spec == 'bestvideo':
 744             video_formats = [
 745                 f for f in available_formats
 746                 if f.get('acodec') == 'none']
 747             if video_formats:
 748                 return video_formats[-1]
 749         elif format_spec == 'worstvideo':
 750             video_formats = [
 751                 f for f in available_formats
 752                 if f.get('acodec') == 'none']
 753             if video_formats:
 754                 return video_formats[0]
 755         else:
 756             extensions = ['mp4', 'flv', 'webm', '3gp', 'm4a']
 757             if format_spec in extensions:
 758                 filter_f = lambda f: f['ext'] == format_spec
 759             else:
 760                 filter_f = lambda f: f['format_id'] == format_spec
 761             matches = list(filter(filter_f, available_formats))
 762             if matches:
 763                 return matches[-1]
 764         return None
 765
 766     def process_video_result(self, info_dict, download=True):
 767         assert info_dict.get('_type', 'video') == 'video'
 768
 769         if 'id' not in info_dict:
 770             raise ExtractorError('Missing "id" field in extractor result')
 771         if 'title' not in info_dict:
 772             raise ExtractorError('Missing "title" field in extractor result')
 773
 774         if 'playlist' not in info_dict:
 775             # It isn't part of a playlist
 776             info_dict['playlist'] = None
 777             info_dict['playlist_index'] = None
 778
 779         thumbnails = info_dict.get('thumbnails')
 780         if thumbnails:
 781             thumbnails.sort(key=lambda t: (
 782                 t.get('width'), t.get('height'), t.get('url')))
 783             for t in thumbnails:
 784                 if 'width' in t and 'height' in t:
 785                     t['resolution'] = '%dx%d' % (t['width'], t['height'])
 786
 787         if thumbnails and 'thumbnail' not in info_dict:
 788             info_dict['thumbnail'] = thumbnails[-1]['url']
 789
 790         if 'display_id' not in info_dict and 'id' in info_dict:
 791             info_dict['display_id'] = info_dict['id']
 792
 793         if info_dict.get('upload_date') is None and info_dict.get('timestamp') is not None:
 794             # Working around negative timestamps in Windows
 795             # (see http://bugs.python.org/issue1646728)
 796             if info_dict['timestamp'] < 0 and os.name == 'nt':
 797                 info_dict['timestamp'] = 0
 798             upload_date = datetime.datetime.utcfromtimestamp(
 799                 info_dict['timestamp'])
 800             info_dict['upload_date'] = upload_date.strftime('%Y%m%d')
 801
 802         # This extractors handle format selection themselves
 803         if info_dict['extractor'] in ['Youku']:
 804             if download:
 805                 self.process_info(info_dict)
 806             return info_dict
 807
 808         # We now pick which formats have to be downloaded
 809         if info_dict.get('formats') is None:
 810             # There's only one format available
 811             formats = [info_dict]
 812         else:
 813             formats = info_dict['formats']
 814
 815         if not formats:
 816             raise ExtractorError('No video formats found!')
 817
 818         # We check that all the formats have the format and format_id fields
 819         for i, format in enumerate(formats):
 820             if 'url' not in format:
 821                 raise ExtractorError('Missing "url" key in result (index %d)' % i)
 822
 823             if format.get('format_id') is None:
 824                 format['format_id'] = compat_str(i)
 825             if format.get('format') is None:
 826                 format['format'] = '{id} - {res}{note}'.format(
 827                     id=format['format_id'],
 828                     res=self.format_resolution(format),
 829                     note=' ({0})'.format(format['format_note']) if format.get('format_note') is not None else '',
 830                 )
 831             # Automatically determine file extension if missing
 832             if 'ext' not in format:
 833                 format['ext'] = determine_ext(format['url']).lower()
 834
 835         format_limit = self.params.get('format_limit', None)
 836         if format_limit:
 837             formats = list(takewhile_inclusive(
 838                 lambda f: f['format_id'] != format_limit, formats
 839             ))
 840
 841         # TODO Central sorting goes here
 842
 843         if formats[0] is not info_dict:
 844             # only set the 'formats' fields if the original info_dict list them
 845             # otherwise we end up with a circular reference, the first (and unique)
 846             # element in the 'formats' field in info_dict is info_dict itself,
 847             # wich can't be exported to json
 848             info_dict['formats'] = formats
 849         if self.params.get('listformats', None):
 850             self.list_formats(info_dict)
 851             return
 852
 853         req_format = self.params.get('format')
 854         if req_format is None:
 855             req_format = 'best'
 856         formats_to_download = []
 857         # The -1 is for supporting YoutubeIE
 858         if req_format in ('-1', 'all'):
 859             formats_to_download = formats
 860         else:
 861             for rfstr in req_format.split(','):
 862                 # We can accept formats requested in the format: 34/5/best, we pick
 863                 # the first that is available, starting from left
 864                 req_formats = rfstr.split('/')
 865                 for rf in req_formats:
 866                     if re.match(r'.+?\+.+?', rf) is not None:
 867                         # Two formats have been requested like '137+139'
 868                         format_1, format_2 = rf.split('+')
 869                         formats_info = (self.select_format(format_1, formats),
 870                                         self.select_format(format_2, formats))
 871                         if all(formats_info):
 872                             # The first format must contain the video and the
 873                             # second the audio
 874                             if formats_info[0].get('vcodec') == 'none':
 875                                 self.report_error('The first format must '
 876                                                   'contain the video, try using '
 877                                                   '"-f %s+%s"' % (format_2, format_1))
 878                                 return
 879                             selected_format = {
 880                                 'requested_formats': formats_info,
 881                                 'format': rf,
 882                                 'ext': formats_info[0]['ext'],
 883                             }
 884                         else:
 885                             selected_format = None
 886                     else:
 887                         selected_format = self.select_format(rf, formats)
 888                     if selected_format is not None:
 889                         formats_to_download.append(selected_format)
 890                         break
 891         if not formats_to_download:
 892             raise ExtractorError('requested format not available',
 893                                  expected=True)
 894
 895         if download:
 896             if len(formats_to_download) > 1:
 897                 self.to_screen('[info] %s: downloading video in %s formats' % (info_dict['id'], len(formats_to_download)))
 898             for format in formats_to_download:
 899                 new_info = dict(info_dict)
 900                 new_info.update(format)
 901                 self.process_info(new_info)
 902         # We update the info dict with the best quality format (backwards compatibility)
 903         info_dict.update(formats_to_download[-1])
 904         return info_dict
 905
 906     def process_info(self, info_dict):
 907         """Process a single resolved IE result."""
 908
 909         assert info_dict.get('_type', 'video') == 'video'
 910
 911         max_downloads = self.params.get('max_downloads')
 912         if max_downloads is not None:
 913             if self._num_downloads >= int(max_downloads):
 914                 raise MaxDownloadsReached()
 915
 916         info_dict['fulltitle'] = info_dict['title']
 917         if len(info_dict['title']) > 200:
 918             info_dict['title'] = info_dict['title'][:197] + '...'
 919
 920         # Keep for backwards compatibility
 921         info_dict['stitle'] = info_dict['title']
 922
 923         if 'format' not in info_dict:
 924             info_dict['format'] = info_dict['ext']
 925
 926         reason = self._match_entry(info_dict)
 927         if reason is not None:
 928             self.to_screen('[download] ' + reason)
 929             return
 930
 931         self._num_downloads += 1
 932
 933         filename = self.prepare_filename(info_dict)
 934
 935         # Forced printings
 936         if self.params.get('forcetitle', False):
 937             self.to_stdout(info_dict['fulltitle'])
 938         if self.params.get('forceid', False):
 939             self.to_stdout(info_dict['id'])
 940         if self.params.get('forceurl', False):
 941             if info_dict.get('requested_formats') is not None:
 942                 for f in info_dict['requested_formats']:
 943                     self.to_stdout(f['url'] + f.get('play_path', ''))
 944             else:
 945                 # For RTMP URLs, also include the playpath
 946                 self.to_stdout(info_dict['url'] + info_dict.get('play_path', ''))
 947         if self.params.get('forcethumbnail', False) and info_dict.get('thumbnail') is not None:
 948             self.to_stdout(info_dict['thumbnail'])
 949         if self.params.get('forcedescription', False) and info_dict.get('description') is not None:
 950             self.to_stdout(info_dict['description'])
 951         if self.params.get('forcefilename', False) and filename is not None:
 952             self.to_stdout(filename)
 953         if self.params.get('forceduration', False) and info_dict.get('duration') is not None:
 954             self.to_stdout(formatSeconds(info_dict['duration']))
 955         if self.params.get('forceformat', False):
 956             self.to_stdout(info_dict['format'])
 957         if self.params.get('forcejson', False):
 958             info_dict['_filename'] = filename
 959             self.to_stdout(json.dumps(info_dict))
 960         if self.params.get('dump_single_json', False):
 961             info_dict['_filename'] = filename
 962
 963         # Do nothing else if in simulate mode
 964         if self.params.get('simulate', False):
 965             return
 966
 967         if filename is None:
 968             return
 969
 970         try:
 971             dn = os.path.dirname(encodeFilename(filename))
 972             if dn and not os.path.exists(dn):
 973                 os.makedirs(dn)
 974         except (OSError, IOError) as err:
 975             self.report_error('unable to create directory ' + compat_str(err))
 976             return
 977
 978         if self.params.get('writedescription', False):
 979             descfn = filename + '.description'
 980             if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(descfn)):
 981                 self.to_screen('[info] Video description is already present')
 982             else:
 983                 try:
 984                     self.to_screen('[info] Writing video description to: ' + descfn)
 985                     with io.open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile:
 986                         descfile.write(info_dict['description'])
 987                 except (KeyError, TypeError):
 988                     self.report_warning('There\'s no description to write.')
 989                 except (OSError, IOError):
 990                     self.report_error('Cannot write description file ' + descfn)
 991                     return
 992
 993         if self.params.get('writeannotations', False):
 994             annofn = filename + '.annotations.xml'
 995             if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(annofn)):
 996                 self.to_screen('[info] Video annotations are already present')
 997             else:
 998                 try:
 999                     self.to_screen('[info] Writing video annotations to: ' + annofn)
1000                     with io.open(encodeFilename(annofn), 'w', encoding='utf-8') as annofile:
1001                         annofile.write(info_dict['annotations'])
1002                 except (KeyError, TypeError):
1003                     self.report_warning('There are no annotations to write.')
1004                 except (OSError, IOError):
1005                     self.report_error('Cannot write annotations file: ' + annofn)
1006                     return
1007
1008         subtitles_are_requested = any([self.params.get('writesubtitles', False),
1009                                        self.params.get('writeautomaticsub')])
1010
1011         if subtitles_are_requested and 'subtitles' in info_dict and info_dict['subtitles']:
1012             # subtitles download errors are already managed as troubles in relevant IE
1013             # that way it will silently go on when used with unsupporting IE
1014             subtitles = info_dict['subtitles']
1015             sub_format = self.params.get('subtitlesformat', 'srt')
1016             for sub_lang in subtitles.keys():
1017                 sub = subtitles[sub_lang]
1018                 if sub is None:
1019                     continue
1020                 try:
1021                     sub_filename = subtitles_filename(filename, sub_lang, sub_format)
1022                     if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(sub_filename)):
1023                         self.to_screen('[info] Video subtitle %s.%s is already_present' % (sub_lang, sub_format))
1024                     else:
1025                         self.to_screen('[info] Writing video subtitles to: ' + sub_filename)
1026                         with io.open(encodeFilename(sub_filename), 'w', encoding='utf-8') as subfile:
1027                             subfile.write(sub)
1028                 except (OSError, IOError):
1029                     self.report_error('Cannot write subtitles file ' + sub_filename)
1030                     return
1031
1032         if self.params.get('writeinfojson', False):
1033             infofn = os.path.splitext(filename)[0] + '.info.json'
1034             if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(infofn)):
1035                 self.to_screen('[info] Video description metadata is already present')
1036             else:
1037                 self.to_screen('[info] Writing video description metadata as JSON to: ' + infofn)
1038                 try:
1039                     write_json_file(info_dict, infofn)
1040                 except (OSError, IOError):
1041                     self.report_error('Cannot write metadata to JSON file ' + infofn)
1042                     return
1043
1044         if self.params.get('writethumbnail', False):
1045             if info_dict.get('thumbnail') is not None:
1046                 thumb_format = determine_ext(info_dict['thumbnail'], 'jpg')
1047                 thumb_filename = os.path.splitext(filename)[0] + '.' + thumb_format
1048                 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(thumb_filename)):
1049                     self.to_screen('[%s] %s: Thumbnail is already present' %
1050                                    (info_dict['extractor'], info_dict['id']))
1051                 else:
1052                     self.to_screen('[%s] %s: Downloading thumbnail ...' %
1053                                    (info_dict['extractor'], info_dict['id']))
1054                     try:
1055                         uf = self.urlopen(info_dict['thumbnail'])
1056                         with open(thumb_filename, 'wb') as thumbf:
1057                             shutil.copyfileobj(uf, thumbf)
1058                         self.to_screen('[%s] %s: Writing thumbnail to: %s' %
1059                                        (info_dict['extractor'], info_dict['id'], thumb_filename))
1060                     except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
1061                         self.report_warning('Unable to download thumbnail "%s": %s' %
1062                                             (info_dict['thumbnail'], compat_str(err)))
1063
1064         if not self.params.get('skip_download', False):
1065             if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(filename)):
1066                 success = True
1067             else:
1068                 try:
1069                     def dl(name, info):
1070                         fd = get_suitable_downloader(info)(self, self.params)
1071                         for ph in self._progress_hooks:
1072                             fd.add_progress_hook(ph)
1073                         if self.params.get('verbose'):
1074                             self.to_stdout('[debug] Invoking downloader on %r' % info.get('url'))
1075                         return fd.download(name, info)
1076                     if info_dict.get('requested_formats') is not None:
1077                         downloaded = []
1078                         success = True
1079                         merger = FFmpegMergerPP(self, not self.params.get('keepvideo'))
1080                         if not merger._executable:
1081                             postprocessors = []
1082                             self.report_warning('You have requested multiple '
1083                                                 'formats but ffmpeg or avconv are not installed.'
1084                                                 ' The formats won\'t be merged')
1085                         else:
1086                             postprocessors = [merger]
1087                         for f in info_dict['requested_formats']:
1088                             new_info = dict(info_dict)
1089                             new_info.update(f)
1090                             fname = self.prepare_filename(new_info)
1091                             fname = prepend_extension(fname, 'f%s' % f['format_id'])
1092                             downloaded.append(fname)
1093                             partial_success = dl(fname, new_info)
1094                             success = success and partial_success
1095                         info_dict['__postprocessors'] = postprocessors
1096                         info_dict['__files_to_merge'] = downloaded
1097                     else:
1098                         # Just a single file
1099                         success = dl(filename, info_dict)
1100                 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
1101                     self.report_error('unable to download video data: %s' % str(err))
1102                     return
1103                 except (OSError, IOError) as err:
1104                     raise UnavailableVideoError(err)
1105                 except (ContentTooShortError, ) as err:
1106                     self.report_error('content too short (expected %s bytes and served %s)' % (err.expected, err.downloaded))
1107                     return
1108
1109             if success:
1110                 try:
1111                     self.post_process(filename, info_dict)
1112                 except (PostProcessingError) as err:
1113                     self.report_error('postprocessing: %s' % str(err))
1114                     return
1115
1116         self.record_download_archive(info_dict)
1117
1118     def download(self, url_list):
1119         """Download a given list of URLs."""
1120         outtmpl = self.params.get('outtmpl', DEFAULT_OUTTMPL)
1121         if (len(url_list) > 1 and
1122                 '%' not in outtmpl
1123                 and self.params.get('max_downloads') != 1):
1124             raise SameFileError(outtmpl)
1125
1126         for url in url_list:
1127             try:
1128                 # It also downloads the videos
1129                 res = self.extract_info(url)
1130             except UnavailableVideoError:
1131                 self.report_error('unable to download video')
1132             except MaxDownloadsReached:
1133                 self.to_screen('[info] Maximum number of downloaded files reached.')
1134                 raise
1135             else:
1136                 if self.params.get('dump_single_json', False):
1137                     self.to_stdout(json.dumps(res))
1138
1139         return self._download_retcode
1140
1141     def download_with_info_file(self, info_filename):
1142         with io.open(info_filename, 'r', encoding='utf-8') as f:
1143             info = json.load(f)
1144         try:
1145             self.process_ie_result(info, download=True)
1146         except DownloadError:
1147             webpage_url = info.get('webpage_url')
1148             if webpage_url is not None:
1149                 self.report_warning('The info failed to download, trying with "%s"' % webpage_url)
1150                 return self.download([webpage_url])
1151             else:
1152                 raise
1153         return self._download_retcode
1154
1155     def post_process(self, filename, ie_info):
1156         """Run all the postprocessors on the given file."""
1157         info = dict(ie_info)
1158         info['filepath'] = filename
1159         keep_video = None
1160         pps_chain = []
1161         if ie_info.get('__postprocessors') is not None:
1162             pps_chain.extend(ie_info['__postprocessors'])
1163         pps_chain.extend(self._pps)
1164         for pp in pps_chain:
1165             try:
1166                 keep_video_wish, new_info = pp.run(info)
1167                 if keep_video_wish is not None:
1168                     if keep_video_wish:
1169                         keep_video = keep_video_wish
1170                     elif keep_video is None:
1171                         # No clear decision yet, let IE decide
1172                         keep_video = keep_video_wish
1173             except PostProcessingError as e:
1174                 self.report_error(e.msg)
1175         if keep_video is False and not self.params.get('keepvideo', False):
1176             try:
1177                 self.to_screen('Deleting original file %s (pass -k to keep)' % filename)
1178                 os.remove(encodeFilename(filename))
1179             except (IOError, OSError):
1180                 self.report_warning('Unable to remove downloaded video file')
1181
1182     def _make_archive_id(self, info_dict):
1183         # Future-proof against any change in case
1184         # and backwards compatibility with prior versions
1185         extractor = info_dict.get('extractor_key')
1186         if extractor is None:
1187             if 'id' in info_dict:
1188                 extractor = info_dict.get('ie_key')  # key in a playlist
1189         if extractor is None:
1190             return None  # Incomplete video information
1191         return extractor.lower() + ' ' + info_dict['id']
1192
1193     def in_download_archive(self, info_dict):
1194         fn = self.params.get('download_archive')
1195         if fn is None:
1196             return False
1197
1198         vid_id = self._make_archive_id(info_dict)
1199         if vid_id is None:
1200             return False  # Incomplete video information
1201
1202         try:
1203             with locked_file(fn, 'r', encoding='utf-8') as archive_file:
1204                 for line in archive_file:
1205                     if line.strip() == vid_id:
1206                         return True
1207         except IOError as ioe:
1208             if ioe.errno != errno.ENOENT:
1209                 raise
1210         return False
1211
1212     def record_download_archive(self, info_dict):
1213         fn = self.params.get('download_archive')
1214         if fn is None:
1215             return
1216         vid_id = self._make_archive_id(info_dict)
1217         assert vid_id
1218         with locked_file(fn, 'a', encoding='utf-8') as archive_file:
1219             archive_file.write(vid_id + '\n')
1220
1221     @staticmethod
1222     def format_resolution(format, default='unknown'):
1223         if format.get('vcodec') == 'none':
1224             return 'audio only'
1225         if format.get('resolution') is not None:
1226             return format['resolution']
1227         if format.get('height') is not None:
1228             if format.get('width') is not None:
1229                 res = '%sx%s' % (format['width'], format['height'])
1230             else:
1231                 res = '%sp' % format['height']
1232         elif format.get('width') is not None:
1233             res = '?x%d' % format['width']
1234         else:
1235             res = default
1236         return res
1237
1238     def _format_note(self, fdict):
1239         res = ''
1240         if fdict.get('ext') in ['f4f', 'f4m']:
1241             res += '(unsupported) '
1242         if fdict.get('format_note') is not None:
1243             res += fdict['format_note'] + ' '
1244         if fdict.get('tbr') is not None:
1245             res += '%4dk ' % fdict['tbr']
1246         if fdict.get('container') is not None:
1247             if res:
1248                 res += ', '
1249             res += '%s container' % fdict['container']
1250         if (fdict.get('vcodec') is not None and
1251                 fdict.get('vcodec') != 'none'):
1252             if res:
1253                 res += ', '
1254             res += fdict['vcodec']
1255             if fdict.get('vbr') is not None:
1256                 res += '@'
1257         elif fdict.get('vbr') is not None and fdict.get('abr') is not None:
1258             res += 'video@'
1259         if fdict.get('vbr') is not None:
1260             res += '%4dk' % fdict['vbr']
1261         if fdict.get('fps') is not None:
1262             res += ', %sfps' % fdict['fps']
1263         if fdict.get('acodec') is not None:
1264             if res:
1265                 res += ', '
1266             if fdict['acodec'] == 'none':
1267                 res += 'video only'
1268             else:
1269                 res += '%-5s' % fdict['acodec']
1270         elif fdict.get('abr') is not None:
1271             if res:
1272                 res += ', '
1273             res += 'audio'
1274         if fdict.get('abr') is not None:
1275             res += '@%3dk' % fdict['abr']
1276         if fdict.get('asr') is not None:
1277             res += ' (%5dHz)' % fdict['asr']
1278         if fdict.get('filesize') is not None:
1279             if res:
1280                 res += ', '
1281             res += format_bytes(fdict['filesize'])
1282         elif fdict.get('filesize_approx') is not None:
1283             if res:
1284                 res += ', '
1285             res += '~' + format_bytes(fdict['filesize_approx'])
1286         return res
1287
1288     def list_formats(self, info_dict):
1289         def line(format, idlen=20):
1290             return (('%-' + compat_str(idlen + 1) + 's%-10s%-12s%s') % (
1291                 format['format_id'],
1292                 format['ext'],
1293                 self.format_resolution(format),
1294                 self._format_note(format),
1295             ))
1296
1297         formats = info_dict.get('formats', [info_dict])
1298         idlen = max(len('format code'),
1299                     max(len(f['format_id']) for f in formats))
1300         formats_s = [line(f, idlen) for f in formats]
1301         if len(formats) > 1:
1302             formats_s[0] += (' ' if self._format_note(formats[0]) else '') + '(worst)'
1303             formats_s[-1] += (' ' if self._format_note(formats[-1]) else '') + '(best)'
1304
1305         header_line = line({
1306             'format_id': 'format code', 'ext': 'extension',
1307             'resolution': 'resolution', 'format_note': 'note'}, idlen=idlen)
1308         self.to_screen('[info] Available formats for %s:\n%s\n%s' %
1309                        (info_dict['id'], header_line, '\n'.join(formats_s)))
1310
1311     def urlopen(self, req):
1312         """ Start an HTTP download """
1313
1314         # According to RFC 3986, URLs can not contain non-ASCII characters, however this is not
1315         # always respected by websites, some tend to give out URLs with non percent-encoded
1316         # non-ASCII characters (see telemb.py, ard.py [#3412])
1317         # urllib chokes on URLs with non-ASCII characters (see http://bugs.python.org/issue3991)
1318         # To work around aforementioned issue we will replace request's original URL with
1319         # percent-encoded one
1320         req_is_string = isinstance(req, basestring if sys.version_info < (3, 0) else compat_str)
1321         url = req if req_is_string else req.get_full_url()
1322         url_escaped = escape_url(url)
1323
1324         # Substitute URL if any change after escaping
1325         if url != url_escaped:
1326             if req_is_string:
1327                 req = url_escaped
1328             else:
1329                 req = compat_urllib_request.Request(
1330                     url_escaped, data=req.data, headers=req.headers,
1331                     origin_req_host=req.origin_req_host, unverifiable=req.unverifiable)
1332
1333         return self._opener.open(req, timeout=self._socket_timeout)
1334
1335     def print_debug_header(self):
1336         if not self.params.get('verbose'):
1337             return
1338
1339         if type('') is not compat_str:
1340             # Python 2.6 on SLES11 SP1 (https://github.com/rg3/youtube-dl/issues/3326)
1341             self.report_warning(
1342                 'Your Python is broken! Update to a newer and supported version')
1343
1344         stdout_encoding = getattr(
1345             sys.stdout, 'encoding', 'missing (%s)' % type(sys.stdout).__name__)
1346         encoding_str = (
1347             '[debug] Encodings: locale %s, fs %s, out %s, pref %s\n' % (
1348                 locale.getpreferredencoding(),
1349                 sys.getfilesystemencoding(),
1350                 stdout_encoding,
1351                 self.get_encoding()))
1352         write_string(encoding_str, encoding=None)
1353
1354         self._write_string('[debug] youtube-dl version ' + __version__ + '\n')
1355         try:
1356             sp = subprocess.Popen(
1357                 ['git', 'rev-parse', '--short', 'HEAD'],
1358                 stdout=subprocess.PIPE, stderr=subprocess.PIPE,
1359                 cwd=os.path.dirname(os.path.abspath(__file__)))
1360             out, err = sp.communicate()
1361             out = out.decode().strip()
1362             if re.match('[0-9a-f]+', out):
1363                 self._write_string('[debug] Git HEAD: ' + out + '\n')
1364         except:
1365             try:
1366                 sys.exc_clear()
1367             except:
1368                 pass
1369         self._write_string('[debug] Python version %s - %s\n' % (
1370             platform.python_version(), platform_name()))
1371
1372         exe_versions = FFmpegPostProcessor.get_versions()
1373         exe_versions['rtmpdump'] = rtmpdump_version()
1374         exe_str = ', '.join(
1375             '%s %s' % (exe, v)
1376             for exe, v in sorted(exe_versions.items())
1377             if v
1378         )
1379         if not exe_str:
1380             exe_str = 'none'
1381         self._write_string('[debug] exe versions: %s\n' % exe_str)
1382
1383         proxy_map = {}
1384         for handler in self._opener.handlers:
1385             if hasattr(handler, 'proxies'):
1386                 proxy_map.update(handler.proxies)
1387         self._write_string('[debug] Proxy map: ' + compat_str(proxy_map) + '\n')
1388
1389     def _setup_opener(self):
1390         timeout_val = self.params.get('socket_timeout')
1391         self._socket_timeout = 600 if timeout_val is None else float(timeout_val)
1392
1393         opts_cookiefile = self.params.get('cookiefile')
1394         opts_proxy = self.params.get('proxy')
1395
1396         if opts_cookiefile is None:
1397             self.cookiejar = compat_cookiejar.CookieJar()
1398         else:
1399             self.cookiejar = compat_cookiejar.MozillaCookieJar(
1400                 opts_cookiefile)
1401             if os.access(opts_cookiefile, os.R_OK):
1402                 self.cookiejar.load()
1403
1404         cookie_processor = compat_urllib_request.HTTPCookieProcessor(
1405             self.cookiejar)
1406         if opts_proxy is not None:
1407             if opts_proxy == '':
1408                 proxies = {}
1409             else:
1410                 proxies = {'http': opts_proxy, 'https': opts_proxy}
1411         else:
1412             proxies = compat_urllib_request.getproxies()
1413             # Set HTTPS proxy to HTTP one if given (https://github.com/rg3/youtube-dl/issues/805)
1414             if 'http' in proxies and 'https' not in proxies:
1415                 proxies['https'] = proxies['http']
1416         proxy_handler = compat_urllib_request.ProxyHandler(proxies)
1417
1418         debuglevel = 1 if self.params.get('debug_printtraffic') else 0
1419         https_handler = make_HTTPS_handler(
1420             self.params.get('nocheckcertificate', False), debuglevel=debuglevel)
1421         ydlh = YoutubeDLHandler(debuglevel=debuglevel)
1422         opener = compat_urllib_request.build_opener(
1423             https_handler, proxy_handler, cookie_processor, ydlh)
1424         # Delete the default user-agent header, which would otherwise apply in
1425         # cases where our custom HTTP handler doesn't come into play
1426         # (See https://github.com/rg3/youtube-dl/issues/1309 for details)
1427         opener.addheaders = []
1428         self._opener = opener
1429
1430     def encode(self, s):
1431         if isinstance(s, bytes):
1432             return s  # Already encoded
1433
1434         try:
1435             return s.encode(self.get_encoding())
1436         except UnicodeEncodeError as err:
1437             err.reason = err.reason + '. Check your system encoding configuration or use the --encoding option.'
1438             raise
1439
1440     def get_encoding(self):
1441         encoding = self.params.get('encoding')
1442         if encoding is None:
1443             encoding = preferredencoding()
1444         return encoding