git.bitcoin.ninja Git - youtube-dl/blob - youtube_dl/YoutubeDL.py

   1 #!/usr/bin/env python
   2 # -*- coding: utf-8 -*-
   3
   4 from __future__ import absolute_import, unicode_literals
   5
   6 import collections
   7 import datetime
   8 import errno
   9 import io
  10 import json
  11 import locale
  12 import os
  13 import platform
  14 import re
  15 import shutil
  16 import subprocess
  17 import socket
  18 import sys
  19 import time
  20 import traceback
  21
  22 if os.name == 'nt':
  23     import ctypes
  24
  25 from .utils import (
  26     compat_cookiejar,
  27     compat_http_client,
  28     compat_str,
  29     compat_urllib_error,
  30     compat_urllib_request,
  31     ContentTooShortError,
  32     date_from_str,
  33     DateRange,
  34     DEFAULT_OUTTMPL,
  35     determine_ext,
  36     DownloadError,
  37     encodeFilename,
  38     ExtractorError,
  39     format_bytes,
  40     formatSeconds,
  41     get_term_width,
  42     locked_file,
  43     make_HTTPS_handler,
  44     MaxDownloadsReached,
  45     PagedList,
  46     PostProcessingError,
  47     platform_name,
  48     preferredencoding,
  49     SameFileError,
  50     sanitize_filename,
  51     subtitles_filename,
  52     takewhile_inclusive,
  53     UnavailableVideoError,
  54     url_basename,
  55     write_json_file,
  56     write_string,
  57     YoutubeDLHandler,
  58     prepend_extension,
  59 )
  60 from .extractor import get_info_extractor, gen_extractors
  61 from .downloader import get_suitable_downloader
  62 from .postprocessor import FFmpegMergerPP
  63 from .version import __version__
  64
  65
  66 class YoutubeDL(object):
  67     """YoutubeDL class.
  68
  69     YoutubeDL objects are the ones responsible of downloading the
  70     actual video file and writing it to disk if the user has requested
  71     it, among some other tasks. In most cases there should be one per
  72     program. As, given a video URL, the downloader doesn't know how to
  73     extract all the needed information, task that InfoExtractors do, it
  74     has to pass the URL to one of them.
  75
  76     For this, YoutubeDL objects have a method that allows
  77     InfoExtractors to be registered in a given order. When it is passed
  78     a URL, the YoutubeDL object handles it to the first InfoExtractor it
  79     finds that reports being able to handle it. The InfoExtractor extracts
  80     all the information about the video or videos the URL refers to, and
  81     YoutubeDL process the extracted information, possibly using a File
  82     Downloader to download the video.
  83
  84     YoutubeDL objects accept a lot of parameters. In order not to saturate
  85     the object constructor with arguments, it receives a dictionary of
  86     options instead. These options are available through the params
  87     attribute for the InfoExtractors to use. The YoutubeDL also
  88     registers itself as the downloader in charge for the InfoExtractors
  89     that are added to it, so this is a "mutual registration".
  90
  91     Available options:
  92
  93     username:          Username for authentication purposes.
  94     password:          Password for authentication purposes.
  95     videopassword:     Password for acces a video.
  96     usenetrc:          Use netrc for authentication instead.
  97     verbose:           Print additional info to stdout.
  98     quiet:             Do not print messages to stdout.
  99     no_warnings:       Do not print out anything for warnings.
 100     forceurl:          Force printing final URL.
 101     forcetitle:        Force printing title.
 102     forceid:           Force printing ID.
 103     forcethumbnail:    Force printing thumbnail URL.
 104     forcedescription:  Force printing description.
 105     forcefilename:     Force printing final filename.
 106     forceduration:     Force printing duration.
 107     forcejson:         Force printing info_dict as JSON.
 108     simulate:          Do not download the video files.
 109     format:            Video format code.
 110     format_limit:      Highest quality format to try.
 111     outtmpl:           Template for output names.
 112     restrictfilenames: Do not allow "&" and spaces in file names
 113     ignoreerrors:      Do not stop on download errors.
 114     nooverwrites:      Prevent overwriting files.
 115     playliststart:     Playlist item to start at.
 116     playlistend:       Playlist item to end at.
 117     matchtitle:        Download only matching titles.
 118     rejecttitle:       Reject downloads for matching titles.
 119     logger:            Log messages to a logging.Logger instance.
 120     logtostderr:       Log messages to stderr instead of stdout.
 121     writedescription:  Write the video description to a .description file
 122     writeinfojson:     Write the video description to a .info.json file
 123     writeannotations:  Write the video annotations to a .annotations.xml file
 124     writethumbnail:    Write the thumbnail image to a file
 125     writesubtitles:    Write the video subtitles to a file
 126     writeautomaticsub: Write the automatic subtitles to a file
 127     allsubtitles:      Downloads all the subtitles of the video
 128                        (requires writesubtitles or writeautomaticsub)
 129     listsubtitles:     Lists all available subtitles for the video
 130     subtitlesformat:   Subtitle format [srt/sbv/vtt] (default=srt)
 131     subtitleslangs:    List of languages of the subtitles to download
 132     keepvideo:         Keep the video file after post-processing
 133     daterange:         A DateRange object, download only if the upload_date is in the range.
 134     skip_download:     Skip the actual download of the video file
 135     cachedir:          Location of the cache files in the filesystem.
 136                        None to disable filesystem cache.
 137     noplaylist:        Download single video instead of a playlist if in doubt.
 138     age_limit:         An integer representing the user's age in years.
 139                        Unsuitable videos for the given age are skipped.
 140     min_views:         An integer representing the minimum view count the video
 141                        must have in order to not be skipped.
 142                        Videos without view count information are always
 143                        downloaded. None for no limit.
 144     max_views:         An integer representing the maximum view count.
 145                        Videos that are more popular than that are not
 146                        downloaded.
 147                        Videos without view count information are always
 148                        downloaded. None for no limit.
 149     download_archive:  File name of a file where all downloads are recorded.
 150                        Videos already present in the file are not downloaded
 151                        again.
 152     cookiefile:        File name where cookies should be read from and dumped to.
 153     nocheckcertificate:Do not verify SSL certificates
 154     prefer_insecure:   Use HTTP instead of HTTPS to retrieve information.
 155                        At the moment, this is only supported by YouTube.
 156     proxy:             URL of the proxy server to use
 157     socket_timeout:    Time to wait for unresponsive hosts, in seconds
 158     bidi_workaround:   Work around buggy terminals without bidirectional text
 159                        support, using fridibi
 160     debug_printtraffic:Print out sent and received HTTP traffic
 161     include_ads:       Download ads as well
 162     default_search:    Prepend this string if an input url is not valid.
 163                        'auto' for elaborate guessing
 164     encoding:          Use this encoding instead of the system-specified.
 165
 166     The following parameters are not used by YoutubeDL itself, they are used by
 167     the FileDownloader:
 168     nopart, updatetime, buffersize, ratelimit, min_filesize, max_filesize, test,
 169     noresizebuffer, retries, continuedl, noprogress, consoletitle
 170
 171     The following options are used by the post processors:
 172     prefer_ffmpeg:     If True, use ffmpeg instead of avconv if both are available,
 173                        otherwise prefer avconv.
 174     """
 175
 176     params = None
 177     _ies = []
 178     _pps = []
 179     _download_retcode = None
 180     _num_downloads = None
 181     _screen_file = None
 182
 183     def __init__(self, params=None):
 184         """Create a FileDownloader object with the given options."""
 185         if params is None:
 186             params = {}
 187         self._ies = []
 188         self._ies_instances = {}
 189         self._pps = []
 190         self._progress_hooks = []
 191         self._download_retcode = 0
 192         self._num_downloads = 0
 193         self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)]
 194         self._err_file = sys.stderr
 195         self.params = params
 196
 197         if params.get('bidi_workaround', False):
 198             try:
 199                 import pty
 200                 master, slave = pty.openpty()
 201                 width = get_term_width()
 202                 if width is None:
 203                     width_args = []
 204                 else:
 205                     width_args = ['-w', str(width)]
 206                 sp_kwargs = dict(
 207                     stdin=subprocess.PIPE,
 208                     stdout=slave,
 209                     stderr=self._err_file)
 210                 try:
 211                     self._output_process = subprocess.Popen(
 212                         ['bidiv'] + width_args, **sp_kwargs
 213                     )
 214                 except OSError:
 215                     self._output_process = subprocess.Popen(
 216                         ['fribidi', '-c', 'UTF-8'] + width_args, **sp_kwargs)
 217                 self._output_channel = os.fdopen(master, 'rb')
 218             except OSError as ose:
 219                 if ose.errno == 2:
 220                     self.report_warning('Could not find fribidi executable, ignoring --bidi-workaround . Make sure that  fribidi  is an executable file in one of the directories in your $PATH.')
 221                 else:
 222                     raise
 223
 224         if (sys.version_info >= (3,) and sys.platform != 'win32' and
 225                 sys.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968']
 226                 and not params['restrictfilenames']):
 227             # On Python 3, the Unicode filesystem API will throw errors (#1474)
 228             self.report_warning(
 229                 'Assuming --restrict-filenames since file system encoding '
 230                 'cannot encode all charactes. '
 231                 'Set the LC_ALL environment variable to fix this.')
 232             self.params['restrictfilenames'] = True
 233
 234         if '%(stitle)s' in self.params.get('outtmpl', ''):
 235             self.report_warning('%(stitle)s is deprecated. Use the %(title)s and the --restrict-filenames flag(which also secures %(uploader)s et al) instead.')
 236
 237         self._setup_opener()
 238
 239     def add_info_extractor(self, ie):
 240         """Add an InfoExtractor object to the end of the list."""
 241         self._ies.append(ie)
 242         self._ies_instances[ie.ie_key()] = ie
 243         ie.set_downloader(self)
 244
 245     def get_info_extractor(self, ie_key):
 246         """
 247         Get an instance of an IE with name ie_key, it will try to get one from
 248         the _ies list, if there's no instance it will create a new one and add
 249         it to the extractor list.
 250         """
 251         ie = self._ies_instances.get(ie_key)
 252         if ie is None:
 253             ie = get_info_extractor(ie_key)()
 254             self.add_info_extractor(ie)
 255         return ie
 256
 257     def add_default_info_extractors(self):
 258         """
 259         Add the InfoExtractors returned by gen_extractors to the end of the list
 260         """
 261         for ie in gen_extractors():
 262             self.add_info_extractor(ie)
 263
 264     def add_post_processor(self, pp):
 265         """Add a PostProcessor object to the end of the chain."""
 266         self._pps.append(pp)
 267         pp.set_downloader(self)
 268
 269     def add_progress_hook(self, ph):
 270         """Add the progress hook (currently only for the file downloader)"""
 271         self._progress_hooks.append(ph)
 272
 273     def _bidi_workaround(self, message):
 274         if not hasattr(self, '_output_channel'):
 275             return message
 276
 277         assert hasattr(self, '_output_process')
 278         assert type(message) == type('')
 279         line_count = message.count('\n') + 1
 280         self._output_process.stdin.write((message + '\n').encode('utf-8'))
 281         self._output_process.stdin.flush()
 282         res = ''.join(self._output_channel.readline().decode('utf-8')
 283                        for _ in range(line_count))
 284         return res[:-len('\n')]
 285
 286     def to_screen(self, message, skip_eol=False):
 287         """Print message to stdout if not in quiet mode."""
 288         return self.to_stdout(message, skip_eol, check_quiet=True)
 289
 290     def _write_string(self, s, out=None):
 291         write_string(s, out=out, encoding=self.params.get('encoding'))
 292
 293     def to_stdout(self, message, skip_eol=False, check_quiet=False):
 294         """Print message to stdout if not in quiet mode."""
 295         if self.params.get('logger'):
 296             self.params['logger'].debug(message)
 297         elif not check_quiet or not self.params.get('quiet', False):
 298             message = self._bidi_workaround(message)
 299             terminator = ['\n', ''][skip_eol]
 300             output = message + terminator
 301
 302             self._write_string(output, self._screen_file)
 303
 304     def to_stderr(self, message):
 305         """Print message to stderr."""
 306         assert type(message) == type('')
 307         if self.params.get('logger'):
 308             self.params['logger'].error(message)
 309         else:
 310             message = self._bidi_workaround(message)
 311             output = message + '\n'
 312             self._write_string(output, self._err_file)
 313
 314     def to_console_title(self, message):
 315         if not self.params.get('consoletitle', False):
 316             return
 317         if os.name == 'nt' and ctypes.windll.kernel32.GetConsoleWindow():
 318             # c_wchar_p() might not be necessary if `message` is
 319             # already of type unicode()
 320             ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message))
 321         elif 'TERM' in os.environ:
 322             self._write_string('\033]0;%s\007' % message, self._screen_file)
 323
 324     def save_console_title(self):
 325         if not self.params.get('consoletitle', False):
 326             return
 327         if 'TERM' in os.environ:
 328             # Save the title on stack
 329             self._write_string('\033[22;0t', self._screen_file)
 330
 331     def restore_console_title(self):
 332         if not self.params.get('consoletitle', False):
 333             return
 334         if 'TERM' in os.environ:
 335             # Restore the title from stack
 336             self._write_string('\033[23;0t', self._screen_file)
 337
 338     def __enter__(self):
 339         self.save_console_title()
 340         return self
 341
 342     def __exit__(self, *args):
 343         self.restore_console_title()
 344
 345         if self.params.get('cookiefile') is not None:
 346             self.cookiejar.save()
 347
 348     def trouble(self, message=None, tb=None):
 349         """Determine action to take when a download problem appears.
 350
 351         Depending on if the downloader has been configured to ignore
 352         download errors or not, this method may throw an exception or
 353         not when errors are found, after printing the message.
 354
 355         tb, if given, is additional traceback information.
 356         """
 357         if message is not None:
 358             self.to_stderr(message)
 359         if self.params.get('verbose'):
 360             if tb is None:
 361                 if sys.exc_info()[0]:  # if .trouble has been called from an except block
 362                     tb = ''
 363                     if hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
 364                         tb += ''.join(traceback.format_exception(*sys.exc_info()[1].exc_info))
 365                     tb += compat_str(traceback.format_exc())
 366                 else:
 367                     tb_data = traceback.format_list(traceback.extract_stack())
 368                     tb = ''.join(tb_data)
 369             self.to_stderr(tb)
 370         if not self.params.get('ignoreerrors', False):
 371             if sys.exc_info()[0] and hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
 372                 exc_info = sys.exc_info()[1].exc_info
 373             else:
 374                 exc_info = sys.exc_info()
 375             raise DownloadError(message, exc_info)
 376         self._download_retcode = 1
 377
 378     def report_warning(self, message):
 379         '''
 380         Print the message to stderr, it will be prefixed with 'WARNING:'
 381         If stderr is a tty file the 'WARNING:' will be colored
 382         '''
 383         if self.params.get('logger') is not None:
 384             self.params['logger'].warning(message)
 385         else:
 386             if self.params.get('no_warnings'):
 387                 return
 388             if self._err_file.isatty() and os.name != 'nt':
 389                 _msg_header = '\033[0;33mWARNING:\033[0m'
 390             else:
 391                 _msg_header = 'WARNING:'
 392             warning_message = '%s %s' % (_msg_header, message)
 393             self.to_stderr(warning_message)
 394
 395     def report_error(self, message, tb=None):
 396         '''
 397         Do the same as trouble, but prefixes the message with 'ERROR:', colored
 398         in red if stderr is a tty file.
 399         '''
 400         if self._err_file.isatty() and os.name != 'nt':
 401             _msg_header = '\033[0;31mERROR:\033[0m'
 402         else:
 403             _msg_header = 'ERROR:'
 404         error_message = '%s %s' % (_msg_header, message)
 405         self.trouble(error_message, tb)
 406
 407     def report_file_already_downloaded(self, file_name):
 408         """Report file has already been fully downloaded."""
 409         try:
 410             self.to_screen('[download] %s has already been downloaded' % file_name)
 411         except UnicodeEncodeError:
 412             self.to_screen('[download] The file has already been downloaded')
 413
 414     def prepare_filename(self, info_dict):
 415         """Generate the output filename."""
 416         try:
 417             template_dict = dict(info_dict)
 418
 419             template_dict['epoch'] = int(time.time())
 420             autonumber_size = self.params.get('autonumber_size')
 421             if autonumber_size is None:
 422                 autonumber_size = 5
 423             autonumber_templ = '%0' + str(autonumber_size) + 'd'
 424             template_dict['autonumber'] = autonumber_templ % self._num_downloads
 425             if template_dict.get('playlist_index') is not None:
 426                 template_dict['playlist_index'] = '%05d' % template_dict['playlist_index']
 427             if template_dict.get('resolution') is None:
 428                 if template_dict.get('width') and template_dict.get('height'):
 429                     template_dict['resolution'] = '%dx%d' % (template_dict['width'], template_dict['height'])
 430                 elif template_dict.get('height'):
 431                     template_dict['resolution'] = '%sp' % template_dict['height']
 432                 elif template_dict.get('width'):
 433                     template_dict['resolution'] = '?x%d' % template_dict['width']
 434
 435             sanitize = lambda k, v: sanitize_filename(
 436                 compat_str(v),
 437                 restricted=self.params.get('restrictfilenames'),
 438                 is_id=(k == 'id'))
 439             template_dict = dict((k, sanitize(k, v))
 440                                  for k, v in template_dict.items()
 441                                  if v is not None)
 442             template_dict = collections.defaultdict(lambda: 'NA', template_dict)
 443
 444             outtmpl = self.params.get('outtmpl', DEFAULT_OUTTMPL)
 445             tmpl = os.path.expanduser(outtmpl)
 446             filename = tmpl % template_dict
 447             return filename
 448         except ValueError as err:
 449             self.report_error('Error in output template: ' + str(err) + ' (encoding: ' + repr(preferredencoding()) + ')')
 450             return None
 451
 452     def _match_entry(self, info_dict):
 453         """ Returns None iff the file should be downloaded """
 454
 455         video_title = info_dict.get('title', info_dict.get('id', 'video'))
 456         if 'title' in info_dict:
 457             # This can happen when we're just evaluating the playlist
 458             title = info_dict['title']
 459             matchtitle = self.params.get('matchtitle', False)
 460             if matchtitle:
 461                 if not re.search(matchtitle, title, re.IGNORECASE):
 462                     return '"' + title + '" title did not match pattern "' + matchtitle + '"'
 463             rejecttitle = self.params.get('rejecttitle', False)
 464             if rejecttitle:
 465                 if re.search(rejecttitle, title, re.IGNORECASE):
 466                     return '"' + title + '" title matched reject pattern "' + rejecttitle + '"'
 467         date = info_dict.get('upload_date', None)
 468         if date is not None:
 469             dateRange = self.params.get('daterange', DateRange())
 470             if date not in dateRange:
 471                 return '%s upload date is not in range %s' % (date_from_str(date).isoformat(), dateRange)
 472         view_count = info_dict.get('view_count', None)
 473         if view_count is not None:
 474             min_views = self.params.get('min_views')
 475             if min_views is not None and view_count < min_views:
 476                 return 'Skipping %s, because it has not reached minimum view count (%d/%d)' % (video_title, view_count, min_views)
 477             max_views = self.params.get('max_views')
 478             if max_views is not None and view_count > max_views:
 479                 return 'Skipping %s, because it has exceeded the maximum view count (%d/%d)' % (video_title, view_count, max_views)
 480         age_limit = self.params.get('age_limit')
 481         if age_limit is not None:
 482             if age_limit < info_dict.get('age_limit', 0):
 483                 return 'Skipping "' + title + '" because it is age restricted'
 484         if self.in_download_archive(info_dict):
 485             return '%s has already been recorded in archive' % video_title
 486         return None
 487
 488     @staticmethod
 489     def add_extra_info(info_dict, extra_info):
 490         '''Set the keys from extra_info in info dict if they are missing'''
 491         for key, value in extra_info.items():
 492             info_dict.setdefault(key, value)
 493
 494     def extract_info(self, url, download=True, ie_key=None, extra_info={},
 495                      process=True):
 496         '''
 497         Returns a list with a dictionary for each video we find.
 498         If 'download', also downloads the videos.
 499         extra_info is a dict containing the extra values to add to each result
 500          '''
 501
 502         if ie_key:
 503             ies = [self.get_info_extractor(ie_key)]
 504         else:
 505             ies = self._ies
 506
 507         for ie in ies:
 508             if not ie.suitable(url):
 509                 continue
 510
 511             if not ie.working():
 512                 self.report_warning('The program functionality for this site has been marked as broken, '
 513                                     'and will probably not work.')
 514
 515             try:
 516                 ie_result = ie.extract(url)
 517                 if ie_result is None: # Finished already (backwards compatibility; listformats and friends should be moved here)
 518                     break
 519                 if isinstance(ie_result, list):
 520                     # Backwards compatibility: old IE result format
 521                     ie_result = {
 522                         '_type': 'compat_list',
 523                         'entries': ie_result,
 524                     }
 525                 self.add_default_extra_info(ie_result, ie, url)
 526                 if process:
 527                     return self.process_ie_result(ie_result, download, extra_info)
 528                 else:
 529                     return ie_result
 530             except ExtractorError as de: # An error we somewhat expected
 531                 self.report_error(compat_str(de), de.format_traceback())
 532                 break
 533             except MaxDownloadsReached:
 534                 raise
 535             except Exception as e:
 536                 if self.params.get('ignoreerrors', False):
 537                     self.report_error(compat_str(e), tb=compat_str(traceback.format_exc()))
 538                     break
 539                 else:
 540                     raise
 541         else:
 542             self.report_error('no suitable InfoExtractor for URL %s' % url)
 543
 544     def add_default_extra_info(self, ie_result, ie, url):
 545         self.add_extra_info(ie_result, {
 546             'extractor': ie.IE_NAME,
 547             'webpage_url': url,
 548             'webpage_url_basename': url_basename(url),
 549             'extractor_key': ie.ie_key(),
 550         })
 551
 552     def process_ie_result(self, ie_result, download=True, extra_info={}):
 553         """
 554         Take the result of the ie(may be modified) and resolve all unresolved
 555         references (URLs, playlist items).
 556
 557         It will also download the videos if 'download'.
 558         Returns the resolved ie_result.
 559         """
 560
 561         result_type = ie_result.get('_type', 'video') # If not given we suppose it's a video, support the default old system
 562         if result_type == 'video':
 563             self.add_extra_info(ie_result, extra_info)
 564             return self.process_video_result(ie_result, download=download)
 565         elif result_type == 'url':
 566             # We have to add extra_info to the results because it may be
 567             # contained in a playlist
 568             return self.extract_info(ie_result['url'],
 569                                      download,
 570                                      ie_key=ie_result.get('ie_key'),
 571                                      extra_info=extra_info)
 572         elif result_type == 'url_transparent':
 573             # Use the information from the embedding page
 574             info = self.extract_info(
 575                 ie_result['url'], ie_key=ie_result.get('ie_key'),
 576                 extra_info=extra_info, download=False, process=False)
 577
 578             def make_result(embedded_info):
 579                 new_result = ie_result.copy()
 580                 for f in ('_type', 'url', 'ext', 'player_url', 'formats',
 581                           'entries', 'ie_key', 'duration',
 582                           'subtitles', 'annotations', 'format',
 583                           'thumbnail', 'thumbnails'):
 584                     if f in new_result:
 585                         del new_result[f]
 586                     if f in embedded_info:
 587                         new_result[f] = embedded_info[f]
 588                 return new_result
 589             new_result = make_result(info)
 590
 591             assert new_result.get('_type') != 'url_transparent'
 592             if new_result.get('_type') == 'compat_list':
 593                 new_result['entries'] = [
 594                     make_result(e) for e in new_result['entries']]
 595
 596             return self.process_ie_result(
 597                 new_result, download=download, extra_info=extra_info)
 598         elif result_type == 'playlist':
 599             # We process each entry in the playlist
 600             playlist = ie_result.get('title', None) or ie_result.get('id', None)
 601             self.to_screen('[download] Downloading playlist: %s' % playlist)
 602
 603             playlist_results = []
 604
 605             playliststart = self.params.get('playliststart', 1) - 1
 606             playlistend = self.params.get('playlistend', None)
 607             # For backwards compatibility, interpret -1 as whole list
 608             if playlistend == -1:
 609                 playlistend = None
 610
 611             if isinstance(ie_result['entries'], list):
 612                 n_all_entries = len(ie_result['entries'])
 613                 entries = ie_result['entries'][playliststart:playlistend]
 614                 n_entries = len(entries)
 615                 self.to_screen(
 616                     "[%s] playlist %s: Collected %d video ids (downloading %d of them)" %
 617                     (ie_result['extractor'], playlist, n_all_entries, n_entries))
 618             else:
 619                 assert isinstance(ie_result['entries'], PagedList)
 620                 entries = ie_result['entries'].getslice(
 621                     playliststart, playlistend)
 622                 n_entries = len(entries)
 623                 self.to_screen(
 624                     "[%s] playlist %s: Downloading %d videos" %
 625                     (ie_result['extractor'], playlist, n_entries))
 626
 627             for i, entry in enumerate(entries, 1):
 628                 self.to_screen('[download] Downloading video #%s of %s' % (i, n_entries))
 629                 extra = {
 630                     'playlist': playlist,
 631                     'playlist_index': i + playliststart,
 632                     'extractor': ie_result['extractor'],
 633                     'webpage_url': ie_result['webpage_url'],
 634                     'webpage_url_basename': url_basename(ie_result['webpage_url']),
 635                     'extractor_key': ie_result['extractor_key'],
 636                 }
 637
 638                 reason = self._match_entry(entry)
 639                 if reason is not None:
 640                     self.to_screen('[download] ' + reason)
 641                     continue
 642
 643                 entry_result = self.process_ie_result(entry,
 644                                                       download=download,
 645                                                       extra_info=extra)
 646                 playlist_results.append(entry_result)
 647             ie_result['entries'] = playlist_results
 648             return ie_result
 649         elif result_type == 'compat_list':
 650             def _fixup(r):
 651                 self.add_extra_info(r,
 652                     {
 653                         'extractor': ie_result['extractor'],
 654                         'webpage_url': ie_result['webpage_url'],
 655                         'webpage_url_basename': url_basename(ie_result['webpage_url']),
 656                         'extractor_key': ie_result['extractor_key'],
 657                     })
 658                 return r
 659             ie_result['entries'] = [
 660                 self.process_ie_result(_fixup(r), download, extra_info)
 661                 for r in ie_result['entries']
 662             ]
 663             return ie_result
 664         else:
 665             raise Exception('Invalid result type: %s' % result_type)
 666
 667     def select_format(self, format_spec, available_formats):
 668         if format_spec == 'best' or format_spec is None:
 669             return available_formats[-1]
 670         elif format_spec == 'worst':
 671             return available_formats[0]
 672         elif format_spec == 'bestaudio':
 673             audio_formats = [
 674                 f for f in available_formats
 675                 if f.get('vcodec') == 'none']
 676             if audio_formats:
 677                 return audio_formats[-1]
 678         elif format_spec == 'worstaudio':
 679             audio_formats = [
 680                 f for f in available_formats
 681                 if f.get('vcodec') == 'none']
 682             if audio_formats:
 683                 return audio_formats[0]
 684         elif format_spec == 'bestvideo':
 685             video_formats = [
 686                 f for f in available_formats
 687                 if f.get('acodec') == 'none']
 688             if video_formats:
 689                 return video_formats[-1]
 690         elif format_spec == 'worstvideo':
 691             video_formats = [
 692                 f for f in available_formats
 693                 if f.get('acodec') == 'none']
 694             if video_formats:
 695                 return video_formats[0]
 696         else:
 697             extensions = ['mp4', 'flv', 'webm', '3gp']
 698             if format_spec in extensions:
 699                 filter_f = lambda f: f['ext'] == format_spec
 700             else:
 701                 filter_f = lambda f: f['format_id'] == format_spec
 702             matches = list(filter(filter_f, available_formats))
 703             if matches:
 704                 return matches[-1]
 705         return None
 706
 707     def process_video_result(self, info_dict, download=True):
 708         assert info_dict.get('_type', 'video') == 'video'
 709
 710         if 'id' not in info_dict:
 711             raise ExtractorError('Missing "id" field in extractor result')
 712         if 'title' not in info_dict:
 713             raise ExtractorError('Missing "title" field in extractor result')
 714
 715         if 'playlist' not in info_dict:
 716             # It isn't part of a playlist
 717             info_dict['playlist'] = None
 718             info_dict['playlist_index'] = None
 719
 720         thumbnails = info_dict.get('thumbnails')
 721         if thumbnails:
 722             for t in thumbnails:
 723                 if 'width' in t and 'height' in t:
 724                     t['resolution'] = '%dx%d' % (t['width'], t['height'])
 725
 726         if thumbnails and 'thumbnail' not in info_dict:
 727             info_dict['thumbnail'] = thumbnails[-1]['url']
 728
 729         if 'display_id' not in info_dict and 'id' in info_dict:
 730             info_dict['display_id'] = info_dict['id']
 731
 732         if info_dict.get('upload_date') is None and info_dict.get('timestamp') is not None:
 733             upload_date = datetime.datetime.utcfromtimestamp(
 734                 info_dict['timestamp'])
 735             info_dict['upload_date'] = upload_date.strftime('%Y%m%d')
 736
 737         # This extractors handle format selection themselves
 738         if info_dict['extractor'] in ['Youku']:
 739             if download:
 740                 self.process_info(info_dict)
 741             return info_dict
 742
 743         # We now pick which formats have to be downloaded
 744         if info_dict.get('formats') is None:
 745             # There's only one format available
 746             formats = [info_dict]
 747         else:
 748             formats = info_dict['formats']
 749
 750         if not formats:
 751             raise ExtractorError('No video formats found!')
 752
 753         # We check that all the formats have the format and format_id fields
 754         for i, format in enumerate(formats):
 755             if 'url' not in format:
 756                 raise ExtractorError('Missing "url" key in result (index %d)' % i)
 757
 758             if format.get('format_id') is None:
 759                 format['format_id'] = compat_str(i)
 760             if format.get('format') is None:
 761                 format['format'] = '{id} - {res}{note}'.format(
 762                     id=format['format_id'],
 763                     res=self.format_resolution(format),
 764                     note=' ({0})'.format(format['format_note']) if format.get('format_note') is not None else '',
 765                 )
 766             # Automatically determine file extension if missing
 767             if 'ext' not in format:
 768                 format['ext'] = determine_ext(format['url']).lower()
 769
 770         format_limit = self.params.get('format_limit', None)
 771         if format_limit:
 772             formats = list(takewhile_inclusive(
 773                 lambda f: f['format_id'] != format_limit, formats
 774             ))
 775
 776         # TODO Central sorting goes here
 777
 778         if formats[0] is not info_dict:
 779             # only set the 'formats' fields if the original info_dict list them
 780             # otherwise we end up with a circular reference, the first (and unique)
 781             # element in the 'formats' field in info_dict is info_dict itself,
 782             # wich can't be exported to json
 783             info_dict['formats'] = formats
 784         if self.params.get('listformats', None):
 785             self.list_formats(info_dict)
 786             return
 787
 788         req_format = self.params.get('format')
 789         if req_format is None:
 790             req_format = 'best'
 791         formats_to_download = []
 792         # The -1 is for supporting YoutubeIE
 793         if req_format in ('-1', 'all'):
 794             formats_to_download = formats
 795         else:
 796             # We can accept formats requested in the format: 34/5/best, we pick
 797             # the first that is available, starting from left
 798             req_formats = req_format.split('/')
 799             for rf in req_formats:
 800                 if re.match(r'.+?\+.+?', rf) is not None:
 801                     # Two formats have been requested like '137+139'
 802                     format_1, format_2 = rf.split('+')
 803                     formats_info = (self.select_format(format_1, formats),
 804                         self.select_format(format_2, formats))
 805                     if all(formats_info):
 806                         selected_format = {
 807                             'requested_formats': formats_info,
 808                             'format': rf,
 809                             'ext': formats_info[0]['ext'],
 810                         }
 811                     else:
 812                         selected_format = None
 813                 else:
 814                     selected_format = self.select_format(rf, formats)
 815                 if selected_format is not None:
 816                     formats_to_download = [selected_format]
 817                     break
 818         if not formats_to_download:
 819             raise ExtractorError('requested format not available',
 820                                  expected=True)
 821
 822         if download:
 823             if len(formats_to_download) > 1:
 824                 self.to_screen('[info] %s: downloading video in %s formats' % (info_dict['id'], len(formats_to_download)))
 825             for format in formats_to_download:
 826                 new_info = dict(info_dict)
 827                 new_info.update(format)
 828                 self.process_info(new_info)
 829         # We update the info dict with the best quality format (backwards compatibility)
 830         info_dict.update(formats_to_download[-1])
 831         return info_dict
 832
 833     def process_info(self, info_dict):
 834         """Process a single resolved IE result."""
 835
 836         assert info_dict.get('_type', 'video') == 'video'
 837
 838         max_downloads = self.params.get('max_downloads')
 839         if max_downloads is not None:
 840             if self._num_downloads >= int(max_downloads):
 841                 raise MaxDownloadsReached()
 842
 843         info_dict['fulltitle'] = info_dict['title']
 844         if len(info_dict['title']) > 200:
 845             info_dict['title'] = info_dict['title'][:197] + '...'
 846
 847         # Keep for backwards compatibility
 848         info_dict['stitle'] = info_dict['title']
 849
 850         if not 'format' in info_dict:
 851             info_dict['format'] = info_dict['ext']
 852
 853         reason = self._match_entry(info_dict)
 854         if reason is not None:
 855             self.to_screen('[download] ' + reason)
 856             return
 857
 858         self._num_downloads += 1
 859
 860         filename = self.prepare_filename(info_dict)
 861
 862         # Forced printings
 863         if self.params.get('forcetitle', False):
 864             self.to_stdout(info_dict['fulltitle'])
 865         if self.params.get('forceid', False):
 866             self.to_stdout(info_dict['id'])
 867         if self.params.get('forceurl', False):
 868             # For RTMP URLs, also include the playpath
 869             self.to_stdout(info_dict['url'] + info_dict.get('play_path', ''))
 870         if self.params.get('forcethumbnail', False) and info_dict.get('thumbnail') is not None:
 871             self.to_stdout(info_dict['thumbnail'])
 872         if self.params.get('forcedescription', False) and info_dict.get('description') is not None:
 873             self.to_stdout(info_dict['description'])
 874         if self.params.get('forcefilename', False) and filename is not None:
 875             self.to_stdout(filename)
 876         if self.params.get('forceduration', False) and info_dict.get('duration') is not None:
 877             self.to_stdout(formatSeconds(info_dict['duration']))
 878         if self.params.get('forceformat', False):
 879             self.to_stdout(info_dict['format'])
 880         if self.params.get('forcejson', False):
 881             info_dict['_filename'] = filename
 882             self.to_stdout(json.dumps(info_dict))
 883
 884         # Do nothing else if in simulate mode
 885         if self.params.get('simulate', False):
 886             return
 887
 888         if filename is None:
 889             return
 890
 891         try:
 892             dn = os.path.dirname(encodeFilename(filename))
 893             if dn and not os.path.exists(dn):
 894                 os.makedirs(dn)
 895         except (OSError, IOError) as err:
 896             self.report_error('unable to create directory ' + compat_str(err))
 897             return
 898
 899         if self.params.get('writedescription', False):
 900             descfn = filename + '.description'
 901             if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(descfn)):
 902                 self.to_screen('[info] Video description is already present')
 903             else:
 904                 try:
 905                     self.to_screen('[info] Writing video description to: ' + descfn)
 906                     with io.open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile:
 907                         descfile.write(info_dict['description'])
 908                 except (KeyError, TypeError):
 909                     self.report_warning('There\'s no description to write.')
 910                 except (OSError, IOError):
 911                     self.report_error('Cannot write description file ' + descfn)
 912                     return
 913
 914         if self.params.get('writeannotations', False):
 915             annofn = filename + '.annotations.xml'
 916             if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(annofn)):
 917                 self.to_screen('[info] Video annotations are already present')
 918             else:
 919                 try:
 920                     self.to_screen('[info] Writing video annotations to: ' + annofn)
 921                     with io.open(encodeFilename(annofn), 'w', encoding='utf-8') as annofile:
 922                         annofile.write(info_dict['annotations'])
 923                 except (KeyError, TypeError):
 924                     self.report_warning('There are no annotations to write.')
 925                 except (OSError, IOError):
 926                     self.report_error('Cannot write annotations file: ' + annofn)
 927                     return
 928
 929         subtitles_are_requested = any([self.params.get('writesubtitles', False),
 930                                        self.params.get('writeautomaticsub')])
 931
 932         if subtitles_are_requested and 'subtitles' in info_dict and info_dict['subtitles']:
 933             # subtitles download errors are already managed as troubles in relevant IE
 934             # that way it will silently go on when used with unsupporting IE
 935             subtitles = info_dict['subtitles']
 936             sub_format = self.params.get('subtitlesformat', 'srt')
 937             for sub_lang in subtitles.keys():
 938                 sub = subtitles[sub_lang]
 939                 if sub is None:
 940                     continue
 941                 try:
 942                     sub_filename = subtitles_filename(filename, sub_lang, sub_format)
 943                     if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(sub_filename)):
 944                         self.to_screen('[info] Video subtitle %s.%s is already_present' % (sub_lang, sub_format))
 945                     else:
 946                         self.to_screen('[info] Writing video subtitles to: ' + sub_filename)
 947                         with io.open(encodeFilename(sub_filename), 'w', encoding='utf-8') as subfile:
 948                                 subfile.write(sub)
 949                 except (OSError, IOError):
 950                     self.report_error('Cannot write subtitles file ' + sub_filename)
 951                     return
 952
 953         if self.params.get('writeinfojson', False):
 954             infofn = os.path.splitext(filename)[0] + '.info.json'
 955             if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(infofn)):
 956                 self.to_screen('[info] Video description metadata is already present')
 957             else:
 958                 self.to_screen('[info] Writing video description metadata as JSON to: ' + infofn)
 959                 try:
 960                     write_json_file(info_dict, encodeFilename(infofn))
 961                 except (OSError, IOError):
 962                     self.report_error('Cannot write metadata to JSON file ' + infofn)
 963                     return
 964
 965         if self.params.get('writethumbnail', False):
 966             if info_dict.get('thumbnail') is not None:
 967                 thumb_format = determine_ext(info_dict['thumbnail'], 'jpg')
 968                 thumb_filename = os.path.splitext(filename)[0] + '.' + thumb_format
 969                 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(thumb_filename)):
 970                     self.to_screen('[%s] %s: Thumbnail is already present' %
 971                                    (info_dict['extractor'], info_dict['id']))
 972                 else:
 973                     self.to_screen('[%s] %s: Downloading thumbnail ...' %
 974                                    (info_dict['extractor'], info_dict['id']))
 975                     try:
 976                         uf = self.urlopen(info_dict['thumbnail'])
 977                         with open(thumb_filename, 'wb') as thumbf:
 978                             shutil.copyfileobj(uf, thumbf)
 979                         self.to_screen('[%s] %s: Writing thumbnail to: %s' %
 980                             (info_dict['extractor'], info_dict['id'], thumb_filename))
 981                     except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
 982                         self.report_warning('Unable to download thumbnail "%s": %s' %
 983                             (info_dict['thumbnail'], compat_str(err)))
 984
 985         if not self.params.get('skip_download', False):
 986             if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(filename)):
 987                 success = True
 988             else:
 989                 try:
 990                     def dl(name, info):
 991                         fd = get_suitable_downloader(info)(self, self.params)
 992                         for ph in self._progress_hooks:
 993                             fd.add_progress_hook(ph)
 994                         return fd.download(name, info)
 995                     if info_dict.get('requested_formats') is not None:
 996                         downloaded = []
 997                         success = True
 998                         merger = FFmpegMergerPP(self)
 999                         if not merger._get_executable():
1000                             postprocessors = []
1001                             self.report_warning('You have requested multiple '
1002                                 'formats but ffmpeg or avconv are not installed.'
1003                                 ' The formats won\'t be merged')
1004                         else:
1005                             postprocessors = [merger]
1006                         for f in info_dict['requested_formats']:
1007                             new_info = dict(info_dict)
1008                             new_info.update(f)
1009                             fname = self.prepare_filename(new_info)
1010                             fname = prepend_extension(fname, 'f%s' % f['format_id'])
1011                             downloaded.append(fname)
1012                             partial_success = dl(fname, new_info)
1013                             success = success and partial_success
1014                         info_dict['__postprocessors'] = postprocessors
1015                         info_dict['__files_to_merge'] = downloaded
1016                     else:
1017                         # Just a single file
1018                         success = dl(filename, info_dict)
1019                 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
1020                     self.report_error('unable to download video data: %s' % str(err))
1021                     return
1022                 except (OSError, IOError) as err:
1023                     raise UnavailableVideoError(err)
1024                 except (ContentTooShortError, ) as err:
1025                     self.report_error('content too short (expected %s bytes and served %s)' % (err.expected, err.downloaded))
1026                     return
1027
1028             if success:
1029                 try:
1030                     self.post_process(filename, info_dict)
1031                 except (PostProcessingError) as err:
1032                     self.report_error('postprocessing: %s' % str(err))
1033                     return
1034
1035         self.record_download_archive(info_dict)
1036
1037     def download(self, url_list):
1038         """Download a given list of URLs."""
1039         outtmpl = self.params.get('outtmpl', DEFAULT_OUTTMPL)
1040         if (len(url_list) > 1 and
1041                 '%' not in outtmpl
1042                 and self.params.get('max_downloads') != 1):
1043             raise SameFileError(outtmpl)
1044
1045         for url in url_list:
1046             try:
1047                 #It also downloads the videos
1048                 self.extract_info(url)
1049             except UnavailableVideoError:
1050                 self.report_error('unable to download video')
1051             except MaxDownloadsReached:
1052                 self.to_screen('[info] Maximum number of downloaded files reached.')
1053                 raise
1054
1055         return self._download_retcode
1056
1057     def download_with_info_file(self, info_filename):
1058         with io.open(info_filename, 'r', encoding='utf-8') as f:
1059             info = json.load(f)
1060         try:
1061             self.process_ie_result(info, download=True)
1062         except DownloadError:
1063             webpage_url = info.get('webpage_url')
1064             if webpage_url is not None:
1065                 self.report_warning('The info failed to download, trying with "%s"' % webpage_url)
1066                 return self.download([webpage_url])
1067             else:
1068                 raise
1069         return self._download_retcode
1070
1071     def post_process(self, filename, ie_info):
1072         """Run all the postprocessors on the given file."""
1073         info = dict(ie_info)
1074         info['filepath'] = filename
1075         keep_video = None
1076         pps_chain = []
1077         if ie_info.get('__postprocessors') is not None:
1078             pps_chain.extend(ie_info['__postprocessors'])
1079         pps_chain.extend(self._pps)
1080         for pp in pps_chain:
1081             try:
1082                 keep_video_wish, new_info = pp.run(info)
1083                 if keep_video_wish is not None:
1084                     if keep_video_wish:
1085                         keep_video = keep_video_wish
1086                     elif keep_video is None:
1087                         # No clear decision yet, let IE decide
1088                         keep_video = keep_video_wish
1089             except PostProcessingError as e:
1090                 self.report_error(e.msg)
1091         if keep_video is False and not self.params.get('keepvideo', False):
1092             try:
1093                 self.to_screen('Deleting original file %s (pass -k to keep)' % filename)
1094                 os.remove(encodeFilename(filename))
1095             except (IOError, OSError):
1096                 self.report_warning('Unable to remove downloaded video file')
1097
1098     def _make_archive_id(self, info_dict):
1099         # Future-proof against any change in case
1100         # and backwards compatibility with prior versions
1101         extractor = info_dict.get('extractor_key')
1102         if extractor is None:
1103             if 'id' in info_dict:
1104                 extractor = info_dict.get('ie_key')  # key in a playlist
1105         if extractor is None:
1106             return None  # Incomplete video information
1107         return extractor.lower() + ' ' + info_dict['id']
1108
1109     def in_download_archive(self, info_dict):
1110         fn = self.params.get('download_archive')
1111         if fn is None:
1112             return False
1113
1114         vid_id = self._make_archive_id(info_dict)
1115         if vid_id is None:
1116             return False  # Incomplete video information
1117
1118         try:
1119             with locked_file(fn, 'r', encoding='utf-8') as archive_file:
1120                 for line in archive_file:
1121                     if line.strip() == vid_id:
1122                         return True
1123         except IOError as ioe:
1124             if ioe.errno != errno.ENOENT:
1125                 raise
1126         return False
1127
1128     def record_download_archive(self, info_dict):
1129         fn = self.params.get('download_archive')
1130         if fn is None:
1131             return
1132         vid_id = self._make_archive_id(info_dict)
1133         assert vid_id
1134         with locked_file(fn, 'a', encoding='utf-8') as archive_file:
1135             archive_file.write(vid_id + '\n')
1136
1137     @staticmethod
1138     def format_resolution(format, default='unknown'):
1139         if format.get('vcodec') == 'none':
1140             return 'audio only'
1141         if format.get('resolution') is not None:
1142             return format['resolution']
1143         if format.get('height') is not None:
1144             if format.get('width') is not None:
1145                 res = '%sx%s' % (format['width'], format['height'])
1146             else:
1147                 res = '%sp' % format['height']
1148         elif format.get('width') is not None:
1149             res = '?x%d' % format['width']
1150         else:
1151             res = default
1152         return res
1153
1154     def _format_note(self, fdict):
1155         res = ''
1156         if fdict.get('ext') in ['f4f', 'f4m']:
1157             res += '(unsupported) '
1158         if fdict.get('format_note') is not None:
1159             res += fdict['format_note'] + ' '
1160         if fdict.get('tbr') is not None:
1161             res += '%4dk ' % fdict['tbr']
1162         if fdict.get('container') is not None:
1163             if res:
1164                 res += ', '
1165             res += '%s container' % fdict['container']
1166         if (fdict.get('vcodec') is not None and
1167                 fdict.get('vcodec') != 'none'):
1168             if res:
1169                 res += ', '
1170             res += fdict['vcodec']
1171             if fdict.get('vbr') is not None:
1172                 res += '@'
1173         elif fdict.get('vbr') is not None and fdict.get('abr') is not None:
1174             res += 'video@'
1175         if fdict.get('vbr') is not None:
1176             res += '%4dk' % fdict['vbr']
1177         if fdict.get('acodec') is not None:
1178             if res:
1179                 res += ', '
1180             if fdict['acodec'] == 'none':
1181                 res += 'video only'
1182             else:
1183                 res += '%-5s' % fdict['acodec']
1184         elif fdict.get('abr') is not None:
1185             if res:
1186                 res += ', '
1187             res += 'audio'
1188         if fdict.get('abr') is not None:
1189             res += '@%3dk' % fdict['abr']
1190         if fdict.get('asr') is not None:
1191             res += ' (%5dHz)' % fdict['asr']
1192         if fdict.get('filesize') is not None:
1193             if res:
1194                 res += ', '
1195             res += format_bytes(fdict['filesize'])
1196         return res
1197
1198     def list_formats(self, info_dict):
1199         def line(format, idlen=20):
1200             return (('%-' + compat_str(idlen + 1) + 's%-10s%-12s%s') % (
1201                 format['format_id'],
1202                 format['ext'],
1203                 self.format_resolution(format),
1204                 self._format_note(format),
1205             ))
1206
1207         formats = info_dict.get('formats', [info_dict])
1208         idlen = max(len('format code'),
1209                     max(len(f['format_id']) for f in formats))
1210         formats_s = [line(f, idlen) for f in formats]
1211         if len(formats) > 1:
1212             formats_s[0] += (' ' if self._format_note(formats[0]) else '') + '(worst)'
1213             formats_s[-1] += (' ' if self._format_note(formats[-1]) else '') + '(best)'
1214
1215         header_line = line({
1216             'format_id': 'format code', 'ext': 'extension',
1217             'resolution': 'resolution', 'format_note': 'note'}, idlen=idlen)
1218         self.to_screen('[info] Available formats for %s:\n%s\n%s' %
1219                        (info_dict['id'], header_line, '\n'.join(formats_s)))
1220
1221     def urlopen(self, req):
1222         """ Start an HTTP download """
1223         return self._opener.open(req, timeout=self._socket_timeout)
1224
1225     def print_debug_header(self):
1226         if not self.params.get('verbose'):
1227             return
1228
1229         write_string(
1230             '[debug] Encodings: locale %s, fs %s, out %s, pref %s\n' % (
1231                 locale.getpreferredencoding(),
1232                 sys.getfilesystemencoding(),
1233                 sys.stdout.encoding,
1234                 self.get_encoding()),
1235             encoding=None
1236         )
1237
1238         self._write_string('[debug] youtube-dl version ' + __version__ + '\n')
1239         try:
1240             sp = subprocess.Popen(
1241                 ['git', 'rev-parse', '--short', 'HEAD'],
1242                 stdout=subprocess.PIPE, stderr=subprocess.PIPE,
1243                 cwd=os.path.dirname(os.path.abspath(__file__)))
1244             out, err = sp.communicate()
1245             out = out.decode().strip()
1246             if re.match('[0-9a-f]+', out):
1247                 self._write_string('[debug] Git HEAD: ' + out + '\n')
1248         except:
1249             try:
1250                 sys.exc_clear()
1251             except:
1252                 pass
1253         self._write_string('[debug] Python version %s - %s' %
1254                      (platform.python_version(), platform_name()) + '\n')
1255
1256         proxy_map = {}
1257         for handler in self._opener.handlers:
1258             if hasattr(handler, 'proxies'):
1259                 proxy_map.update(handler.proxies)
1260         self._write_string('[debug] Proxy map: ' + compat_str(proxy_map) + '\n')
1261
1262     def _setup_opener(self):
1263         timeout_val = self.params.get('socket_timeout')
1264         self._socket_timeout = 600 if timeout_val is None else float(timeout_val)
1265
1266         opts_cookiefile = self.params.get('cookiefile')
1267         opts_proxy = self.params.get('proxy')
1268
1269         if opts_cookiefile is None:
1270             self.cookiejar = compat_cookiejar.CookieJar()
1271         else:
1272             self.cookiejar = compat_cookiejar.MozillaCookieJar(
1273                 opts_cookiefile)
1274             if os.access(opts_cookiefile, os.R_OK):
1275                 self.cookiejar.load()
1276
1277         cookie_processor = compat_urllib_request.HTTPCookieProcessor(
1278             self.cookiejar)
1279         if opts_proxy is not None:
1280             if opts_proxy == '':
1281                 proxies = {}
1282             else:
1283                 proxies = {'http': opts_proxy, 'https': opts_proxy}
1284         else:
1285             proxies = compat_urllib_request.getproxies()
1286             # Set HTTPS proxy to HTTP one if given (https://github.com/rg3/youtube-dl/issues/805)
1287             if 'http' in proxies and 'https' not in proxies:
1288                 proxies['https'] = proxies['http']
1289         proxy_handler = compat_urllib_request.ProxyHandler(proxies)
1290
1291         debuglevel = 1 if self.params.get('debug_printtraffic') else 0
1292         https_handler = make_HTTPS_handler(
1293             self.params.get('nocheckcertificate', False), debuglevel=debuglevel)
1294         ydlh = YoutubeDLHandler(debuglevel=debuglevel)
1295         opener = compat_urllib_request.build_opener(
1296             https_handler, proxy_handler, cookie_processor, ydlh)
1297         # Delete the default user-agent header, which would otherwise apply in
1298         # cases where our custom HTTP handler doesn't come into play
1299         # (See https://github.com/rg3/youtube-dl/issues/1309 for details)
1300         opener.addheaders = []
1301         self._opener = opener
1302
1303     def encode(self, s):
1304         if isinstance(s, bytes):
1305             return s  # Already encoded
1306
1307         try:
1308             return s.encode(self.get_encoding())
1309         except UnicodeEncodeError as err:
1310             err.reason = err.reason + '. Check your system encoding configuration or use the --encoding option.'
1311             raise
1312
1313     def get_encoding(self):
1314         encoding = self.params.get('encoding')
1315         if encoding is None:
1316             encoding = preferredencoding()
1317         return encoding