_ Git - youtube-dl/blob - youtube_dl/YoutubeDL.py

   1 #!/usr/bin/env python
   2 # -*- coding: utf-8 -*-
   3
   4 from __future__ import absolute_import, unicode_literals
   5
   6 import collections
   7 import datetime
   8 import errno
   9 import io
  10 import itertools
  11 import json
  12 import locale
  13 import os
  14 import platform
  15 import re
  16 import shutil
  17 import subprocess
  18 import socket
  19 import sys
  20 import time
  21 import traceback
  22
  23 if os.name == 'nt':
  24     import ctypes
  25
  26 from .compat import (
  27     compat_cookiejar,
  28     compat_expanduser,
  29     compat_http_client,
  30     compat_kwargs,
  31     compat_str,
  32     compat_urllib_error,
  33     compat_urllib_request,
  34 )
  35 from .utils import (
  36     escape_url,
  37     ContentTooShortError,
  38     date_from_str,
  39     DateRange,
  40     DEFAULT_OUTTMPL,
  41     determine_ext,
  42     DownloadError,
  43     encodeFilename,
  44     ExtractorError,
  45     format_bytes,
  46     formatSeconds,
  47     get_term_width,
  48     locked_file,
  49     make_HTTPS_handler,
  50     MaxDownloadsReached,
  51     PagedList,
  52     PostProcessingError,
  53     platform_name,
  54     preferredencoding,
  55     SameFileError,
  56     sanitize_filename,
  57     subtitles_filename,
  58     takewhile_inclusive,
  59     UnavailableVideoError,
  60     url_basename,
  61     write_json_file,
  62     write_string,
  63     YoutubeDLHandler,
  64     prepend_extension,
  65     args_to_str,
  66 )
  67 from .cache import Cache
  68 from .extractor import get_info_extractor, gen_extractors
  69 from .downloader import get_suitable_downloader
  70 from .downloader.rtmp import rtmpdump_version
  71 from .postprocessor import (
  72     FFmpegMergerPP,
  73     FFmpegPostProcessor,
  74     get_postprocessor,
  75 )
  76 from .version import __version__
  77
  78
  79 class YoutubeDL(object):
  80     """YoutubeDL class.
  81
  82     YoutubeDL objects are the ones responsible of downloading the
  83     actual video file and writing it to disk if the user has requested
  84     it, among some other tasks. In most cases there should be one per
  85     program. As, given a video URL, the downloader doesn't know how to
  86     extract all the needed information, task that InfoExtractors do, it
  87     has to pass the URL to one of them.
  88
  89     For this, YoutubeDL objects have a method that allows
  90     InfoExtractors to be registered in a given order. When it is passed
  91     a URL, the YoutubeDL object handles it to the first InfoExtractor it
  92     finds that reports being able to handle it. The InfoExtractor extracts
  93     all the information about the video or videos the URL refers to, and
  94     YoutubeDL process the extracted information, possibly using a File
  95     Downloader to download the video.
  96
  97     YoutubeDL objects accept a lot of parameters. In order not to saturate
  98     the object constructor with arguments, it receives a dictionary of
  99     options instead. These options are available through the params
 100     attribute for the InfoExtractors to use. The YoutubeDL also
 101     registers itself as the downloader in charge for the InfoExtractors
 102     that are added to it, so this is a "mutual registration".
 103
 104     Available options:
 105
 106     username:          Username for authentication purposes.
 107     password:          Password for authentication purposes.
 108     videopassword:     Password for acces a video.
 109     usenetrc:          Use netrc for authentication instead.
 110     verbose:           Print additional info to stdout.
 111     quiet:             Do not print messages to stdout.
 112     no_warnings:       Do not print out anything for warnings.
 113     forceurl:          Force printing final URL.
 114     forcetitle:        Force printing title.
 115     forceid:           Force printing ID.
 116     forcethumbnail:    Force printing thumbnail URL.
 117     forcedescription:  Force printing description.
 118     forcefilename:     Force printing final filename.
 119     forceduration:     Force printing duration.
 120     forcejson:         Force printing info_dict as JSON.
 121     dump_single_json:  Force printing the info_dict of the whole playlist
 122                        (or video) as a single JSON line.
 123     simulate:          Do not download the video files.
 124     format:            Video format code.
 125     format_limit:      Highest quality format to try.
 126     outtmpl:           Template for output names.
 127     restrictfilenames: Do not allow "&" and spaces in file names
 128     ignoreerrors:      Do not stop on download errors.
 129     nooverwrites:      Prevent overwriting files.
 130     playliststart:     Playlist item to start at.
 131     playlistend:       Playlist item to end at.
 132     playlistreverse:   Download playlist items in reverse order.
 133     matchtitle:        Download only matching titles.
 134     rejecttitle:       Reject downloads for matching titles.
 135     logger:            Log messages to a logging.Logger instance.
 136     logtostderr:       Log messages to stderr instead of stdout.
 137     writedescription:  Write the video description to a .description file
 138     writeinfojson:     Write the video description to a .info.json file
 139     writeannotations:  Write the video annotations to a .annotations.xml file
 140     writethumbnail:    Write the thumbnail image to a file
 141     writesubtitles:    Write the video subtitles to a file
 142     writeautomaticsub: Write the automatic subtitles to a file
 143     allsubtitles:      Downloads all the subtitles of the video
 144                        (requires writesubtitles or writeautomaticsub)
 145     listsubtitles:     Lists all available subtitles for the video
 146     subtitlesformat:   Subtitle format [srt/sbv/vtt] (default=srt)
 147     subtitleslangs:    List of languages of the subtitles to download
 148     keepvideo:         Keep the video file after post-processing
 149     daterange:         A DateRange object, download only if the upload_date is in the range.
 150     skip_download:     Skip the actual download of the video file
 151     cachedir:          Location of the cache files in the filesystem.
 152                        False to disable filesystem cache.
 153     noplaylist:        Download single video instead of a playlist if in doubt.
 154     age_limit:         An integer representing the user's age in years.
 155                        Unsuitable videos for the given age are skipped.
 156     min_views:         An integer representing the minimum view count the video
 157                        must have in order to not be skipped.
 158                        Videos without view count information are always
 159                        downloaded. None for no limit.
 160     max_views:         An integer representing the maximum view count.
 161                        Videos that are more popular than that are not
 162                        downloaded.
 163                        Videos without view count information are always
 164                        downloaded. None for no limit.
 165     download_archive:  File name of a file where all downloads are recorded.
 166                        Videos already present in the file are not downloaded
 167                        again.
 168     cookiefile:        File name where cookies should be read from and dumped to.
 169     nocheckcertificate:Do not verify SSL certificates
 170     prefer_insecure:   Use HTTP instead of HTTPS to retrieve information.
 171                        At the moment, this is only supported by YouTube.
 172     proxy:             URL of the proxy server to use
 173     socket_timeout:    Time to wait for unresponsive hosts, in seconds
 174     bidi_workaround:   Work around buggy terminals without bidirectional text
 175                        support, using fridibi
 176     debug_printtraffic:Print out sent and received HTTP traffic
 177     include_ads:       Download ads as well
 178     default_search:    Prepend this string if an input url is not valid.
 179                        'auto' for elaborate guessing
 180     encoding:          Use this encoding instead of the system-specified.
 181     extract_flat:      Do not resolve URLs, return the immediate result.
 182                        Pass in 'in_playlist' to only show this behavior for
 183                        playlist items.
 184     postprocessors:    A list of dictionaries, each with an entry
 185                        key:  The name of the postprocessor. See
 186                              youtube_dl/postprocessor/__init__.py for a list.
 187                        as well as any further keyword arguments for the
 188                        postprocessor.
 189
 190     The following parameters are not used by YoutubeDL itself, they are used by
 191     the FileDownloader:
 192     nopart, updatetime, buffersize, ratelimit, min_filesize, max_filesize, test,
 193     noresizebuffer, retries, continuedl, noprogress, consoletitle
 194
 195     The following options are used by the post processors:
 196     prefer_ffmpeg:     If True, use ffmpeg instead of avconv if both are available,
 197                        otherwise prefer avconv.
 198     exec_cmd:          Arbitrary command to run after downloading
 199     """
 200
 201     params = None
 202     _ies = []
 203     _pps = []
 204     _download_retcode = None
 205     _num_downloads = None
 206     _screen_file = None
 207
 208     def __init__(self, params=None, auto_init=True):
 209         """Create a FileDownloader object with the given options."""
 210         if params is None:
 211             params = {}
 212         self._ies = []
 213         self._ies_instances = {}
 214         self._pps = []
 215         self._progress_hooks = []
 216         self._download_retcode = 0
 217         self._num_downloads = 0
 218         self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)]
 219         self._err_file = sys.stderr
 220         self.params = params
 221         self.cache = Cache(self)
 222
 223         if params.get('bidi_workaround', False):
 224             try:
 225                 import pty
 226                 master, slave = pty.openpty()
 227                 width = get_term_width()
 228                 if width is None:
 229                     width_args = []
 230                 else:
 231                     width_args = ['-w', str(width)]
 232                 sp_kwargs = dict(
 233                     stdin=subprocess.PIPE,
 234                     stdout=slave,
 235                     stderr=self._err_file)
 236                 try:
 237                     self._output_process = subprocess.Popen(
 238                         ['bidiv'] + width_args, **sp_kwargs
 239                     )
 240                 except OSError:
 241                     self._output_process = subprocess.Popen(
 242                         ['fribidi', '-c', 'UTF-8'] + width_args, **sp_kwargs)
 243                 self._output_channel = os.fdopen(master, 'rb')
 244             except OSError as ose:
 245                 if ose.errno == 2:
 246                     self.report_warning('Could not find fribidi executable, ignoring --bidi-workaround . Make sure that  fribidi  is an executable file in one of the directories in your $PATH.')
 247                 else:
 248                     raise
 249
 250         if (sys.version_info >= (3,) and sys.platform != 'win32' and
 251                 sys.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968']
 252                 and not params.get('restrictfilenames', False)):
 253             # On Python 3, the Unicode filesystem API will throw errors (#1474)
 254             self.report_warning(
 255                 'Assuming --restrict-filenames since file system encoding '
 256                 'cannot encode all characters. '
 257                 'Set the LC_ALL environment variable to fix this.')
 258             self.params['restrictfilenames'] = True
 259
 260         if '%(stitle)s' in self.params.get('outtmpl', ''):
 261             self.report_warning('%(stitle)s is deprecated. Use the %(title)s and the --restrict-filenames flag(which also secures %(uploader)s et al) instead.')
 262
 263         self._setup_opener()
 264
 265         if auto_init:
 266             self.print_debug_header()
 267             self.add_default_info_extractors()
 268
 269         for pp_def_raw in self.params.get('postprocessors', []):
 270             pp_class = get_postprocessor(pp_def_raw['key'])
 271             pp_def = dict(pp_def_raw)
 272             del pp_def['key']
 273             pp = pp_class(self, **compat_kwargs(pp_def))
 274             self.add_post_processor(pp)
 275
 276     def warn_if_short_id(self, argv):
 277         # short YouTube ID starting with dash?
 278         idxs = [
 279             i for i, a in enumerate(argv)
 280             if re.match(r'^-[0-9A-Za-z_-]{10}$', a)]
 281         if idxs:
 282             correct_argv = (
 283                 ['youtube-dl'] +
 284                 [a for i, a in enumerate(argv) if i not in idxs] +
 285                 ['--'] + [argv[i] for i in idxs]
 286             )
 287             self.report_warning(
 288                 'Long argument string detected. '
 289                 'Use -- to separate parameters and URLs, like this:\n%s\n' %
 290                 args_to_str(correct_argv))
 291
 292     def add_info_extractor(self, ie):
 293         """Add an InfoExtractor object to the end of the list."""
 294         self._ies.append(ie)
 295         self._ies_instances[ie.ie_key()] = ie
 296         ie.set_downloader(self)
 297
 298     def get_info_extractor(self, ie_key):
 299         """
 300         Get an instance of an IE with name ie_key, it will try to get one from
 301         the _ies list, if there's no instance it will create a new one and add
 302         it to the extractor list.
 303         """
 304         ie = self._ies_instances.get(ie_key)
 305         if ie is None:
 306             ie = get_info_extractor(ie_key)()
 307             self.add_info_extractor(ie)
 308         return ie
 309
 310     def add_default_info_extractors(self):
 311         """
 312         Add the InfoExtractors returned by gen_extractors to the end of the list
 313         """
 314         for ie in gen_extractors():
 315             self.add_info_extractor(ie)
 316
 317     def add_post_processor(self, pp):
 318         """Add a PostProcessor object to the end of the chain."""
 319         self._pps.append(pp)
 320         pp.set_downloader(self)
 321
 322     def add_progress_hook(self, ph):
 323         """Add the progress hook (currently only for the file downloader)"""
 324         self._progress_hooks.append(ph)
 325
 326     def _bidi_workaround(self, message):
 327         if not hasattr(self, '_output_channel'):
 328             return message
 329
 330         assert hasattr(self, '_output_process')
 331         assert isinstance(message, compat_str)
 332         line_count = message.count('\n') + 1
 333         self._output_process.stdin.write((message + '\n').encode('utf-8'))
 334         self._output_process.stdin.flush()
 335         res = ''.join(self._output_channel.readline().decode('utf-8')
 336                       for _ in range(line_count))
 337         return res[:-len('\n')]
 338
 339     def to_screen(self, message, skip_eol=False):
 340         """Print message to stdout if not in quiet mode."""
 341         return self.to_stdout(message, skip_eol, check_quiet=True)
 342
 343     def _write_string(self, s, out=None):
 344         write_string(s, out=out, encoding=self.params.get('encoding'))
 345
 346     def to_stdout(self, message, skip_eol=False, check_quiet=False):
 347         """Print message to stdout if not in quiet mode."""
 348         if self.params.get('logger'):
 349             self.params['logger'].debug(message)
 350         elif not check_quiet or not self.params.get('quiet', False):
 351             message = self._bidi_workaround(message)
 352             terminator = ['\n', ''][skip_eol]
 353             output = message + terminator
 354
 355             self._write_string(output, self._screen_file)
 356
 357     def to_stderr(self, message):
 358         """Print message to stderr."""
 359         assert isinstance(message, compat_str)
 360         if self.params.get('logger'):
 361             self.params['logger'].error(message)
 362         else:
 363             message = self._bidi_workaround(message)
 364             output = message + '\n'
 365             self._write_string(output, self._err_file)
 366
 367     def to_console_title(self, message):
 368         if not self.params.get('consoletitle', False):
 369             return
 370         if os.name == 'nt' and ctypes.windll.kernel32.GetConsoleWindow():
 371             # c_wchar_p() might not be necessary if `message` is
 372             # already of type unicode()
 373             ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message))
 374         elif 'TERM' in os.environ:
 375             self._write_string('\033]0;%s\007' % message, self._screen_file)
 376
 377     def save_console_title(self):
 378         if not self.params.get('consoletitle', False):
 379             return
 380         if 'TERM' in os.environ:
 381             # Save the title on stack
 382             self._write_string('\033[22;0t', self._screen_file)
 383
 384     def restore_console_title(self):
 385         if not self.params.get('consoletitle', False):
 386             return
 387         if 'TERM' in os.environ:
 388             # Restore the title from stack
 389             self._write_string('\033[23;0t', self._screen_file)
 390
 391     def __enter__(self):
 392         self.save_console_title()
 393         return self
 394
 395     def __exit__(self, *args):
 396         self.restore_console_title()
 397
 398         if self.params.get('cookiefile') is not None:
 399             self.cookiejar.save()
 400
 401     def trouble(self, message=None, tb=None):
 402         """Determine action to take when a download problem appears.
 403
 404         Depending on if the downloader has been configured to ignore
 405         download errors or not, this method may throw an exception or
 406         not when errors are found, after printing the message.
 407
 408         tb, if given, is additional traceback information.
 409         """
 410         if message is not None:
 411             self.to_stderr(message)
 412         if self.params.get('verbose'):
 413             if tb is None:
 414                 if sys.exc_info()[0]:  # if .trouble has been called from an except block
 415                     tb = ''
 416                     if hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
 417                         tb += ''.join(traceback.format_exception(*sys.exc_info()[1].exc_info))
 418                     tb += compat_str(traceback.format_exc())
 419                 else:
 420                     tb_data = traceback.format_list(traceback.extract_stack())
 421                     tb = ''.join(tb_data)
 422             self.to_stderr(tb)
 423         if not self.params.get('ignoreerrors', False):
 424             if sys.exc_info()[0] and hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
 425                 exc_info = sys.exc_info()[1].exc_info
 426             else:
 427                 exc_info = sys.exc_info()
 428             raise DownloadError(message, exc_info)
 429         self._download_retcode = 1
 430
 431     def report_warning(self, message):
 432         '''
 433         Print the message to stderr, it will be prefixed with 'WARNING:'
 434         If stderr is a tty file the 'WARNING:' will be colored
 435         '''
 436         if self.params.get('logger') is not None:
 437             self.params['logger'].warning(message)
 438         else:
 439             if self.params.get('no_warnings'):
 440                 return
 441             if self._err_file.isatty() and os.name != 'nt':
 442                 _msg_header = '\033[0;33mWARNING:\033[0m'
 443             else:
 444                 _msg_header = 'WARNING:'
 445             warning_message = '%s %s' % (_msg_header, message)
 446             self.to_stderr(warning_message)
 447
 448     def report_error(self, message, tb=None):
 449         '''
 450         Do the same as trouble, but prefixes the message with 'ERROR:', colored
 451         in red if stderr is a tty file.
 452         '''
 453         if self._err_file.isatty() and os.name != 'nt':
 454             _msg_header = '\033[0;31mERROR:\033[0m'
 455         else:
 456             _msg_header = 'ERROR:'
 457         error_message = '%s %s' % (_msg_header, message)
 458         self.trouble(error_message, tb)
 459
 460     def report_file_already_downloaded(self, file_name):
 461         """Report file has already been fully downloaded."""
 462         try:
 463             self.to_screen('[download] %s has already been downloaded' % file_name)
 464         except UnicodeEncodeError:
 465             self.to_screen('[download] The file has already been downloaded')
 466
 467     def prepare_filename(self, info_dict):
 468         """Generate the output filename."""
 469         try:
 470             template_dict = dict(info_dict)
 471
 472             template_dict['epoch'] = int(time.time())
 473             autonumber_size = self.params.get('autonumber_size')
 474             if autonumber_size is None:
 475                 autonumber_size = 5
 476             autonumber_templ = '%0' + str(autonumber_size) + 'd'
 477             template_dict['autonumber'] = autonumber_templ % self._num_downloads
 478             if template_dict.get('playlist_index') is not None:
 479                 template_dict['playlist_index'] = '%0*d' % (len(str(template_dict['n_entries'])), template_dict['playlist_index'])
 480             if template_dict.get('resolution') is None:
 481                 if template_dict.get('width') and template_dict.get('height'):
 482                     template_dict['resolution'] = '%dx%d' % (template_dict['width'], template_dict['height'])
 483                 elif template_dict.get('height'):
 484                     template_dict['resolution'] = '%sp' % template_dict['height']
 485                 elif template_dict.get('width'):
 486                     template_dict['resolution'] = '?x%d' % template_dict['width']
 487
 488             sanitize = lambda k, v: sanitize_filename(
 489                 compat_str(v),
 490                 restricted=self.params.get('restrictfilenames'),
 491                 is_id=(k == 'id'))
 492             template_dict = dict((k, sanitize(k, v))
 493                                  for k, v in template_dict.items()
 494                                  if v is not None)
 495             template_dict = collections.defaultdict(lambda: 'NA', template_dict)
 496
 497             outtmpl = self.params.get('outtmpl', DEFAULT_OUTTMPL)
 498             tmpl = compat_expanduser(outtmpl)
 499             filename = tmpl % template_dict
 500             return filename
 501         except ValueError as err:
 502             self.report_error('Error in output template: ' + str(err) + ' (encoding: ' + repr(preferredencoding()) + ')')
 503             return None
 504
 505     def _match_entry(self, info_dict):
 506         """ Returns None iff the file should be downloaded """
 507
 508         video_title = info_dict.get('title', info_dict.get('id', 'video'))
 509         if 'title' in info_dict:
 510             # This can happen when we're just evaluating the playlist
 511             title = info_dict['title']
 512             matchtitle = self.params.get('matchtitle', False)
 513             if matchtitle:
 514                 if not re.search(matchtitle, title, re.IGNORECASE):
 515                     return '"' + title + '" title did not match pattern "' + matchtitle + '"'
 516             rejecttitle = self.params.get('rejecttitle', False)
 517             if rejecttitle:
 518                 if re.search(rejecttitle, title, re.IGNORECASE):
 519                     return '"' + title + '" title matched reject pattern "' + rejecttitle + '"'
 520         date = info_dict.get('upload_date', None)
 521         if date is not None:
 522             dateRange = self.params.get('daterange', DateRange())
 523             if date not in dateRange:
 524                 return '%s upload date is not in range %s' % (date_from_str(date).isoformat(), dateRange)
 525         view_count = info_dict.get('view_count', None)
 526         if view_count is not None:
 527             min_views = self.params.get('min_views')
 528             if min_views is not None and view_count < min_views:
 529                 return 'Skipping %s, because it has not reached minimum view count (%d/%d)' % (video_title, view_count, min_views)
 530             max_views = self.params.get('max_views')
 531             if max_views is not None and view_count > max_views:
 532                 return 'Skipping %s, because it has exceeded the maximum view count (%d/%d)' % (video_title, view_count, max_views)
 533         age_limit = self.params.get('age_limit')
 534         if age_limit is not None:
 535             actual_age_limit = info_dict.get('age_limit')
 536             if actual_age_limit is None:
 537                 actual_age_limit = 0
 538             if age_limit < actual_age_limit:
 539                 return 'Skipping "' + title + '" because it is age restricted'
 540         if self.in_download_archive(info_dict):
 541             return '%s has already been recorded in archive' % video_title
 542         return None
 543
 544     @staticmethod
 545     def add_extra_info(info_dict, extra_info):
 546         '''Set the keys from extra_info in info dict if they are missing'''
 547         for key, value in extra_info.items():
 548             info_dict.setdefault(key, value)
 549
 550     def extract_info(self, url, download=True, ie_key=None, extra_info={},
 551                      process=True):
 552         '''
 553         Returns a list with a dictionary for each video we find.
 554         If 'download', also downloads the videos.
 555         extra_info is a dict containing the extra values to add to each result
 556          '''
 557
 558         if ie_key:
 559             ies = [self.get_info_extractor(ie_key)]
 560         else:
 561             ies = self._ies
 562
 563         for ie in ies:
 564             if not ie.suitable(url):
 565                 continue
 566
 567             if not ie.working():
 568                 self.report_warning('The program functionality for this site has been marked as broken, '
 569                                     'and will probably not work.')
 570
 571             try:
 572                 ie_result = ie.extract(url)
 573                 if ie_result is None:  # Finished already (backwards compatibility; listformats and friends should be moved here)
 574                     break
 575                 if isinstance(ie_result, list):
 576                     # Backwards compatibility: old IE result format
 577                     ie_result = {
 578                         '_type': 'compat_list',
 579                         'entries': ie_result,
 580                     }
 581                 self.add_default_extra_info(ie_result, ie, url)
 582                 if process:
 583                     return self.process_ie_result(ie_result, download, extra_info)
 584                 else:
 585                     return ie_result
 586             except ExtractorError as de:  # An error we somewhat expected
 587                 self.report_error(compat_str(de), de.format_traceback())
 588                 break
 589             except MaxDownloadsReached:
 590                 raise
 591             except Exception as e:
 592                 if self.params.get('ignoreerrors', False):
 593                     self.report_error(compat_str(e), tb=compat_str(traceback.format_exc()))
 594                     break
 595                 else:
 596                     raise
 597         else:
 598             self.report_error('no suitable InfoExtractor for URL %s' % url)
 599
 600     def add_default_extra_info(self, ie_result, ie, url):
 601         self.add_extra_info(ie_result, {
 602             'extractor': ie.IE_NAME,
 603             'webpage_url': url,
 604             'webpage_url_basename': url_basename(url),
 605             'extractor_key': ie.ie_key(),
 606         })
 607
 608     def process_ie_result(self, ie_result, download=True, extra_info={}):
 609         """
 610         Take the result of the ie(may be modified) and resolve all unresolved
 611         references (URLs, playlist items).
 612
 613         It will also download the videos if 'download'.
 614         Returns the resolved ie_result.
 615         """
 616
 617         result_type = ie_result.get('_type', 'video')
 618
 619         if result_type in ('url', 'url_transparent'):
 620             extract_flat = self.params.get('extract_flat', False)
 621             if ((extract_flat == 'in_playlist' and 'playlist' in extra_info) or
 622                     extract_flat is True):
 623                 if self.params.get('forcejson', False):
 624                     self.to_stdout(json.dumps(ie_result))
 625                 return ie_result
 626
 627         if result_type == 'video':
 628             self.add_extra_info(ie_result, extra_info)
 629             return self.process_video_result(ie_result, download=download)
 630         elif result_type == 'url':
 631             # We have to add extra_info to the results because it may be
 632             # contained in a playlist
 633             return self.extract_info(ie_result['url'],
 634                                      download,
 635                                      ie_key=ie_result.get('ie_key'),
 636                                      extra_info=extra_info)
 637         elif result_type == 'url_transparent':
 638             # Use the information from the embedding page
 639             info = self.extract_info(
 640                 ie_result['url'], ie_key=ie_result.get('ie_key'),
 641                 extra_info=extra_info, download=False, process=False)
 642
 643             force_properties = dict(
 644                 (k, v) for k, v in ie_result.items() if v is not None)
 645             for f in ('_type', 'url'):
 646                 if f in force_properties:
 647                     del force_properties[f]
 648             new_result = info.copy()
 649             new_result.update(force_properties)
 650
 651             assert new_result.get('_type') != 'url_transparent'
 652
 653             return self.process_ie_result(
 654                 new_result, download=download, extra_info=extra_info)
 655         elif result_type == 'playlist' or result_type == 'multi_video':
 656             # We process each entry in the playlist
 657             playlist = ie_result.get('title', None) or ie_result.get('id', None)
 658             self.to_screen('[download] Downloading playlist: %s' % playlist)
 659
 660             playlist_results = []
 661
 662             playliststart = self.params.get('playliststart', 1) - 1
 663             playlistend = self.params.get('playlistend', None)
 664             # For backwards compatibility, interpret -1 as whole list
 665             if playlistend == -1:
 666                 playlistend = None
 667
 668             ie_entries = ie_result['entries']
 669             if isinstance(ie_entries, list):
 670                 n_all_entries = len(ie_entries)
 671                 entries = ie_entries[playliststart:playlistend]
 672                 n_entries = len(entries)
 673                 self.to_screen(
 674                     "[%s] playlist %s: Collected %d video ids (downloading %d of them)" %
 675                     (ie_result['extractor'], playlist, n_all_entries, n_entries))
 676             elif isinstance(ie_entries, PagedList):
 677                 entries = ie_entries.getslice(
 678                     playliststart, playlistend)
 679                 n_entries = len(entries)
 680                 self.to_screen(
 681                     "[%s] playlist %s: Downloading %d videos" %
 682                     (ie_result['extractor'], playlist, n_entries))
 683             else:  # iterable
 684                 entries = list(itertools.islice(
 685                     ie_entries, playliststart, playlistend))
 686                 n_entries = len(entries)
 687                 self.to_screen(
 688                     "[%s] playlist %s: Downloading %d videos" %
 689                     (ie_result['extractor'], playlist, n_entries))
 690
 691             if self.params.get('playlistreverse', False):
 692                 entries = entries[::-1]
 693
 694             for i, entry in enumerate(entries, 1):
 695                 self.to_screen('[download] Downloading video #%s of %s' % (i, n_entries))
 696                 extra = {
 697                     'n_entries': n_entries,
 698                     'playlist': playlist,
 699                     'playlist_id': ie_result.get('id'),
 700                     'playlist_title': ie_result.get('title'),
 701                     'playlist_index': i + playliststart,
 702                     'extractor': ie_result['extractor'],
 703                     'webpage_url': ie_result['webpage_url'],
 704                     'webpage_url_basename': url_basename(ie_result['webpage_url']),
 705                     'extractor_key': ie_result['extractor_key'],
 706                 }
 707
 708                 reason = self._match_entry(entry)
 709                 if reason is not None:
 710                     self.to_screen('[download] ' + reason)
 711                     continue
 712
 713                 entry_result = self.process_ie_result(entry,
 714                                                       download=download,
 715                                                       extra_info=extra)
 716                 playlist_results.append(entry_result)
 717             ie_result['entries'] = playlist_results
 718             return ie_result
 719         elif result_type == 'compat_list':
 720             self.report_warning(
 721                 'Extractor %s returned a compat_list result. '
 722                 'It needs to be updated.' % ie_result.get('extractor'))
 723
 724             def _fixup(r):
 725                 self.add_extra_info(
 726                     r,
 727                     {
 728                         'extractor': ie_result['extractor'],
 729                         'webpage_url': ie_result['webpage_url'],
 730                         'webpage_url_basename': url_basename(ie_result['webpage_url']),
 731                         'extractor_key': ie_result['extractor_key'],
 732                     }
 733                 )
 734                 return r
 735             ie_result['entries'] = [
 736                 self.process_ie_result(_fixup(r), download, extra_info)
 737                 for r in ie_result['entries']
 738             ]
 739             return ie_result
 740         else:
 741             raise Exception('Invalid result type: %s' % result_type)
 742
 743     def select_format(self, format_spec, available_formats):
 744         if format_spec == 'best' or format_spec is None:
 745             return available_formats[-1]
 746         elif format_spec == 'worst':
 747             return available_formats[0]
 748         elif format_spec == 'bestaudio':
 749             audio_formats = [
 750                 f for f in available_formats
 751                 if f.get('vcodec') == 'none']
 752             if audio_formats:
 753                 return audio_formats[-1]
 754         elif format_spec == 'worstaudio':
 755             audio_formats = [
 756                 f for f in available_formats
 757                 if f.get('vcodec') == 'none']
 758             if audio_formats:
 759                 return audio_formats[0]
 760         elif format_spec == 'bestvideo':
 761             video_formats = [
 762                 f for f in available_formats
 763                 if f.get('acodec') == 'none']
 764             if video_formats:
 765                 return video_formats[-1]
 766         elif format_spec == 'worstvideo':
 767             video_formats = [
 768                 f for f in available_formats
 769                 if f.get('acodec') == 'none']
 770             if video_formats:
 771                 return video_formats[0]
 772         else:
 773             extensions = ['mp4', 'flv', 'webm', '3gp', 'm4a']
 774             if format_spec in extensions:
 775                 filter_f = lambda f: f['ext'] == format_spec
 776             else:
 777                 filter_f = lambda f: f['format_id'] == format_spec
 778             matches = list(filter(filter_f, available_formats))
 779             if matches:
 780                 return matches[-1]
 781         return None
 782
 783     def process_video_result(self, info_dict, download=True):
 784         assert info_dict.get('_type', 'video') == 'video'
 785
 786         if 'id' not in info_dict:
 787             raise ExtractorError('Missing "id" field in extractor result')
 788         if 'title' not in info_dict:
 789             raise ExtractorError('Missing "title" field in extractor result')
 790
 791         if 'playlist' not in info_dict:
 792             # It isn't part of a playlist
 793             info_dict['playlist'] = None
 794             info_dict['playlist_index'] = None
 795
 796         thumbnails = info_dict.get('thumbnails')
 797         if thumbnails:
 798             thumbnails.sort(key=lambda t: (
 799                 t.get('width'), t.get('height'), t.get('url')))
 800             for t in thumbnails:
 801                 if 'width' in t and 'height' in t:
 802                     t['resolution'] = '%dx%d' % (t['width'], t['height'])
 803
 804         if thumbnails and 'thumbnail' not in info_dict:
 805             info_dict['thumbnail'] = thumbnails[-1]['url']
 806
 807         if 'display_id' not in info_dict and 'id' in info_dict:
 808             info_dict['display_id'] = info_dict['id']
 809
 810         if info_dict.get('upload_date') is None and info_dict.get('timestamp') is not None:
 811             # Working around negative timestamps in Windows
 812             # (see http://bugs.python.org/issue1646728)
 813             if info_dict['timestamp'] < 0 and os.name == 'nt':
 814                 info_dict['timestamp'] = 0
 815             upload_date = datetime.datetime.utcfromtimestamp(
 816                 info_dict['timestamp'])
 817             info_dict['upload_date'] = upload_date.strftime('%Y%m%d')
 818
 819         # This extractors handle format selection themselves
 820         if info_dict['extractor'] in ['Youku']:
 821             if download:
 822                 self.process_info(info_dict)
 823             return info_dict
 824
 825         # We now pick which formats have to be downloaded
 826         if info_dict.get('formats') is None:
 827             # There's only one format available
 828             formats = [info_dict]
 829         else:
 830             formats = info_dict['formats']
 831
 832         if not formats:
 833             raise ExtractorError('No video formats found!')
 834
 835         # We check that all the formats have the format and format_id fields
 836         for i, format in enumerate(formats):
 837             if 'url' not in format:
 838                 raise ExtractorError('Missing "url" key in result (index %d)' % i)
 839
 840             if format.get('format_id') is None:
 841                 format['format_id'] = compat_str(i)
 842             if format.get('format') is None:
 843                 format['format'] = '{id} - {res}{note}'.format(
 844                     id=format['format_id'],
 845                     res=self.format_resolution(format),
 846                     note=' ({0})'.format(format['format_note']) if format.get('format_note') is not None else '',
 847                 )
 848             # Automatically determine file extension if missing
 849             if 'ext' not in format:
 850                 format['ext'] = determine_ext(format['url']).lower()
 851
 852         format_limit = self.params.get('format_limit', None)
 853         if format_limit:
 854             formats = list(takewhile_inclusive(
 855                 lambda f: f['format_id'] != format_limit, formats
 856             ))
 857
 858         # TODO Central sorting goes here
 859
 860         if formats[0] is not info_dict:
 861             # only set the 'formats' fields if the original info_dict list them
 862             # otherwise we end up with a circular reference, the first (and unique)
 863             # element in the 'formats' field in info_dict is info_dict itself,
 864             # wich can't be exported to json
 865             info_dict['formats'] = formats
 866         if self.params.get('listformats', None):
 867             self.list_formats(info_dict)
 868             return
 869
 870         req_format = self.params.get('format')
 871         if req_format is None:
 872             req_format = 'best'
 873         formats_to_download = []
 874         # The -1 is for supporting YoutubeIE
 875         if req_format in ('-1', 'all'):
 876             formats_to_download = formats
 877         else:
 878             for rfstr in req_format.split(','):
 879                 # We can accept formats requested in the format: 34/5/best, we pick
 880                 # the first that is available, starting from left
 881                 req_formats = rfstr.split('/')
 882                 for rf in req_formats:
 883                     if re.match(r'.+?\+.+?', rf) is not None:
 884                         # Two formats have been requested like '137+139'
 885                         format_1, format_2 = rf.split('+')
 886                         formats_info = (self.select_format(format_1, formats),
 887                                         self.select_format(format_2, formats))
 888                         if all(formats_info):
 889                             # The first format must contain the video and the
 890                             # second the audio
 891                             if formats_info[0].get('vcodec') == 'none':
 892                                 self.report_error('The first format must '
 893                                                   'contain the video, try using '
 894                                                   '"-f %s+%s"' % (format_2, format_1))
 895                                 return
 896                             selected_format = {
 897                                 'requested_formats': formats_info,
 898                                 'format': rf,
 899                                 'ext': formats_info[0]['ext'],
 900                             }
 901                         else:
 902                             selected_format = None
 903                     else:
 904                         selected_format = self.select_format(rf, formats)
 905                     if selected_format is not None:
 906                         formats_to_download.append(selected_format)
 907                         break
 908         if not formats_to_download:
 909             raise ExtractorError('requested format not available',
 910                                  expected=True)
 911
 912         if download:
 913             if len(formats_to_download) > 1:
 914                 self.to_screen('[info] %s: downloading video in %s formats' % (info_dict['id'], len(formats_to_download)))
 915             for format in formats_to_download:
 916                 new_info = dict(info_dict)
 917                 new_info.update(format)
 918                 self.process_info(new_info)
 919         # We update the info dict with the best quality format (backwards compatibility)
 920         info_dict.update(formats_to_download[-1])
 921         return info_dict
 922
 923     def process_info(self, info_dict):
 924         """Process a single resolved IE result."""
 925
 926         assert info_dict.get('_type', 'video') == 'video'
 927
 928         max_downloads = self.params.get('max_downloads')
 929         if max_downloads is not None:
 930             if self._num_downloads >= int(max_downloads):
 931                 raise MaxDownloadsReached()
 932
 933         info_dict['fulltitle'] = info_dict['title']
 934         if len(info_dict['title']) > 200:
 935             info_dict['title'] = info_dict['title'][:197] + '...'
 936
 937         # Keep for backwards compatibility
 938         info_dict['stitle'] = info_dict['title']
 939
 940         if 'format' not in info_dict:
 941             info_dict['format'] = info_dict['ext']
 942
 943         reason = self._match_entry(info_dict)
 944         if reason is not None:
 945             self.to_screen('[download] ' + reason)
 946             return
 947
 948         self._num_downloads += 1
 949
 950         filename = self.prepare_filename(info_dict)
 951
 952         # Forced printings
 953         if self.params.get('forcetitle', False):
 954             self.to_stdout(info_dict['fulltitle'])
 955         if self.params.get('forceid', False):
 956             self.to_stdout(info_dict['id'])
 957         if self.params.get('forceurl', False):
 958             if info_dict.get('requested_formats') is not None:
 959                 for f in info_dict['requested_formats']:
 960                     self.to_stdout(f['url'] + f.get('play_path', ''))
 961             else:
 962                 # For RTMP URLs, also include the playpath
 963                 self.to_stdout(info_dict['url'] + info_dict.get('play_path', ''))
 964         if self.params.get('forcethumbnail', False) and info_dict.get('thumbnail') is not None:
 965             self.to_stdout(info_dict['thumbnail'])
 966         if self.params.get('forcedescription', False) and info_dict.get('description') is not None:
 967             self.to_stdout(info_dict['description'])
 968         if self.params.get('forcefilename', False) and filename is not None:
 969             self.to_stdout(filename)
 970         if self.params.get('forceduration', False) and info_dict.get('duration') is not None:
 971             self.to_stdout(formatSeconds(info_dict['duration']))
 972         if self.params.get('forceformat', False):
 973             self.to_stdout(info_dict['format'])
 974         if self.params.get('forcejson', False):
 975             info_dict['_filename'] = filename
 976             self.to_stdout(json.dumps(info_dict))
 977         if self.params.get('dump_single_json', False):
 978             info_dict['_filename'] = filename
 979
 980         # Do nothing else if in simulate mode
 981         if self.params.get('simulate', False):
 982             return
 983
 984         if filename is None:
 985             return
 986
 987         try:
 988             dn = os.path.dirname(encodeFilename(filename))
 989             if dn and not os.path.exists(dn):
 990                 os.makedirs(dn)
 991         except (OSError, IOError) as err:
 992             self.report_error('unable to create directory ' + compat_str(err))
 993             return
 994
 995         if self.params.get('writedescription', False):
 996             descfn = filename + '.description'
 997             if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(descfn)):
 998                 self.to_screen('[info] Video description is already present')
 999             else:
1000                 try:
1001                     self.to_screen('[info] Writing video description to: ' + descfn)
1002                     with io.open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile:
1003                         descfile.write(info_dict['description'])
1004                 except (KeyError, TypeError):
1005                     self.report_warning('There\'s no description to write.')
1006                 except (OSError, IOError):
1007                     self.report_error('Cannot write description file ' + descfn)
1008                     return
1009
1010         if self.params.get('writeannotations', False):
1011             annofn = filename + '.annotations.xml'
1012             if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(annofn)):
1013                 self.to_screen('[info] Video annotations are already present')
1014             else:
1015                 try:
1016                     self.to_screen('[info] Writing video annotations to: ' + annofn)
1017                     with io.open(encodeFilename(annofn), 'w', encoding='utf-8') as annofile:
1018                         annofile.write(info_dict['annotations'])
1019                 except (KeyError, TypeError):
1020                     self.report_warning('There are no annotations to write.')
1021                 except (OSError, IOError):
1022                     self.report_error('Cannot write annotations file: ' + annofn)
1023                     return
1024
1025         subtitles_are_requested = any([self.params.get('writesubtitles', False),
1026                                        self.params.get('writeautomaticsub')])
1027
1028         if subtitles_are_requested and 'subtitles' in info_dict and info_dict['subtitles']:
1029             # subtitles download errors are already managed as troubles in relevant IE
1030             # that way it will silently go on when used with unsupporting IE
1031             subtitles = info_dict['subtitles']
1032             sub_format = self.params.get('subtitlesformat', 'srt')
1033             for sub_lang in subtitles.keys():
1034                 sub = subtitles[sub_lang]
1035                 if sub is None:
1036                     continue
1037                 try:
1038                     sub_filename = subtitles_filename(filename, sub_lang, sub_format)
1039                     if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(sub_filename)):
1040                         self.to_screen('[info] Video subtitle %s.%s is already_present' % (sub_lang, sub_format))
1041                     else:
1042                         self.to_screen('[info] Writing video subtitles to: ' + sub_filename)
1043                         with io.open(encodeFilename(sub_filename), 'w', encoding='utf-8') as subfile:
1044                             subfile.write(sub)
1045                 except (OSError, IOError):
1046                     self.report_error('Cannot write subtitles file ' + sub_filename)
1047                     return
1048
1049         if self.params.get('writeinfojson', False):
1050             infofn = os.path.splitext(filename)[0] + '.info.json'
1051             if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(infofn)):
1052                 self.to_screen('[info] Video description metadata is already present')
1053             else:
1054                 self.to_screen('[info] Writing video description metadata as JSON to: ' + infofn)
1055                 try:
1056                     write_json_file(info_dict, infofn)
1057                 except (OSError, IOError):
1058                     self.report_error('Cannot write metadata to JSON file ' + infofn)
1059                     return
1060
1061         if self.params.get('writethumbnail', False):
1062             if info_dict.get('thumbnail') is not None:
1063                 thumb_format = determine_ext(info_dict['thumbnail'], 'jpg')
1064                 thumb_filename = os.path.splitext(filename)[0] + '.' + thumb_format
1065                 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(thumb_filename)):
1066                     self.to_screen('[%s] %s: Thumbnail is already present' %
1067                                    (info_dict['extractor'], info_dict['id']))
1068                 else:
1069                     self.to_screen('[%s] %s: Downloading thumbnail ...' %
1070                                    (info_dict['extractor'], info_dict['id']))
1071                     try:
1072                         uf = self.urlopen(info_dict['thumbnail'])
1073                         with open(thumb_filename, 'wb') as thumbf:
1074                             shutil.copyfileobj(uf, thumbf)
1075                         self.to_screen('[%s] %s: Writing thumbnail to: %s' %
1076                                        (info_dict['extractor'], info_dict['id'], thumb_filename))
1077                     except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
1078                         self.report_warning('Unable to download thumbnail "%s": %s' %
1079                                             (info_dict['thumbnail'], compat_str(err)))
1080
1081         if not self.params.get('skip_download', False):
1082             if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(filename)):
1083                 success = True
1084             else:
1085                 try:
1086                     def dl(name, info):
1087                         fd = get_suitable_downloader(info)(self, self.params)
1088                         for ph in self._progress_hooks:
1089                             fd.add_progress_hook(ph)
1090                         if self.params.get('verbose'):
1091                             self.to_stdout('[debug] Invoking downloader on %r' % info.get('url'))
1092                         return fd.download(name, info)
1093                     if info_dict.get('requested_formats') is not None:
1094                         downloaded = []
1095                         success = True
1096                         merger = FFmpegMergerPP(self, not self.params.get('keepvideo'))
1097                         if not merger._executable:
1098                             postprocessors = []
1099                             self.report_warning('You have requested multiple '
1100                                                 'formats but ffmpeg or avconv are not installed.'
1101                                                 ' The formats won\'t be merged')
1102                         else:
1103                             postprocessors = [merger]
1104                         for f in info_dict['requested_formats']:
1105                             new_info = dict(info_dict)
1106                             new_info.update(f)
1107                             fname = self.prepare_filename(new_info)
1108                             fname = prepend_extension(fname, 'f%s' % f['format_id'])
1109                             downloaded.append(fname)
1110                             partial_success = dl(fname, new_info)
1111                             success = success and partial_success
1112                         info_dict['__postprocessors'] = postprocessors
1113                         info_dict['__files_to_merge'] = downloaded
1114                     else:
1115                         # Just a single file
1116                         success = dl(filename, info_dict)
1117                 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
1118                     self.report_error('unable to download video data: %s' % str(err))
1119                     return
1120                 except (OSError, IOError) as err:
1121                     raise UnavailableVideoError(err)
1122                 except (ContentTooShortError, ) as err:
1123                     self.report_error('content too short (expected %s bytes and served %s)' % (err.expected, err.downloaded))
1124                     return
1125
1126             if success:
1127                 try:
1128                     self.post_process(filename, info_dict)
1129                 except (PostProcessingError) as err:
1130                     self.report_error('postprocessing: %s' % str(err))
1131                     return
1132
1133         self.record_download_archive(info_dict)
1134
1135     def download(self, url_list):
1136         """Download a given list of URLs."""
1137         outtmpl = self.params.get('outtmpl', DEFAULT_OUTTMPL)
1138         if (len(url_list) > 1 and
1139                 '%' not in outtmpl
1140                 and self.params.get('max_downloads') != 1):
1141             raise SameFileError(outtmpl)
1142
1143         for url in url_list:
1144             try:
1145                 # It also downloads the videos
1146                 res = self.extract_info(url)
1147             except UnavailableVideoError:
1148                 self.report_error('unable to download video')
1149             except MaxDownloadsReached:
1150                 self.to_screen('[info] Maximum number of downloaded files reached.')
1151                 raise
1152             else:
1153                 if self.params.get('dump_single_json', False):
1154                     self.to_stdout(json.dumps(res))
1155
1156         return self._download_retcode
1157
1158     def download_with_info_file(self, info_filename):
1159         with io.open(info_filename, 'r', encoding='utf-8') as f:
1160             info = json.load(f)
1161         try:
1162             self.process_ie_result(info, download=True)
1163         except DownloadError:
1164             webpage_url = info.get('webpage_url')
1165             if webpage_url is not None:
1166                 self.report_warning('The info failed to download, trying with "%s"' % webpage_url)
1167                 return self.download([webpage_url])
1168             else:
1169                 raise
1170         return self._download_retcode
1171
1172     def post_process(self, filename, ie_info):
1173         """Run all the postprocessors on the given file."""
1174         info = dict(ie_info)
1175         info['filepath'] = filename
1176         keep_video = None
1177         pps_chain = []
1178         if ie_info.get('__postprocessors') is not None:
1179             pps_chain.extend(ie_info['__postprocessors'])
1180         pps_chain.extend(self._pps)
1181         for pp in pps_chain:
1182             try:
1183                 keep_video_wish, new_info = pp.run(info)
1184                 if keep_video_wish is not None:
1185                     if keep_video_wish:
1186                         keep_video = keep_video_wish
1187                     elif keep_video is None:
1188                         # No clear decision yet, let IE decide
1189                         keep_video = keep_video_wish
1190             except PostProcessingError as e:
1191                 self.report_error(e.msg)
1192         if keep_video is False and not self.params.get('keepvideo', False):
1193             try:
1194                 self.to_screen('Deleting original file %s (pass -k to keep)' % filename)
1195                 os.remove(encodeFilename(filename))
1196             except (IOError, OSError):
1197                 self.report_warning('Unable to remove downloaded video file')
1198
1199     def _make_archive_id(self, info_dict):
1200         # Future-proof against any change in case
1201         # and backwards compatibility with prior versions
1202         extractor = info_dict.get('extractor_key')
1203         if extractor is None:
1204             if 'id' in info_dict:
1205                 extractor = info_dict.get('ie_key')  # key in a playlist
1206         if extractor is None:
1207             return None  # Incomplete video information
1208         return extractor.lower() + ' ' + info_dict['id']
1209
1210     def in_download_archive(self, info_dict):
1211         fn = self.params.get('download_archive')
1212         if fn is None:
1213             return False
1214
1215         vid_id = self._make_archive_id(info_dict)
1216         if vid_id is None:
1217             return False  # Incomplete video information
1218
1219         try:
1220             with locked_file(fn, 'r', encoding='utf-8') as archive_file:
1221                 for line in archive_file:
1222                     if line.strip() == vid_id:
1223                         return True
1224         except IOError as ioe:
1225             if ioe.errno != errno.ENOENT:
1226                 raise
1227         return False
1228
1229     def record_download_archive(self, info_dict):
1230         fn = self.params.get('download_archive')
1231         if fn is None:
1232             return
1233         vid_id = self._make_archive_id(info_dict)
1234         assert vid_id
1235         with locked_file(fn, 'a', encoding='utf-8') as archive_file:
1236             archive_file.write(vid_id + '\n')
1237
1238     @staticmethod
1239     def format_resolution(format, default='unknown'):
1240         if format.get('vcodec') == 'none':
1241             return 'audio only'
1242         if format.get('resolution') is not None:
1243             return format['resolution']
1244         if format.get('height') is not None:
1245             if format.get('width') is not None:
1246                 res = '%sx%s' % (format['width'], format['height'])
1247             else:
1248                 res = '%sp' % format['height']
1249         elif format.get('width') is not None:
1250             res = '?x%d' % format['width']
1251         else:
1252             res = default
1253         return res
1254
1255     def _format_note(self, fdict):
1256         res = ''
1257         if fdict.get('ext') in ['f4f', 'f4m']:
1258             res += '(unsupported) '
1259         if fdict.get('format_note') is not None:
1260             res += fdict['format_note'] + ' '
1261         if fdict.get('tbr') is not None:
1262             res += '%4dk ' % fdict['tbr']
1263         if fdict.get('container') is not None:
1264             if res:
1265                 res += ', '
1266             res += '%s container' % fdict['container']
1267         if (fdict.get('vcodec') is not None and
1268                 fdict.get('vcodec') != 'none'):
1269             if res:
1270                 res += ', '
1271             res += fdict['vcodec']
1272             if fdict.get('vbr') is not None:
1273                 res += '@'
1274         elif fdict.get('vbr') is not None and fdict.get('abr') is not None:
1275             res += 'video@'
1276         if fdict.get('vbr') is not None:
1277             res += '%4dk' % fdict['vbr']
1278         if fdict.get('fps') is not None:
1279             res += ', %sfps' % fdict['fps']
1280         if fdict.get('acodec') is not None:
1281             if res:
1282                 res += ', '
1283             if fdict['acodec'] == 'none':
1284                 res += 'video only'
1285             else:
1286                 res += '%-5s' % fdict['acodec']
1287         elif fdict.get('abr') is not None:
1288             if res:
1289                 res += ', '
1290             res += 'audio'
1291         if fdict.get('abr') is not None:
1292             res += '@%3dk' % fdict['abr']
1293         if fdict.get('asr') is not None:
1294             res += ' (%5dHz)' % fdict['asr']
1295         if fdict.get('filesize') is not None:
1296             if res:
1297                 res += ', '
1298             res += format_bytes(fdict['filesize'])
1299         elif fdict.get('filesize_approx') is not None:
1300             if res:
1301                 res += ', '
1302             res += '~' + format_bytes(fdict['filesize_approx'])
1303         return res
1304
1305     def list_formats(self, info_dict):
1306         def line(format, idlen=20):
1307             return (('%-' + compat_str(idlen + 1) + 's%-10s%-12s%s') % (
1308                 format['format_id'],
1309                 format['ext'],
1310                 self.format_resolution(format),
1311                 self._format_note(format),
1312             ))
1313
1314         formats = info_dict.get('formats', [info_dict])
1315         idlen = max(len('format code'),
1316                     max(len(f['format_id']) for f in formats))
1317         formats_s = [line(f, idlen) for f in formats]
1318         if len(formats) > 1:
1319             formats_s[0] += (' ' if self._format_note(formats[0]) else '') + '(worst)'
1320             formats_s[-1] += (' ' if self._format_note(formats[-1]) else '') + '(best)'
1321
1322         header_line = line({
1323             'format_id': 'format code', 'ext': 'extension',
1324             'resolution': 'resolution', 'format_note': 'note'}, idlen=idlen)
1325         self.to_screen('[info] Available formats for %s:\n%s\n%s' %
1326                        (info_dict['id'], header_line, '\n'.join(formats_s)))
1327
1328     def urlopen(self, req):
1329         """ Start an HTTP download """
1330
1331         # According to RFC 3986, URLs can not contain non-ASCII characters, however this is not
1332         # always respected by websites, some tend to give out URLs with non percent-encoded
1333         # non-ASCII characters (see telemb.py, ard.py [#3412])
1334         # urllib chokes on URLs with non-ASCII characters (see http://bugs.python.org/issue3991)
1335         # To work around aforementioned issue we will replace request's original URL with
1336         # percent-encoded one
1337         req_is_string = isinstance(req, basestring if sys.version_info < (3, 0) else compat_str)
1338         url = req if req_is_string else req.get_full_url()
1339         url_escaped = escape_url(url)
1340
1341         # Substitute URL if any change after escaping
1342         if url != url_escaped:
1343             if req_is_string:
1344                 req = url_escaped
1345             else:
1346                 req = compat_urllib_request.Request(
1347                     url_escaped, data=req.data, headers=req.headers,
1348                     origin_req_host=req.origin_req_host, unverifiable=req.unverifiable)
1349
1350         return self._opener.open(req, timeout=self._socket_timeout)
1351
1352     def print_debug_header(self):
1353         if not self.params.get('verbose'):
1354             return
1355
1356         if type('') is not compat_str:
1357             # Python 2.6 on SLES11 SP1 (https://github.com/rg3/youtube-dl/issues/3326)
1358             self.report_warning(
1359                 'Your Python is broken! Update to a newer and supported version')
1360
1361         stdout_encoding = getattr(
1362             sys.stdout, 'encoding', 'missing (%s)' % type(sys.stdout).__name__)
1363         encoding_str = (
1364             '[debug] Encodings: locale %s, fs %s, out %s, pref %s\n' % (
1365                 locale.getpreferredencoding(),
1366                 sys.getfilesystemencoding(),
1367                 stdout_encoding,
1368                 self.get_encoding()))
1369         write_string(encoding_str, encoding=None)
1370
1371         self._write_string('[debug] youtube-dl version ' + __version__ + '\n')
1372         try:
1373             sp = subprocess.Popen(
1374                 ['git', 'rev-parse', '--short', 'HEAD'],
1375                 stdout=subprocess.PIPE, stderr=subprocess.PIPE,
1376                 cwd=os.path.dirname(os.path.abspath(__file__)))
1377             out, err = sp.communicate()
1378             out = out.decode().strip()
1379             if re.match('[0-9a-f]+', out):
1380                 self._write_string('[debug] Git HEAD: ' + out + '\n')
1381         except:
1382             try:
1383                 sys.exc_clear()
1384             except:
1385                 pass
1386         self._write_string('[debug] Python version %s - %s\n' % (
1387             platform.python_version(), platform_name()))
1388
1389         exe_versions = FFmpegPostProcessor.get_versions()
1390         exe_versions['rtmpdump'] = rtmpdump_version()
1391         exe_str = ', '.join(
1392             '%s %s' % (exe, v)
1393             for exe, v in sorted(exe_versions.items())
1394             if v
1395         )
1396         if not exe_str:
1397             exe_str = 'none'
1398         self._write_string('[debug] exe versions: %s\n' % exe_str)
1399
1400         proxy_map = {}
1401         for handler in self._opener.handlers:
1402             if hasattr(handler, 'proxies'):
1403                 proxy_map.update(handler.proxies)
1404         self._write_string('[debug] Proxy map: ' + compat_str(proxy_map) + '\n')
1405
1406     def _setup_opener(self):
1407         timeout_val = self.params.get('socket_timeout')
1408         self._socket_timeout = 600 if timeout_val is None else float(timeout_val)
1409
1410         opts_cookiefile = self.params.get('cookiefile')
1411         opts_proxy = self.params.get('proxy')
1412
1413         if opts_cookiefile is None:
1414             self.cookiejar = compat_cookiejar.CookieJar()
1415         else:
1416             self.cookiejar = compat_cookiejar.MozillaCookieJar(
1417                 opts_cookiefile)
1418             if os.access(opts_cookiefile, os.R_OK):
1419                 self.cookiejar.load()
1420
1421         cookie_processor = compat_urllib_request.HTTPCookieProcessor(
1422             self.cookiejar)
1423         if opts_proxy is not None:
1424             if opts_proxy == '':
1425                 proxies = {}
1426             else:
1427                 proxies = {'http': opts_proxy, 'https': opts_proxy}
1428         else:
1429             proxies = compat_urllib_request.getproxies()
1430             # Set HTTPS proxy to HTTP one if given (https://github.com/rg3/youtube-dl/issues/805)
1431             if 'http' in proxies and 'https' not in proxies:
1432                 proxies['https'] = proxies['http']
1433         proxy_handler = compat_urllib_request.ProxyHandler(proxies)
1434
1435         debuglevel = 1 if self.params.get('debug_printtraffic') else 0
1436         https_handler = make_HTTPS_handler(
1437             self.params.get('nocheckcertificate', False), debuglevel=debuglevel)
1438         ydlh = YoutubeDLHandler(debuglevel=debuglevel)
1439         opener = compat_urllib_request.build_opener(
1440             https_handler, proxy_handler, cookie_processor, ydlh)
1441         # Delete the default user-agent header, which would otherwise apply in
1442         # cases where our custom HTTP handler doesn't come into play
1443         # (See https://github.com/rg3/youtube-dl/issues/1309 for details)
1444         opener.addheaders = []
1445         self._opener = opener
1446
1447     def encode(self, s):
1448         if isinstance(s, bytes):
1449             return s  # Already encoded
1450
1451         try:
1452             return s.encode(self.get_encoding())
1453         except UnicodeEncodeError as err:
1454             err.reason = err.reason + '. Check your system encoding configuration or use the --encoding option.'
1455             raise
1456
1457     def get_encoding(self):
1458         encoding = self.params.get('encoding')
1459         if encoding is None:
1460             encoding = preferredencoding()
1461         return encoding