_ Git - youtube-dl/blob - youtube_dl/YoutubeDL.py

   1 #!/usr/bin/env python
   2 # -*- coding: utf-8 -*-
   3
   4 from __future__ import absolute_import, unicode_literals
   5
   6 import collections
   7 import datetime
   8 import errno
   9 import io
  10 import itertools
  11 import json
  12 import locale
  13 import operator
  14 import os
  15 import platform
  16 import re
  17 import shutil
  18 import subprocess
  19 import socket
  20 import sys
  21 import time
  22 import traceback
  23
  24 if os.name == 'nt':
  25     import ctypes
  26
  27 from .compat import (
  28     compat_cookiejar,
  29     compat_expanduser,
  30     compat_http_client,
  31     compat_kwargs,
  32     compat_str,
  33     compat_urllib_error,
  34     compat_urllib_request,
  35 )
  36 from .utils import (
  37     escape_url,
  38     ContentTooShortError,
  39     date_from_str,
  40     DateRange,
  41     DEFAULT_OUTTMPL,
  42     determine_ext,
  43     DownloadError,
  44     encodeFilename,
  45     ExtractorError,
  46     format_bytes,
  47     formatSeconds,
  48     get_term_width,
  49     locked_file,
  50     make_HTTPS_handler,
  51     MaxDownloadsReached,
  52     PagedList,
  53     parse_filesize,
  54     PostProcessingError,
  55     platform_name,
  56     preferredencoding,
  57     render_table,
  58     SameFileError,
  59     sanitize_filename,
  60     std_headers,
  61     subtitles_filename,
  62     takewhile_inclusive,
  63     UnavailableVideoError,
  64     url_basename,
  65     version_tuple,
  66     write_json_file,
  67     write_string,
  68     YoutubeDLHandler,
  69     prepend_extension,
  70     args_to_str,
  71     age_restricted,
  72 )
  73 from .cache import Cache
  74 from .extractor import get_info_extractor, gen_extractors
  75 from .downloader import get_suitable_downloader
  76 from .downloader.rtmp import rtmpdump_version
  77 from .postprocessor import (
  78     FFmpegFixupM4aPP,
  79     FFmpegFixupStretchedPP,
  80     FFmpegMergerPP,
  81     FFmpegPostProcessor,
  82     get_postprocessor,
  83 )
  84 from .version import __version__
  85
  86
  87 class YoutubeDL(object):
  88     """YoutubeDL class.
  89
  90     YoutubeDL objects are the ones responsible of downloading the
  91     actual video file and writing it to disk if the user has requested
  92     it, among some other tasks. In most cases there should be one per
  93     program. As, given a video URL, the downloader doesn't know how to
  94     extract all the needed information, task that InfoExtractors do, it
  95     has to pass the URL to one of them.
  96
  97     For this, YoutubeDL objects have a method that allows
  98     InfoExtractors to be registered in a given order. When it is passed
  99     a URL, the YoutubeDL object handles it to the first InfoExtractor it
 100     finds that reports being able to handle it. The InfoExtractor extracts
 101     all the information about the video or videos the URL refers to, and
 102     YoutubeDL process the extracted information, possibly using a File
 103     Downloader to download the video.
 104
 105     YoutubeDL objects accept a lot of parameters. In order not to saturate
 106     the object constructor with arguments, it receives a dictionary of
 107     options instead. These options are available through the params
 108     attribute for the InfoExtractors to use. The YoutubeDL also
 109     registers itself as the downloader in charge for the InfoExtractors
 110     that are added to it, so this is a "mutual registration".
 111
 112     Available options:
 113
 114     username:          Username for authentication purposes.
 115     password:          Password for authentication purposes.
 116     videopassword:     Password for acces a video.
 117     usenetrc:          Use netrc for authentication instead.
 118     verbose:           Print additional info to stdout.
 119     quiet:             Do not print messages to stdout.
 120     no_warnings:       Do not print out anything for warnings.
 121     forceurl:          Force printing final URL.
 122     forcetitle:        Force printing title.
 123     forceid:           Force printing ID.
 124     forcethumbnail:    Force printing thumbnail URL.
 125     forcedescription:  Force printing description.
 126     forcefilename:     Force printing final filename.
 127     forceduration:     Force printing duration.
 128     forcejson:         Force printing info_dict as JSON.
 129     dump_single_json:  Force printing the info_dict of the whole playlist
 130                        (or video) as a single JSON line.
 131     simulate:          Do not download the video files.
 132     format:            Video format code. See options.py for more information.
 133     format_limit:      Highest quality format to try.
 134     outtmpl:           Template for output names.
 135     restrictfilenames: Do not allow "&" and spaces in file names
 136     ignoreerrors:      Do not stop on download errors.
 137     nooverwrites:      Prevent overwriting files.
 138     playliststart:     Playlist item to start at.
 139     playlistend:       Playlist item to end at.
 140     playlist_items:    Specific indices of playlist to download.
 141     playlistreverse:   Download playlist items in reverse order.
 142     matchtitle:        Download only matching titles.
 143     rejecttitle:       Reject downloads for matching titles.
 144     logger:            Log messages to a logging.Logger instance.
 145     logtostderr:       Log messages to stderr instead of stdout.
 146     writedescription:  Write the video description to a .description file
 147     writeinfojson:     Write the video description to a .info.json file
 148     writeannotations:  Write the video annotations to a .annotations.xml file
 149     writethumbnail:    Write the thumbnail image to a file
 150     write_all_thumbnails:  Write all thumbnail formats to files
 151     writesubtitles:    Write the video subtitles to a file
 152     writeautomaticsub: Write the automatic subtitles to a file
 153     allsubtitles:      Downloads all the subtitles of the video
 154                        (requires writesubtitles or writeautomaticsub)
 155     listsubtitles:     Lists all available subtitles for the video
 156     subtitlesformat:   Subtitle format [srt/sbv/vtt] (default=srt)
 157     subtitleslangs:    List of languages of the subtitles to download
 158     keepvideo:         Keep the video file after post-processing
 159     daterange:         A DateRange object, download only if the upload_date is in the range.
 160     skip_download:     Skip the actual download of the video file
 161     cachedir:          Location of the cache files in the filesystem.
 162                        False to disable filesystem cache.
 163     noplaylist:        Download single video instead of a playlist if in doubt.
 164     age_limit:         An integer representing the user's age in years.
 165                        Unsuitable videos for the given age are skipped.
 166     min_views:         An integer representing the minimum view count the video
 167                        must have in order to not be skipped.
 168                        Videos without view count information are always
 169                        downloaded. None for no limit.
 170     max_views:         An integer representing the maximum view count.
 171                        Videos that are more popular than that are not
 172                        downloaded.
 173                        Videos without view count information are always
 174                        downloaded. None for no limit.
 175     download_archive:  File name of a file where all downloads are recorded.
 176                        Videos already present in the file are not downloaded
 177                        again.
 178     cookiefile:        File name where cookies should be read from and dumped to.
 179     nocheckcertificate:Do not verify SSL certificates
 180     prefer_insecure:   Use HTTP instead of HTTPS to retrieve information.
 181                        At the moment, this is only supported by YouTube.
 182     proxy:             URL of the proxy server to use
 183     socket_timeout:    Time to wait for unresponsive hosts, in seconds
 184     bidi_workaround:   Work around buggy terminals without bidirectional text
 185                        support, using fridibi
 186     debug_printtraffic:Print out sent and received HTTP traffic
 187     include_ads:       Download ads as well
 188     default_search:    Prepend this string if an input url is not valid.
 189                        'auto' for elaborate guessing
 190     encoding:          Use this encoding instead of the system-specified.
 191     extract_flat:      Do not resolve URLs, return the immediate result.
 192                        Pass in 'in_playlist' to only show this behavior for
 193                        playlist items.
 194     postprocessors:    A list of dictionaries, each with an entry
 195                        * key:  The name of the postprocessor. See
 196                                youtube_dl/postprocessor/__init__.py for a list.
 197                        as well as any further keyword arguments for the
 198                        postprocessor.
 199     progress_hooks:    A list of functions that get called on download
 200                        progress, with a dictionary with the entries
 201                        * filename: The final filename
 202                        * status: One of "downloading" and "finished"
 203
 204                        The dict may also have some of the following entries:
 205
 206                        * downloaded_bytes: Bytes on disk
 207                        * total_bytes: Size of the whole file, None if unknown
 208                        * tmpfilename: The filename we're currently writing to
 209                        * eta: The estimated time in seconds, None if unknown
 210                        * speed: The download speed in bytes/second, None if
 211                                 unknown
 212
 213                        Progress hooks are guaranteed to be called at least once
 214                        (with status "finished") if the download is successful.
 215     merge_output_format: Extension to use when merging formats.
 216     fixup:             Automatically correct known faults of the file.
 217                        One of:
 218                        - "never": do nothing
 219                        - "warn": only emit a warning
 220                        - "detect_or_warn": check whether we can do anything
 221                                            about it, warn otherwise (default)
 222     source_address:    (Experimental) Client-side IP address to bind to.
 223     call_home:         Boolean, true iff we are allowed to contact the
 224                        youtube-dl servers for debugging.
 225     sleep_interval:    Number of seconds to sleep before each download.
 226     external_downloader:  Executable of the external downloader to call.
 227     listformats:       Print an overview of available video formats and exit.
 228     list_thumbnails:   Print a table of all thumbnails and exit.
 229
 230
 231     The following parameters are not used by YoutubeDL itself, they are used by
 232     the FileDownloader:
 233     nopart, updatetime, buffersize, ratelimit, min_filesize, max_filesize, test,
 234     noresizebuffer, retries, continuedl, noprogress, consoletitle
 235
 236     The following options are used by the post processors:
 237     prefer_ffmpeg:     If True, use ffmpeg instead of avconv if both are available,
 238                        otherwise prefer avconv.
 239     exec_cmd:          Arbitrary command to run after downloading
 240     """
 241
 242     params = None
 243     _ies = []
 244     _pps = []
 245     _download_retcode = None
 246     _num_downloads = None
 247     _screen_file = None
 248
 249     def __init__(self, params=None, auto_init=True):
 250         """Create a FileDownloader object with the given options."""
 251         if params is None:
 252             params = {}
 253         self._ies = []
 254         self._ies_instances = {}
 255         self._pps = []
 256         self._progress_hooks = []
 257         self._download_retcode = 0
 258         self._num_downloads = 0
 259         self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)]
 260         self._err_file = sys.stderr
 261         self.params = params
 262         self.cache = Cache(self)
 263
 264         if params.get('bidi_workaround', False):
 265             try:
 266                 import pty
 267                 master, slave = pty.openpty()
 268                 width = get_term_width()
 269                 if width is None:
 270                     width_args = []
 271                 else:
 272                     width_args = ['-w', str(width)]
 273                 sp_kwargs = dict(
 274                     stdin=subprocess.PIPE,
 275                     stdout=slave,
 276                     stderr=self._err_file)
 277                 try:
 278                     self._output_process = subprocess.Popen(
 279                         ['bidiv'] + width_args, **sp_kwargs
 280                     )
 281                 except OSError:
 282                     self._output_process = subprocess.Popen(
 283                         ['fribidi', '-c', 'UTF-8'] + width_args, **sp_kwargs)
 284                 self._output_channel = os.fdopen(master, 'rb')
 285             except OSError as ose:
 286                 if ose.errno == 2:
 287                     self.report_warning('Could not find fribidi executable, ignoring --bidi-workaround . Make sure that  fribidi  is an executable file in one of the directories in your $PATH.')
 288                 else:
 289                     raise
 290
 291         if (sys.version_info >= (3,) and sys.platform != 'win32' and
 292                 sys.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968']
 293                 and not params.get('restrictfilenames', False)):
 294             # On Python 3, the Unicode filesystem API will throw errors (#1474)
 295             self.report_warning(
 296                 'Assuming --restrict-filenames since file system encoding '
 297                 'cannot encode all characters. '
 298                 'Set the LC_ALL environment variable to fix this.')
 299             self.params['restrictfilenames'] = True
 300
 301         if '%(stitle)s' in self.params.get('outtmpl', ''):
 302             self.report_warning('%(stitle)s is deprecated. Use the %(title)s and the --restrict-filenames flag(which also secures %(uploader)s et al) instead.')
 303
 304         self._setup_opener()
 305
 306         if auto_init:
 307             self.print_debug_header()
 308             self.add_default_info_extractors()
 309
 310         for pp_def_raw in self.params.get('postprocessors', []):
 311             pp_class = get_postprocessor(pp_def_raw['key'])
 312             pp_def = dict(pp_def_raw)
 313             del pp_def['key']
 314             pp = pp_class(self, **compat_kwargs(pp_def))
 315             self.add_post_processor(pp)
 316
 317         for ph in self.params.get('progress_hooks', []):
 318             self.add_progress_hook(ph)
 319
 320     def warn_if_short_id(self, argv):
 321         # short YouTube ID starting with dash?
 322         idxs = [
 323             i for i, a in enumerate(argv)
 324             if re.match(r'^-[0-9A-Za-z_-]{10}$', a)]
 325         if idxs:
 326             correct_argv = (
 327                 ['youtube-dl'] +
 328                 [a for i, a in enumerate(argv) if i not in idxs] +
 329                 ['--'] + [argv[i] for i in idxs]
 330             )
 331             self.report_warning(
 332                 'Long argument string detected. '
 333                 'Use -- to separate parameters and URLs, like this:\n%s\n' %
 334                 args_to_str(correct_argv))
 335
 336     def add_info_extractor(self, ie):
 337         """Add an InfoExtractor object to the end of the list."""
 338         self._ies.append(ie)
 339         self._ies_instances[ie.ie_key()] = ie
 340         ie.set_downloader(self)
 341
 342     def get_info_extractor(self, ie_key):
 343         """
 344         Get an instance of an IE with name ie_key, it will try to get one from
 345         the _ies list, if there's no instance it will create a new one and add
 346         it to the extractor list.
 347         """
 348         ie = self._ies_instances.get(ie_key)
 349         if ie is None:
 350             ie = get_info_extractor(ie_key)()
 351             self.add_info_extractor(ie)
 352         return ie
 353
 354     def add_default_info_extractors(self):
 355         """
 356         Add the InfoExtractors returned by gen_extractors to the end of the list
 357         """
 358         for ie in gen_extractors():
 359             self.add_info_extractor(ie)
 360
 361     def add_post_processor(self, pp):
 362         """Add a PostProcessor object to the end of the chain."""
 363         self._pps.append(pp)
 364         pp.set_downloader(self)
 365
 366     def add_progress_hook(self, ph):
 367         """Add the progress hook (currently only for the file downloader)"""
 368         self._progress_hooks.append(ph)
 369
 370     def _bidi_workaround(self, message):
 371         if not hasattr(self, '_output_channel'):
 372             return message
 373
 374         assert hasattr(self, '_output_process')
 375         assert isinstance(message, compat_str)
 376         line_count = message.count('\n') + 1
 377         self._output_process.stdin.write((message + '\n').encode('utf-8'))
 378         self._output_process.stdin.flush()
 379         res = ''.join(self._output_channel.readline().decode('utf-8')
 380                       for _ in range(line_count))
 381         return res[:-len('\n')]
 382
 383     def to_screen(self, message, skip_eol=False):
 384         """Print message to stdout if not in quiet mode."""
 385         return self.to_stdout(message, skip_eol, check_quiet=True)
 386
 387     def _write_string(self, s, out=None):
 388         write_string(s, out=out, encoding=self.params.get('encoding'))
 389
 390     def to_stdout(self, message, skip_eol=False, check_quiet=False):
 391         """Print message to stdout if not in quiet mode."""
 392         if self.params.get('logger'):
 393             self.params['logger'].debug(message)
 394         elif not check_quiet or not self.params.get('quiet', False):
 395             message = self._bidi_workaround(message)
 396             terminator = ['\n', ''][skip_eol]
 397             output = message + terminator
 398
 399             self._write_string(output, self._screen_file)
 400
 401     def to_stderr(self, message):
 402         """Print message to stderr."""
 403         assert isinstance(message, compat_str)
 404         if self.params.get('logger'):
 405             self.params['logger'].error(message)
 406         else:
 407             message = self._bidi_workaround(message)
 408             output = message + '\n'
 409             self._write_string(output, self._err_file)
 410
 411     def to_console_title(self, message):
 412         if not self.params.get('consoletitle', False):
 413             return
 414         if os.name == 'nt' and ctypes.windll.kernel32.GetConsoleWindow():
 415             # c_wchar_p() might not be necessary if `message` is
 416             # already of type unicode()
 417             ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message))
 418         elif 'TERM' in os.environ:
 419             self._write_string('\033]0;%s\007' % message, self._screen_file)
 420
 421     def save_console_title(self):
 422         if not self.params.get('consoletitle', False):
 423             return
 424         if 'TERM' in os.environ:
 425             # Save the title on stack
 426             self._write_string('\033[22;0t', self._screen_file)
 427
 428     def restore_console_title(self):
 429         if not self.params.get('consoletitle', False):
 430             return
 431         if 'TERM' in os.environ:
 432             # Restore the title from stack
 433             self._write_string('\033[23;0t', self._screen_file)
 434
 435     def __enter__(self):
 436         self.save_console_title()
 437         return self
 438
 439     def __exit__(self, *args):
 440         self.restore_console_title()
 441
 442         if self.params.get('cookiefile') is not None:
 443             self.cookiejar.save()
 444
 445     def trouble(self, message=None, tb=None):
 446         """Determine action to take when a download problem appears.
 447
 448         Depending on if the downloader has been configured to ignore
 449         download errors or not, this method may throw an exception or
 450         not when errors are found, after printing the message.
 451
 452         tb, if given, is additional traceback information.
 453         """
 454         if message is not None:
 455             self.to_stderr(message)
 456         if self.params.get('verbose'):
 457             if tb is None:
 458                 if sys.exc_info()[0]:  # if .trouble has been called from an except block
 459                     tb = ''
 460                     if hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
 461                         tb += ''.join(traceback.format_exception(*sys.exc_info()[1].exc_info))
 462                     tb += compat_str(traceback.format_exc())
 463                 else:
 464                     tb_data = traceback.format_list(traceback.extract_stack())
 465                     tb = ''.join(tb_data)
 466             self.to_stderr(tb)
 467         if not self.params.get('ignoreerrors', False):
 468             if sys.exc_info()[0] and hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
 469                 exc_info = sys.exc_info()[1].exc_info
 470             else:
 471                 exc_info = sys.exc_info()
 472             raise DownloadError(message, exc_info)
 473         self._download_retcode = 1
 474
 475     def report_warning(self, message):
 476         '''
 477         Print the message to stderr, it will be prefixed with 'WARNING:'
 478         If stderr is a tty file the 'WARNING:' will be colored
 479         '''
 480         if self.params.get('logger') is not None:
 481             self.params['logger'].warning(message)
 482         else:
 483             if self.params.get('no_warnings'):
 484                 return
 485             if self._err_file.isatty() and os.name != 'nt':
 486                 _msg_header = '\033[0;33mWARNING:\033[0m'
 487             else:
 488                 _msg_header = 'WARNING:'
 489             warning_message = '%s %s' % (_msg_header, message)
 490             self.to_stderr(warning_message)
 491
 492     def report_error(self, message, tb=None):
 493         '''
 494         Do the same as trouble, but prefixes the message with 'ERROR:', colored
 495         in red if stderr is a tty file.
 496         '''
 497         if self._err_file.isatty() and os.name != 'nt':
 498             _msg_header = '\033[0;31mERROR:\033[0m'
 499         else:
 500             _msg_header = 'ERROR:'
 501         error_message = '%s %s' % (_msg_header, message)
 502         self.trouble(error_message, tb)
 503
 504     def report_file_already_downloaded(self, file_name):
 505         """Report file has already been fully downloaded."""
 506         try:
 507             self.to_screen('[download] %s has already been downloaded' % file_name)
 508         except UnicodeEncodeError:
 509             self.to_screen('[download] The file has already been downloaded')
 510
 511     def prepare_filename(self, info_dict):
 512         """Generate the output filename."""
 513         try:
 514             template_dict = dict(info_dict)
 515
 516             template_dict['epoch'] = int(time.time())
 517             autonumber_size = self.params.get('autonumber_size')
 518             if autonumber_size is None:
 519                 autonumber_size = 5
 520             autonumber_templ = '%0' + str(autonumber_size) + 'd'
 521             template_dict['autonumber'] = autonumber_templ % self._num_downloads
 522             if template_dict.get('playlist_index') is not None:
 523                 template_dict['playlist_index'] = '%0*d' % (len(str(template_dict['n_entries'])), template_dict['playlist_index'])
 524             if template_dict.get('resolution') is None:
 525                 if template_dict.get('width') and template_dict.get('height'):
 526                     template_dict['resolution'] = '%dx%d' % (template_dict['width'], template_dict['height'])
 527                 elif template_dict.get('height'):
 528                     template_dict['resolution'] = '%sp' % template_dict['height']
 529                 elif template_dict.get('width'):
 530                     template_dict['resolution'] = '?x%d' % template_dict['width']
 531
 532             sanitize = lambda k, v: sanitize_filename(
 533                 compat_str(v),
 534                 restricted=self.params.get('restrictfilenames'),
 535                 is_id=(k == 'id'))
 536             template_dict = dict((k, sanitize(k, v))
 537                                  for k, v in template_dict.items()
 538                                  if v is not None)
 539             template_dict = collections.defaultdict(lambda: 'NA', template_dict)
 540
 541             outtmpl = self.params.get('outtmpl', DEFAULT_OUTTMPL)
 542             tmpl = compat_expanduser(outtmpl)
 543             filename = tmpl % template_dict
 544             return filename
 545         except ValueError as err:
 546             self.report_error('Error in output template: ' + str(err) + ' (encoding: ' + repr(preferredencoding()) + ')')
 547             return None
 548
 549     def _match_entry(self, info_dict):
 550         """ Returns None iff the file should be downloaded """
 551
 552         video_title = info_dict.get('title', info_dict.get('id', 'video'))
 553         if 'title' in info_dict:
 554             # This can happen when we're just evaluating the playlist
 555             title = info_dict['title']
 556             matchtitle = self.params.get('matchtitle', False)
 557             if matchtitle:
 558                 if not re.search(matchtitle, title, re.IGNORECASE):
 559                     return '"' + title + '" title did not match pattern "' + matchtitle + '"'
 560             rejecttitle = self.params.get('rejecttitle', False)
 561             if rejecttitle:
 562                 if re.search(rejecttitle, title, re.IGNORECASE):
 563                     return '"' + title + '" title matched reject pattern "' + rejecttitle + '"'
 564         date = info_dict.get('upload_date', None)
 565         if date is not None:
 566             dateRange = self.params.get('daterange', DateRange())
 567             if date not in dateRange:
 568                 return '%s upload date is not in range %s' % (date_from_str(date).isoformat(), dateRange)
 569         view_count = info_dict.get('view_count', None)
 570         if view_count is not None:
 571             min_views = self.params.get('min_views')
 572             if min_views is not None and view_count < min_views:
 573                 return 'Skipping %s, because it has not reached minimum view count (%d/%d)' % (video_title, view_count, min_views)
 574             max_views = self.params.get('max_views')
 575             if max_views is not None and view_count > max_views:
 576                 return 'Skipping %s, because it has exceeded the maximum view count (%d/%d)' % (video_title, view_count, max_views)
 577         if age_restricted(info_dict.get('age_limit'), self.params.get('age_limit')):
 578             return 'Skipping "%s" because it is age restricted' % title
 579         if self.in_download_archive(info_dict):
 580             return '%s has already been recorded in archive' % video_title
 581         return None
 582
 583     @staticmethod
 584     def add_extra_info(info_dict, extra_info):
 585         '''Set the keys from extra_info in info dict if they are missing'''
 586         for key, value in extra_info.items():
 587             info_dict.setdefault(key, value)
 588
 589     def extract_info(self, url, download=True, ie_key=None, extra_info={},
 590                      process=True):
 591         '''
 592         Returns a list with a dictionary for each video we find.
 593         If 'download', also downloads the videos.
 594         extra_info is a dict containing the extra values to add to each result
 595          '''
 596
 597         if ie_key:
 598             ies = [self.get_info_extractor(ie_key)]
 599         else:
 600             ies = self._ies
 601
 602         for ie in ies:
 603             if not ie.suitable(url):
 604                 continue
 605
 606             if not ie.working():
 607                 self.report_warning('The program functionality for this site has been marked as broken, '
 608                                     'and will probably not work.')
 609
 610             try:
 611                 ie_result = ie.extract(url)
 612                 if ie_result is None:  # Finished already (backwards compatibility; listformats and friends should be moved here)
 613                     break
 614                 if isinstance(ie_result, list):
 615                     # Backwards compatibility: old IE result format
 616                     ie_result = {
 617                         '_type': 'compat_list',
 618                         'entries': ie_result,
 619                     }
 620                 self.add_default_extra_info(ie_result, ie, url)
 621                 if process:
 622                     return self.process_ie_result(ie_result, download, extra_info)
 623                 else:
 624                     return ie_result
 625             except ExtractorError as de:  # An error we somewhat expected
 626                 self.report_error(compat_str(de), de.format_traceback())
 627                 break
 628             except MaxDownloadsReached:
 629                 raise
 630             except Exception as e:
 631                 if self.params.get('ignoreerrors', False):
 632                     self.report_error(compat_str(e), tb=compat_str(traceback.format_exc()))
 633                     break
 634                 else:
 635                     raise
 636         else:
 637             self.report_error('no suitable InfoExtractor for URL %s' % url)
 638
 639     def add_default_extra_info(self, ie_result, ie, url):
 640         self.add_extra_info(ie_result, {
 641             'extractor': ie.IE_NAME,
 642             'webpage_url': url,
 643             'webpage_url_basename': url_basename(url),
 644             'extractor_key': ie.ie_key(),
 645         })
 646
 647     def process_ie_result(self, ie_result, download=True, extra_info={}):
 648         """
 649         Take the result of the ie(may be modified) and resolve all unresolved
 650         references (URLs, playlist items).
 651
 652         It will also download the videos if 'download'.
 653         Returns the resolved ie_result.
 654         """
 655
 656         result_type = ie_result.get('_type', 'video')
 657
 658         if result_type in ('url', 'url_transparent'):
 659             extract_flat = self.params.get('extract_flat', False)
 660             if ((extract_flat == 'in_playlist' and 'playlist' in extra_info) or
 661                     extract_flat is True):
 662                 if self.params.get('forcejson', False):
 663                     self.to_stdout(json.dumps(ie_result))
 664                 return ie_result
 665
 666         if result_type == 'video':
 667             self.add_extra_info(ie_result, extra_info)
 668             return self.process_video_result(ie_result, download=download)
 669         elif result_type == 'url':
 670             # We have to add extra_info to the results because it may be
 671             # contained in a playlist
 672             return self.extract_info(ie_result['url'],
 673                                      download,
 674                                      ie_key=ie_result.get('ie_key'),
 675                                      extra_info=extra_info)
 676         elif result_type == 'url_transparent':
 677             # Use the information from the embedding page
 678             info = self.extract_info(
 679                 ie_result['url'], ie_key=ie_result.get('ie_key'),
 680                 extra_info=extra_info, download=False, process=False)
 681
 682             force_properties = dict(
 683                 (k, v) for k, v in ie_result.items() if v is not None)
 684             for f in ('_type', 'url'):
 685                 if f in force_properties:
 686                     del force_properties[f]
 687             new_result = info.copy()
 688             new_result.update(force_properties)
 689
 690             assert new_result.get('_type') != 'url_transparent'
 691
 692             return self.process_ie_result(
 693                 new_result, download=download, extra_info=extra_info)
 694         elif result_type == 'playlist' or result_type == 'multi_video':
 695             # We process each entry in the playlist
 696             playlist = ie_result.get('title', None) or ie_result.get('id', None)
 697             self.to_screen('[download] Downloading playlist: %s' % playlist)
 698
 699             playlist_results = []
 700
 701             playliststart = self.params.get('playliststart', 1) - 1
 702             playlistend = self.params.get('playlistend', None)
 703             # For backwards compatibility, interpret -1 as whole list
 704             if playlistend == -1:
 705                 playlistend = None
 706
 707             playlistitems_str = self.params.get('playlist_items', None)
 708             playlistitems = None
 709             if playlistitems_str is not None:
 710                 def iter_playlistitems(format):
 711                     for string_segment in format.split(','):
 712                         if '-' in string_segment:
 713                             start, end = string_segment.split('-')
 714                             for item in range(int(start), int(end) + 1):
 715                                 yield int(item)
 716                         else:
 717                             yield int(string_segment)
 718                 playlistitems = iter_playlistitems(playlistitems_str)
 719
 720             ie_entries = ie_result['entries']
 721             if isinstance(ie_entries, list):
 722                 n_all_entries = len(ie_entries)
 723                 if playlistitems:
 724                     entries = [ie_entries[i - 1] for i in playlistitems]
 725                 else:
 726                     entries = ie_entries[playliststart:playlistend]
 727                 n_entries = len(entries)
 728                 self.to_screen(
 729                     "[%s] playlist %s: Collected %d video ids (downloading %d of them)" %
 730                     (ie_result['extractor'], playlist, n_all_entries, n_entries))
 731             elif isinstance(ie_entries, PagedList):
 732                 if playlistitems:
 733                     entries = []
 734                     for item in playlistitems:
 735                         entries.extend(ie_entries.getslice(
 736                             item - 1, item
 737                         ))
 738                 else:
 739                     entries = ie_entries.getslice(
 740                         playliststart, playlistend)
 741                 n_entries = len(entries)
 742                 self.to_screen(
 743                     "[%s] playlist %s: Downloading %d videos" %
 744                     (ie_result['extractor'], playlist, n_entries))
 745             else:  # iterable
 746                 if playlistitems:
 747                     entry_list = list(ie_entries)
 748                     entries = [entry_list[i - 1] for i in playlistitems]
 749                 else:
 750                     entries = list(itertools.islice(
 751                         ie_entries, playliststart, playlistend))
 752                 n_entries = len(entries)
 753                 self.to_screen(
 754                     "[%s] playlist %s: Downloading %d videos" %
 755                     (ie_result['extractor'], playlist, n_entries))
 756
 757             if self.params.get('playlistreverse', False):
 758                 entries = entries[::-1]
 759
 760             for i, entry in enumerate(entries, 1):
 761                 self.to_screen('[download] Downloading video %s of %s' % (i, n_entries))
 762                 extra = {
 763                     'n_entries': n_entries,
 764                     'playlist': playlist,
 765                     'playlist_id': ie_result.get('id'),
 766                     'playlist_title': ie_result.get('title'),
 767                     'playlist_index': i + playliststart,
 768                     'extractor': ie_result['extractor'],
 769                     'webpage_url': ie_result['webpage_url'],
 770                     'webpage_url_basename': url_basename(ie_result['webpage_url']),
 771                     'extractor_key': ie_result['extractor_key'],
 772                 }
 773
 774                 reason = self._match_entry(entry)
 775                 if reason is not None:
 776                     self.to_screen('[download] ' + reason)
 777                     continue
 778
 779                 entry_result = self.process_ie_result(entry,
 780                                                       download=download,
 781                                                       extra_info=extra)
 782                 playlist_results.append(entry_result)
 783             ie_result['entries'] = playlist_results
 784             return ie_result
 785         elif result_type == 'compat_list':
 786             self.report_warning(
 787                 'Extractor %s returned a compat_list result. '
 788                 'It needs to be updated.' % ie_result.get('extractor'))
 789
 790             def _fixup(r):
 791                 self.add_extra_info(
 792                     r,
 793                     {
 794                         'extractor': ie_result['extractor'],
 795                         'webpage_url': ie_result['webpage_url'],
 796                         'webpage_url_basename': url_basename(ie_result['webpage_url']),
 797                         'extractor_key': ie_result['extractor_key'],
 798                     }
 799                 )
 800                 return r
 801             ie_result['entries'] = [
 802                 self.process_ie_result(_fixup(r), download, extra_info)
 803                 for r in ie_result['entries']
 804             ]
 805             return ie_result
 806         else:
 807             raise Exception('Invalid result type: %s' % result_type)
 808
 809     def _apply_format_filter(self, format_spec, available_formats):
 810         " Returns a tuple of the remaining format_spec and filtered formats "
 811
 812         OPERATORS = {
 813             '<': operator.lt,
 814             '<=': operator.le,
 815             '>': operator.gt,
 816             '>=': operator.ge,
 817             '=': operator.eq,
 818             '!=': operator.ne,
 819         }
 820         operator_rex = re.compile(r'''(?x)\s*\[
 821             (?P<key>width|height|tbr|abr|vbr|filesize)
 822             \s*(?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
 823             (?P<value>[0-9.]+(?:[kKmMgGtTpPeEzZyY]i?[Bb]?)?)
 824             \]$
 825             ''' % '|'.join(map(re.escape, OPERATORS.keys())))
 826         m = operator_rex.search(format_spec)
 827         if not m:
 828             raise ValueError('Invalid format specification %r' % format_spec)
 829
 830         try:
 831             comparison_value = int(m.group('value'))
 832         except ValueError:
 833             comparison_value = parse_filesize(m.group('value'))
 834             if comparison_value is None:
 835                 comparison_value = parse_filesize(m.group('value') + 'B')
 836             if comparison_value is None:
 837                 raise ValueError(
 838                     'Invalid value %r in format specification %r' % (
 839                         m.group('value'), format_spec))
 840         op = OPERATORS[m.group('op')]
 841
 842         def _filter(f):
 843             actual_value = f.get(m.group('key'))
 844             if actual_value is None:
 845                 return m.group('none_inclusive')
 846             return op(actual_value, comparison_value)
 847         new_formats = [f for f in available_formats if _filter(f)]
 848
 849         new_format_spec = format_spec[:-len(m.group(0))]
 850         if not new_format_spec:
 851             new_format_spec = 'best'
 852
 853         return (new_format_spec, new_formats)
 854
 855     def select_format(self, format_spec, available_formats):
 856         while format_spec.endswith(']'):
 857             format_spec, available_formats = self._apply_format_filter(
 858                 format_spec, available_formats)
 859         if not available_formats:
 860             return None
 861
 862         if format_spec == 'best' or format_spec is None:
 863             return available_formats[-1]
 864         elif format_spec == 'worst':
 865             return available_formats[0]
 866         elif format_spec == 'bestaudio':
 867             audio_formats = [
 868                 f for f in available_formats
 869                 if f.get('vcodec') == 'none']
 870             if audio_formats:
 871                 return audio_formats[-1]
 872         elif format_spec == 'worstaudio':
 873             audio_formats = [
 874                 f for f in available_formats
 875                 if f.get('vcodec') == 'none']
 876             if audio_formats:
 877                 return audio_formats[0]
 878         elif format_spec == 'bestvideo':
 879             video_formats = [
 880                 f for f in available_formats
 881                 if f.get('acodec') == 'none']
 882             if video_formats:
 883                 return video_formats[-1]
 884         elif format_spec == 'worstvideo':
 885             video_formats = [
 886                 f for f in available_formats
 887                 if f.get('acodec') == 'none']
 888             if video_formats:
 889                 return video_formats[0]
 890         else:
 891             extensions = ['mp4', 'flv', 'webm', '3gp', 'm4a', 'mp3', 'ogg', 'aac', 'wav']
 892             if format_spec in extensions:
 893                 filter_f = lambda f: f['ext'] == format_spec
 894             else:
 895                 filter_f = lambda f: f['format_id'] == format_spec
 896             matches = list(filter(filter_f, available_formats))
 897             if matches:
 898                 return matches[-1]
 899         return None
 900
 901     def _calc_headers(self, info_dict):
 902         res = std_headers.copy()
 903
 904         add_headers = info_dict.get('http_headers')
 905         if add_headers:
 906             res.update(add_headers)
 907
 908         cookies = self._calc_cookies(info_dict)
 909         if cookies:
 910             res['Cookie'] = cookies
 911
 912         return res
 913
 914     def _calc_cookies(self, info_dict):
 915         class _PseudoRequest(object):
 916             def __init__(self, url):
 917                 self.url = url
 918                 self.headers = {}
 919                 self.unverifiable = False
 920
 921             def add_unredirected_header(self, k, v):
 922                 self.headers[k] = v
 923
 924             def get_full_url(self):
 925                 return self.url
 926
 927             def is_unverifiable(self):
 928                 return self.unverifiable
 929
 930             def has_header(self, h):
 931                 return h in self.headers
 932
 933         pr = _PseudoRequest(info_dict['url'])
 934         self.cookiejar.add_cookie_header(pr)
 935         return pr.headers.get('Cookie')
 936
 937     def process_video_result(self, info_dict, download=True):
 938         assert info_dict.get('_type', 'video') == 'video'
 939
 940         if 'id' not in info_dict:
 941             raise ExtractorError('Missing "id" field in extractor result')
 942         if 'title' not in info_dict:
 943             raise ExtractorError('Missing "title" field in extractor result')
 944
 945         if 'playlist' not in info_dict:
 946             # It isn't part of a playlist
 947             info_dict['playlist'] = None
 948             info_dict['playlist_index'] = None
 949
 950         thumbnails = info_dict.get('thumbnails')
 951         if thumbnails is None:
 952             thumbnail = info_dict.get('thumbnail')
 953             if thumbnail:
 954                 thumbnails = [{'url': thumbnail}]
 955         if thumbnails:
 956             thumbnails.sort(key=lambda t: (
 957                 t.get('preference'), t.get('width'), t.get('height'),
 958                 t.get('id'), t.get('url')))
 959             for t in thumbnails:
 960                 if 'width' in t and 'height' in t:
 961                     t['resolution'] = '%dx%d' % (t['width'], t['height'])
 962
 963         if thumbnails and 'thumbnail' not in info_dict:
 964             info_dict['thumbnail'] = thumbnails[-1]['url']
 965
 966         if 'display_id' not in info_dict and 'id' in info_dict:
 967             info_dict['display_id'] = info_dict['id']
 968
 969         if info_dict.get('upload_date') is None and info_dict.get('timestamp') is not None:
 970             # Working around negative timestamps in Windows
 971             # (see http://bugs.python.org/issue1646728)
 972             if info_dict['timestamp'] < 0 and os.name == 'nt':
 973                 info_dict['timestamp'] = 0
 974             upload_date = datetime.datetime.utcfromtimestamp(
 975                 info_dict['timestamp'])
 976             info_dict['upload_date'] = upload_date.strftime('%Y%m%d')
 977
 978         # This extractors handle format selection themselves
 979         if info_dict['extractor'] in ['Youku']:
 980             if download:
 981                 self.process_info(info_dict)
 982             return info_dict
 983
 984         # We now pick which formats have to be downloaded
 985         if info_dict.get('formats') is None:
 986             # There's only one format available
 987             formats = [info_dict]
 988         else:
 989             formats = info_dict['formats']
 990
 991         if not formats:
 992             raise ExtractorError('No video formats found!')
 993
 994         # We check that all the formats have the format and format_id fields
 995         for i, format in enumerate(formats):
 996             if 'url' not in format:
 997                 raise ExtractorError('Missing "url" key in result (index %d)' % i)
 998
 999             if format.get('format_id') is None:
1000                 format['format_id'] = compat_str(i)
1001             if format.get('format') is None:
1002                 format['format'] = '{id} - {res}{note}'.format(
1003                     id=format['format_id'],
1004                     res=self.format_resolution(format),
1005                     note=' ({0})'.format(format['format_note']) if format.get('format_note') is not None else '',
1006                 )
1007             # Automatically determine file extension if missing
1008             if 'ext' not in format:
1009                 format['ext'] = determine_ext(format['url']).lower()
1010             # Add HTTP headers, so that external programs can use them from the
1011             # json output
1012             full_format_info = info_dict.copy()
1013             full_format_info.update(format)
1014             format['http_headers'] = self._calc_headers(full_format_info)
1015
1016         format_limit = self.params.get('format_limit', None)
1017         if format_limit:
1018             formats = list(takewhile_inclusive(
1019                 lambda f: f['format_id'] != format_limit, formats
1020             ))
1021
1022         # TODO Central sorting goes here
1023
1024         if formats[0] is not info_dict:
1025             # only set the 'formats' fields if the original info_dict list them
1026             # otherwise we end up with a circular reference, the first (and unique)
1027             # element in the 'formats' field in info_dict is info_dict itself,
1028             # wich can't be exported to json
1029             info_dict['formats'] = formats
1030         if self.params.get('listformats'):
1031             self.list_formats(info_dict)
1032             return
1033         if self.params.get('list_thumbnails'):
1034             self.list_thumbnails(info_dict)
1035             return
1036
1037         req_format = self.params.get('format')
1038         if req_format is None:
1039             req_format = 'best'
1040         formats_to_download = []
1041         # The -1 is for supporting YoutubeIE
1042         if req_format in ('-1', 'all'):
1043             formats_to_download = formats
1044         else:
1045             for rfstr in req_format.split(','):
1046                 # We can accept formats requested in the format: 34/5/best, we pick
1047                 # the first that is available, starting from left
1048                 req_formats = rfstr.split('/')
1049                 for rf in req_formats:
1050                     if re.match(r'.+?\+.+?', rf) is not None:
1051                         # Two formats have been requested like '137+139'
1052                         format_1, format_2 = rf.split('+')
1053                         formats_info = (self.select_format(format_1, formats),
1054                                         self.select_format(format_2, formats))
1055                         if all(formats_info):
1056                             # The first format must contain the video and the
1057                             # second the audio
1058                             if formats_info[0].get('vcodec') == 'none':
1059                                 self.report_error('The first format must '
1060                                                   'contain the video, try using '
1061                                                   '"-f %s+%s"' % (format_2, format_1))
1062                                 return
1063                             output_ext = (
1064                                 formats_info[0]['ext']
1065                                 if self.params.get('merge_output_format') is None
1066                                 else self.params['merge_output_format'])
1067                             selected_format = {
1068                                 'requested_formats': formats_info,
1069                                 'format': rf,
1070                                 'ext': formats_info[0]['ext'],
1071                                 'width': formats_info[0].get('width'),
1072                                 'height': formats_info[0].get('height'),
1073                                 'resolution': formats_info[0].get('resolution'),
1074                                 'fps': formats_info[0].get('fps'),
1075                                 'vcodec': formats_info[0].get('vcodec'),
1076                                 'vbr': formats_info[0].get('vbr'),
1077                                 'stretched_ratio': formats_info[0].get('stretched_ratio'),
1078                                 'acodec': formats_info[1].get('acodec'),
1079                                 'abr': formats_info[1].get('abr'),
1080                                 'ext': output_ext,
1081                             }
1082                         else:
1083                             selected_format = None
1084                     else:
1085                         selected_format = self.select_format(rf, formats)
1086                     if selected_format is not None:
1087                         formats_to_download.append(selected_format)
1088                         break
1089         if not formats_to_download:
1090             raise ExtractorError('requested format not available',
1091                                  expected=True)
1092
1093         if download:
1094             if len(formats_to_download) > 1:
1095                 self.to_screen('[info] %s: downloading video in %s formats' % (info_dict['id'], len(formats_to_download)))
1096             for format in formats_to_download:
1097                 new_info = dict(info_dict)
1098                 new_info.update(format)
1099                 self.process_info(new_info)
1100         # We update the info dict with the best quality format (backwards compatibility)
1101         info_dict.update(formats_to_download[-1])
1102         return info_dict
1103
1104     def process_info(self, info_dict):
1105         """Process a single resolved IE result."""
1106
1107         assert info_dict.get('_type', 'video') == 'video'
1108
1109         max_downloads = self.params.get('max_downloads')
1110         if max_downloads is not None:
1111             if self._num_downloads >= int(max_downloads):
1112                 raise MaxDownloadsReached()
1113
1114         info_dict['fulltitle'] = info_dict['title']
1115         if len(info_dict['title']) > 200:
1116             info_dict['title'] = info_dict['title'][:197] + '...'
1117
1118         # Keep for backwards compatibility
1119         info_dict['stitle'] = info_dict['title']
1120
1121         if 'format' not in info_dict:
1122             info_dict['format'] = info_dict['ext']
1123
1124         reason = self._match_entry(info_dict)
1125         if reason is not None:
1126             self.to_screen('[download] ' + reason)
1127             return
1128
1129         self._num_downloads += 1
1130
1131         filename = self.prepare_filename(info_dict)
1132
1133         # Forced printings
1134         if self.params.get('forcetitle', False):
1135             self.to_stdout(info_dict['fulltitle'])
1136         if self.params.get('forceid', False):
1137             self.to_stdout(info_dict['id'])
1138         if self.params.get('forceurl', False):
1139             if info_dict.get('requested_formats') is not None:
1140                 for f in info_dict['requested_formats']:
1141                     self.to_stdout(f['url'] + f.get('play_path', ''))
1142             else:
1143                 # For RTMP URLs, also include the playpath
1144                 self.to_stdout(info_dict['url'] + info_dict.get('play_path', ''))
1145         if self.params.get('forcethumbnail', False) and info_dict.get('thumbnail') is not None:
1146             self.to_stdout(info_dict['thumbnail'])
1147         if self.params.get('forcedescription', False) and info_dict.get('description') is not None:
1148             self.to_stdout(info_dict['description'])
1149         if self.params.get('forcefilename', False) and filename is not None:
1150             self.to_stdout(filename)
1151         if self.params.get('forceduration', False) and info_dict.get('duration') is not None:
1152             self.to_stdout(formatSeconds(info_dict['duration']))
1153         if self.params.get('forceformat', False):
1154             self.to_stdout(info_dict['format'])
1155         if self.params.get('forcejson', False):
1156             info_dict['_filename'] = filename
1157             self.to_stdout(json.dumps(info_dict))
1158         if self.params.get('dump_single_json', False):
1159             info_dict['_filename'] = filename
1160
1161         # Do nothing else if in simulate mode
1162         if self.params.get('simulate', False):
1163             return
1164
1165         if filename is None:
1166             return
1167
1168         try:
1169             dn = os.path.dirname(encodeFilename(filename))
1170             if dn and not os.path.exists(dn):
1171                 os.makedirs(dn)
1172         except (OSError, IOError) as err:
1173             self.report_error('unable to create directory ' + compat_str(err))
1174             return
1175
1176         if self.params.get('writedescription', False):
1177             descfn = filename + '.description'
1178             if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(descfn)):
1179                 self.to_screen('[info] Video description is already present')
1180             elif info_dict.get('description') is None:
1181                 self.report_warning('There\'s no description to write.')
1182             else:
1183                 try:
1184                     self.to_screen('[info] Writing video description to: ' + descfn)
1185                     with io.open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile:
1186                         descfile.write(info_dict['description'])
1187                 except (OSError, IOError):
1188                     self.report_error('Cannot write description file ' + descfn)
1189                     return
1190
1191         if self.params.get('writeannotations', False):
1192             annofn = filename + '.annotations.xml'
1193             if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(annofn)):
1194                 self.to_screen('[info] Video annotations are already present')
1195             else:
1196                 try:
1197                     self.to_screen('[info] Writing video annotations to: ' + annofn)
1198                     with io.open(encodeFilename(annofn), 'w', encoding='utf-8') as annofile:
1199                         annofile.write(info_dict['annotations'])
1200                 except (KeyError, TypeError):
1201                     self.report_warning('There are no annotations to write.')
1202                 except (OSError, IOError):
1203                     self.report_error('Cannot write annotations file: ' + annofn)
1204                     return
1205
1206         subtitles_are_requested = any([self.params.get('writesubtitles', False),
1207                                        self.params.get('writeautomaticsub')])
1208
1209         if subtitles_are_requested and 'subtitles' in info_dict and info_dict['subtitles']:
1210             # subtitles download errors are already managed as troubles in relevant IE
1211             # that way it will silently go on when used with unsupporting IE
1212             subtitles = info_dict['subtitles']
1213             sub_format = self.params.get('subtitlesformat', 'srt')
1214             for sub_lang in subtitles.keys():
1215                 sub = subtitles[sub_lang]
1216                 if sub is None:
1217                     continue
1218                 try:
1219                     sub_filename = subtitles_filename(filename, sub_lang, sub_format)
1220                     if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(sub_filename)):
1221                         self.to_screen('[info] Video subtitle %s.%s is already_present' % (sub_lang, sub_format))
1222                     else:
1223                         self.to_screen('[info] Writing video subtitles to: ' + sub_filename)
1224                         with io.open(encodeFilename(sub_filename), 'w', encoding='utf-8') as subfile:
1225                             subfile.write(sub)
1226                 except (OSError, IOError):
1227                     self.report_error('Cannot write subtitles file ' + sub_filename)
1228                     return
1229
1230         if self.params.get('writeinfojson', False):
1231             infofn = os.path.splitext(filename)[0] + '.info.json'
1232             if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(infofn)):
1233                 self.to_screen('[info] Video description metadata is already present')
1234             else:
1235                 self.to_screen('[info] Writing video description metadata as JSON to: ' + infofn)
1236                 try:
1237                     write_json_file(info_dict, infofn)
1238                 except (OSError, IOError):
1239                     self.report_error('Cannot write metadata to JSON file ' + infofn)
1240                     return
1241
1242         self._write_thumbnails(info_dict, filename)
1243
1244         if not self.params.get('skip_download', False):
1245             try:
1246                 def dl(name, info):
1247                     fd = get_suitable_downloader(info, self.params)(self, self.params)
1248                     for ph in self._progress_hooks:
1249                         fd.add_progress_hook(ph)
1250                     if self.params.get('verbose'):
1251                         self.to_stdout('[debug] Invoking downloader on %r' % info.get('url'))
1252                     return fd.download(name, info)
1253                 if info_dict.get('requested_formats') is not None:
1254                     downloaded = []
1255                     success = True
1256                     merger = FFmpegMergerPP(self, not self.params.get('keepvideo'))
1257                     if not merger._executable:
1258                         postprocessors = []
1259                         self.report_warning('You have requested multiple '
1260                                             'formats but ffmpeg or avconv are not installed.'
1261                                             ' The formats won\'t be merged')
1262                     else:
1263                         postprocessors = [merger]
1264                     for f in info_dict['requested_formats']:
1265                         new_info = dict(info_dict)
1266                         new_info.update(f)
1267                         fname = self.prepare_filename(new_info)
1268                         fname = prepend_extension(fname, 'f%s' % f['format_id'])
1269                         downloaded.append(fname)
1270                         partial_success = dl(fname, new_info)
1271                         success = success and partial_success
1272                     info_dict['__postprocessors'] = postprocessors
1273                     info_dict['__files_to_merge'] = downloaded
1274                 else:
1275                     # Just a single file
1276                     success = dl(filename, info_dict)
1277             except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
1278                 self.report_error('unable to download video data: %s' % str(err))
1279                 return
1280             except (OSError, IOError) as err:
1281                 raise UnavailableVideoError(err)
1282             except (ContentTooShortError, ) as err:
1283                 self.report_error('content too short (expected %s bytes and served %s)' % (err.expected, err.downloaded))
1284                 return
1285
1286             if success:
1287                 # Fixup content
1288                 fixup_policy = self.params.get('fixup')
1289                 if fixup_policy is None:
1290                     fixup_policy = 'detect_or_warn'
1291
1292                 stretched_ratio = info_dict.get('stretched_ratio')
1293                 if stretched_ratio is not None and stretched_ratio != 1:
1294                     if fixup_policy == 'warn':
1295                         self.report_warning('%s: Non-uniform pixel ratio (%s)' % (
1296                             info_dict['id'], stretched_ratio))
1297                     elif fixup_policy == 'detect_or_warn':
1298                         stretched_pp = FFmpegFixupStretchedPP(self)
1299                         if stretched_pp.available:
1300                             info_dict.setdefault('__postprocessors', [])
1301                             info_dict['__postprocessors'].append(stretched_pp)
1302                         else:
1303                             self.report_warning(
1304                                 '%s: Non-uniform pixel ratio (%s). Install ffmpeg or avconv to fix this automatically.' % (
1305                                     info_dict['id'], stretched_ratio))
1306                     else:
1307                         assert fixup_policy in ('ignore', 'never')
1308
1309                 if info_dict.get('requested_formats') is None and info_dict.get('container') == 'm4a_dash':
1310                     if fixup_policy == 'warn':
1311                         self.report_warning('%s: writing DASH m4a. Only some players support this container.' % (
1312                             info_dict['id']))
1313                     elif fixup_policy == 'detect_or_warn':
1314                         fixup_pp = FFmpegFixupM4aPP(self)
1315                         if fixup_pp.available:
1316                             info_dict.setdefault('__postprocessors', [])
1317                             info_dict['__postprocessors'].append(fixup_pp)
1318                         else:
1319                             self.report_warning(
1320                                 '%s: writing DASH m4a. Only some players support this container. Install ffmpeg or avconv to fix this automatically.' % (
1321                                     info_dict['id']))
1322                     else:
1323                         assert fixup_policy in ('ignore', 'never')
1324
1325                 try:
1326                     self.post_process(filename, info_dict)
1327                 except (PostProcessingError) as err:
1328                     self.report_error('postprocessing: %s' % str(err))
1329                     return
1330                 self.record_download_archive(info_dict)
1331
1332     def download(self, url_list):
1333         """Download a given list of URLs."""
1334         outtmpl = self.params.get('outtmpl', DEFAULT_OUTTMPL)
1335         if (len(url_list) > 1 and
1336                 '%' not in outtmpl
1337                 and self.params.get('max_downloads') != 1):
1338             raise SameFileError(outtmpl)
1339
1340         for url in url_list:
1341             try:
1342                 # It also downloads the videos
1343                 res = self.extract_info(url)
1344             except UnavailableVideoError:
1345                 self.report_error('unable to download video')
1346             except MaxDownloadsReached:
1347                 self.to_screen('[info] Maximum number of downloaded files reached.')
1348                 raise
1349             else:
1350                 if self.params.get('dump_single_json', False):
1351                     self.to_stdout(json.dumps(res))
1352
1353         return self._download_retcode
1354
1355     def download_with_info_file(self, info_filename):
1356         with io.open(info_filename, 'r', encoding='utf-8') as f:
1357             info = json.load(f)
1358         try:
1359             self.process_ie_result(info, download=True)
1360         except DownloadError:
1361             webpage_url = info.get('webpage_url')
1362             if webpage_url is not None:
1363                 self.report_warning('The info failed to download, trying with "%s"' % webpage_url)
1364                 return self.download([webpage_url])
1365             else:
1366                 raise
1367         return self._download_retcode
1368
1369     def post_process(self, filename, ie_info):
1370         """Run all the postprocessors on the given file."""
1371         info = dict(ie_info)
1372         info['filepath'] = filename
1373         pps_chain = []
1374         if ie_info.get('__postprocessors') is not None:
1375             pps_chain.extend(ie_info['__postprocessors'])
1376         pps_chain.extend(self._pps)
1377         for pp in pps_chain:
1378             keep_video = None
1379             old_filename = info['filepath']
1380             try:
1381                 keep_video_wish, info = pp.run(info)
1382                 if keep_video_wish is not None:
1383                     if keep_video_wish:
1384                         keep_video = keep_video_wish
1385                     elif keep_video is None:
1386                         # No clear decision yet, let IE decide
1387                         keep_video = keep_video_wish
1388             except PostProcessingError as e:
1389                 self.report_error(e.msg)
1390             if keep_video is False and not self.params.get('keepvideo', False):
1391                 try:
1392                     self.to_screen('Deleting original file %s (pass -k to keep)' % old_filename)
1393                     os.remove(encodeFilename(old_filename))
1394                 except (IOError, OSError):
1395                     self.report_warning('Unable to remove downloaded video file')
1396
1397     def _make_archive_id(self, info_dict):
1398         # Future-proof against any change in case
1399         # and backwards compatibility with prior versions
1400         extractor = info_dict.get('extractor_key')
1401         if extractor is None:
1402             if 'id' in info_dict:
1403                 extractor = info_dict.get('ie_key')  # key in a playlist
1404         if extractor is None:
1405             return None  # Incomplete video information
1406         return extractor.lower() + ' ' + info_dict['id']
1407
1408     def in_download_archive(self, info_dict):
1409         fn = self.params.get('download_archive')
1410         if fn is None:
1411             return False
1412
1413         vid_id = self._make_archive_id(info_dict)
1414         if vid_id is None:
1415             return False  # Incomplete video information
1416
1417         try:
1418             with locked_file(fn, 'r', encoding='utf-8') as archive_file:
1419                 for line in archive_file:
1420                     if line.strip() == vid_id:
1421                         return True
1422         except IOError as ioe:
1423             if ioe.errno != errno.ENOENT:
1424                 raise
1425         return False
1426
1427     def record_download_archive(self, info_dict):
1428         fn = self.params.get('download_archive')
1429         if fn is None:
1430             return
1431         vid_id = self._make_archive_id(info_dict)
1432         assert vid_id
1433         with locked_file(fn, 'a', encoding='utf-8') as archive_file:
1434             archive_file.write(vid_id + '\n')
1435
1436     @staticmethod
1437     def format_resolution(format, default='unknown'):
1438         if format.get('vcodec') == 'none':
1439             return 'audio only'
1440         if format.get('resolution') is not None:
1441             return format['resolution']
1442         if format.get('height') is not None:
1443             if format.get('width') is not None:
1444                 res = '%sx%s' % (format['width'], format['height'])
1445             else:
1446                 res = '%sp' % format['height']
1447         elif format.get('width') is not None:
1448             res = '?x%d' % format['width']
1449         else:
1450             res = default
1451         return res
1452
1453     def _format_note(self, fdict):
1454         res = ''
1455         if fdict.get('ext') in ['f4f', 'f4m']:
1456             res += '(unsupported) '
1457         if fdict.get('format_note') is not None:
1458             res += fdict['format_note'] + ' '
1459         if fdict.get('tbr') is not None:
1460             res += '%4dk ' % fdict['tbr']
1461         if fdict.get('container') is not None:
1462             if res:
1463                 res += ', '
1464             res += '%s container' % fdict['container']
1465         if (fdict.get('vcodec') is not None and
1466                 fdict.get('vcodec') != 'none'):
1467             if res:
1468                 res += ', '
1469             res += fdict['vcodec']
1470             if fdict.get('vbr') is not None:
1471                 res += '@'
1472         elif fdict.get('vbr') is not None and fdict.get('abr') is not None:
1473             res += 'video@'
1474         if fdict.get('vbr') is not None:
1475             res += '%4dk' % fdict['vbr']
1476         if fdict.get('fps') is not None:
1477             res += ', %sfps' % fdict['fps']
1478         if fdict.get('acodec') is not None:
1479             if res:
1480                 res += ', '
1481             if fdict['acodec'] == 'none':
1482                 res += 'video only'
1483             else:
1484                 res += '%-5s' % fdict['acodec']
1485         elif fdict.get('abr') is not None:
1486             if res:
1487                 res += ', '
1488             res += 'audio'
1489         if fdict.get('abr') is not None:
1490             res += '@%3dk' % fdict['abr']
1491         if fdict.get('asr') is not None:
1492             res += ' (%5dHz)' % fdict['asr']
1493         if fdict.get('filesize') is not None:
1494             if res:
1495                 res += ', '
1496             res += format_bytes(fdict['filesize'])
1497         elif fdict.get('filesize_approx') is not None:
1498             if res:
1499                 res += ', '
1500             res += '~' + format_bytes(fdict['filesize_approx'])
1501         return res
1502
1503     def list_formats(self, info_dict):
1504         def line(format, idlen=20):
1505             return (('%-' + compat_str(idlen + 1) + 's%-10s%-12s%s') % (
1506                 format['format_id'],
1507                 format['ext'],
1508                 self.format_resolution(format),
1509                 self._format_note(format),
1510             ))
1511
1512         formats = info_dict.get('formats', [info_dict])
1513         idlen = max(len('format code'),
1514                     max(len(f['format_id']) for f in formats))
1515         formats_s = [
1516             line(f, idlen) for f in formats
1517             if f.get('preference') is None or f['preference'] >= -1000]
1518         if len(formats) > 1:
1519             formats_s[0] += (' ' if self._format_note(formats[0]) else '') + '(worst)'
1520             formats_s[-1] += (' ' if self._format_note(formats[-1]) else '') + '(best)'
1521
1522         header_line = line({
1523             'format_id': 'format code', 'ext': 'extension',
1524             'resolution': 'resolution', 'format_note': 'note'}, idlen=idlen)
1525         self.to_screen(
1526             '[info] Available formats for %s:\n%s\n%s' %
1527             (info_dict['id'], header_line, '\n'.join(formats_s)))
1528
1529     def list_thumbnails(self, info_dict):
1530         thumbnails = info_dict.get('thumbnails')
1531         if not thumbnails:
1532             tn_url = info_dict.get('thumbnail')
1533             if tn_url:
1534                 thumbnails = [{'id': '0', 'url': tn_url}]
1535             else:
1536                 self.to_screen(
1537                     '[info] No thumbnails present for %s' % info_dict['id'])
1538                 return
1539
1540         self.to_screen(
1541             '[info] Thumbnails for %s:' % info_dict['id'])
1542         self.to_screen(render_table(
1543             ['ID', 'width', 'height', 'URL'],
1544             [[t['id'], t.get('width', 'unknown'), t.get('height', 'unknown'), t['url']] for t in thumbnails]))
1545
1546     def urlopen(self, req):
1547         """ Start an HTTP download """
1548
1549         # According to RFC 3986, URLs can not contain non-ASCII characters, however this is not
1550         # always respected by websites, some tend to give out URLs with non percent-encoded
1551         # non-ASCII characters (see telemb.py, ard.py [#3412])
1552         # urllib chokes on URLs with non-ASCII characters (see http://bugs.python.org/issue3991)
1553         # To work around aforementioned issue we will replace request's original URL with
1554         # percent-encoded one
1555         req_is_string = isinstance(req, basestring if sys.version_info < (3, 0) else compat_str)
1556         url = req if req_is_string else req.get_full_url()
1557         url_escaped = escape_url(url)
1558
1559         # Substitute URL if any change after escaping
1560         if url != url_escaped:
1561             if req_is_string:
1562                 req = url_escaped
1563             else:
1564                 req = compat_urllib_request.Request(
1565                     url_escaped, data=req.data, headers=req.headers,
1566                     origin_req_host=req.origin_req_host, unverifiable=req.unverifiable)
1567
1568         return self._opener.open(req, timeout=self._socket_timeout)
1569
1570     def print_debug_header(self):
1571         if not self.params.get('verbose'):
1572             return
1573
1574         if type('') is not compat_str:
1575             # Python 2.6 on SLES11 SP1 (https://github.com/rg3/youtube-dl/issues/3326)
1576             self.report_warning(
1577                 'Your Python is broken! Update to a newer and supported version')
1578
1579         stdout_encoding = getattr(
1580             sys.stdout, 'encoding', 'missing (%s)' % type(sys.stdout).__name__)
1581         encoding_str = (
1582             '[debug] Encodings: locale %s, fs %s, out %s, pref %s\n' % (
1583                 locale.getpreferredencoding(),
1584                 sys.getfilesystemencoding(),
1585                 stdout_encoding,
1586                 self.get_encoding()))
1587         write_string(encoding_str, encoding=None)
1588
1589         self._write_string('[debug] youtube-dl version ' + __version__ + '\n')
1590         try:
1591             sp = subprocess.Popen(
1592                 ['git', 'rev-parse', '--short', 'HEAD'],
1593                 stdout=subprocess.PIPE, stderr=subprocess.PIPE,
1594                 cwd=os.path.dirname(os.path.abspath(__file__)))
1595             out, err = sp.communicate()
1596             out = out.decode().strip()
1597             if re.match('[0-9a-f]+', out):
1598                 self._write_string('[debug] Git HEAD: ' + out + '\n')
1599         except:
1600             try:
1601                 sys.exc_clear()
1602             except:
1603                 pass
1604         self._write_string('[debug] Python version %s - %s\n' % (
1605             platform.python_version(), platform_name()))
1606
1607         exe_versions = FFmpegPostProcessor.get_versions()
1608         exe_versions['rtmpdump'] = rtmpdump_version()
1609         exe_str = ', '.join(
1610             '%s %s' % (exe, v)
1611             for exe, v in sorted(exe_versions.items())
1612             if v
1613         )
1614         if not exe_str:
1615             exe_str = 'none'
1616         self._write_string('[debug] exe versions: %s\n' % exe_str)
1617
1618         proxy_map = {}
1619         for handler in self._opener.handlers:
1620             if hasattr(handler, 'proxies'):
1621                 proxy_map.update(handler.proxies)
1622         self._write_string('[debug] Proxy map: ' + compat_str(proxy_map) + '\n')
1623
1624         if self.params.get('call_home', False):
1625             ipaddr = self.urlopen('https://yt-dl.org/ip').read().decode('utf-8')
1626             self._write_string('[debug] Public IP address: %s\n' % ipaddr)
1627             latest_version = self.urlopen(
1628                 'https://yt-dl.org/latest/version').read().decode('utf-8')
1629             if version_tuple(latest_version) > version_tuple(__version__):
1630                 self.report_warning(
1631                     'You are using an outdated version (newest version: %s)! '
1632                     'See https://yt-dl.org/update if you need help updating.' %
1633                     latest_version)
1634
1635     def _setup_opener(self):
1636         timeout_val = self.params.get('socket_timeout')
1637         self._socket_timeout = 600 if timeout_val is None else float(timeout_val)
1638
1639         opts_cookiefile = self.params.get('cookiefile')
1640         opts_proxy = self.params.get('proxy')
1641
1642         if opts_cookiefile is None:
1643             self.cookiejar = compat_cookiejar.CookieJar()
1644         else:
1645             self.cookiejar = compat_cookiejar.MozillaCookieJar(
1646                 opts_cookiefile)
1647             if os.access(opts_cookiefile, os.R_OK):
1648                 self.cookiejar.load()
1649
1650         cookie_processor = compat_urllib_request.HTTPCookieProcessor(
1651             self.cookiejar)
1652         if opts_proxy is not None:
1653             if opts_proxy == '':
1654                 proxies = {}
1655             else:
1656                 proxies = {'http': opts_proxy, 'https': opts_proxy}
1657         else:
1658             proxies = compat_urllib_request.getproxies()
1659             # Set HTTPS proxy to HTTP one if given (https://github.com/rg3/youtube-dl/issues/805)
1660             if 'http' in proxies and 'https' not in proxies:
1661                 proxies['https'] = proxies['http']
1662         proxy_handler = compat_urllib_request.ProxyHandler(proxies)
1663
1664         debuglevel = 1 if self.params.get('debug_printtraffic') else 0
1665         https_handler = make_HTTPS_handler(self.params, debuglevel=debuglevel)
1666         ydlh = YoutubeDLHandler(self.params, debuglevel=debuglevel)
1667         opener = compat_urllib_request.build_opener(
1668             https_handler, proxy_handler, cookie_processor, ydlh)
1669         # Delete the default user-agent header, which would otherwise apply in
1670         # cases where our custom HTTP handler doesn't come into play
1671         # (See https://github.com/rg3/youtube-dl/issues/1309 for details)
1672         opener.addheaders = []
1673         self._opener = opener
1674
1675     def encode(self, s):
1676         if isinstance(s, bytes):
1677             return s  # Already encoded
1678
1679         try:
1680             return s.encode(self.get_encoding())
1681         except UnicodeEncodeError as err:
1682             err.reason = err.reason + '. Check your system encoding configuration or use the --encoding option.'
1683             raise
1684
1685     def get_encoding(self):
1686         encoding = self.params.get('encoding')
1687         if encoding is None:
1688             encoding = preferredencoding()
1689         return encoding
1690
1691     def _write_thumbnails(self, info_dict, filename):
1692         if self.params.get('writethumbnail', False):
1693             thumbnails = info_dict.get('thumbnails')
1694             if thumbnails:
1695                 thumbnails = [thumbnails[-1]]
1696         elif self.params.get('write_all_thumbnails', False):
1697             thumbnails = info_dict.get('thumbnails')
1698         else:
1699             return
1700
1701         if not thumbnails:
1702             # No thumbnails present, so return immediately
1703             return
1704
1705         for t in thumbnails:
1706             thumb_ext = determine_ext(t['url'], 'jpg')
1707             suffix = '_%s' % t['id'] if len(thumbnails) > 1 else ''
1708             thumb_display_id = '%s ' % t['id'] if len(thumbnails) > 1 else ''
1709             thumb_filename = os.path.splitext(filename)[0] + suffix + '.' + thumb_ext
1710
1711             if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(thumb_filename)):
1712                 self.to_screen('[%s] %s: Thumbnail %sis already present' %
1713                                (info_dict['extractor'], info_dict['id'], thumb_display_id))
1714             else:
1715                 self.to_screen('[%s] %s: Downloading thumbnail %s...' %
1716                                (info_dict['extractor'], info_dict['id'], thumb_display_id))
1717                 try:
1718                     uf = self.urlopen(t['url'])
1719                     with open(thumb_filename, 'wb') as thumbf:
1720                         shutil.copyfileobj(uf, thumbf)
1721                     self.to_screen('[%s] %s: Writing thumbnail %sto: %s' %
1722                                    (info_dict['extractor'], info_dict['id'], thumb_display_id, thumb_filename))
1723                 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
1724                     self.report_warning('Unable to download thumbnail "%s": %s' %
1725                                         (t['url'], compat_str(err)))