git.bitcoin.ninja Git - youtube-dl/blob - youtube_dl/YoutubeDL.py

   1 #!/usr/bin/env python
   2 # -*- coding: utf-8 -*-
   3
   4 from __future__ import absolute_import, unicode_literals
   5
   6 import collections
   7 import datetime
   8 import errno
   9 import io
  10 import itertools
  11 import json
  12 import locale
  13 import operator
  14 import os
  15 import platform
  16 import re
  17 import shutil
  18 import subprocess
  19 import socket
  20 import sys
  21 import time
  22 import traceback
  23
  24 if os.name == 'nt':
  25     import ctypes
  26
  27 from .compat import (
  28     compat_cookiejar,
  29     compat_expanduser,
  30     compat_http_client,
  31     compat_kwargs,
  32     compat_str,
  33     compat_urllib_error,
  34     compat_urllib_request,
  35 )
  36 from .utils import (
  37     escape_url,
  38     ContentTooShortError,
  39     date_from_str,
  40     DateRange,
  41     DEFAULT_OUTTMPL,
  42     determine_ext,
  43     DownloadError,
  44     encodeFilename,
  45     ExtractorError,
  46     format_bytes,
  47     formatSeconds,
  48     get_term_width,
  49     locked_file,
  50     make_HTTPS_handler,
  51     MaxDownloadsReached,
  52     PagedList,
  53     parse_filesize,
  54     PostProcessingError,
  55     platform_name,
  56     preferredencoding,
  57     render_table,
  58     SameFileError,
  59     sanitize_filename,
  60     std_headers,
  61     subtitles_filename,
  62     takewhile_inclusive,
  63     UnavailableVideoError,
  64     url_basename,
  65     version_tuple,
  66     write_json_file,
  67     write_string,
  68     YoutubeDLHandler,
  69     prepend_extension,
  70     args_to_str,
  71     age_restricted,
  72 )
  73 from .cache import Cache
  74 from .extractor import get_info_extractor, gen_extractors
  75 from .downloader import get_suitable_downloader
  76 from .downloader.rtmp import rtmpdump_version
  77 from .postprocessor import (
  78     FFmpegFixupM4aPP,
  79     FFmpegFixupStretchedPP,
  80     FFmpegMergerPP,
  81     FFmpegPostProcessor,
  82     get_postprocessor,
  83 )
  84 from .version import __version__
  85
  86
  87 class YoutubeDL(object):
  88     """YoutubeDL class.
  89
  90     YoutubeDL objects are the ones responsible of downloading the
  91     actual video file and writing it to disk if the user has requested
  92     it, among some other tasks. In most cases there should be one per
  93     program. As, given a video URL, the downloader doesn't know how to
  94     extract all the needed information, task that InfoExtractors do, it
  95     has to pass the URL to one of them.
  96
  97     For this, YoutubeDL objects have a method that allows
  98     InfoExtractors to be registered in a given order. When it is passed
  99     a URL, the YoutubeDL object handles it to the first InfoExtractor it
 100     finds that reports being able to handle it. The InfoExtractor extracts
 101     all the information about the video or videos the URL refers to, and
 102     YoutubeDL process the extracted information, possibly using a File
 103     Downloader to download the video.
 104
 105     YoutubeDL objects accept a lot of parameters. In order not to saturate
 106     the object constructor with arguments, it receives a dictionary of
 107     options instead. These options are available through the params
 108     attribute for the InfoExtractors to use. The YoutubeDL also
 109     registers itself as the downloader in charge for the InfoExtractors
 110     that are added to it, so this is a "mutual registration".
 111
 112     Available options:
 113
 114     username:          Username for authentication purposes.
 115     password:          Password for authentication purposes.
 116     videopassword:     Password for acces a video.
 117     usenetrc:          Use netrc for authentication instead.
 118     verbose:           Print additional info to stdout.
 119     quiet:             Do not print messages to stdout.
 120     no_warnings:       Do not print out anything for warnings.
 121     forceurl:          Force printing final URL.
 122     forcetitle:        Force printing title.
 123     forceid:           Force printing ID.
 124     forcethumbnail:    Force printing thumbnail URL.
 125     forcedescription:  Force printing description.
 126     forcefilename:     Force printing final filename.
 127     forceduration:     Force printing duration.
 128     forcejson:         Force printing info_dict as JSON.
 129     dump_single_json:  Force printing the info_dict of the whole playlist
 130                        (or video) as a single JSON line.
 131     simulate:          Do not download the video files.
 132     format:            Video format code. See options.py for more information.
 133     format_limit:      Highest quality format to try.
 134     outtmpl:           Template for output names.
 135     restrictfilenames: Do not allow "&" and spaces in file names
 136     ignoreerrors:      Do not stop on download errors.
 137     nooverwrites:      Prevent overwriting files.
 138     playliststart:     Playlist item to start at.
 139     playlistend:       Playlist item to end at.
 140     playlist_items:    Specific indices of playlist to download.
 141     playlistreverse:   Download playlist items in reverse order.
 142     matchtitle:        Download only matching titles.
 143     rejecttitle:       Reject downloads for matching titles.
 144     logger:            Log messages to a logging.Logger instance.
 145     logtostderr:       Log messages to stderr instead of stdout.
 146     writedescription:  Write the video description to a .description file
 147     writeinfojson:     Write the video description to a .info.json file
 148     writeannotations:  Write the video annotations to a .annotations.xml file
 149     writethumbnail:    Write the thumbnail image to a file
 150     write_all_thumbnails:  Write all thumbnail formats to files
 151     writesubtitles:    Write the video subtitles to a file
 152     writeautomaticsub: Write the automatic subtitles to a file
 153     allsubtitles:      Downloads all the subtitles of the video
 154                        (requires writesubtitles or writeautomaticsub)
 155     listsubtitles:     Lists all available subtitles for the video
 156     subtitlesformat:   Subtitle format [srt/sbv/vtt] (default=srt)
 157     subtitleslangs:    List of languages of the subtitles to download
 158     keepvideo:         Keep the video file after post-processing
 159     daterange:         A DateRange object, download only if the upload_date is in the range.
 160     skip_download:     Skip the actual download of the video file
 161     cachedir:          Location of the cache files in the filesystem.
 162                        False to disable filesystem cache.
 163     noplaylist:        Download single video instead of a playlist if in doubt.
 164     age_limit:         An integer representing the user's age in years.
 165                        Unsuitable videos for the given age are skipped.
 166     min_views:         An integer representing the minimum view count the video
 167                        must have in order to not be skipped.
 168                        Videos without view count information are always
 169                        downloaded. None for no limit.
 170     max_views:         An integer representing the maximum view count.
 171                        Videos that are more popular than that are not
 172                        downloaded.
 173                        Videos without view count information are always
 174                        downloaded. None for no limit.
 175     download_archive:  File name of a file where all downloads are recorded.
 176                        Videos already present in the file are not downloaded
 177                        again.
 178     cookiefile:        File name where cookies should be read from and dumped to.
 179     nocheckcertificate:Do not verify SSL certificates
 180     prefer_insecure:   Use HTTP instead of HTTPS to retrieve information.
 181                        At the moment, this is only supported by YouTube.
 182     proxy:             URL of the proxy server to use
 183     socket_timeout:    Time to wait for unresponsive hosts, in seconds
 184     bidi_workaround:   Work around buggy terminals without bidirectional text
 185                        support, using fridibi
 186     debug_printtraffic:Print out sent and received HTTP traffic
 187     include_ads:       Download ads as well
 188     default_search:    Prepend this string if an input url is not valid.
 189                        'auto' for elaborate guessing
 190     encoding:          Use this encoding instead of the system-specified.
 191     extract_flat:      Do not resolve URLs, return the immediate result.
 192                        Pass in 'in_playlist' to only show this behavior for
 193                        playlist items.
 194     postprocessors:    A list of dictionaries, each with an entry
 195                        * key:  The name of the postprocessor. See
 196                                youtube_dl/postprocessor/__init__.py for a list.
 197                        as well as any further keyword arguments for the
 198                        postprocessor.
 199     progress_hooks:    A list of functions that get called on download
 200                        progress, with a dictionary with the entries
 201                        * status: One of "downloading" and "finished".
 202                                  Check this first and ignore unknown values.
 203
 204                        If status is one of "downloading" or "finished", the
 205                        following properties may also be present:
 206                        * filename: The final filename (always present)
 207                        * downloaded_bytes: Bytes on disk
 208                        * total_bytes: Size of the whole file, None if unknown
 209                        * tmpfilename: The filename we're currently writing to
 210                        * eta: The estimated time in seconds, None if unknown
 211                        * speed: The download speed in bytes/second, None if
 212                                 unknown
 213
 214                        Progress hooks are guaranteed to be called at least once
 215                        (with status "finished") if the download is successful.
 216     merge_output_format: Extension to use when merging formats.
 217     fixup:             Automatically correct known faults of the file.
 218                        One of:
 219                        - "never": do nothing
 220                        - "warn": only emit a warning
 221                        - "detect_or_warn": check whether we can do anything
 222                                            about it, warn otherwise (default)
 223     source_address:    (Experimental) Client-side IP address to bind to.
 224     call_home:         Boolean, true iff we are allowed to contact the
 225                        youtube-dl servers for debugging.
 226     sleep_interval:    Number of seconds to sleep before each download.
 227     external_downloader:  Executable of the external downloader to call.
 228     listformats:       Print an overview of available video formats and exit.
 229     list_thumbnails:   Print a table of all thumbnails and exit.
 230
 231
 232     The following parameters are not used by YoutubeDL itself, they are used by
 233     the FileDownloader:
 234     nopart, updatetime, buffersize, ratelimit, min_filesize, max_filesize, test,
 235     noresizebuffer, retries, continuedl, noprogress, consoletitle,
 236     xattr_set_filesize.
 237
 238     The following options are used by the post processors:
 239     prefer_ffmpeg:     If True, use ffmpeg instead of avconv if both are available,
 240                        otherwise prefer avconv.
 241     exec_cmd:          Arbitrary command to run after downloading
 242     """
 243
 244     params = None
 245     _ies = []
 246     _pps = []
 247     _download_retcode = None
 248     _num_downloads = None
 249     _screen_file = None
 250
 251     def __init__(self, params=None, auto_init=True):
 252         """Create a FileDownloader object with the given options."""
 253         if params is None:
 254             params = {}
 255         self._ies = []
 256         self._ies_instances = {}
 257         self._pps = []
 258         self._progress_hooks = []
 259         self._download_retcode = 0
 260         self._num_downloads = 0
 261         self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)]
 262         self._err_file = sys.stderr
 263         self.params = params
 264         self.cache = Cache(self)
 265
 266         if params.get('bidi_workaround', False):
 267             try:
 268                 import pty
 269                 master, slave = pty.openpty()
 270                 width = get_term_width()
 271                 if width is None:
 272                     width_args = []
 273                 else:
 274                     width_args = ['-w', str(width)]
 275                 sp_kwargs = dict(
 276                     stdin=subprocess.PIPE,
 277                     stdout=slave,
 278                     stderr=self._err_file)
 279                 try:
 280                     self._output_process = subprocess.Popen(
 281                         ['bidiv'] + width_args, **sp_kwargs
 282                     )
 283                 except OSError:
 284                     self._output_process = subprocess.Popen(
 285                         ['fribidi', '-c', 'UTF-8'] + width_args, **sp_kwargs)
 286                 self._output_channel = os.fdopen(master, 'rb')
 287             except OSError as ose:
 288                 if ose.errno == 2:
 289                     self.report_warning('Could not find fribidi executable, ignoring --bidi-workaround . Make sure that  fribidi  is an executable file in one of the directories in your $PATH.')
 290                 else:
 291                     raise
 292
 293         if (sys.version_info >= (3,) and sys.platform != 'win32' and
 294                 sys.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968']
 295                 and not params.get('restrictfilenames', False)):
 296             # On Python 3, the Unicode filesystem API will throw errors (#1474)
 297             self.report_warning(
 298                 'Assuming --restrict-filenames since file system encoding '
 299                 'cannot encode all characters. '
 300                 'Set the LC_ALL environment variable to fix this.')
 301             self.params['restrictfilenames'] = True
 302
 303         if '%(stitle)s' in self.params.get('outtmpl', ''):
 304             self.report_warning('%(stitle)s is deprecated. Use the %(title)s and the --restrict-filenames flag(which also secures %(uploader)s et al) instead.')
 305
 306         self._setup_opener()
 307
 308         if auto_init:
 309             self.print_debug_header()
 310             self.add_default_info_extractors()
 311
 312         for pp_def_raw in self.params.get('postprocessors', []):
 313             pp_class = get_postprocessor(pp_def_raw['key'])
 314             pp_def = dict(pp_def_raw)
 315             del pp_def['key']
 316             pp = pp_class(self, **compat_kwargs(pp_def))
 317             self.add_post_processor(pp)
 318
 319         for ph in self.params.get('progress_hooks', []):
 320             self.add_progress_hook(ph)
 321
 322     def warn_if_short_id(self, argv):
 323         # short YouTube ID starting with dash?
 324         idxs = [
 325             i for i, a in enumerate(argv)
 326             if re.match(r'^-[0-9A-Za-z_-]{10}$', a)]
 327         if idxs:
 328             correct_argv = (
 329                 ['youtube-dl'] +
 330                 [a for i, a in enumerate(argv) if i not in idxs] +
 331                 ['--'] + [argv[i] for i in idxs]
 332             )
 333             self.report_warning(
 334                 'Long argument string detected. '
 335                 'Use -- to separate parameters and URLs, like this:\n%s\n' %
 336                 args_to_str(correct_argv))
 337
 338     def add_info_extractor(self, ie):
 339         """Add an InfoExtractor object to the end of the list."""
 340         self._ies.append(ie)
 341         self._ies_instances[ie.ie_key()] = ie
 342         ie.set_downloader(self)
 343
 344     def get_info_extractor(self, ie_key):
 345         """
 346         Get an instance of an IE with name ie_key, it will try to get one from
 347         the _ies list, if there's no instance it will create a new one and add
 348         it to the extractor list.
 349         """
 350         ie = self._ies_instances.get(ie_key)
 351         if ie is None:
 352             ie = get_info_extractor(ie_key)()
 353             self.add_info_extractor(ie)
 354         return ie
 355
 356     def add_default_info_extractors(self):
 357         """
 358         Add the InfoExtractors returned by gen_extractors to the end of the list
 359         """
 360         for ie in gen_extractors():
 361             self.add_info_extractor(ie)
 362
 363     def add_post_processor(self, pp):
 364         """Add a PostProcessor object to the end of the chain."""
 365         self._pps.append(pp)
 366         pp.set_downloader(self)
 367
 368     def add_progress_hook(self, ph):
 369         """Add the progress hook (currently only for the file downloader)"""
 370         self._progress_hooks.append(ph)
 371
 372     def _bidi_workaround(self, message):
 373         if not hasattr(self, '_output_channel'):
 374             return message
 375
 376         assert hasattr(self, '_output_process')
 377         assert isinstance(message, compat_str)
 378         line_count = message.count('\n') + 1
 379         self._output_process.stdin.write((message + '\n').encode('utf-8'))
 380         self._output_process.stdin.flush()
 381         res = ''.join(self._output_channel.readline().decode('utf-8')
 382                       for _ in range(line_count))
 383         return res[:-len('\n')]
 384
 385     def to_screen(self, message, skip_eol=False):
 386         """Print message to stdout if not in quiet mode."""
 387         return self.to_stdout(message, skip_eol, check_quiet=True)
 388
 389     def _write_string(self, s, out=None):
 390         write_string(s, out=out, encoding=self.params.get('encoding'))
 391
 392     def to_stdout(self, message, skip_eol=False, check_quiet=False):
 393         """Print message to stdout if not in quiet mode."""
 394         if self.params.get('logger'):
 395             self.params['logger'].debug(message)
 396         elif not check_quiet or not self.params.get('quiet', False):
 397             message = self._bidi_workaround(message)
 398             terminator = ['\n', ''][skip_eol]
 399             output = message + terminator
 400
 401             self._write_string(output, self._screen_file)
 402
 403     def to_stderr(self, message):
 404         """Print message to stderr."""
 405         assert isinstance(message, compat_str)
 406         if self.params.get('logger'):
 407             self.params['logger'].error(message)
 408         else:
 409             message = self._bidi_workaround(message)
 410             output = message + '\n'
 411             self._write_string(output, self._err_file)
 412
 413     def to_console_title(self, message):
 414         if not self.params.get('consoletitle', False):
 415             return
 416         if os.name == 'nt' and ctypes.windll.kernel32.GetConsoleWindow():
 417             # c_wchar_p() might not be necessary if `message` is
 418             # already of type unicode()
 419             ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message))
 420         elif 'TERM' in os.environ:
 421             self._write_string('\033]0;%s\007' % message, self._screen_file)
 422
 423     def save_console_title(self):
 424         if not self.params.get('consoletitle', False):
 425             return
 426         if 'TERM' in os.environ:
 427             # Save the title on stack
 428             self._write_string('\033[22;0t', self._screen_file)
 429
 430     def restore_console_title(self):
 431         if not self.params.get('consoletitle', False):
 432             return
 433         if 'TERM' in os.environ:
 434             # Restore the title from stack
 435             self._write_string('\033[23;0t', self._screen_file)
 436
 437     def __enter__(self):
 438         self.save_console_title()
 439         return self
 440
 441     def __exit__(self, *args):
 442         self.restore_console_title()
 443
 444         if self.params.get('cookiefile') is not None:
 445             self.cookiejar.save()
 446
 447     def trouble(self, message=None, tb=None):
 448         """Determine action to take when a download problem appears.
 449
 450         Depending on if the downloader has been configured to ignore
 451         download errors or not, this method may throw an exception or
 452         not when errors are found, after printing the message.
 453
 454         tb, if given, is additional traceback information.
 455         """
 456         if message is not None:
 457             self.to_stderr(message)
 458         if self.params.get('verbose'):
 459             if tb is None:
 460                 if sys.exc_info()[0]:  # if .trouble has been called from an except block
 461                     tb = ''
 462                     if hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
 463                         tb += ''.join(traceback.format_exception(*sys.exc_info()[1].exc_info))
 464                     tb += compat_str(traceback.format_exc())
 465                 else:
 466                     tb_data = traceback.format_list(traceback.extract_stack())
 467                     tb = ''.join(tb_data)
 468             self.to_stderr(tb)
 469         if not self.params.get('ignoreerrors', False):
 470             if sys.exc_info()[0] and hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
 471                 exc_info = sys.exc_info()[1].exc_info
 472             else:
 473                 exc_info = sys.exc_info()
 474             raise DownloadError(message, exc_info)
 475         self._download_retcode = 1
 476
 477     def report_warning(self, message):
 478         '''
 479         Print the message to stderr, it will be prefixed with 'WARNING:'
 480         If stderr is a tty file the 'WARNING:' will be colored
 481         '''
 482         if self.params.get('logger') is not None:
 483             self.params['logger'].warning(message)
 484         else:
 485             if self.params.get('no_warnings'):
 486                 return
 487             if self._err_file.isatty() and os.name != 'nt':
 488                 _msg_header = '\033[0;33mWARNING:\033[0m'
 489             else:
 490                 _msg_header = 'WARNING:'
 491             warning_message = '%s %s' % (_msg_header, message)
 492             self.to_stderr(warning_message)
 493
 494     def report_error(self, message, tb=None):
 495         '''
 496         Do the same as trouble, but prefixes the message with 'ERROR:', colored
 497         in red if stderr is a tty file.
 498         '''
 499         if self._err_file.isatty() and os.name != 'nt':
 500             _msg_header = '\033[0;31mERROR:\033[0m'
 501         else:
 502             _msg_header = 'ERROR:'
 503         error_message = '%s %s' % (_msg_header, message)
 504         self.trouble(error_message, tb)
 505
 506     def report_file_already_downloaded(self, file_name):
 507         """Report file has already been fully downloaded."""
 508         try:
 509             self.to_screen('[download] %s has already been downloaded' % file_name)
 510         except UnicodeEncodeError:
 511             self.to_screen('[download] The file has already been downloaded')
 512
 513     def prepare_filename(self, info_dict):
 514         """Generate the output filename."""
 515         try:
 516             template_dict = dict(info_dict)
 517
 518             template_dict['epoch'] = int(time.time())
 519             autonumber_size = self.params.get('autonumber_size')
 520             if autonumber_size is None:
 521                 autonumber_size = 5
 522             autonumber_templ = '%0' + str(autonumber_size) + 'd'
 523             template_dict['autonumber'] = autonumber_templ % self._num_downloads
 524             if template_dict.get('playlist_index') is not None:
 525                 template_dict['playlist_index'] = '%0*d' % (len(str(template_dict['n_entries'])), template_dict['playlist_index'])
 526             if template_dict.get('resolution') is None:
 527                 if template_dict.get('width') and template_dict.get('height'):
 528                     template_dict['resolution'] = '%dx%d' % (template_dict['width'], template_dict['height'])
 529                 elif template_dict.get('height'):
 530                     template_dict['resolution'] = '%sp' % template_dict['height']
 531                 elif template_dict.get('width'):
 532                     template_dict['resolution'] = '?x%d' % template_dict['width']
 533
 534             sanitize = lambda k, v: sanitize_filename(
 535                 compat_str(v),
 536                 restricted=self.params.get('restrictfilenames'),
 537                 is_id=(k == 'id'))
 538             template_dict = dict((k, sanitize(k, v))
 539                                  for k, v in template_dict.items()
 540                                  if v is not None)
 541             template_dict = collections.defaultdict(lambda: 'NA', template_dict)
 542
 543             outtmpl = self.params.get('outtmpl', DEFAULT_OUTTMPL)
 544             tmpl = compat_expanduser(outtmpl)
 545             filename = tmpl % template_dict
 546             return filename
 547         except ValueError as err:
 548             self.report_error('Error in output template: ' + str(err) + ' (encoding: ' + repr(preferredencoding()) + ')')
 549             return None
 550
 551     def _match_entry(self, info_dict):
 552         """ Returns None iff the file should be downloaded """
 553
 554         video_title = info_dict.get('title', info_dict.get('id', 'video'))
 555         if 'title' in info_dict:
 556             # This can happen when we're just evaluating the playlist
 557             title = info_dict['title']
 558             matchtitle = self.params.get('matchtitle', False)
 559             if matchtitle:
 560                 if not re.search(matchtitle, title, re.IGNORECASE):
 561                     return '"' + title + '" title did not match pattern "' + matchtitle + '"'
 562             rejecttitle = self.params.get('rejecttitle', False)
 563             if rejecttitle:
 564                 if re.search(rejecttitle, title, re.IGNORECASE):
 565                     return '"' + title + '" title matched reject pattern "' + rejecttitle + '"'
 566         date = info_dict.get('upload_date', None)
 567         if date is not None:
 568             dateRange = self.params.get('daterange', DateRange())
 569             if date not in dateRange:
 570                 return '%s upload date is not in range %s' % (date_from_str(date).isoformat(), dateRange)
 571         view_count = info_dict.get('view_count', None)
 572         if view_count is not None:
 573             min_views = self.params.get('min_views')
 574             if min_views is not None and view_count < min_views:
 575                 return 'Skipping %s, because it has not reached minimum view count (%d/%d)' % (video_title, view_count, min_views)
 576             max_views = self.params.get('max_views')
 577             if max_views is not None and view_count > max_views:
 578                 return 'Skipping %s, because it has exceeded the maximum view count (%d/%d)' % (video_title, view_count, max_views)
 579         if age_restricted(info_dict.get('age_limit'), self.params.get('age_limit')):
 580             return 'Skipping "%s" because it is age restricted' % title
 581         if self.in_download_archive(info_dict):
 582             return '%s has already been recorded in archive' % video_title
 583         return None
 584
 585     @staticmethod
 586     def add_extra_info(info_dict, extra_info):
 587         '''Set the keys from extra_info in info dict if they are missing'''
 588         for key, value in extra_info.items():
 589             info_dict.setdefault(key, value)
 590
 591     def extract_info(self, url, download=True, ie_key=None, extra_info={},
 592                      process=True):
 593         '''
 594         Returns a list with a dictionary for each video we find.
 595         If 'download', also downloads the videos.
 596         extra_info is a dict containing the extra values to add to each result
 597          '''
 598
 599         if ie_key:
 600             ies = [self.get_info_extractor(ie_key)]
 601         else:
 602             ies = self._ies
 603
 604         for ie in ies:
 605             if not ie.suitable(url):
 606                 continue
 607
 608             if not ie.working():
 609                 self.report_warning('The program functionality for this site has been marked as broken, '
 610                                     'and will probably not work.')
 611
 612             try:
 613                 ie_result = ie.extract(url)
 614                 if ie_result is None:  # Finished already (backwards compatibility; listformats and friends should be moved here)
 615                     break
 616                 if isinstance(ie_result, list):
 617                     # Backwards compatibility: old IE result format
 618                     ie_result = {
 619                         '_type': 'compat_list',
 620                         'entries': ie_result,
 621                     }
 622                 self.add_default_extra_info(ie_result, ie, url)
 623                 if process:
 624                     return self.process_ie_result(ie_result, download, extra_info)
 625                 else:
 626                     return ie_result
 627             except ExtractorError as de:  # An error we somewhat expected
 628                 self.report_error(compat_str(de), de.format_traceback())
 629                 break
 630             except MaxDownloadsReached:
 631                 raise
 632             except Exception as e:
 633                 if self.params.get('ignoreerrors', False):
 634                     self.report_error(compat_str(e), tb=compat_str(traceback.format_exc()))
 635                     break
 636                 else:
 637                     raise
 638         else:
 639             self.report_error('no suitable InfoExtractor for URL %s' % url)
 640
 641     def add_default_extra_info(self, ie_result, ie, url):
 642         self.add_extra_info(ie_result, {
 643             'extractor': ie.IE_NAME,
 644             'webpage_url': url,
 645             'webpage_url_basename': url_basename(url),
 646             'extractor_key': ie.ie_key(),
 647         })
 648
 649     def process_ie_result(self, ie_result, download=True, extra_info={}):
 650         """
 651         Take the result of the ie(may be modified) and resolve all unresolved
 652         references (URLs, playlist items).
 653
 654         It will also download the videos if 'download'.
 655         Returns the resolved ie_result.
 656         """
 657
 658         result_type = ie_result.get('_type', 'video')
 659
 660         if result_type in ('url', 'url_transparent'):
 661             extract_flat = self.params.get('extract_flat', False)
 662             if ((extract_flat == 'in_playlist' and 'playlist' in extra_info) or
 663                     extract_flat is True):
 664                 if self.params.get('forcejson', False):
 665                     self.to_stdout(json.dumps(ie_result))
 666                 return ie_result
 667
 668         if result_type == 'video':
 669             self.add_extra_info(ie_result, extra_info)
 670             return self.process_video_result(ie_result, download=download)
 671         elif result_type == 'url':
 672             # We have to add extra_info to the results because it may be
 673             # contained in a playlist
 674             return self.extract_info(ie_result['url'],
 675                                      download,
 676                                      ie_key=ie_result.get('ie_key'),
 677                                      extra_info=extra_info)
 678         elif result_type == 'url_transparent':
 679             # Use the information from the embedding page
 680             info = self.extract_info(
 681                 ie_result['url'], ie_key=ie_result.get('ie_key'),
 682                 extra_info=extra_info, download=False, process=False)
 683
 684             force_properties = dict(
 685                 (k, v) for k, v in ie_result.items() if v is not None)
 686             for f in ('_type', 'url'):
 687                 if f in force_properties:
 688                     del force_properties[f]
 689             new_result = info.copy()
 690             new_result.update(force_properties)
 691
 692             assert new_result.get('_type') != 'url_transparent'
 693
 694             return self.process_ie_result(
 695                 new_result, download=download, extra_info=extra_info)
 696         elif result_type == 'playlist' or result_type == 'multi_video':
 697             # We process each entry in the playlist
 698             playlist = ie_result.get('title', None) or ie_result.get('id', None)
 699             self.to_screen('[download] Downloading playlist: %s' % playlist)
 700
 701             playlist_results = []
 702
 703             playliststart = self.params.get('playliststart', 1) - 1
 704             playlistend = self.params.get('playlistend', None)
 705             # For backwards compatibility, interpret -1 as whole list
 706             if playlistend == -1:
 707                 playlistend = None
 708
 709             playlistitems_str = self.params.get('playlist_items', None)
 710             playlistitems = None
 711             if playlistitems_str is not None:
 712                 def iter_playlistitems(format):
 713                     for string_segment in format.split(','):
 714                         if '-' in string_segment:
 715                             start, end = string_segment.split('-')
 716                             for item in range(int(start), int(end) + 1):
 717                                 yield int(item)
 718                         else:
 719                             yield int(string_segment)
 720                 playlistitems = iter_playlistitems(playlistitems_str)
 721
 722             ie_entries = ie_result['entries']
 723             if isinstance(ie_entries, list):
 724                 n_all_entries = len(ie_entries)
 725                 if playlistitems:
 726                     entries = [ie_entries[i - 1] for i in playlistitems]
 727                 else:
 728                     entries = ie_entries[playliststart:playlistend]
 729                 n_entries = len(entries)
 730                 self.to_screen(
 731                     "[%s] playlist %s: Collected %d video ids (downloading %d of them)" %
 732                     (ie_result['extractor'], playlist, n_all_entries, n_entries))
 733             elif isinstance(ie_entries, PagedList):
 734                 if playlistitems:
 735                     entries = []
 736                     for item in playlistitems:
 737                         entries.extend(ie_entries.getslice(
 738                             item - 1, item
 739                         ))
 740                 else:
 741                     entries = ie_entries.getslice(
 742                         playliststart, playlistend)
 743                 n_entries = len(entries)
 744                 self.to_screen(
 745                     "[%s] playlist %s: Downloading %d videos" %
 746                     (ie_result['extractor'], playlist, n_entries))
 747             else:  # iterable
 748                 if playlistitems:
 749                     entry_list = list(ie_entries)
 750                     entries = [entry_list[i - 1] for i in playlistitems]
 751                 else:
 752                     entries = list(itertools.islice(
 753                         ie_entries, playliststart, playlistend))
 754                 n_entries = len(entries)
 755                 self.to_screen(
 756                     "[%s] playlist %s: Downloading %d videos" %
 757                     (ie_result['extractor'], playlist, n_entries))
 758
 759             if self.params.get('playlistreverse', False):
 760                 entries = entries[::-1]
 761
 762             for i, entry in enumerate(entries, 1):
 763                 self.to_screen('[download] Downloading video %s of %s' % (i, n_entries))
 764                 extra = {
 765                     'n_entries': n_entries,
 766                     'playlist': playlist,
 767                     'playlist_id': ie_result.get('id'),
 768                     'playlist_title': ie_result.get('title'),
 769                     'playlist_index': i + playliststart,
 770                     'extractor': ie_result['extractor'],
 771                     'webpage_url': ie_result['webpage_url'],
 772                     'webpage_url_basename': url_basename(ie_result['webpage_url']),
 773                     'extractor_key': ie_result['extractor_key'],
 774                 }
 775
 776                 reason = self._match_entry(entry)
 777                 if reason is not None:
 778                     self.to_screen('[download] ' + reason)
 779                     continue
 780
 781                 entry_result = self.process_ie_result(entry,
 782                                                       download=download,
 783                                                       extra_info=extra)
 784                 playlist_results.append(entry_result)
 785             ie_result['entries'] = playlist_results
 786             return ie_result
 787         elif result_type == 'compat_list':
 788             self.report_warning(
 789                 'Extractor %s returned a compat_list result. '
 790                 'It needs to be updated.' % ie_result.get('extractor'))
 791
 792             def _fixup(r):
 793                 self.add_extra_info(
 794                     r,
 795                     {
 796                         'extractor': ie_result['extractor'],
 797                         'webpage_url': ie_result['webpage_url'],
 798                         'webpage_url_basename': url_basename(ie_result['webpage_url']),
 799                         'extractor_key': ie_result['extractor_key'],
 800                     }
 801                 )
 802                 return r
 803             ie_result['entries'] = [
 804                 self.process_ie_result(_fixup(r), download, extra_info)
 805                 for r in ie_result['entries']
 806             ]
 807             return ie_result
 808         else:
 809             raise Exception('Invalid result type: %s' % result_type)
 810
 811     def _apply_format_filter(self, format_spec, available_formats):
 812         " Returns a tuple of the remaining format_spec and filtered formats "
 813
 814         OPERATORS = {
 815             '<': operator.lt,
 816             '<=': operator.le,
 817             '>': operator.gt,
 818             '>=': operator.ge,
 819             '=': operator.eq,
 820             '!=': operator.ne,
 821         }
 822         operator_rex = re.compile(r'''(?x)\s*\[
 823             (?P<key>width|height|tbr|abr|vbr|filesize)
 824             \s*(?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
 825             (?P<value>[0-9.]+(?:[kKmMgGtTpPeEzZyY]i?[Bb]?)?)
 826             \]$
 827             ''' % '|'.join(map(re.escape, OPERATORS.keys())))
 828         m = operator_rex.search(format_spec)
 829         if not m:
 830             raise ValueError('Invalid format specification %r' % format_spec)
 831
 832         try:
 833             comparison_value = int(m.group('value'))
 834         except ValueError:
 835             comparison_value = parse_filesize(m.group('value'))
 836             if comparison_value is None:
 837                 comparison_value = parse_filesize(m.group('value') + 'B')
 838             if comparison_value is None:
 839                 raise ValueError(
 840                     'Invalid value %r in format specification %r' % (
 841                         m.group('value'), format_spec))
 842         op = OPERATORS[m.group('op')]
 843
 844         def _filter(f):
 845             actual_value = f.get(m.group('key'))
 846             if actual_value is None:
 847                 return m.group('none_inclusive')
 848             return op(actual_value, comparison_value)
 849         new_formats = [f for f in available_formats if _filter(f)]
 850
 851         new_format_spec = format_spec[:-len(m.group(0))]
 852         if not new_format_spec:
 853             new_format_spec = 'best'
 854
 855         return (new_format_spec, new_formats)
 856
 857     def select_format(self, format_spec, available_formats):
 858         while format_spec.endswith(']'):
 859             format_spec, available_formats = self._apply_format_filter(
 860                 format_spec, available_formats)
 861         if not available_formats:
 862             return None
 863
 864         if format_spec == 'best' or format_spec is None:
 865             return available_formats[-1]
 866         elif format_spec == 'worst':
 867             return available_formats[0]
 868         elif format_spec == 'bestaudio':
 869             audio_formats = [
 870                 f for f in available_formats
 871                 if f.get('vcodec') == 'none']
 872             if audio_formats:
 873                 return audio_formats[-1]
 874         elif format_spec == 'worstaudio':
 875             audio_formats = [
 876                 f for f in available_formats
 877                 if f.get('vcodec') == 'none']
 878             if audio_formats:
 879                 return audio_formats[0]
 880         elif format_spec == 'bestvideo':
 881             video_formats = [
 882                 f for f in available_formats
 883                 if f.get('acodec') == 'none']
 884             if video_formats:
 885                 return video_formats[-1]
 886         elif format_spec == 'worstvideo':
 887             video_formats = [
 888                 f for f in available_formats
 889                 if f.get('acodec') == 'none']
 890             if video_formats:
 891                 return video_formats[0]
 892         else:
 893             extensions = ['mp4', 'flv', 'webm', '3gp', 'm4a', 'mp3', 'ogg', 'aac', 'wav']
 894             if format_spec in extensions:
 895                 filter_f = lambda f: f['ext'] == format_spec
 896             else:
 897                 filter_f = lambda f: f['format_id'] == format_spec
 898             matches = list(filter(filter_f, available_formats))
 899             if matches:
 900                 return matches[-1]
 901         return None
 902
 903     def _calc_headers(self, info_dict):
 904         res = std_headers.copy()
 905
 906         add_headers = info_dict.get('http_headers')
 907         if add_headers:
 908             res.update(add_headers)
 909
 910         cookies = self._calc_cookies(info_dict)
 911         if cookies:
 912             res['Cookie'] = cookies
 913
 914         return res
 915
 916     def _calc_cookies(self, info_dict):
 917         class _PseudoRequest(object):
 918             def __init__(self, url):
 919                 self.url = url
 920                 self.headers = {}
 921                 self.unverifiable = False
 922
 923             def add_unredirected_header(self, k, v):
 924                 self.headers[k] = v
 925
 926             def get_full_url(self):
 927                 return self.url
 928
 929             def is_unverifiable(self):
 930                 return self.unverifiable
 931
 932             def has_header(self, h):
 933                 return h in self.headers
 934
 935         pr = _PseudoRequest(info_dict['url'])
 936         self.cookiejar.add_cookie_header(pr)
 937         return pr.headers.get('Cookie')
 938
 939     def process_video_result(self, info_dict, download=True):
 940         assert info_dict.get('_type', 'video') == 'video'
 941
 942         if 'id' not in info_dict:
 943             raise ExtractorError('Missing "id" field in extractor result')
 944         if 'title' not in info_dict:
 945             raise ExtractorError('Missing "title" field in extractor result')
 946
 947         if 'playlist' not in info_dict:
 948             # It isn't part of a playlist
 949             info_dict['playlist'] = None
 950             info_dict['playlist_index'] = None
 951
 952         thumbnails = info_dict.get('thumbnails')
 953         if thumbnails is None:
 954             thumbnail = info_dict.get('thumbnail')
 955             if thumbnail:
 956                 thumbnails = [{'url': thumbnail}]
 957         if thumbnails:
 958             thumbnails.sort(key=lambda t: (
 959                 t.get('preference'), t.get('width'), t.get('height'),
 960                 t.get('id'), t.get('url')))
 961             for t in thumbnails:
 962                 if 'width' in t and 'height' in t:
 963                     t['resolution'] = '%dx%d' % (t['width'], t['height'])
 964
 965         if thumbnails and 'thumbnail' not in info_dict:
 966             info_dict['thumbnail'] = thumbnails[-1]['url']
 967
 968         if 'display_id' not in info_dict and 'id' in info_dict:
 969             info_dict['display_id'] = info_dict['id']
 970
 971         if info_dict.get('upload_date') is None and info_dict.get('timestamp') is not None:
 972             # Working around negative timestamps in Windows
 973             # (see http://bugs.python.org/issue1646728)
 974             if info_dict['timestamp'] < 0 and os.name == 'nt':
 975                 info_dict['timestamp'] = 0
 976             upload_date = datetime.datetime.utcfromtimestamp(
 977                 info_dict['timestamp'])
 978             info_dict['upload_date'] = upload_date.strftime('%Y%m%d')
 979
 980         # This extractors handle format selection themselves
 981         if info_dict['extractor'] in ['Youku']:
 982             if download:
 983                 self.process_info(info_dict)
 984             return info_dict
 985
 986         # We now pick which formats have to be downloaded
 987         if info_dict.get('formats') is None:
 988             # There's only one format available
 989             formats = [info_dict]
 990         else:
 991             formats = info_dict['formats']
 992
 993         if not formats:
 994             raise ExtractorError('No video formats found!')
 995
 996         # We check that all the formats have the format and format_id fields
 997         for i, format in enumerate(formats):
 998             if 'url' not in format:
 999                 raise ExtractorError('Missing "url" key in result (index %d)' % i)
1000
1001             if format.get('format_id') is None:
1002                 format['format_id'] = compat_str(i)
1003             if format.get('format') is None:
1004                 format['format'] = '{id} - {res}{note}'.format(
1005                     id=format['format_id'],
1006                     res=self.format_resolution(format),
1007                     note=' ({0})'.format(format['format_note']) if format.get('format_note') is not None else '',
1008                 )
1009             # Automatically determine file extension if missing
1010             if 'ext' not in format:
1011                 format['ext'] = determine_ext(format['url']).lower()
1012             # Add HTTP headers, so that external programs can use them from the
1013             # json output
1014             full_format_info = info_dict.copy()
1015             full_format_info.update(format)
1016             format['http_headers'] = self._calc_headers(full_format_info)
1017
1018         format_limit = self.params.get('format_limit', None)
1019         if format_limit:
1020             formats = list(takewhile_inclusive(
1021                 lambda f: f['format_id'] != format_limit, formats
1022             ))
1023
1024         # TODO Central sorting goes here
1025
1026         if formats[0] is not info_dict:
1027             # only set the 'formats' fields if the original info_dict list them
1028             # otherwise we end up with a circular reference, the first (and unique)
1029             # element in the 'formats' field in info_dict is info_dict itself,
1030             # wich can't be exported to json
1031             info_dict['formats'] = formats
1032         if self.params.get('listformats'):
1033             self.list_formats(info_dict)
1034             return
1035         if self.params.get('list_thumbnails'):
1036             self.list_thumbnails(info_dict)
1037             return
1038
1039         req_format = self.params.get('format')
1040         if req_format is None:
1041             req_format = 'best'
1042         formats_to_download = []
1043         # The -1 is for supporting YoutubeIE
1044         if req_format in ('-1', 'all'):
1045             formats_to_download = formats
1046         else:
1047             for rfstr in req_format.split(','):
1048                 # We can accept formats requested in the format: 34/5/best, we pick
1049                 # the first that is available, starting from left
1050                 req_formats = rfstr.split('/')
1051                 for rf in req_formats:
1052                     if re.match(r'.+?\+.+?', rf) is not None:
1053                         # Two formats have been requested like '137+139'
1054                         format_1, format_2 = rf.split('+')
1055                         formats_info = (self.select_format(format_1, formats),
1056                                         self.select_format(format_2, formats))
1057                         if all(formats_info):
1058                             # The first format must contain the video and the
1059                             # second the audio
1060                             if formats_info[0].get('vcodec') == 'none':
1061                                 self.report_error('The first format must '
1062                                                   'contain the video, try using '
1063                                                   '"-f %s+%s"' % (format_2, format_1))
1064                                 return
1065                             output_ext = (
1066                                 formats_info[0]['ext']
1067                                 if self.params.get('merge_output_format') is None
1068                                 else self.params['merge_output_format'])
1069                             selected_format = {
1070                                 'requested_formats': formats_info,
1071                                 'format': rf,
1072                                 'ext': formats_info[0]['ext'],
1073                                 'width': formats_info[0].get('width'),
1074                                 'height': formats_info[0].get('height'),
1075                                 'resolution': formats_info[0].get('resolution'),
1076                                 'fps': formats_info[0].get('fps'),
1077                                 'vcodec': formats_info[0].get('vcodec'),
1078                                 'vbr': formats_info[0].get('vbr'),
1079                                 'stretched_ratio': formats_info[0].get('stretched_ratio'),
1080                                 'acodec': formats_info[1].get('acodec'),
1081                                 'abr': formats_info[1].get('abr'),
1082                                 'ext': output_ext,
1083                             }
1084                         else:
1085                             selected_format = None
1086                     else:
1087                         selected_format = self.select_format(rf, formats)
1088                     if selected_format is not None:
1089                         formats_to_download.append(selected_format)
1090                         break
1091         if not formats_to_download:
1092             raise ExtractorError('requested format not available',
1093                                  expected=True)
1094
1095         if download:
1096             if len(formats_to_download) > 1:
1097                 self.to_screen('[info] %s: downloading video in %s formats' % (info_dict['id'], len(formats_to_download)))
1098             for format in formats_to_download:
1099                 new_info = dict(info_dict)
1100                 new_info.update(format)
1101                 self.process_info(new_info)
1102         # We update the info dict with the best quality format (backwards compatibility)
1103         info_dict.update(formats_to_download[-1])
1104         return info_dict
1105
1106     def process_info(self, info_dict):
1107         """Process a single resolved IE result."""
1108
1109         assert info_dict.get('_type', 'video') == 'video'
1110
1111         max_downloads = self.params.get('max_downloads')
1112         if max_downloads is not None:
1113             if self._num_downloads >= int(max_downloads):
1114                 raise MaxDownloadsReached()
1115
1116         info_dict['fulltitle'] = info_dict['title']
1117         if len(info_dict['title']) > 200:
1118             info_dict['title'] = info_dict['title'][:197] + '...'
1119
1120         # Keep for backwards compatibility
1121         info_dict['stitle'] = info_dict['title']
1122
1123         if 'format' not in info_dict:
1124             info_dict['format'] = info_dict['ext']
1125
1126         reason = self._match_entry(info_dict)
1127         if reason is not None:
1128             self.to_screen('[download] ' + reason)
1129             return
1130
1131         self._num_downloads += 1
1132
1133         filename = self.prepare_filename(info_dict)
1134
1135         # Forced printings
1136         if self.params.get('forcetitle', False):
1137             self.to_stdout(info_dict['fulltitle'])
1138         if self.params.get('forceid', False):
1139             self.to_stdout(info_dict['id'])
1140         if self.params.get('forceurl', False):
1141             if info_dict.get('requested_formats') is not None:
1142                 for f in info_dict['requested_formats']:
1143                     self.to_stdout(f['url'] + f.get('play_path', ''))
1144             else:
1145                 # For RTMP URLs, also include the playpath
1146                 self.to_stdout(info_dict['url'] + info_dict.get('play_path', ''))
1147         if self.params.get('forcethumbnail', False) and info_dict.get('thumbnail') is not None:
1148             self.to_stdout(info_dict['thumbnail'])
1149         if self.params.get('forcedescription', False) and info_dict.get('description') is not None:
1150             self.to_stdout(info_dict['description'])
1151         if self.params.get('forcefilename', False) and filename is not None:
1152             self.to_stdout(filename)
1153         if self.params.get('forceduration', False) and info_dict.get('duration') is not None:
1154             self.to_stdout(formatSeconds(info_dict['duration']))
1155         if self.params.get('forceformat', False):
1156             self.to_stdout(info_dict['format'])
1157         if self.params.get('forcejson', False):
1158             info_dict['_filename'] = filename
1159             self.to_stdout(json.dumps(info_dict))
1160         if self.params.get('dump_single_json', False):
1161             info_dict['_filename'] = filename
1162
1163         # Do nothing else if in simulate mode
1164         if self.params.get('simulate', False):
1165             return
1166
1167         if filename is None:
1168             return
1169
1170         try:
1171             dn = os.path.dirname(encodeFilename(filename))
1172             if dn and not os.path.exists(dn):
1173                 os.makedirs(dn)
1174         except (OSError, IOError) as err:
1175             self.report_error('unable to create directory ' + compat_str(err))
1176             return
1177
1178         if self.params.get('writedescription', False):
1179             descfn = filename + '.description'
1180             if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(descfn)):
1181                 self.to_screen('[info] Video description is already present')
1182             elif info_dict.get('description') is None:
1183                 self.report_warning('There\'s no description to write.')
1184             else:
1185                 try:
1186                     self.to_screen('[info] Writing video description to: ' + descfn)
1187                     with io.open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile:
1188                         descfile.write(info_dict['description'])
1189                 except (OSError, IOError):
1190                     self.report_error('Cannot write description file ' + descfn)
1191                     return
1192
1193         if self.params.get('writeannotations', False):
1194             annofn = filename + '.annotations.xml'
1195             if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(annofn)):
1196                 self.to_screen('[info] Video annotations are already present')
1197             else:
1198                 try:
1199                     self.to_screen('[info] Writing video annotations to: ' + annofn)
1200                     with io.open(encodeFilename(annofn), 'w', encoding='utf-8') as annofile:
1201                         annofile.write(info_dict['annotations'])
1202                 except (KeyError, TypeError):
1203                     self.report_warning('There are no annotations to write.')
1204                 except (OSError, IOError):
1205                     self.report_error('Cannot write annotations file: ' + annofn)
1206                     return
1207
1208         subtitles_are_requested = any([self.params.get('writesubtitles', False),
1209                                        self.params.get('writeautomaticsub')])
1210
1211         if subtitles_are_requested and 'subtitles' in info_dict and info_dict['subtitles']:
1212             # subtitles download errors are already managed as troubles in relevant IE
1213             # that way it will silently go on when used with unsupporting IE
1214             subtitles = info_dict['subtitles']
1215             sub_format = self.params.get('subtitlesformat', 'srt')
1216             for sub_lang in subtitles.keys():
1217                 sub = subtitles[sub_lang]
1218                 if sub is None:
1219                     continue
1220                 try:
1221                     sub_filename = subtitles_filename(filename, sub_lang, sub_format)
1222                     if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(sub_filename)):
1223                         self.to_screen('[info] Video subtitle %s.%s is already_present' % (sub_lang, sub_format))
1224                     else:
1225                         self.to_screen('[info] Writing video subtitles to: ' + sub_filename)
1226                         with io.open(encodeFilename(sub_filename), 'w', encoding='utf-8') as subfile:
1227                             subfile.write(sub)
1228                 except (OSError, IOError):
1229                     self.report_error('Cannot write subtitles file ' + sub_filename)
1230                     return
1231
1232         if self.params.get('writeinfojson', False):
1233             infofn = os.path.splitext(filename)[0] + '.info.json'
1234             if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(infofn)):
1235                 self.to_screen('[info] Video description metadata is already present')
1236             else:
1237                 self.to_screen('[info] Writing video description metadata as JSON to: ' + infofn)
1238                 try:
1239                     write_json_file(info_dict, infofn)
1240                 except (OSError, IOError):
1241                     self.report_error('Cannot write metadata to JSON file ' + infofn)
1242                     return
1243
1244         self._write_thumbnails(info_dict, filename)
1245
1246         if not self.params.get('skip_download', False):
1247             try:
1248                 def dl(name, info):
1249                     fd = get_suitable_downloader(info, self.params)(self, self.params)
1250                     for ph in self._progress_hooks:
1251                         fd.add_progress_hook(ph)
1252                     if self.params.get('verbose'):
1253                         self.to_stdout('[debug] Invoking downloader on %r' % info.get('url'))
1254                     return fd.download(name, info)
1255
1256                 if info_dict.get('requested_formats') is not None:
1257                     downloaded = []
1258                     success = True
1259                     merger = FFmpegMergerPP(self, not self.params.get('keepvideo'))
1260                     if not merger._executable:
1261                         postprocessors = []
1262                         self.report_warning('You have requested multiple '
1263                                             'formats but ffmpeg or avconv are not installed.'
1264                                             ' The formats won\'t be merged')
1265                     else:
1266                         postprocessors = [merger]
1267                     for f in info_dict['requested_formats']:
1268                         new_info = dict(info_dict)
1269                         new_info.update(f)
1270                         fname = self.prepare_filename(new_info)
1271                         fname = prepend_extension(fname, 'f%s' % f['format_id'])
1272                         downloaded.append(fname)
1273                         partial_success = dl(fname, new_info)
1274                         success = success and partial_success
1275                     info_dict['__postprocessors'] = postprocessors
1276                     info_dict['__files_to_merge'] = downloaded
1277                 else:
1278                     # Just a single file
1279                     success = dl(filename, info_dict)
1280             except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
1281                 self.report_error('unable to download video data: %s' % str(err))
1282                 return
1283             except (OSError, IOError) as err:
1284                 raise UnavailableVideoError(err)
1285             except (ContentTooShortError, ) as err:
1286                 self.report_error('content too short (expected %s bytes and served %s)' % (err.expected, err.downloaded))
1287                 return
1288
1289             if success:
1290                 # Fixup content
1291                 fixup_policy = self.params.get('fixup')
1292                 if fixup_policy is None:
1293                     fixup_policy = 'detect_or_warn'
1294
1295                 stretched_ratio = info_dict.get('stretched_ratio')
1296                 if stretched_ratio is not None and stretched_ratio != 1:
1297                     if fixup_policy == 'warn':
1298                         self.report_warning('%s: Non-uniform pixel ratio (%s)' % (
1299                             info_dict['id'], stretched_ratio))
1300                     elif fixup_policy == 'detect_or_warn':
1301                         stretched_pp = FFmpegFixupStretchedPP(self)
1302                         if stretched_pp.available:
1303                             info_dict.setdefault('__postprocessors', [])
1304                             info_dict['__postprocessors'].append(stretched_pp)
1305                         else:
1306                             self.report_warning(
1307                                 '%s: Non-uniform pixel ratio (%s). Install ffmpeg or avconv to fix this automatically.' % (
1308                                     info_dict['id'], stretched_ratio))
1309                     else:
1310                         assert fixup_policy in ('ignore', 'never')
1311
1312                 if info_dict.get('requested_formats') is None and info_dict.get('container') == 'm4a_dash':
1313                     if fixup_policy == 'warn':
1314                         self.report_warning('%s: writing DASH m4a. Only some players support this container.' % (
1315                             info_dict['id']))
1316                     elif fixup_policy == 'detect_or_warn':
1317                         fixup_pp = FFmpegFixupM4aPP(self)
1318                         if fixup_pp.available:
1319                             info_dict.setdefault('__postprocessors', [])
1320                             info_dict['__postprocessors'].append(fixup_pp)
1321                         else:
1322                             self.report_warning(
1323                                 '%s: writing DASH m4a. Only some players support this container. Install ffmpeg or avconv to fix this automatically.' % (
1324                                     info_dict['id']))
1325                     else:
1326                         assert fixup_policy in ('ignore', 'never')
1327
1328                 try:
1329                     self.post_process(filename, info_dict)
1330                 except (PostProcessingError) as err:
1331                     self.report_error('postprocessing: %s' % str(err))
1332                     return
1333                 self.record_download_archive(info_dict)
1334
1335     def download(self, url_list):
1336         """Download a given list of URLs."""
1337         outtmpl = self.params.get('outtmpl', DEFAULT_OUTTMPL)
1338         if (len(url_list) > 1 and
1339                 '%' not in outtmpl
1340                 and self.params.get('max_downloads') != 1):
1341             raise SameFileError(outtmpl)
1342
1343         for url in url_list:
1344             try:
1345                 # It also downloads the videos
1346                 res = self.extract_info(url)
1347             except UnavailableVideoError:
1348                 self.report_error('unable to download video')
1349             except MaxDownloadsReached:
1350                 self.to_screen('[info] Maximum number of downloaded files reached.')
1351                 raise
1352             else:
1353                 if self.params.get('dump_single_json', False):
1354                     self.to_stdout(json.dumps(res))
1355
1356         return self._download_retcode
1357
1358     def download_with_info_file(self, info_filename):
1359         with io.open(info_filename, 'r', encoding='utf-8') as f:
1360             info = json.load(f)
1361         try:
1362             self.process_ie_result(info, download=True)
1363         except DownloadError:
1364             webpage_url = info.get('webpage_url')
1365             if webpage_url is not None:
1366                 self.report_warning('The info failed to download, trying with "%s"' % webpage_url)
1367                 return self.download([webpage_url])
1368             else:
1369                 raise
1370         return self._download_retcode
1371
1372     def post_process(self, filename, ie_info):
1373         """Run all the postprocessors on the given file."""
1374         info = dict(ie_info)
1375         info['filepath'] = filename
1376         pps_chain = []
1377         if ie_info.get('__postprocessors') is not None:
1378             pps_chain.extend(ie_info['__postprocessors'])
1379         pps_chain.extend(self._pps)
1380         for pp in pps_chain:
1381             keep_video = None
1382             old_filename = info['filepath']
1383             try:
1384                 keep_video_wish, info = pp.run(info)
1385                 if keep_video_wish is not None:
1386                     if keep_video_wish:
1387                         keep_video = keep_video_wish
1388                     elif keep_video is None:
1389                         # No clear decision yet, let IE decide
1390                         keep_video = keep_video_wish
1391             except PostProcessingError as e:
1392                 self.report_error(e.msg)
1393             if keep_video is False and not self.params.get('keepvideo', False):
1394                 try:
1395                     self.to_screen('Deleting original file %s (pass -k to keep)' % old_filename)
1396                     os.remove(encodeFilename(old_filename))
1397                 except (IOError, OSError):
1398                     self.report_warning('Unable to remove downloaded video file')
1399
1400     def _make_archive_id(self, info_dict):
1401         # Future-proof against any change in case
1402         # and backwards compatibility with prior versions
1403         extractor = info_dict.get('extractor_key')
1404         if extractor is None:
1405             if 'id' in info_dict:
1406                 extractor = info_dict.get('ie_key')  # key in a playlist
1407         if extractor is None:
1408             return None  # Incomplete video information
1409         return extractor.lower() + ' ' + info_dict['id']
1410
1411     def in_download_archive(self, info_dict):
1412         fn = self.params.get('download_archive')
1413         if fn is None:
1414             return False
1415
1416         vid_id = self._make_archive_id(info_dict)
1417         if vid_id is None:
1418             return False  # Incomplete video information
1419
1420         try:
1421             with locked_file(fn, 'r', encoding='utf-8') as archive_file:
1422                 for line in archive_file:
1423                     if line.strip() == vid_id:
1424                         return True
1425         except IOError as ioe:
1426             if ioe.errno != errno.ENOENT:
1427                 raise
1428         return False
1429
1430     def record_download_archive(self, info_dict):
1431         fn = self.params.get('download_archive')
1432         if fn is None:
1433             return
1434         vid_id = self._make_archive_id(info_dict)
1435         assert vid_id
1436         with locked_file(fn, 'a', encoding='utf-8') as archive_file:
1437             archive_file.write(vid_id + '\n')
1438
1439     @staticmethod
1440     def format_resolution(format, default='unknown'):
1441         if format.get('vcodec') == 'none':
1442             return 'audio only'
1443         if format.get('resolution') is not None:
1444             return format['resolution']
1445         if format.get('height') is not None:
1446             if format.get('width') is not None:
1447                 res = '%sx%s' % (format['width'], format['height'])
1448             else:
1449                 res = '%sp' % format['height']
1450         elif format.get('width') is not None:
1451             res = '?x%d' % format['width']
1452         else:
1453             res = default
1454         return res
1455
1456     def _format_note(self, fdict):
1457         res = ''
1458         if fdict.get('ext') in ['f4f', 'f4m']:
1459             res += '(unsupported) '
1460         if fdict.get('format_note') is not None:
1461             res += fdict['format_note'] + ' '
1462         if fdict.get('tbr') is not None:
1463             res += '%4dk ' % fdict['tbr']
1464         if fdict.get('container') is not None:
1465             if res:
1466                 res += ', '
1467             res += '%s container' % fdict['container']
1468         if (fdict.get('vcodec') is not None and
1469                 fdict.get('vcodec') != 'none'):
1470             if res:
1471                 res += ', '
1472             res += fdict['vcodec']
1473             if fdict.get('vbr') is not None:
1474                 res += '@'
1475         elif fdict.get('vbr') is not None and fdict.get('abr') is not None:
1476             res += 'video@'
1477         if fdict.get('vbr') is not None:
1478             res += '%4dk' % fdict['vbr']
1479         if fdict.get('fps') is not None:
1480             res += ', %sfps' % fdict['fps']
1481         if fdict.get('acodec') is not None:
1482             if res:
1483                 res += ', '
1484             if fdict['acodec'] == 'none':
1485                 res += 'video only'
1486             else:
1487                 res += '%-5s' % fdict['acodec']
1488         elif fdict.get('abr') is not None:
1489             if res:
1490                 res += ', '
1491             res += 'audio'
1492         if fdict.get('abr') is not None:
1493             res += '@%3dk' % fdict['abr']
1494         if fdict.get('asr') is not None:
1495             res += ' (%5dHz)' % fdict['asr']
1496         if fdict.get('filesize') is not None:
1497             if res:
1498                 res += ', '
1499             res += format_bytes(fdict['filesize'])
1500         elif fdict.get('filesize_approx') is not None:
1501             if res:
1502                 res += ', '
1503             res += '~' + format_bytes(fdict['filesize_approx'])
1504         return res
1505
1506     def list_formats(self, info_dict):
1507         def line(format, idlen=20):
1508             return (('%-' + compat_str(idlen + 1) + 's%-10s%-12s%s') % (
1509                 format['format_id'],
1510                 format['ext'],
1511                 self.format_resolution(format),
1512                 self._format_note(format),
1513             ))
1514
1515         formats = info_dict.get('formats', [info_dict])
1516         idlen = max(len('format code'),
1517                     max(len(f['format_id']) for f in formats))
1518         formats_s = [
1519             line(f, idlen) for f in formats
1520             if f.get('preference') is None or f['preference'] >= -1000]
1521         if len(formats) > 1:
1522             formats_s[0] += (' ' if self._format_note(formats[0]) else '') + '(worst)'
1523             formats_s[-1] += (' ' if self._format_note(formats[-1]) else '') + '(best)'
1524
1525         header_line = line({
1526             'format_id': 'format code', 'ext': 'extension',
1527             'resolution': 'resolution', 'format_note': 'note'}, idlen=idlen)
1528         self.to_screen(
1529             '[info] Available formats for %s:\n%s\n%s' %
1530             (info_dict['id'], header_line, '\n'.join(formats_s)))
1531
1532     def list_thumbnails(self, info_dict):
1533         thumbnails = info_dict.get('thumbnails')
1534         if not thumbnails:
1535             tn_url = info_dict.get('thumbnail')
1536             if tn_url:
1537                 thumbnails = [{'id': '0', 'url': tn_url}]
1538             else:
1539                 self.to_screen(
1540                     '[info] No thumbnails present for %s' % info_dict['id'])
1541                 return
1542
1543         self.to_screen(
1544             '[info] Thumbnails for %s:' % info_dict['id'])
1545         self.to_screen(render_table(
1546             ['ID', 'width', 'height', 'URL'],
1547             [[t['id'], t.get('width', 'unknown'), t.get('height', 'unknown'), t['url']] for t in thumbnails]))
1548
1549     def urlopen(self, req):
1550         """ Start an HTTP download """
1551
1552         # According to RFC 3986, URLs can not contain non-ASCII characters, however this is not
1553         # always respected by websites, some tend to give out URLs with non percent-encoded
1554         # non-ASCII characters (see telemb.py, ard.py [#3412])
1555         # urllib chokes on URLs with non-ASCII characters (see http://bugs.python.org/issue3991)
1556         # To work around aforementioned issue we will replace request's original URL with
1557         # percent-encoded one
1558         req_is_string = isinstance(req, basestring if sys.version_info < (3, 0) else compat_str)
1559         url = req if req_is_string else req.get_full_url()
1560         url_escaped = escape_url(url)
1561
1562         # Substitute URL if any change after escaping
1563         if url != url_escaped:
1564             if req_is_string:
1565                 req = url_escaped
1566             else:
1567                 req = compat_urllib_request.Request(
1568                     url_escaped, data=req.data, headers=req.headers,
1569                     origin_req_host=req.origin_req_host, unverifiable=req.unverifiable)
1570
1571         return self._opener.open(req, timeout=self._socket_timeout)
1572
1573     def print_debug_header(self):
1574         if not self.params.get('verbose'):
1575             return
1576
1577         if type('') is not compat_str:
1578             # Python 2.6 on SLES11 SP1 (https://github.com/rg3/youtube-dl/issues/3326)
1579             self.report_warning(
1580                 'Your Python is broken! Update to a newer and supported version')
1581
1582         stdout_encoding = getattr(
1583             sys.stdout, 'encoding', 'missing (%s)' % type(sys.stdout).__name__)
1584         encoding_str = (
1585             '[debug] Encodings: locale %s, fs %s, out %s, pref %s\n' % (
1586                 locale.getpreferredencoding(),
1587                 sys.getfilesystemencoding(),
1588                 stdout_encoding,
1589                 self.get_encoding()))
1590         write_string(encoding_str, encoding=None)
1591
1592         self._write_string('[debug] youtube-dl version ' + __version__ + '\n')
1593         try:
1594             sp = subprocess.Popen(
1595                 ['git', 'rev-parse', '--short', 'HEAD'],
1596                 stdout=subprocess.PIPE, stderr=subprocess.PIPE,
1597                 cwd=os.path.dirname(os.path.abspath(__file__)))
1598             out, err = sp.communicate()
1599             out = out.decode().strip()
1600             if re.match('[0-9a-f]+', out):
1601                 self._write_string('[debug] Git HEAD: ' + out + '\n')
1602         except:
1603             try:
1604                 sys.exc_clear()
1605             except:
1606                 pass
1607         self._write_string('[debug] Python version %s - %s\n' % (
1608             platform.python_version(), platform_name()))
1609
1610         exe_versions = FFmpegPostProcessor.get_versions()
1611         exe_versions['rtmpdump'] = rtmpdump_version()
1612         exe_str = ', '.join(
1613             '%s %s' % (exe, v)
1614             for exe, v in sorted(exe_versions.items())
1615             if v
1616         )
1617         if not exe_str:
1618             exe_str = 'none'
1619         self._write_string('[debug] exe versions: %s\n' % exe_str)
1620
1621         proxy_map = {}
1622         for handler in self._opener.handlers:
1623             if hasattr(handler, 'proxies'):
1624                 proxy_map.update(handler.proxies)
1625         self._write_string('[debug] Proxy map: ' + compat_str(proxy_map) + '\n')
1626
1627         if self.params.get('call_home', False):
1628             ipaddr = self.urlopen('https://yt-dl.org/ip').read().decode('utf-8')
1629             self._write_string('[debug] Public IP address: %s\n' % ipaddr)
1630             latest_version = self.urlopen(
1631                 'https://yt-dl.org/latest/version').read().decode('utf-8')
1632             if version_tuple(latest_version) > version_tuple(__version__):
1633                 self.report_warning(
1634                     'You are using an outdated version (newest version: %s)! '
1635                     'See https://yt-dl.org/update if you need help updating.' %
1636                     latest_version)
1637
1638     def _setup_opener(self):
1639         timeout_val = self.params.get('socket_timeout')
1640         self._socket_timeout = 600 if timeout_val is None else float(timeout_val)
1641
1642         opts_cookiefile = self.params.get('cookiefile')
1643         opts_proxy = self.params.get('proxy')
1644
1645         if opts_cookiefile is None:
1646             self.cookiejar = compat_cookiejar.CookieJar()
1647         else:
1648             self.cookiejar = compat_cookiejar.MozillaCookieJar(
1649                 opts_cookiefile)
1650             if os.access(opts_cookiefile, os.R_OK):
1651                 self.cookiejar.load()
1652
1653         cookie_processor = compat_urllib_request.HTTPCookieProcessor(
1654             self.cookiejar)
1655         if opts_proxy is not None:
1656             if opts_proxy == '':
1657                 proxies = {}
1658             else:
1659                 proxies = {'http': opts_proxy, 'https': opts_proxy}
1660         else:
1661             proxies = compat_urllib_request.getproxies()
1662             # Set HTTPS proxy to HTTP one if given (https://github.com/rg3/youtube-dl/issues/805)
1663             if 'http' in proxies and 'https' not in proxies:
1664                 proxies['https'] = proxies['http']
1665         proxy_handler = compat_urllib_request.ProxyHandler(proxies)
1666
1667         debuglevel = 1 if self.params.get('debug_printtraffic') else 0
1668         https_handler = make_HTTPS_handler(self.params, debuglevel=debuglevel)
1669         ydlh = YoutubeDLHandler(self.params, debuglevel=debuglevel)
1670         opener = compat_urllib_request.build_opener(
1671             https_handler, proxy_handler, cookie_processor, ydlh)
1672         # Delete the default user-agent header, which would otherwise apply in
1673         # cases where our custom HTTP handler doesn't come into play
1674         # (See https://github.com/rg3/youtube-dl/issues/1309 for details)
1675         opener.addheaders = []
1676         self._opener = opener
1677
1678     def encode(self, s):
1679         if isinstance(s, bytes):
1680             return s  # Already encoded
1681
1682         try:
1683             return s.encode(self.get_encoding())
1684         except UnicodeEncodeError as err:
1685             err.reason = err.reason + '. Check your system encoding configuration or use the --encoding option.'
1686             raise
1687
1688     def get_encoding(self):
1689         encoding = self.params.get('encoding')
1690         if encoding is None:
1691             encoding = preferredencoding()
1692         return encoding
1693
1694     def _write_thumbnails(self, info_dict, filename):
1695         if self.params.get('writethumbnail', False):
1696             thumbnails = info_dict.get('thumbnails')
1697             if thumbnails:
1698                 thumbnails = [thumbnails[-1]]
1699         elif self.params.get('write_all_thumbnails', False):
1700             thumbnails = info_dict.get('thumbnails')
1701         else:
1702             return
1703
1704         if not thumbnails:
1705             # No thumbnails present, so return immediately
1706             return
1707
1708         for t in thumbnails:
1709             thumb_ext = determine_ext(t['url'], 'jpg')
1710             suffix = '_%s' % t['id'] if len(thumbnails) > 1 else ''
1711             thumb_display_id = '%s ' % t['id'] if len(thumbnails) > 1 else ''
1712             thumb_filename = os.path.splitext(filename)[0] + suffix + '.' + thumb_ext
1713
1714             if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(thumb_filename)):
1715                 self.to_screen('[%s] %s: Thumbnail %sis already present' %
1716                                (info_dict['extractor'], info_dict['id'], thumb_display_id))
1717             else:
1718                 self.to_screen('[%s] %s: Downloading thumbnail %s...' %
1719                                (info_dict['extractor'], info_dict['id'], thumb_display_id))
1720                 try:
1721                     uf = self.urlopen(t['url'])
1722                     with open(thumb_filename, 'wb') as thumbf:
1723                         shutil.copyfileobj(uf, thumbf)
1724                     self.to_screen('[%s] %s: Writing thumbnail %sto: %s' %
1725                                    (info_dict['extractor'], info_dict['id'], thumb_display_id, thumb_filename))
1726                 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
1727                     self.report_warning('Unable to download thumbnail "%s": %s' %
1728                                         (t['url'], compat_str(err)))