_ Git - youtube-dl/blob - youtube_dl/YoutubeDL.py

   1 #!/usr/bin/env python
   2 # -*- coding: utf-8 -*-
   3
   4 from __future__ import absolute_import, unicode_literals
   5
   6 import collections
   7 import contextlib
   8 import datetime
   9 import errno
  10 import fileinput
  11 import io
  12 import itertools
  13 import json
  14 import locale
  15 import operator
  16 import os
  17 import platform
  18 import re
  19 import shutil
  20 import subprocess
  21 import socket
  22 import sys
  23 import time
  24 import tokenize
  25 import traceback
  26
  27 if os.name == 'nt':
  28     import ctypes
  29
  30 from .compat import (
  31     compat_basestring,
  32     compat_cookiejar,
  33     compat_expanduser,
  34     compat_get_terminal_size,
  35     compat_http_client,
  36     compat_kwargs,
  37     compat_str,
  38     compat_tokenize_tokenize,
  39     compat_urllib_error,
  40     compat_urllib_request,
  41 )
  42 from .utils import (
  43     escape_url,
  44     ContentTooShortError,
  45     date_from_str,
  46     DateRange,
  47     DEFAULT_OUTTMPL,
  48     determine_ext,
  49     DownloadError,
  50     encodeFilename,
  51     ExtractorError,
  52     format_bytes,
  53     formatSeconds,
  54     HEADRequest,
  55     locked_file,
  56     make_HTTPS_handler,
  57     MaxDownloadsReached,
  58     PagedList,
  59     parse_filesize,
  60     PerRequestProxyHandler,
  61     PostProcessingError,
  62     platform_name,
  63     preferredencoding,
  64     render_table,
  65     SameFileError,
  66     sanitize_filename,
  67     sanitize_path,
  68     std_headers,
  69     subtitles_filename,
  70     UnavailableVideoError,
  71     url_basename,
  72     version_tuple,
  73     write_json_file,
  74     write_string,
  75     YoutubeDLHandler,
  76     prepend_extension,
  77     replace_extension,
  78     args_to_str,
  79     age_restricted,
  80 )
  81 from .cache import Cache
  82 from .extractor import get_info_extractor, gen_extractors
  83 from .downloader import get_suitable_downloader
  84 from .downloader.rtmp import rtmpdump_version
  85 from .postprocessor import (
  86     FFmpegFixupM4aPP,
  87     FFmpegFixupStretchedPP,
  88     FFmpegMergerPP,
  89     FFmpegPostProcessor,
  90     get_postprocessor,
  91 )
  92 from .version import __version__
  93
  94
  95 class YoutubeDL(object):
  96     """YoutubeDL class.
  97
  98     YoutubeDL objects are the ones responsible of downloading the
  99     actual video file and writing it to disk if the user has requested
 100     it, among some other tasks. In most cases there should be one per
 101     program. As, given a video URL, the downloader doesn't know how to
 102     extract all the needed information, task that InfoExtractors do, it
 103     has to pass the URL to one of them.
 104
 105     For this, YoutubeDL objects have a method that allows
 106     InfoExtractors to be registered in a given order. When it is passed
 107     a URL, the YoutubeDL object handles it to the first InfoExtractor it
 108     finds that reports being able to handle it. The InfoExtractor extracts
 109     all the information about the video or videos the URL refers to, and
 110     YoutubeDL process the extracted information, possibly using a File
 111     Downloader to download the video.
 112
 113     YoutubeDL objects accept a lot of parameters. In order not to saturate
 114     the object constructor with arguments, it receives a dictionary of
 115     options instead. These options are available through the params
 116     attribute for the InfoExtractors to use. The YoutubeDL also
 117     registers itself as the downloader in charge for the InfoExtractors
 118     that are added to it, so this is a "mutual registration".
 119
 120     Available options:
 121
 122     username:          Username for authentication purposes.
 123     password:          Password for authentication purposes.
 124     videopassword:     Password for accessing a video.
 125     usenetrc:          Use netrc for authentication instead.
 126     verbose:           Print additional info to stdout.
 127     quiet:             Do not print messages to stdout.
 128     no_warnings:       Do not print out anything for warnings.
 129     forceurl:          Force printing final URL.
 130     forcetitle:        Force printing title.
 131     forceid:           Force printing ID.
 132     forcethumbnail:    Force printing thumbnail URL.
 133     forcedescription:  Force printing description.
 134     forcefilename:     Force printing final filename.
 135     forceduration:     Force printing duration.
 136     forcejson:         Force printing info_dict as JSON.
 137     dump_single_json:  Force printing the info_dict of the whole playlist
 138                        (or video) as a single JSON line.
 139     simulate:          Do not download the video files.
 140     format:            Video format code. See options.py for more information.
 141     outtmpl:           Template for output names.
 142     restrictfilenames: Do not allow "&" and spaces in file names
 143     ignoreerrors:      Do not stop on download errors.
 144     force_generic_extractor: Force downloader to use the generic extractor
 145     nooverwrites:      Prevent overwriting files.
 146     playliststart:     Playlist item to start at.
 147     playlistend:       Playlist item to end at.
 148     playlist_items:    Specific indices of playlist to download.
 149     playlistreverse:   Download playlist items in reverse order.
 150     matchtitle:        Download only matching titles.
 151     rejecttitle:       Reject downloads for matching titles.
 152     logger:            Log messages to a logging.Logger instance.
 153     logtostderr:       Log messages to stderr instead of stdout.
 154     writedescription:  Write the video description to a .description file
 155     writeinfojson:     Write the video description to a .info.json file
 156     writeannotations:  Write the video annotations to a .annotations.xml file
 157     writethumbnail:    Write the thumbnail image to a file
 158     write_all_thumbnails:  Write all thumbnail formats to files
 159     writesubtitles:    Write the video subtitles to a file
 160     writeautomaticsub: Write the automatic subtitles to a file
 161     allsubtitles:      Downloads all the subtitles of the video
 162                        (requires writesubtitles or writeautomaticsub)
 163     listsubtitles:     Lists all available subtitles for the video
 164     subtitlesformat:   The format code for subtitles
 165     subtitleslangs:    List of languages of the subtitles to download
 166     keepvideo:         Keep the video file after post-processing
 167     daterange:         A DateRange object, download only if the upload_date is in the range.
 168     skip_download:     Skip the actual download of the video file
 169     cachedir:          Location of the cache files in the filesystem.
 170                        False to disable filesystem cache.
 171     noplaylist:        Download single video instead of a playlist if in doubt.
 172     age_limit:         An integer representing the user's age in years.
 173                        Unsuitable videos for the given age are skipped.
 174     min_views:         An integer representing the minimum view count the video
 175                        must have in order to not be skipped.
 176                        Videos without view count information are always
 177                        downloaded. None for no limit.
 178     max_views:         An integer representing the maximum view count.
 179                        Videos that are more popular than that are not
 180                        downloaded.
 181                        Videos without view count information are always
 182                        downloaded. None for no limit.
 183     download_archive:  File name of a file where all downloads are recorded.
 184                        Videos already present in the file are not downloaded
 185                        again.
 186     cookiefile:        File name where cookies should be read from and dumped to.
 187     nocheckcertificate:Do not verify SSL certificates
 188     prefer_insecure:   Use HTTP instead of HTTPS to retrieve information.
 189                        At the moment, this is only supported by YouTube.
 190     proxy:             URL of the proxy server to use
 191     cn_verification_proxy:  URL of the proxy to use for IP address verification
 192                        on Chinese sites. (Experimental)
 193     socket_timeout:    Time to wait for unresponsive hosts, in seconds
 194     bidi_workaround:   Work around buggy terminals without bidirectional text
 195                        support, using fridibi
 196     debug_printtraffic:Print out sent and received HTTP traffic
 197     include_ads:       Download ads as well
 198     default_search:    Prepend this string if an input url is not valid.
 199                        'auto' for elaborate guessing
 200     encoding:          Use this encoding instead of the system-specified.
 201     extract_flat:      Do not resolve URLs, return the immediate result.
 202                        Pass in 'in_playlist' to only show this behavior for
 203                        playlist items.
 204     postprocessors:    A list of dictionaries, each with an entry
 205                        * key:  The name of the postprocessor. See
 206                                youtube_dl/postprocessor/__init__.py for a list.
 207                        as well as any further keyword arguments for the
 208                        postprocessor.
 209     progress_hooks:    A list of functions that get called on download
 210                        progress, with a dictionary with the entries
 211                        * status: One of "downloading", "error", or "finished".
 212                                  Check this first and ignore unknown values.
 213
 214                        If status is one of "downloading", or "finished", the
 215                        following properties may also be present:
 216                        * filename: The final filename (always present)
 217                        * tmpfilename: The filename we're currently writing to
 218                        * downloaded_bytes: Bytes on disk
 219                        * total_bytes: Size of the whole file, None if unknown
 220                        * total_bytes_estimate: Guess of the eventual file size,
 221                                                None if unavailable.
 222                        * elapsed: The number of seconds since download started.
 223                        * eta: The estimated time in seconds, None if unknown
 224                        * speed: The download speed in bytes/second, None if
 225                                 unknown
 226                        * fragment_index: The counter of the currently
 227                                          downloaded video fragment.
 228                        * fragment_count: The number of fragments (= individual
 229                                          files that will be merged)
 230
 231                        Progress hooks are guaranteed to be called at least once
 232                        (with status "finished") if the download is successful.
 233     merge_output_format: Extension to use when merging formats.
 234     fixup:             Automatically correct known faults of the file.
 235                        One of:
 236                        - "never": do nothing
 237                        - "warn": only emit a warning
 238                        - "detect_or_warn": check whether we can do anything
 239                                            about it, warn otherwise (default)
 240     source_address:    (Experimental) Client-side IP address to bind to.
 241     call_home:         Boolean, true iff we are allowed to contact the
 242                        youtube-dl servers for debugging.
 243     sleep_interval:    Number of seconds to sleep before each download.
 244     listformats:       Print an overview of available video formats and exit.
 245     list_thumbnails:   Print a table of all thumbnails and exit.
 246     match_filter:      A function that gets called with the info_dict of
 247                        every video.
 248                        If it returns a message, the video is ignored.
 249                        If it returns None, the video is downloaded.
 250                        match_filter_func in utils.py is one example for this.
 251     no_color:          Do not emit color codes in output.
 252
 253     The following options determine which downloader is picked:
 254     external_downloader: Executable of the external downloader to call.
 255                        None or unset for standard (built-in) downloader.
 256     hls_prefer_native: Use the native HLS downloader instead of ffmpeg/avconv.
 257
 258     The following parameters are not used by YoutubeDL itself, they are used by
 259     the downloader (see youtube_dl/downloader/common.py):
 260     nopart, updatetime, buffersize, ratelimit, min_filesize, max_filesize, test,
 261     noresizebuffer, retries, continuedl, noprogress, consoletitle,
 262     xattr_set_filesize, external_downloader_args.
 263
 264     The following options are used by the post processors:
 265     prefer_ffmpeg:     If True, use ffmpeg instead of avconv if both are available,
 266                        otherwise prefer avconv.
 267     """
 268
 269     params = None
 270     _ies = []
 271     _pps = []
 272     _download_retcode = None
 273     _num_downloads = None
 274     _screen_file = None
 275
 276     def __init__(self, params=None, auto_init=True):
 277         """Create a FileDownloader object with the given options."""
 278         if params is None:
 279             params = {}
 280         self._ies = []
 281         self._ies_instances = {}
 282         self._pps = []
 283         self._progress_hooks = []
 284         self._download_retcode = 0
 285         self._num_downloads = 0
 286         self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)]
 287         self._err_file = sys.stderr
 288         self.params = params
 289         self.cache = Cache(self)
 290
 291         if params.get('bidi_workaround', False):
 292             try:
 293                 import pty
 294                 master, slave = pty.openpty()
 295                 width = compat_get_terminal_size().columns
 296                 if width is None:
 297                     width_args = []
 298                 else:
 299                     width_args = ['-w', str(width)]
 300                 sp_kwargs = dict(
 301                     stdin=subprocess.PIPE,
 302                     stdout=slave,
 303                     stderr=self._err_file)
 304                 try:
 305                     self._output_process = subprocess.Popen(
 306                         ['bidiv'] + width_args, **sp_kwargs
 307                     )
 308                 except OSError:
 309                     self._output_process = subprocess.Popen(
 310                         ['fribidi', '-c', 'UTF-8'] + width_args, **sp_kwargs)
 311                 self._output_channel = os.fdopen(master, 'rb')
 312             except OSError as ose:
 313                 if ose.errno == 2:
 314                     self.report_warning('Could not find fribidi executable, ignoring --bidi-workaround . Make sure that  fribidi  is an executable file in one of the directories in your $PATH.')
 315                 else:
 316                     raise
 317
 318         if (sys.version_info >= (3,) and sys.platform != 'win32' and
 319                 sys.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968'] and
 320                 not params.get('restrictfilenames', False)):
 321             # On Python 3, the Unicode filesystem API will throw errors (#1474)
 322             self.report_warning(
 323                 'Assuming --restrict-filenames since file system encoding '
 324                 'cannot encode all characters. '
 325                 'Set the LC_ALL environment variable to fix this.')
 326             self.params['restrictfilenames'] = True
 327
 328         if isinstance(params.get('outtmpl'), bytes):
 329             self.report_warning(
 330                 'Parameter outtmpl is bytes, but should be a unicode string. '
 331                 'Put  from __future__ import unicode_literals  at the top of your code file or consider switching to Python 3.x.')
 332
 333         self._setup_opener()
 334
 335         if auto_init:
 336             self.print_debug_header()
 337             self.add_default_info_extractors()
 338
 339         for pp_def_raw in self.params.get('postprocessors', []):
 340             pp_class = get_postprocessor(pp_def_raw['key'])
 341             pp_def = dict(pp_def_raw)
 342             del pp_def['key']
 343             pp = pp_class(self, **compat_kwargs(pp_def))
 344             self.add_post_processor(pp)
 345
 346         for ph in self.params.get('progress_hooks', []):
 347             self.add_progress_hook(ph)
 348
 349     def warn_if_short_id(self, argv):
 350         # short YouTube ID starting with dash?
 351         idxs = [
 352             i for i, a in enumerate(argv)
 353             if re.match(r'^-[0-9A-Za-z_-]{10}$', a)]
 354         if idxs:
 355             correct_argv = (
 356                 ['youtube-dl'] +
 357                 [a for i, a in enumerate(argv) if i not in idxs] +
 358                 ['--'] + [argv[i] for i in idxs]
 359             )
 360             self.report_warning(
 361                 'Long argument string detected. '
 362                 'Use -- to separate parameters and URLs, like this:\n%s\n' %
 363                 args_to_str(correct_argv))
 364
 365     def add_info_extractor(self, ie):
 366         """Add an InfoExtractor object to the end of the list."""
 367         self._ies.append(ie)
 368         self._ies_instances[ie.ie_key()] = ie
 369         ie.set_downloader(self)
 370
 371     def get_info_extractor(self, ie_key):
 372         """
 373         Get an instance of an IE with name ie_key, it will try to get one from
 374         the _ies list, if there's no instance it will create a new one and add
 375         it to the extractor list.
 376         """
 377         ie = self._ies_instances.get(ie_key)
 378         if ie is None:
 379             ie = get_info_extractor(ie_key)()
 380             self.add_info_extractor(ie)
 381         return ie
 382
 383     def add_default_info_extractors(self):
 384         """
 385         Add the InfoExtractors returned by gen_extractors to the end of the list
 386         """
 387         for ie in gen_extractors():
 388             self.add_info_extractor(ie)
 389
 390     def add_post_processor(self, pp):
 391         """Add a PostProcessor object to the end of the chain."""
 392         self._pps.append(pp)
 393         pp.set_downloader(self)
 394
 395     def add_progress_hook(self, ph):
 396         """Add the progress hook (currently only for the file downloader)"""
 397         self._progress_hooks.append(ph)
 398
 399     def _bidi_workaround(self, message):
 400         if not hasattr(self, '_output_channel'):
 401             return message
 402
 403         assert hasattr(self, '_output_process')
 404         assert isinstance(message, compat_str)
 405         line_count = message.count('\n') + 1
 406         self._output_process.stdin.write((message + '\n').encode('utf-8'))
 407         self._output_process.stdin.flush()
 408         res = ''.join(self._output_channel.readline().decode('utf-8')
 409                       for _ in range(line_count))
 410         return res[:-len('\n')]
 411
 412     def to_screen(self, message, skip_eol=False):
 413         """Print message to stdout if not in quiet mode."""
 414         return self.to_stdout(message, skip_eol, check_quiet=True)
 415
 416     def _write_string(self, s, out=None):
 417         write_string(s, out=out, encoding=self.params.get('encoding'))
 418
 419     def to_stdout(self, message, skip_eol=False, check_quiet=False):
 420         """Print message to stdout if not in quiet mode."""
 421         if self.params.get('logger'):
 422             self.params['logger'].debug(message)
 423         elif not check_quiet or not self.params.get('quiet', False):
 424             message = self._bidi_workaround(message)
 425             terminator = ['\n', ''][skip_eol]
 426             output = message + terminator
 427
 428             self._write_string(output, self._screen_file)
 429
 430     def to_stderr(self, message):
 431         """Print message to stderr."""
 432         assert isinstance(message, compat_str)
 433         if self.params.get('logger'):
 434             self.params['logger'].error(message)
 435         else:
 436             message = self._bidi_workaround(message)
 437             output = message + '\n'
 438             self._write_string(output, self._err_file)
 439
 440     def to_console_title(self, message):
 441         if not self.params.get('consoletitle', False):
 442             return
 443         if os.name == 'nt' and ctypes.windll.kernel32.GetConsoleWindow():
 444             # c_wchar_p() might not be necessary if `message` is
 445             # already of type unicode()
 446             ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message))
 447         elif 'TERM' in os.environ:
 448             self._write_string('\033]0;%s\007' % message, self._screen_file)
 449
 450     def save_console_title(self):
 451         if not self.params.get('consoletitle', False):
 452             return
 453         if 'TERM' in os.environ:
 454             # Save the title on stack
 455             self._write_string('\033[22;0t', self._screen_file)
 456
 457     def restore_console_title(self):
 458         if not self.params.get('consoletitle', False):
 459             return
 460         if 'TERM' in os.environ:
 461             # Restore the title from stack
 462             self._write_string('\033[23;0t', self._screen_file)
 463
 464     def __enter__(self):
 465         self.save_console_title()
 466         return self
 467
 468     def __exit__(self, *args):
 469         self.restore_console_title()
 470
 471         if self.params.get('cookiefile') is not None:
 472             self.cookiejar.save()
 473
 474     def trouble(self, message=None, tb=None):
 475         """Determine action to take when a download problem appears.
 476
 477         Depending on if the downloader has been configured to ignore
 478         download errors or not, this method may throw an exception or
 479         not when errors are found, after printing the message.
 480
 481         tb, if given, is additional traceback information.
 482         """
 483         if message is not None:
 484             self.to_stderr(message)
 485         if self.params.get('verbose'):
 486             if tb is None:
 487                 if sys.exc_info()[0]:  # if .trouble has been called from an except block
 488                     tb = ''
 489                     if hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
 490                         tb += ''.join(traceback.format_exception(*sys.exc_info()[1].exc_info))
 491                     tb += compat_str(traceback.format_exc())
 492                 else:
 493                     tb_data = traceback.format_list(traceback.extract_stack())
 494                     tb = ''.join(tb_data)
 495             self.to_stderr(tb)
 496         if not self.params.get('ignoreerrors', False):
 497             if sys.exc_info()[0] and hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
 498                 exc_info = sys.exc_info()[1].exc_info
 499             else:
 500                 exc_info = sys.exc_info()
 501             raise DownloadError(message, exc_info)
 502         self._download_retcode = 1
 503
 504     def report_warning(self, message):
 505         '''
 506         Print the message to stderr, it will be prefixed with 'WARNING:'
 507         If stderr is a tty file the 'WARNING:' will be colored
 508         '''
 509         if self.params.get('logger') is not None:
 510             self.params['logger'].warning(message)
 511         else:
 512             if self.params.get('no_warnings'):
 513                 return
 514             if not self.params.get('no_color') and self._err_file.isatty() and os.name != 'nt':
 515                 _msg_header = '\033[0;33mWARNING:\033[0m'
 516             else:
 517                 _msg_header = 'WARNING:'
 518             warning_message = '%s %s' % (_msg_header, message)
 519             self.to_stderr(warning_message)
 520
 521     def report_error(self, message, tb=None):
 522         '''
 523         Do the same as trouble, but prefixes the message with 'ERROR:', colored
 524         in red if stderr is a tty file.
 525         '''
 526         if not self.params.get('no_color') and self._err_file.isatty() and os.name != 'nt':
 527             _msg_header = '\033[0;31mERROR:\033[0m'
 528         else:
 529             _msg_header = 'ERROR:'
 530         error_message = '%s %s' % (_msg_header, message)
 531         self.trouble(error_message, tb)
 532
 533     def report_file_already_downloaded(self, file_name):
 534         """Report file has already been fully downloaded."""
 535         try:
 536             self.to_screen('[download] %s has already been downloaded' % file_name)
 537         except UnicodeEncodeError:
 538             self.to_screen('[download] The file has already been downloaded')
 539
 540     def prepare_filename(self, info_dict):
 541         """Generate the output filename."""
 542         try:
 543             template_dict = dict(info_dict)
 544
 545             template_dict['epoch'] = int(time.time())
 546             autonumber_size = self.params.get('autonumber_size')
 547             if autonumber_size is None:
 548                 autonumber_size = 5
 549             autonumber_templ = '%0' + str(autonumber_size) + 'd'
 550             template_dict['autonumber'] = autonumber_templ % self._num_downloads
 551             if template_dict.get('playlist_index') is not None:
 552                 template_dict['playlist_index'] = '%0*d' % (len(str(template_dict['n_entries'])), template_dict['playlist_index'])
 553             if template_dict.get('resolution') is None:
 554                 if template_dict.get('width') and template_dict.get('height'):
 555                     template_dict['resolution'] = '%dx%d' % (template_dict['width'], template_dict['height'])
 556                 elif template_dict.get('height'):
 557                     template_dict['resolution'] = '%sp' % template_dict['height']
 558                 elif template_dict.get('width'):
 559                     template_dict['resolution'] = '?x%d' % template_dict['width']
 560
 561             sanitize = lambda k, v: sanitize_filename(
 562                 compat_str(v),
 563                 restricted=self.params.get('restrictfilenames'),
 564                 is_id=(k == 'id'))
 565             template_dict = dict((k, sanitize(k, v))
 566                                  for k, v in template_dict.items()
 567                                  if v is not None)
 568             template_dict = collections.defaultdict(lambda: 'NA', template_dict)
 569
 570             outtmpl = sanitize_path(self.params.get('outtmpl', DEFAULT_OUTTMPL))
 571             tmpl = compat_expanduser(outtmpl)
 572             filename = tmpl % template_dict
 573             # Temporary fix for #4787
 574             # 'Treat' all problem characters by passing filename through preferredencoding
 575             # to workaround encoding issues with subprocess on python2 @ Windows
 576             if sys.version_info < (3, 0) and sys.platform == 'win32':
 577                 filename = encodeFilename(filename, True).decode(preferredencoding())
 578             return filename
 579         except ValueError as err:
 580             self.report_error('Error in output template: ' + str(err) + ' (encoding: ' + repr(preferredencoding()) + ')')
 581             return None
 582
 583     def _match_entry(self, info_dict, incomplete):
 584         """ Returns None iff the file should be downloaded """
 585
 586         video_title = info_dict.get('title', info_dict.get('id', 'video'))
 587         if 'title' in info_dict:
 588             # This can happen when we're just evaluating the playlist
 589             title = info_dict['title']
 590             matchtitle = self.params.get('matchtitle', False)
 591             if matchtitle:
 592                 if not re.search(matchtitle, title, re.IGNORECASE):
 593                     return '"' + title + '" title did not match pattern "' + matchtitle + '"'
 594             rejecttitle = self.params.get('rejecttitle', False)
 595             if rejecttitle:
 596                 if re.search(rejecttitle, title, re.IGNORECASE):
 597                     return '"' + title + '" title matched reject pattern "' + rejecttitle + '"'
 598         date = info_dict.get('upload_date', None)
 599         if date is not None:
 600             dateRange = self.params.get('daterange', DateRange())
 601             if date not in dateRange:
 602                 return '%s upload date is not in range %s' % (date_from_str(date).isoformat(), dateRange)
 603         view_count = info_dict.get('view_count', None)
 604         if view_count is not None:
 605             min_views = self.params.get('min_views')
 606             if min_views is not None and view_count < min_views:
 607                 return 'Skipping %s, because it has not reached minimum view count (%d/%d)' % (video_title, view_count, min_views)
 608             max_views = self.params.get('max_views')
 609             if max_views is not None and view_count > max_views:
 610                 return 'Skipping %s, because it has exceeded the maximum view count (%d/%d)' % (video_title, view_count, max_views)
 611         if age_restricted(info_dict.get('age_limit'), self.params.get('age_limit')):
 612             return 'Skipping "%s" because it is age restricted' % video_title
 613         if self.in_download_archive(info_dict):
 614             return '%s has already been recorded in archive' % video_title
 615
 616         if not incomplete:
 617             match_filter = self.params.get('match_filter')
 618             if match_filter is not None:
 619                 ret = match_filter(info_dict)
 620                 if ret is not None:
 621                     return ret
 622
 623         return None
 624
 625     @staticmethod
 626     def add_extra_info(info_dict, extra_info):
 627         '''Set the keys from extra_info in info dict if they are missing'''
 628         for key, value in extra_info.items():
 629             info_dict.setdefault(key, value)
 630
 631     def extract_info(self, url, download=True, ie_key=None, extra_info={},
 632                      process=True, force_generic_extractor=False):
 633         '''
 634         Returns a list with a dictionary for each video we find.
 635         If 'download', also downloads the videos.
 636         extra_info is a dict containing the extra values to add to each result
 637         '''
 638
 639         if not ie_key and force_generic_extractor:
 640             ie_key = 'Generic'
 641
 642         if ie_key:
 643             ies = [self.get_info_extractor(ie_key)]
 644         else:
 645             ies = self._ies
 646
 647         for ie in ies:
 648             if not ie.suitable(url):
 649                 continue
 650
 651             if not ie.working():
 652                 self.report_warning('The program functionality for this site has been marked as broken, '
 653                                     'and will probably not work.')
 654
 655             try:
 656                 ie_result = ie.extract(url)
 657                 if ie_result is None:  # Finished already (backwards compatibility; listformats and friends should be moved here)
 658                     break
 659                 if isinstance(ie_result, list):
 660                     # Backwards compatibility: old IE result format
 661                     ie_result = {
 662                         '_type': 'compat_list',
 663                         'entries': ie_result,
 664                     }
 665                 self.add_default_extra_info(ie_result, ie, url)
 666                 if process:
 667                     return self.process_ie_result(ie_result, download, extra_info)
 668                 else:
 669                     return ie_result
 670             except ExtractorError as de:  # An error we somewhat expected
 671                 self.report_error(compat_str(de), de.format_traceback())
 672                 break
 673             except MaxDownloadsReached:
 674                 raise
 675             except Exception as e:
 676                 if self.params.get('ignoreerrors', False):
 677                     self.report_error(compat_str(e), tb=compat_str(traceback.format_exc()))
 678                     break
 679                 else:
 680                     raise
 681         else:
 682             self.report_error('no suitable InfoExtractor for URL %s' % url)
 683
 684     def add_default_extra_info(self, ie_result, ie, url):
 685         self.add_extra_info(ie_result, {
 686             'extractor': ie.IE_NAME,
 687             'webpage_url': url,
 688             'webpage_url_basename': url_basename(url),
 689             'extractor_key': ie.ie_key(),
 690         })
 691
 692     def process_ie_result(self, ie_result, download=True, extra_info={}):
 693         """
 694         Take the result of the ie(may be modified) and resolve all unresolved
 695         references (URLs, playlist items).
 696
 697         It will also download the videos if 'download'.
 698         Returns the resolved ie_result.
 699         """
 700
 701         result_type = ie_result.get('_type', 'video')
 702
 703         if result_type in ('url', 'url_transparent'):
 704             extract_flat = self.params.get('extract_flat', False)
 705             if ((extract_flat == 'in_playlist' and 'playlist' in extra_info) or
 706                     extract_flat is True):
 707                 if self.params.get('forcejson', False):
 708                     self.to_stdout(json.dumps(ie_result))
 709                 return ie_result
 710
 711         if result_type == 'video':
 712             self.add_extra_info(ie_result, extra_info)
 713             return self.process_video_result(ie_result, download=download)
 714         elif result_type == 'url':
 715             # We have to add extra_info to the results because it may be
 716             # contained in a playlist
 717             return self.extract_info(ie_result['url'],
 718                                      download,
 719                                      ie_key=ie_result.get('ie_key'),
 720                                      extra_info=extra_info)
 721         elif result_type == 'url_transparent':
 722             # Use the information from the embedding page
 723             info = self.extract_info(
 724                 ie_result['url'], ie_key=ie_result.get('ie_key'),
 725                 extra_info=extra_info, download=False, process=False)
 726
 727             force_properties = dict(
 728                 (k, v) for k, v in ie_result.items() if v is not None)
 729             for f in ('_type', 'url'):
 730                 if f in force_properties:
 731                     del force_properties[f]
 732             new_result = info.copy()
 733             new_result.update(force_properties)
 734
 735             assert new_result.get('_type') != 'url_transparent'
 736
 737             return self.process_ie_result(
 738                 new_result, download=download, extra_info=extra_info)
 739         elif result_type == 'playlist' or result_type == 'multi_video':
 740             # We process each entry in the playlist
 741             playlist = ie_result.get('title', None) or ie_result.get('id', None)
 742             self.to_screen('[download] Downloading playlist: %s' % playlist)
 743
 744             playlist_results = []
 745
 746             playliststart = self.params.get('playliststart', 1) - 1
 747             playlistend = self.params.get('playlistend', None)
 748             # For backwards compatibility, interpret -1 as whole list
 749             if playlistend == -1:
 750                 playlistend = None
 751
 752             playlistitems_str = self.params.get('playlist_items', None)
 753             playlistitems = None
 754             if playlistitems_str is not None:
 755                 def iter_playlistitems(format):
 756                     for string_segment in format.split(','):
 757                         if '-' in string_segment:
 758                             start, end = string_segment.split('-')
 759                             for item in range(int(start), int(end) + 1):
 760                                 yield int(item)
 761                         else:
 762                             yield int(string_segment)
 763                 playlistitems = iter_playlistitems(playlistitems_str)
 764
 765             ie_entries = ie_result['entries']
 766             if isinstance(ie_entries, list):
 767                 n_all_entries = len(ie_entries)
 768                 if playlistitems:
 769                     entries = [
 770                         ie_entries[i - 1] for i in playlistitems
 771                         if -n_all_entries <= i - 1 < n_all_entries]
 772                 else:
 773                     entries = ie_entries[playliststart:playlistend]
 774                 n_entries = len(entries)
 775                 self.to_screen(
 776                     "[%s] playlist %s: Collected %d video ids (downloading %d of them)" %
 777                     (ie_result['extractor'], playlist, n_all_entries, n_entries))
 778             elif isinstance(ie_entries, PagedList):
 779                 if playlistitems:
 780                     entries = []
 781                     for item in playlistitems:
 782                         entries.extend(ie_entries.getslice(
 783                             item - 1, item
 784                         ))
 785                 else:
 786                     entries = ie_entries.getslice(
 787                         playliststart, playlistend)
 788                 n_entries = len(entries)
 789                 self.to_screen(
 790                     "[%s] playlist %s: Downloading %d videos" %
 791                     (ie_result['extractor'], playlist, n_entries))
 792             else:  # iterable
 793                 if playlistitems:
 794                     entry_list = list(ie_entries)
 795                     entries = [entry_list[i - 1] for i in playlistitems]
 796                 else:
 797                     entries = list(itertools.islice(
 798                         ie_entries, playliststart, playlistend))
 799                 n_entries = len(entries)
 800                 self.to_screen(
 801                     "[%s] playlist %s: Downloading %d videos" %
 802                     (ie_result['extractor'], playlist, n_entries))
 803
 804             if self.params.get('playlistreverse', False):
 805                 entries = entries[::-1]
 806
 807             for i, entry in enumerate(entries, 1):
 808                 self.to_screen('[download] Downloading video %s of %s' % (i, n_entries))
 809                 extra = {
 810                     'n_entries': n_entries,
 811                     'playlist': playlist,
 812                     'playlist_id': ie_result.get('id'),
 813                     'playlist_title': ie_result.get('title'),
 814                     'playlist_index': i + playliststart,
 815                     'extractor': ie_result['extractor'],
 816                     'webpage_url': ie_result['webpage_url'],
 817                     'webpage_url_basename': url_basename(ie_result['webpage_url']),
 818                     'extractor_key': ie_result['extractor_key'],
 819                 }
 820
 821                 reason = self._match_entry(entry, incomplete=True)
 822                 if reason is not None:
 823                     self.to_screen('[download] ' + reason)
 824                     continue
 825
 826                 entry_result = self.process_ie_result(entry,
 827                                                       download=download,
 828                                                       extra_info=extra)
 829                 playlist_results.append(entry_result)
 830             ie_result['entries'] = playlist_results
 831             return ie_result
 832         elif result_type == 'compat_list':
 833             self.report_warning(
 834                 'Extractor %s returned a compat_list result. '
 835                 'It needs to be updated.' % ie_result.get('extractor'))
 836
 837             def _fixup(r):
 838                 self.add_extra_info(
 839                     r,
 840                     {
 841                         'extractor': ie_result['extractor'],
 842                         'webpage_url': ie_result['webpage_url'],
 843                         'webpage_url_basename': url_basename(ie_result['webpage_url']),
 844                         'extractor_key': ie_result['extractor_key'],
 845                     }
 846                 )
 847                 return r
 848             ie_result['entries'] = [
 849                 self.process_ie_result(_fixup(r), download, extra_info)
 850                 for r in ie_result['entries']
 851             ]
 852             return ie_result
 853         else:
 854             raise Exception('Invalid result type: %s' % result_type)
 855
 856     def _build_format_filter(self, filter_spec):
 857         " Returns a function to filter the formats according to the filter_spec "
 858
 859         OPERATORS = {
 860             '<': operator.lt,
 861             '<=': operator.le,
 862             '>': operator.gt,
 863             '>=': operator.ge,
 864             '=': operator.eq,
 865             '!=': operator.ne,
 866         }
 867         operator_rex = re.compile(r'''(?x)\s*
 868             (?P<key>width|height|tbr|abr|vbr|asr|filesize|fps)
 869             \s*(?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
 870             (?P<value>[0-9.]+(?:[kKmMgGtTpPeEzZyY]i?[Bb]?)?)
 871             $
 872             ''' % '|'.join(map(re.escape, OPERATORS.keys())))
 873         m = operator_rex.search(filter_spec)
 874         if m:
 875             try:
 876                 comparison_value = int(m.group('value'))
 877             except ValueError:
 878                 comparison_value = parse_filesize(m.group('value'))
 879                 if comparison_value is None:
 880                     comparison_value = parse_filesize(m.group('value') + 'B')
 881                 if comparison_value is None:
 882                     raise ValueError(
 883                         'Invalid value %r in format specification %r' % (
 884                             m.group('value'), filter_spec))
 885             op = OPERATORS[m.group('op')]
 886
 887         if not m:
 888             STR_OPERATORS = {
 889                 '=': operator.eq,
 890                 '!=': operator.ne,
 891             }
 892             str_operator_rex = re.compile(r'''(?x)
 893                 \s*(?P<key>ext|acodec|vcodec|container|protocol)
 894                 \s*(?P<op>%s)(?P<none_inclusive>\s*\?)?
 895                 \s*(?P<value>[a-zA-Z0-9_-]+)
 896                 \s*$
 897                 ''' % '|'.join(map(re.escape, STR_OPERATORS.keys())))
 898             m = str_operator_rex.search(filter_spec)
 899             if m:
 900                 comparison_value = m.group('value')
 901                 op = STR_OPERATORS[m.group('op')]
 902
 903         if not m:
 904             raise ValueError('Invalid filter specification %r' % filter_spec)
 905
 906         def _filter(f):
 907             actual_value = f.get(m.group('key'))
 908             if actual_value is None:
 909                 return m.group('none_inclusive')
 910             return op(actual_value, comparison_value)
 911         return _filter
 912
 913     def build_format_selector(self, format_spec):
 914         def syntax_error(note, start):
 915             message = (
 916                 'Invalid format specification: '
 917                 '{0}\n\t{1}\n\t{2}^'.format(note, format_spec, ' ' * start[1]))
 918             return SyntaxError(message)
 919
 920         PICKFIRST = 'PICKFIRST'
 921         MERGE = 'MERGE'
 922         SINGLE = 'SINGLE'
 923         GROUP = 'GROUP'
 924         FormatSelector = collections.namedtuple('FormatSelector', ['type', 'selector', 'filters'])
 925
 926         def _parse_filter(tokens):
 927             filter_parts = []
 928             for type, string, start, _, _ in tokens:
 929                 if type == tokenize.OP and string == ']':
 930                     return ''.join(filter_parts)
 931                 else:
 932                     filter_parts.append(string)
 933
 934         def _parse_format_selection(tokens, inside_merge=False, inside_choice=False, inside_group=False):
 935             selectors = []
 936             current_selector = None
 937             for type, string, start, _, _ in tokens:
 938                 # ENCODING is only defined in python 3.x
 939                 if type == getattr(tokenize, 'ENCODING', None):
 940                     continue
 941                 elif type in [tokenize.NAME, tokenize.NUMBER]:
 942                     current_selector = FormatSelector(SINGLE, string, [])
 943                 elif type == tokenize.OP:
 944                     if string == ')':
 945                         if not inside_group:
 946                             # ')' will be handled by the parentheses group
 947                             tokens.restore_last_token()
 948                         break
 949                     elif inside_merge and string in ['/', ',']:
 950                         tokens.restore_last_token()
 951                         break
 952                     elif inside_choice and string == ',':
 953                         tokens.restore_last_token()
 954                         break
 955                     elif string == ',':
 956                         selectors.append(current_selector)
 957                         current_selector = None
 958                     elif string == '/':
 959                         first_choice = current_selector
 960                         second_choice = _parse_format_selection(tokens, inside_choice=True)
 961                         current_selector = None
 962                         selectors.append(FormatSelector(PICKFIRST, (first_choice, second_choice), []))
 963                     elif string == '[':
 964                         if not current_selector:
 965                             current_selector = FormatSelector(SINGLE, 'best', [])
 966                         format_filter = _parse_filter(tokens)
 967                         current_selector.filters.append(format_filter)
 968                     elif string == '(':
 969                         if current_selector:
 970                             raise syntax_error('Unexpected "("', start)
 971                         group = _parse_format_selection(tokens, inside_group=True)
 972                         current_selector = FormatSelector(GROUP, group, [])
 973                     elif string == '+':
 974                         video_selector = current_selector
 975                         audio_selector = _parse_format_selection(tokens, inside_merge=True)
 976                         current_selector = FormatSelector(MERGE, (video_selector, audio_selector), [])
 977                     else:
 978                         raise syntax_error('Operator not recognized: "{0}"'.format(string), start)
 979                 elif type == tokenize.ENDMARKER:
 980                     break
 981             if current_selector:
 982                 selectors.append(current_selector)
 983             return selectors
 984
 985         def _build_selector_function(selector):
 986             if isinstance(selector, list):
 987                 fs = [_build_selector_function(s) for s in selector]
 988
 989                 def selector_function(formats):
 990                     for f in fs:
 991                         for format in f(formats):
 992                             yield format
 993                 return selector_function
 994             elif selector.type == GROUP:
 995                 selector_function = _build_selector_function(selector.selector)
 996             elif selector.type == PICKFIRST:
 997                 fs = [_build_selector_function(s) for s in selector.selector]
 998
 999                 def selector_function(formats):
1000                     for f in fs:
1001                         picked_formats = list(f(formats))
1002                         if picked_formats:
1003                             return picked_formats
1004                     return []
1005             elif selector.type == SINGLE:
1006                 format_spec = selector.selector
1007
1008                 def selector_function(formats):
1009                     if format_spec == 'all':
1010                         for f in formats:
1011                             yield f
1012                     elif format_spec in ['best', 'worst', None]:
1013                         format_idx = 0 if format_spec == 'worst' else -1
1014                         audiovideo_formats = [
1015                             f for f in formats
1016                             if f.get('vcodec') != 'none' and f.get('acodec') != 'none']
1017                         if audiovideo_formats:
1018                             yield audiovideo_formats[format_idx]
1019                         # for audio only (soundcloud) or video only (imgur) urls, select the best/worst audio format
1020                         elif (all(f.get('acodec') != 'none' for f in formats) or
1021                               all(f.get('vcodec') != 'none' for f in formats)):
1022                             yield formats[format_idx]
1023                     elif format_spec == 'bestaudio':
1024                         audio_formats = [
1025                             f for f in formats
1026                             if f.get('vcodec') == 'none']
1027                         if audio_formats:
1028                             yield audio_formats[-1]
1029                     elif format_spec == 'worstaudio':
1030                         audio_formats = [
1031                             f for f in formats
1032                             if f.get('vcodec') == 'none']
1033                         if audio_formats:
1034                             yield audio_formats[0]
1035                     elif format_spec == 'bestvideo':
1036                         video_formats = [
1037                             f for f in formats
1038                             if f.get('acodec') == 'none']
1039                         if video_formats:
1040                             yield video_formats[-1]
1041                     elif format_spec == 'worstvideo':
1042                         video_formats = [
1043                             f for f in formats
1044                             if f.get('acodec') == 'none']
1045                         if video_formats:
1046                             yield video_formats[0]
1047                     else:
1048                         extensions = ['mp4', 'flv', 'webm', '3gp', 'm4a', 'mp3', 'ogg', 'aac', 'wav']
1049                         if format_spec in extensions:
1050                             filter_f = lambda f: f['ext'] == format_spec
1051                         else:
1052                             filter_f = lambda f: f['format_id'] == format_spec
1053                         matches = list(filter(filter_f, formats))
1054                         if matches:
1055                             yield matches[-1]
1056             elif selector.type == MERGE:
1057                 def _merge(formats_info):
1058                     format_1, format_2 = [f['format_id'] for f in formats_info]
1059                     # The first format must contain the video and the
1060                     # second the audio
1061                     if formats_info[0].get('vcodec') == 'none':
1062                         self.report_error('The first format must '
1063                                           'contain the video, try using '
1064                                           '"-f %s+%s"' % (format_2, format_1))
1065                         return
1066                     output_ext = (
1067                         formats_info[0]['ext']
1068                         if self.params.get('merge_output_format') is None
1069                         else self.params['merge_output_format'])
1070                     return {
1071                         'requested_formats': formats_info,
1072                         'format': '%s+%s' % (formats_info[0].get('format'),
1073                                              formats_info[1].get('format')),
1074                         'format_id': '%s+%s' % (formats_info[0].get('format_id'),
1075                                                 formats_info[1].get('format_id')),
1076                         'width': formats_info[0].get('width'),
1077                         'height': formats_info[0].get('height'),
1078                         'resolution': formats_info[0].get('resolution'),
1079                         'fps': formats_info[0].get('fps'),
1080                         'vcodec': formats_info[0].get('vcodec'),
1081                         'vbr': formats_info[0].get('vbr'),
1082                         'stretched_ratio': formats_info[0].get('stretched_ratio'),
1083                         'acodec': formats_info[1].get('acodec'),
1084                         'abr': formats_info[1].get('abr'),
1085                         'ext': output_ext,
1086                     }
1087                 video_selector, audio_selector = map(_build_selector_function, selector.selector)
1088
1089                 def selector_function(formats):
1090                     formats = list(formats)
1091                     for pair in itertools.product(video_selector(formats), audio_selector(formats)):
1092                         yield _merge(pair)
1093
1094             filters = [self._build_format_filter(f) for f in selector.filters]
1095
1096             def final_selector(formats):
1097                 for _filter in filters:
1098                     formats = list(filter(_filter, formats))
1099                 return selector_function(formats)
1100             return final_selector
1101
1102         stream = io.BytesIO(format_spec.encode('utf-8'))
1103         try:
1104             tokens = list(compat_tokenize_tokenize(stream.readline))
1105         except tokenize.TokenError:
1106             raise syntax_error('Missing closing/opening brackets or parenthesis', (0, len(format_spec)))
1107
1108         class TokenIterator(object):
1109             def __init__(self, tokens):
1110                 self.tokens = tokens
1111                 self.counter = 0
1112
1113             def __iter__(self):
1114                 return self
1115
1116             def __next__(self):
1117                 if self.counter >= len(self.tokens):
1118                     raise StopIteration()
1119                 value = self.tokens[self.counter]
1120                 self.counter += 1
1121                 return value
1122
1123             next = __next__
1124
1125             def restore_last_token(self):
1126                 self.counter -= 1
1127
1128         parsed_selector = _parse_format_selection(iter(TokenIterator(tokens)))
1129         return _build_selector_function(parsed_selector)
1130
1131     def _calc_headers(self, info_dict):
1132         res = std_headers.copy()
1133
1134         add_headers = info_dict.get('http_headers')
1135         if add_headers:
1136             res.update(add_headers)
1137
1138         cookies = self._calc_cookies(info_dict)
1139         if cookies:
1140             res['Cookie'] = cookies
1141
1142         return res
1143
1144     def _calc_cookies(self, info_dict):
1145         pr = compat_urllib_request.Request(info_dict['url'])
1146         self.cookiejar.add_cookie_header(pr)
1147         return pr.get_header('Cookie')
1148
1149     def process_video_result(self, info_dict, download=True):
1150         assert info_dict.get('_type', 'video') == 'video'
1151
1152         if 'id' not in info_dict:
1153             raise ExtractorError('Missing "id" field in extractor result')
1154         if 'title' not in info_dict:
1155             raise ExtractorError('Missing "title" field in extractor result')
1156
1157         if 'playlist' not in info_dict:
1158             # It isn't part of a playlist
1159             info_dict['playlist'] = None
1160             info_dict['playlist_index'] = None
1161
1162         thumbnails = info_dict.get('thumbnails')
1163         if thumbnails is None:
1164             thumbnail = info_dict.get('thumbnail')
1165             if thumbnail:
1166                 info_dict['thumbnails'] = thumbnails = [{'url': thumbnail}]
1167         if thumbnails:
1168             thumbnails.sort(key=lambda t: (
1169                 t.get('preference'), t.get('width'), t.get('height'),
1170                 t.get('id'), t.get('url')))
1171             for i, t in enumerate(thumbnails):
1172                 if 'width' in t and 'height' in t:
1173                     t['resolution'] = '%dx%d' % (t['width'], t['height'])
1174                 if t.get('id') is None:
1175                     t['id'] = '%d' % i
1176
1177         if thumbnails and 'thumbnail' not in info_dict:
1178             info_dict['thumbnail'] = thumbnails[-1]['url']
1179
1180         if 'display_id' not in info_dict and 'id' in info_dict:
1181             info_dict['display_id'] = info_dict['id']
1182
1183         if info_dict.get('upload_date') is None and info_dict.get('timestamp') is not None:
1184             # Working around out-of-range timestamp values (e.g. negative ones on Windows,
1185             # see http://bugs.python.org/issue1646728)
1186             try:
1187                 upload_date = datetime.datetime.utcfromtimestamp(info_dict['timestamp'])
1188                 info_dict['upload_date'] = upload_date.strftime('%Y%m%d')
1189             except (ValueError, OverflowError, OSError):
1190                 pass
1191
1192         if self.params.get('listsubtitles', False):
1193             if 'automatic_captions' in info_dict:
1194                 self.list_subtitles(info_dict['id'], info_dict.get('automatic_captions'), 'automatic captions')
1195             self.list_subtitles(info_dict['id'], info_dict.get('subtitles'), 'subtitles')
1196             return
1197         info_dict['requested_subtitles'] = self.process_subtitles(
1198             info_dict['id'], info_dict.get('subtitles'),
1199             info_dict.get('automatic_captions'))
1200
1201         # We now pick which formats have to be downloaded
1202         if info_dict.get('formats') is None:
1203             # There's only one format available
1204             formats = [info_dict]
1205         else:
1206             formats = info_dict['formats']
1207
1208         if not formats:
1209             raise ExtractorError('No video formats found!')
1210
1211         formats_dict = {}
1212
1213         # We check that all the formats have the format and format_id fields
1214         for i, format in enumerate(formats):
1215             if 'url' not in format:
1216                 raise ExtractorError('Missing "url" key in result (index %d)' % i)
1217
1218             if format.get('format_id') is None:
1219                 format['format_id'] = compat_str(i)
1220             format_id = format['format_id']
1221             if format_id not in formats_dict:
1222                 formats_dict[format_id] = []
1223             formats_dict[format_id].append(format)
1224
1225         # Make sure all formats have unique format_id
1226         for format_id, ambiguous_formats in formats_dict.items():
1227             if len(ambiguous_formats) > 1:
1228                 for i, format in enumerate(ambiguous_formats):
1229                     format['format_id'] = '%s-%d' % (format_id, i)
1230
1231         for i, format in enumerate(formats):
1232             if format.get('format') is None:
1233                 format['format'] = '{id} - {res}{note}'.format(
1234                     id=format['format_id'],
1235                     res=self.format_resolution(format),
1236                     note=' ({0})'.format(format['format_note']) if format.get('format_note') is not None else '',
1237                 )
1238             # Automatically determine file extension if missing
1239             if 'ext' not in format:
1240                 format['ext'] = determine_ext(format['url']).lower()
1241             # Add HTTP headers, so that external programs can use them from the
1242             # json output
1243             full_format_info = info_dict.copy()
1244             full_format_info.update(format)
1245             format['http_headers'] = self._calc_headers(full_format_info)
1246
1247         # TODO Central sorting goes here
1248
1249         if formats[0] is not info_dict:
1250             # only set the 'formats' fields if the original info_dict list them
1251             # otherwise we end up with a circular reference, the first (and unique)
1252             # element in the 'formats' field in info_dict is info_dict itself,
1253             # wich can't be exported to json
1254             info_dict['formats'] = formats
1255         if self.params.get('listformats'):
1256             self.list_formats(info_dict)
1257             return
1258         if self.params.get('list_thumbnails'):
1259             self.list_thumbnails(info_dict)
1260             return
1261
1262         req_format = self.params.get('format')
1263         if req_format is None:
1264             req_format_list = []
1265             if (self.params.get('outtmpl', DEFAULT_OUTTMPL) != '-' and
1266                     info_dict['extractor'] in ['youtube', 'ted']):
1267                 merger = FFmpegMergerPP(self)
1268                 if merger.available and merger.can_merge():
1269                     req_format_list.append('bestvideo+bestaudio')
1270             req_format_list.append('best')
1271             req_format = '/'.join(req_format_list)
1272         format_selector = self.build_format_selector(req_format)
1273         formats_to_download = list(format_selector(formats))
1274         if not formats_to_download:
1275             raise ExtractorError('requested format not available',
1276                                  expected=True)
1277
1278         if download:
1279             if len(formats_to_download) > 1:
1280                 self.to_screen('[info] %s: downloading video in %s formats' % (info_dict['id'], len(formats_to_download)))
1281             for format in formats_to_download:
1282                 new_info = dict(info_dict)
1283                 new_info.update(format)
1284                 self.process_info(new_info)
1285         # We update the info dict with the best quality format (backwards compatibility)
1286         info_dict.update(formats_to_download[-1])
1287         return info_dict
1288
1289     def process_subtitles(self, video_id, normal_subtitles, automatic_captions):
1290         """Select the requested subtitles and their format"""
1291         available_subs = {}
1292         if normal_subtitles and self.params.get('writesubtitles'):
1293             available_subs.update(normal_subtitles)
1294         if automatic_captions and self.params.get('writeautomaticsub'):
1295             for lang, cap_info in automatic_captions.items():
1296                 if lang not in available_subs:
1297                     available_subs[lang] = cap_info
1298
1299         if (not self.params.get('writesubtitles') and not
1300                 self.params.get('writeautomaticsub') or not
1301                 available_subs):
1302             return None
1303
1304         if self.params.get('allsubtitles', False):
1305             requested_langs = available_subs.keys()
1306         else:
1307             if self.params.get('subtitleslangs', False):
1308                 requested_langs = self.params.get('subtitleslangs')
1309             elif 'en' in available_subs:
1310                 requested_langs = ['en']
1311             else:
1312                 requested_langs = [list(available_subs.keys())[0]]
1313
1314         formats_query = self.params.get('subtitlesformat', 'best')
1315         formats_preference = formats_query.split('/') if formats_query else []
1316         subs = {}
1317         for lang in requested_langs:
1318             formats = available_subs.get(lang)
1319             if formats is None:
1320                 self.report_warning('%s subtitles not available for %s' % (lang, video_id))
1321                 continue
1322             for ext in formats_preference:
1323                 if ext == 'best':
1324                     f = formats[-1]
1325                     break
1326                 matches = list(filter(lambda f: f['ext'] == ext, formats))
1327                 if matches:
1328                     f = matches[-1]
1329                     break
1330             else:
1331                 f = formats[-1]
1332                 self.report_warning(
1333                     'No subtitle format found matching "%s" for language %s, '
1334                     'using %s' % (formats_query, lang, f['ext']))
1335             subs[lang] = f
1336         return subs
1337
1338     def process_info(self, info_dict):
1339         """Process a single resolved IE result."""
1340
1341         assert info_dict.get('_type', 'video') == 'video'
1342
1343         max_downloads = self.params.get('max_downloads')
1344         if max_downloads is not None:
1345             if self._num_downloads >= int(max_downloads):
1346                 raise MaxDownloadsReached()
1347
1348         info_dict['fulltitle'] = info_dict['title']
1349         if len(info_dict['title']) > 200:
1350             info_dict['title'] = info_dict['title'][:197] + '...'
1351
1352         if 'format' not in info_dict:
1353             info_dict['format'] = info_dict['ext']
1354
1355         reason = self._match_entry(info_dict, incomplete=False)
1356         if reason is not None:
1357             self.to_screen('[download] ' + reason)
1358             return
1359
1360         self._num_downloads += 1
1361
1362         info_dict['_filename'] = filename = self.prepare_filename(info_dict)
1363
1364         # Forced printings
1365         if self.params.get('forcetitle', False):
1366             self.to_stdout(info_dict['fulltitle'])
1367         if self.params.get('forceid', False):
1368             self.to_stdout(info_dict['id'])
1369         if self.params.get('forceurl', False):
1370             if info_dict.get('requested_formats') is not None:
1371                 for f in info_dict['requested_formats']:
1372                     self.to_stdout(f['url'] + f.get('play_path', ''))
1373             else:
1374                 # For RTMP URLs, also include the playpath
1375                 self.to_stdout(info_dict['url'] + info_dict.get('play_path', ''))
1376         if self.params.get('forcethumbnail', False) and info_dict.get('thumbnail') is not None:
1377             self.to_stdout(info_dict['thumbnail'])
1378         if self.params.get('forcedescription', False) and info_dict.get('description') is not None:
1379             self.to_stdout(info_dict['description'])
1380         if self.params.get('forcefilename', False) and filename is not None:
1381             self.to_stdout(filename)
1382         if self.params.get('forceduration', False) and info_dict.get('duration') is not None:
1383             self.to_stdout(formatSeconds(info_dict['duration']))
1384         if self.params.get('forceformat', False):
1385             self.to_stdout(info_dict['format'])
1386         if self.params.get('forcejson', False):
1387             self.to_stdout(json.dumps(info_dict))
1388
1389         # Do nothing else if in simulate mode
1390         if self.params.get('simulate', False):
1391             return
1392
1393         if filename is None:
1394             return
1395
1396         try:
1397             dn = os.path.dirname(sanitize_path(encodeFilename(filename)))
1398             if dn and not os.path.exists(dn):
1399                 os.makedirs(dn)
1400         except (OSError, IOError) as err:
1401             self.report_error('unable to create directory ' + compat_str(err))
1402             return
1403
1404         if self.params.get('writedescription', False):
1405             descfn = replace_extension(filename, 'description', info_dict.get('ext'))
1406             if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(descfn)):
1407                 self.to_screen('[info] Video description is already present')
1408             elif info_dict.get('description') is None:
1409                 self.report_warning('There\'s no description to write.')
1410             else:
1411                 try:
1412                     self.to_screen('[info] Writing video description to: ' + descfn)
1413                     with io.open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile:
1414                         descfile.write(info_dict['description'])
1415                 except (OSError, IOError):
1416                     self.report_error('Cannot write description file ' + descfn)
1417                     return
1418
1419         if self.params.get('writeannotations', False):
1420             annofn = replace_extension(filename, 'annotations.xml', info_dict.get('ext'))
1421             if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(annofn)):
1422                 self.to_screen('[info] Video annotations are already present')
1423             else:
1424                 try:
1425                     self.to_screen('[info] Writing video annotations to: ' + annofn)
1426                     with io.open(encodeFilename(annofn), 'w', encoding='utf-8') as annofile:
1427                         annofile.write(info_dict['annotations'])
1428                 except (KeyError, TypeError):
1429                     self.report_warning('There are no annotations to write.')
1430                 except (OSError, IOError):
1431                     self.report_error('Cannot write annotations file: ' + annofn)
1432                     return
1433
1434         subtitles_are_requested = any([self.params.get('writesubtitles', False),
1435                                        self.params.get('writeautomaticsub')])
1436
1437         if subtitles_are_requested and info_dict.get('requested_subtitles'):
1438             # subtitles download errors are already managed as troubles in relevant IE
1439             # that way it will silently go on when used with unsupporting IE
1440             subtitles = info_dict['requested_subtitles']
1441             ie = self.get_info_extractor(info_dict['extractor_key'])
1442             for sub_lang, sub_info in subtitles.items():
1443                 sub_format = sub_info['ext']
1444                 if sub_info.get('data') is not None:
1445                     sub_data = sub_info['data']
1446                 else:
1447                     try:
1448                         sub_data = ie._download_webpage(
1449                             sub_info['url'], info_dict['id'], note=False)
1450                     except ExtractorError as err:
1451                         self.report_warning('Unable to download subtitle for "%s": %s' %
1452                                             (sub_lang, compat_str(err.cause)))
1453                         continue
1454                 try:
1455                     sub_filename = subtitles_filename(filename, sub_lang, sub_format)
1456                     if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(sub_filename)):
1457                         self.to_screen('[info] Video subtitle %s.%s is already_present' % (sub_lang, sub_format))
1458                     else:
1459                         self.to_screen('[info] Writing video subtitles to: ' + sub_filename)
1460                         with io.open(encodeFilename(sub_filename), 'w', encoding='utf-8') as subfile:
1461                             subfile.write(sub_data)
1462                 except (OSError, IOError):
1463                     self.report_error('Cannot write subtitles file ' + sub_filename)
1464                     return
1465
1466         if self.params.get('writeinfojson', False):
1467             infofn = replace_extension(filename, 'info.json', info_dict.get('ext'))
1468             if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(infofn)):
1469                 self.to_screen('[info] Video description metadata is already present')
1470             else:
1471                 self.to_screen('[info] Writing video description metadata as JSON to: ' + infofn)
1472                 try:
1473                     write_json_file(self.filter_requested_info(info_dict), infofn)
1474                 except (OSError, IOError):
1475                     self.report_error('Cannot write metadata to JSON file ' + infofn)
1476                     return
1477
1478         self._write_thumbnails(info_dict, filename)
1479
1480         if not self.params.get('skip_download', False):
1481             try:
1482                 def dl(name, info):
1483                     fd = get_suitable_downloader(info, self.params)(self, self.params)
1484                     for ph in self._progress_hooks:
1485                         fd.add_progress_hook(ph)
1486                     if self.params.get('verbose'):
1487                         self.to_stdout('[debug] Invoking downloader on %r' % info.get('url'))
1488                     return fd.download(name, info)
1489
1490                 if info_dict.get('requested_formats') is not None:
1491                     downloaded = []
1492                     success = True
1493                     merger = FFmpegMergerPP(self)
1494                     if not merger.available:
1495                         postprocessors = []
1496                         self.report_warning('You have requested multiple '
1497                                             'formats but ffmpeg or avconv are not installed.'
1498                                             ' The formats won\'t be merged.')
1499                     else:
1500                         postprocessors = [merger]
1501
1502                     def compatible_formats(formats):
1503                         video, audio = formats
1504                         # Check extension
1505                         video_ext, audio_ext = audio.get('ext'), video.get('ext')
1506                         if video_ext and audio_ext:
1507                             COMPATIBLE_EXTS = (
1508                                 ('mp3', 'mp4', 'm4a', 'm4p', 'm4b', 'm4r', 'm4v'),
1509                                 ('webm')
1510                             )
1511                             for exts in COMPATIBLE_EXTS:
1512                                 if video_ext in exts and audio_ext in exts:
1513                                     return True
1514                         # TODO: Check acodec/vcodec
1515                         return False
1516
1517                     filename_real_ext = os.path.splitext(filename)[1][1:]
1518                     filename_wo_ext = (
1519                         os.path.splitext(filename)[0]
1520                         if filename_real_ext == info_dict['ext']
1521                         else filename)
1522                     requested_formats = info_dict['requested_formats']
1523                     if self.params.get('merge_output_format') is None and not compatible_formats(requested_formats):
1524                         info_dict['ext'] = 'mkv'
1525                         self.report_warning(
1526                             'Requested formats are incompatible for merge and will be merged into mkv.')
1527                     # Ensure filename always has a correct extension for successful merge
1528                     filename = '%s.%s' % (filename_wo_ext, info_dict['ext'])
1529                     if os.path.exists(encodeFilename(filename)):
1530                         self.to_screen(
1531                             '[download] %s has already been downloaded and '
1532                             'merged' % filename)
1533                     else:
1534                         for f in requested_formats:
1535                             new_info = dict(info_dict)
1536                             new_info.update(f)
1537                             fname = self.prepare_filename(new_info)
1538                             fname = prepend_extension(fname, 'f%s' % f['format_id'], new_info['ext'])
1539                             downloaded.append(fname)
1540                             partial_success = dl(fname, new_info)
1541                             success = success and partial_success
1542                         info_dict['__postprocessors'] = postprocessors
1543                         info_dict['__files_to_merge'] = downloaded
1544                 else:
1545                     # Just a single file
1546                     success = dl(filename, info_dict)
1547             except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
1548                 self.report_error('unable to download video data: %s' % str(err))
1549                 return
1550             except (OSError, IOError) as err:
1551                 raise UnavailableVideoError(err)
1552             except (ContentTooShortError, ) as err:
1553                 self.report_error('content too short (expected %s bytes and served %s)' % (err.expected, err.downloaded))
1554                 return
1555
1556             if success:
1557                 # Fixup content
1558                 fixup_policy = self.params.get('fixup')
1559                 if fixup_policy is None:
1560                     fixup_policy = 'detect_or_warn'
1561
1562                 stretched_ratio = info_dict.get('stretched_ratio')
1563                 if stretched_ratio is not None and stretched_ratio != 1:
1564                     if fixup_policy == 'warn':
1565                         self.report_warning('%s: Non-uniform pixel ratio (%s)' % (
1566                             info_dict['id'], stretched_ratio))
1567                     elif fixup_policy == 'detect_or_warn':
1568                         stretched_pp = FFmpegFixupStretchedPP(self)
1569                         if stretched_pp.available:
1570                             info_dict.setdefault('__postprocessors', [])
1571                             info_dict['__postprocessors'].append(stretched_pp)
1572                         else:
1573                             self.report_warning(
1574                                 '%s: Non-uniform pixel ratio (%s). Install ffmpeg or avconv to fix this automatically.' % (
1575                                     info_dict['id'], stretched_ratio))
1576                     else:
1577                         assert fixup_policy in ('ignore', 'never')
1578
1579                 if info_dict.get('requested_formats') is None and info_dict.get('container') == 'm4a_dash':
1580                     if fixup_policy == 'warn':
1581                         self.report_warning('%s: writing DASH m4a. Only some players support this container.' % (
1582                             info_dict['id']))
1583                     elif fixup_policy == 'detect_or_warn':
1584                         fixup_pp = FFmpegFixupM4aPP(self)
1585                         if fixup_pp.available:
1586                             info_dict.setdefault('__postprocessors', [])
1587                             info_dict['__postprocessors'].append(fixup_pp)
1588                         else:
1589                             self.report_warning(
1590                                 '%s: writing DASH m4a. Only some players support this container. Install ffmpeg or avconv to fix this automatically.' % (
1591                                     info_dict['id']))
1592                     else:
1593                         assert fixup_policy in ('ignore', 'never')
1594
1595                 try:
1596                     self.post_process(filename, info_dict)
1597                 except (PostProcessingError) as err:
1598                     self.report_error('postprocessing: %s' % str(err))
1599                     return
1600                 self.record_download_archive(info_dict)
1601
1602     def download(self, url_list):
1603         """Download a given list of URLs."""
1604         outtmpl = self.params.get('outtmpl', DEFAULT_OUTTMPL)
1605         if (len(url_list) > 1 and
1606                 '%' not in outtmpl and
1607                 self.params.get('max_downloads') != 1):
1608             raise SameFileError(outtmpl)
1609
1610         for url in url_list:
1611             try:
1612                 # It also downloads the videos
1613                 res = self.extract_info(
1614                     url, force_generic_extractor=self.params.get('force_generic_extractor', False))
1615             except UnavailableVideoError:
1616                 self.report_error('unable to download video')
1617             except MaxDownloadsReached:
1618                 self.to_screen('[info] Maximum number of downloaded files reached.')
1619                 raise
1620             else:
1621                 if self.params.get('dump_single_json', False):
1622                     self.to_stdout(json.dumps(res))
1623
1624         return self._download_retcode
1625
1626     def download_with_info_file(self, info_filename):
1627         with contextlib.closing(fileinput.FileInput(
1628                 [info_filename], mode='r',
1629                 openhook=fileinput.hook_encoded('utf-8'))) as f:
1630             # FileInput doesn't have a read method, we can't call json.load
1631             info = self.filter_requested_info(json.loads('\n'.join(f)))
1632         try:
1633             self.process_ie_result(info, download=True)
1634         except DownloadError:
1635             webpage_url = info.get('webpage_url')
1636             if webpage_url is not None:
1637                 self.report_warning('The info failed to download, trying with "%s"' % webpage_url)
1638                 return self.download([webpage_url])
1639             else:
1640                 raise
1641         return self._download_retcode
1642
1643     @staticmethod
1644     def filter_requested_info(info_dict):
1645         return dict(
1646             (k, v) for k, v in info_dict.items()
1647             if k not in ['requested_formats', 'requested_subtitles'])
1648
1649     def post_process(self, filename, ie_info):
1650         """Run all the postprocessors on the given file."""
1651         info = dict(ie_info)
1652         info['filepath'] = filename
1653         pps_chain = []
1654         if ie_info.get('__postprocessors') is not None:
1655             pps_chain.extend(ie_info['__postprocessors'])
1656         pps_chain.extend(self._pps)
1657         for pp in pps_chain:
1658             files_to_delete = []
1659             try:
1660                 files_to_delete, info = pp.run(info)
1661             except PostProcessingError as e:
1662                 self.report_error(e.msg)
1663             if files_to_delete and not self.params.get('keepvideo', False):
1664                 for old_filename in files_to_delete:
1665                     self.to_screen('Deleting original file %s (pass -k to keep)' % old_filename)
1666                     try:
1667                         os.remove(encodeFilename(old_filename))
1668                     except (IOError, OSError):
1669                         self.report_warning('Unable to remove downloaded original file')
1670
1671     def _make_archive_id(self, info_dict):
1672         # Future-proof against any change in case
1673         # and backwards compatibility with prior versions
1674         extractor = info_dict.get('extractor_key')
1675         if extractor is None:
1676             if 'id' in info_dict:
1677                 extractor = info_dict.get('ie_key')  # key in a playlist
1678         if extractor is None:
1679             return None  # Incomplete video information
1680         return extractor.lower() + ' ' + info_dict['id']
1681
1682     def in_download_archive(self, info_dict):
1683         fn = self.params.get('download_archive')
1684         if fn is None:
1685             return False
1686
1687         vid_id = self._make_archive_id(info_dict)
1688         if vid_id is None:
1689             return False  # Incomplete video information
1690
1691         try:
1692             with locked_file(fn, 'r', encoding='utf-8') as archive_file:
1693                 for line in archive_file:
1694                     if line.strip() == vid_id:
1695                         return True
1696         except IOError as ioe:
1697             if ioe.errno != errno.ENOENT:
1698                 raise
1699         return False
1700
1701     def record_download_archive(self, info_dict):
1702         fn = self.params.get('download_archive')
1703         if fn is None:
1704             return
1705         vid_id = self._make_archive_id(info_dict)
1706         assert vid_id
1707         with locked_file(fn, 'a', encoding='utf-8') as archive_file:
1708             archive_file.write(vid_id + '\n')
1709
1710     @staticmethod
1711     def format_resolution(format, default='unknown'):
1712         if format.get('vcodec') == 'none':
1713             return 'audio only'
1714         if format.get('resolution') is not None:
1715             return format['resolution']
1716         if format.get('height') is not None:
1717             if format.get('width') is not None:
1718                 res = '%sx%s' % (format['width'], format['height'])
1719             else:
1720                 res = '%sp' % format['height']
1721         elif format.get('width') is not None:
1722             res = '?x%d' % format['width']
1723         else:
1724             res = default
1725         return res
1726
1727     def _format_note(self, fdict):
1728         res = ''
1729         if fdict.get('ext') in ['f4f', 'f4m']:
1730             res += '(unsupported) '
1731         if fdict.get('format_note') is not None:
1732             res += fdict['format_note'] + ' '
1733         if fdict.get('tbr') is not None:
1734             res += '%4dk ' % fdict['tbr']
1735         if fdict.get('container') is not None:
1736             if res:
1737                 res += ', '
1738             res += '%s container' % fdict['container']
1739         if (fdict.get('vcodec') is not None and
1740                 fdict.get('vcodec') != 'none'):
1741             if res:
1742                 res += ', '
1743             res += fdict['vcodec']
1744             if fdict.get('vbr') is not None:
1745                 res += '@'
1746         elif fdict.get('vbr') is not None and fdict.get('abr') is not None:
1747             res += 'video@'
1748         if fdict.get('vbr') is not None:
1749             res += '%4dk' % fdict['vbr']
1750         if fdict.get('fps') is not None:
1751             res += ', %sfps' % fdict['fps']
1752         if fdict.get('acodec') is not None:
1753             if res:
1754                 res += ', '
1755             if fdict['acodec'] == 'none':
1756                 res += 'video only'
1757             else:
1758                 res += '%-5s' % fdict['acodec']
1759         elif fdict.get('abr') is not None:
1760             if res:
1761                 res += ', '
1762             res += 'audio'
1763         if fdict.get('abr') is not None:
1764             res += '@%3dk' % fdict['abr']
1765         if fdict.get('asr') is not None:
1766             res += ' (%5dHz)' % fdict['asr']
1767         if fdict.get('filesize') is not None:
1768             if res:
1769                 res += ', '
1770             res += format_bytes(fdict['filesize'])
1771         elif fdict.get('filesize_approx') is not None:
1772             if res:
1773                 res += ', '
1774             res += '~' + format_bytes(fdict['filesize_approx'])
1775         return res
1776
1777     def list_formats(self, info_dict):
1778         formats = info_dict.get('formats', [info_dict])
1779         table = [
1780             [f['format_id'], f['ext'], self.format_resolution(f), self._format_note(f)]
1781             for f in formats
1782             if f.get('preference') is None or f['preference'] >= -1000]
1783         if len(formats) > 1:
1784             table[-1][-1] += (' ' if table[-1][-1] else '') + '(best)'
1785
1786         header_line = ['format code', 'extension', 'resolution', 'note']
1787         self.to_screen(
1788             '[info] Available formats for %s:\n%s' %
1789             (info_dict['id'], render_table(header_line, table)))
1790
1791     def list_thumbnails(self, info_dict):
1792         thumbnails = info_dict.get('thumbnails')
1793         if not thumbnails:
1794             tn_url = info_dict.get('thumbnail')
1795             if tn_url:
1796                 thumbnails = [{'id': '0', 'url': tn_url}]
1797             else:
1798                 self.to_screen(
1799                     '[info] No thumbnails present for %s' % info_dict['id'])
1800                 return
1801
1802         self.to_screen(
1803             '[info] Thumbnails for %s:' % info_dict['id'])
1804         self.to_screen(render_table(
1805             ['ID', 'width', 'height', 'URL'],
1806             [[t['id'], t.get('width', 'unknown'), t.get('height', 'unknown'), t['url']] for t in thumbnails]))
1807
1808     def list_subtitles(self, video_id, subtitles, name='subtitles'):
1809         if not subtitles:
1810             self.to_screen('%s has no %s' % (video_id, name))
1811             return
1812         self.to_screen(
1813             'Available %s for %s:' % (name, video_id))
1814         self.to_screen(render_table(
1815             ['Language', 'formats'],
1816             [[lang, ', '.join(f['ext'] for f in reversed(formats))]
1817                 for lang, formats in subtitles.items()]))
1818
1819     def urlopen(self, req):
1820         """ Start an HTTP download """
1821
1822         # According to RFC 3986, URLs can not contain non-ASCII characters, however this is not
1823         # always respected by websites, some tend to give out URLs with non percent-encoded
1824         # non-ASCII characters (see telemb.py, ard.py [#3412])
1825         # urllib chokes on URLs with non-ASCII characters (see http://bugs.python.org/issue3991)
1826         # To work around aforementioned issue we will replace request's original URL with
1827         # percent-encoded one
1828         req_is_string = isinstance(req, compat_basestring)
1829         url = req if req_is_string else req.get_full_url()
1830         url_escaped = escape_url(url)
1831
1832         # Substitute URL if any change after escaping
1833         if url != url_escaped:
1834             if req_is_string:
1835                 req = url_escaped
1836             else:
1837                 req_type = HEADRequest if req.get_method() == 'HEAD' else compat_urllib_request.Request
1838                 req = req_type(
1839                     url_escaped, data=req.data, headers=req.headers,
1840                     origin_req_host=req.origin_req_host, unverifiable=req.unverifiable)
1841
1842         return self._opener.open(req, timeout=self._socket_timeout)
1843
1844     def print_debug_header(self):
1845         if not self.params.get('verbose'):
1846             return
1847
1848         if type('') is not compat_str:
1849             # Python 2.6 on SLES11 SP1 (https://github.com/rg3/youtube-dl/issues/3326)
1850             self.report_warning(
1851                 'Your Python is broken! Update to a newer and supported version')
1852
1853         stdout_encoding = getattr(
1854             sys.stdout, 'encoding', 'missing (%s)' % type(sys.stdout).__name__)
1855         encoding_str = (
1856             '[debug] Encodings: locale %s, fs %s, out %s, pref %s\n' % (
1857                 locale.getpreferredencoding(),
1858                 sys.getfilesystemencoding(),
1859                 stdout_encoding,
1860                 self.get_encoding()))
1861         write_string(encoding_str, encoding=None)
1862
1863         self._write_string('[debug] youtube-dl version ' + __version__ + '\n')
1864         try:
1865             sp = subprocess.Popen(
1866                 ['git', 'rev-parse', '--short', 'HEAD'],
1867                 stdout=subprocess.PIPE, stderr=subprocess.PIPE,
1868                 cwd=os.path.dirname(os.path.abspath(__file__)))
1869             out, err = sp.communicate()
1870             out = out.decode().strip()
1871             if re.match('[0-9a-f]+', out):
1872                 self._write_string('[debug] Git HEAD: ' + out + '\n')
1873         except Exception:
1874             try:
1875                 sys.exc_clear()
1876             except Exception:
1877                 pass
1878         self._write_string('[debug] Python version %s - %s\n' % (
1879             platform.python_version(), platform_name()))
1880
1881         exe_versions = FFmpegPostProcessor.get_versions(self)
1882         exe_versions['rtmpdump'] = rtmpdump_version()
1883         exe_str = ', '.join(
1884             '%s %s' % (exe, v)
1885             for exe, v in sorted(exe_versions.items())
1886             if v
1887         )
1888         if not exe_str:
1889             exe_str = 'none'
1890         self._write_string('[debug] exe versions: %s\n' % exe_str)
1891
1892         proxy_map = {}
1893         for handler in self._opener.handlers:
1894             if hasattr(handler, 'proxies'):
1895                 proxy_map.update(handler.proxies)
1896         self._write_string('[debug] Proxy map: ' + compat_str(proxy_map) + '\n')
1897
1898         if self.params.get('call_home', False):
1899             ipaddr = self.urlopen('https://yt-dl.org/ip').read().decode('utf-8')
1900             self._write_string('[debug] Public IP address: %s\n' % ipaddr)
1901             latest_version = self.urlopen(
1902                 'https://yt-dl.org/latest/version').read().decode('utf-8')
1903             if version_tuple(latest_version) > version_tuple(__version__):
1904                 self.report_warning(
1905                     'You are using an outdated version (newest version: %s)! '
1906                     'See https://yt-dl.org/update if you need help updating.' %
1907                     latest_version)
1908
1909     def _setup_opener(self):
1910         timeout_val = self.params.get('socket_timeout')
1911         self._socket_timeout = 600 if timeout_val is None else float(timeout_val)
1912
1913         opts_cookiefile = self.params.get('cookiefile')
1914         opts_proxy = self.params.get('proxy')
1915
1916         if opts_cookiefile is None:
1917             self.cookiejar = compat_cookiejar.CookieJar()
1918         else:
1919             self.cookiejar = compat_cookiejar.MozillaCookieJar(
1920                 opts_cookiefile)
1921             if os.access(opts_cookiefile, os.R_OK):
1922                 self.cookiejar.load()
1923
1924         cookie_processor = compat_urllib_request.HTTPCookieProcessor(
1925             self.cookiejar)
1926         if opts_proxy is not None:
1927             if opts_proxy == '':
1928                 proxies = {}
1929             else:
1930                 proxies = {'http': opts_proxy, 'https': opts_proxy}
1931         else:
1932             proxies = compat_urllib_request.getproxies()
1933             # Set HTTPS proxy to HTTP one if given (https://github.com/rg3/youtube-dl/issues/805)
1934             if 'http' in proxies and 'https' not in proxies:
1935                 proxies['https'] = proxies['http']
1936         proxy_handler = PerRequestProxyHandler(proxies)
1937
1938         debuglevel = 1 if self.params.get('debug_printtraffic') else 0
1939         https_handler = make_HTTPS_handler(self.params, debuglevel=debuglevel)
1940         ydlh = YoutubeDLHandler(self.params, debuglevel=debuglevel)
1941         opener = compat_urllib_request.build_opener(
1942             proxy_handler, https_handler, cookie_processor, ydlh)
1943
1944         # Delete the default user-agent header, which would otherwise apply in
1945         # cases where our custom HTTP handler doesn't come into play
1946         # (See https://github.com/rg3/youtube-dl/issues/1309 for details)
1947         opener.addheaders = []
1948         self._opener = opener
1949
1950     def encode(self, s):
1951         if isinstance(s, bytes):
1952             return s  # Already encoded
1953
1954         try:
1955             return s.encode(self.get_encoding())
1956         except UnicodeEncodeError as err:
1957             err.reason = err.reason + '. Check your system encoding configuration or use the --encoding option.'
1958             raise
1959
1960     def get_encoding(self):
1961         encoding = self.params.get('encoding')
1962         if encoding is None:
1963             encoding = preferredencoding()
1964         return encoding
1965
1966     def _write_thumbnails(self, info_dict, filename):
1967         if self.params.get('writethumbnail', False):
1968             thumbnails = info_dict.get('thumbnails')
1969             if thumbnails:
1970                 thumbnails = [thumbnails[-1]]
1971         elif self.params.get('write_all_thumbnails', False):
1972             thumbnails = info_dict.get('thumbnails')
1973         else:
1974             return
1975
1976         if not thumbnails:
1977             # No thumbnails present, so return immediately
1978             return
1979
1980         for t in thumbnails:
1981             thumb_ext = determine_ext(t['url'], 'jpg')
1982             suffix = '_%s' % t['id'] if len(thumbnails) > 1 else ''
1983             thumb_display_id = '%s ' % t['id'] if len(thumbnails) > 1 else ''
1984             t['filename'] = thumb_filename = os.path.splitext(filename)[0] + suffix + '.' + thumb_ext
1985
1986             if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(thumb_filename)):
1987                 self.to_screen('[%s] %s: Thumbnail %sis already present' %
1988                                (info_dict['extractor'], info_dict['id'], thumb_display_id))
1989             else:
1990                 self.to_screen('[%s] %s: Downloading thumbnail %s...' %
1991                                (info_dict['extractor'], info_dict['id'], thumb_display_id))
1992                 try:
1993                     uf = self.urlopen(t['url'])
1994                     with open(thumb_filename, 'wb') as thumbf:
1995                         shutil.copyfileobj(uf, thumbf)
1996                     self.to_screen('[%s] %s: Writing thumbnail %sto: %s' %
1997                                    (info_dict['extractor'], info_dict['id'], thumb_display_id, thumb_filename))
1998                 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
1999                     self.report_warning('Unable to download thumbnail "%s": %s' %
2000                                         (t['url'], compat_str(err)))