_ Git - youtube-dl/blob - youtube_dl/YoutubeDL.py

   1 #!/usr/bin/env python
   2 # -*- coding: utf-8 -*-
   3
   4 from __future__ import absolute_import, unicode_literals
   5
   6 import collections
   7 import contextlib
   8 import datetime
   9 import errno
  10 import fileinput
  11 import io
  12 import itertools
  13 import json
  14 import locale
  15 import operator
  16 import os
  17 import platform
  18 import re
  19 import shutil
  20 import subprocess
  21 import socket
  22 import sys
  23 import time
  24 import tokenize
  25 import traceback
  26
  27 if os.name == 'nt':
  28     import ctypes
  29
  30 from .compat import (
  31     compat_basestring,
  32     compat_cookiejar,
  33     compat_expanduser,
  34     compat_get_terminal_size,
  35     compat_http_client,
  36     compat_kwargs,
  37     compat_str,
  38     compat_tokenize_tokenize,
  39     compat_urllib_error,
  40     compat_urllib_request,
  41 )
  42 from .utils import (
  43     escape_url,
  44     ContentTooShortError,
  45     date_from_str,
  46     DateRange,
  47     DEFAULT_OUTTMPL,
  48     determine_ext,
  49     DownloadError,
  50     encodeFilename,
  51     ExtractorError,
  52     format_bytes,
  53     formatSeconds,
  54     HEADRequest,
  55     locked_file,
  56     make_HTTPS_handler,
  57     MaxDownloadsReached,
  58     PagedList,
  59     parse_filesize,
  60     PerRequestProxyHandler,
  61     PostProcessingError,
  62     platform_name,
  63     preferredencoding,
  64     render_table,
  65     SameFileError,
  66     sanitize_filename,
  67     sanitize_path,
  68     std_headers,
  69     subtitles_filename,
  70     UnavailableVideoError,
  71     url_basename,
  72     version_tuple,
  73     write_json_file,
  74     write_string,
  75     YoutubeDLHandler,
  76     prepend_extension,
  77     replace_extension,
  78     args_to_str,
  79     age_restricted,
  80 )
  81 from .cache import Cache
  82 from .extractor import get_info_extractor, gen_extractors
  83 from .downloader import get_suitable_downloader
  84 from .downloader.rtmp import rtmpdump_version
  85 from .postprocessor import (
  86     FFmpegFixupM4aPP,
  87     FFmpegFixupStretchedPP,
  88     FFmpegMergerPP,
  89     FFmpegPostProcessor,
  90     get_postprocessor,
  91 )
  92 from .version import __version__
  93
  94
  95 class YoutubeDL(object):
  96     """YoutubeDL class.
  97
  98     YoutubeDL objects are the ones responsible of downloading the
  99     actual video file and writing it to disk if the user has requested
 100     it, among some other tasks. In most cases there should be one per
 101     program. As, given a video URL, the downloader doesn't know how to
 102     extract all the needed information, task that InfoExtractors do, it
 103     has to pass the URL to one of them.
 104
 105     For this, YoutubeDL objects have a method that allows
 106     InfoExtractors to be registered in a given order. When it is passed
 107     a URL, the YoutubeDL object handles it to the first InfoExtractor it
 108     finds that reports being able to handle it. The InfoExtractor extracts
 109     all the information about the video or videos the URL refers to, and
 110     YoutubeDL process the extracted information, possibly using a File
 111     Downloader to download the video.
 112
 113     YoutubeDL objects accept a lot of parameters. In order not to saturate
 114     the object constructor with arguments, it receives a dictionary of
 115     options instead. These options are available through the params
 116     attribute for the InfoExtractors to use. The YoutubeDL also
 117     registers itself as the downloader in charge for the InfoExtractors
 118     that are added to it, so this is a "mutual registration".
 119
 120     Available options:
 121
 122     username:          Username for authentication purposes.
 123     password:          Password for authentication purposes.
 124     videopassword:     Password for accessing a video.
 125     usenetrc:          Use netrc for authentication instead.
 126     verbose:           Print additional info to stdout.
 127     quiet:             Do not print messages to stdout.
 128     no_warnings:       Do not print out anything for warnings.
 129     forceurl:          Force printing final URL.
 130     forcetitle:        Force printing title.
 131     forceid:           Force printing ID.
 132     forcethumbnail:    Force printing thumbnail URL.
 133     forcedescription:  Force printing description.
 134     forcefilename:     Force printing final filename.
 135     forceduration:     Force printing duration.
 136     forcejson:         Force printing info_dict as JSON.
 137     dump_single_json:  Force printing the info_dict of the whole playlist
 138                        (or video) as a single JSON line.
 139     simulate:          Do not download the video files.
 140     format:            Video format code. See options.py for more information.
 141     outtmpl:           Template for output names.
 142     restrictfilenames: Do not allow "&" and spaces in file names
 143     ignoreerrors:      Do not stop on download errors.
 144     force_generic_extractor: Force downloader to use the generic extractor
 145     nooverwrites:      Prevent overwriting files.
 146     playliststart:     Playlist item to start at.
 147     playlistend:       Playlist item to end at.
 148     playlist_items:    Specific indices of playlist to download.
 149     playlistreverse:   Download playlist items in reverse order.
 150     matchtitle:        Download only matching titles.
 151     rejecttitle:       Reject downloads for matching titles.
 152     logger:            Log messages to a logging.Logger instance.
 153     logtostderr:       Log messages to stderr instead of stdout.
 154     writedescription:  Write the video description to a .description file
 155     writeinfojson:     Write the video description to a .info.json file
 156     writeannotations:  Write the video annotations to a .annotations.xml file
 157     writethumbnail:    Write the thumbnail image to a file
 158     write_all_thumbnails:  Write all thumbnail formats to files
 159     writesubtitles:    Write the video subtitles to a file
 160     writeautomaticsub: Write the automatic subtitles to a file
 161     allsubtitles:      Downloads all the subtitles of the video
 162                        (requires writesubtitles or writeautomaticsub)
 163     listsubtitles:     Lists all available subtitles for the video
 164     subtitlesformat:   The format code for subtitles
 165     subtitleslangs:    List of languages of the subtitles to download
 166     keepvideo:         Keep the video file after post-processing
 167     daterange:         A DateRange object, download only if the upload_date is in the range.
 168     skip_download:     Skip the actual download of the video file
 169     cachedir:          Location of the cache files in the filesystem.
 170                        False to disable filesystem cache.
 171     noplaylist:        Download single video instead of a playlist if in doubt.
 172     age_limit:         An integer representing the user's age in years.
 173                        Unsuitable videos for the given age are skipped.
 174     min_views:         An integer representing the minimum view count the video
 175                        must have in order to not be skipped.
 176                        Videos without view count information are always
 177                        downloaded. None for no limit.
 178     max_views:         An integer representing the maximum view count.
 179                        Videos that are more popular than that are not
 180                        downloaded.
 181                        Videos without view count information are always
 182                        downloaded. None for no limit.
 183     download_archive:  File name of a file where all downloads are recorded.
 184                        Videos already present in the file are not downloaded
 185                        again.
 186     cookiefile:        File name where cookies should be read from and dumped to.
 187     nocheckcertificate:Do not verify SSL certificates
 188     prefer_insecure:   Use HTTP instead of HTTPS to retrieve information.
 189                        At the moment, this is only supported by YouTube.
 190     proxy:             URL of the proxy server to use
 191     cn_verification_proxy:  URL of the proxy to use for IP address verification
 192                        on Chinese sites. (Experimental)
 193     socket_timeout:    Time to wait for unresponsive hosts, in seconds
 194     bidi_workaround:   Work around buggy terminals without bidirectional text
 195                        support, using fridibi
 196     debug_printtraffic:Print out sent and received HTTP traffic
 197     include_ads:       Download ads as well
 198     default_search:    Prepend this string if an input url is not valid.
 199                        'auto' for elaborate guessing
 200     encoding:          Use this encoding instead of the system-specified.
 201     extract_flat:      Do not resolve URLs, return the immediate result.
 202                        Pass in 'in_playlist' to only show this behavior for
 203                        playlist items.
 204     postprocessors:    A list of dictionaries, each with an entry
 205                        * key:  The name of the postprocessor. See
 206                                youtube_dl/postprocessor/__init__.py for a list.
 207                        as well as any further keyword arguments for the
 208                        postprocessor.
 209     progress_hooks:    A list of functions that get called on download
 210                        progress, with a dictionary with the entries
 211                        * status: One of "downloading", "error", or "finished".
 212                                  Check this first and ignore unknown values.
 213
 214                        If status is one of "downloading", or "finished", the
 215                        following properties may also be present:
 216                        * filename: The final filename (always present)
 217                        * tmpfilename: The filename we're currently writing to
 218                        * downloaded_bytes: Bytes on disk
 219                        * total_bytes: Size of the whole file, None if unknown
 220                        * total_bytes_estimate: Guess of the eventual file size,
 221                                                None if unavailable.
 222                        * elapsed: The number of seconds since download started.
 223                        * eta: The estimated time in seconds, None if unknown
 224                        * speed: The download speed in bytes/second, None if
 225                                 unknown
 226                        * fragment_index: The counter of the currently
 227                                          downloaded video fragment.
 228                        * fragment_count: The number of fragments (= individual
 229                                          files that will be merged)
 230
 231                        Progress hooks are guaranteed to be called at least once
 232                        (with status "finished") if the download is successful.
 233     merge_output_format: Extension to use when merging formats.
 234     fixup:             Automatically correct known faults of the file.
 235                        One of:
 236                        - "never": do nothing
 237                        - "warn": only emit a warning
 238                        - "detect_or_warn": check whether we can do anything
 239                                            about it, warn otherwise (default)
 240     source_address:    (Experimental) Client-side IP address to bind to.
 241     call_home:         Boolean, true iff we are allowed to contact the
 242                        youtube-dl servers for debugging.
 243     sleep_interval:    Number of seconds to sleep before each download.
 244     listformats:       Print an overview of available video formats and exit.
 245     list_thumbnails:   Print a table of all thumbnails and exit.
 246     match_filter:      A function that gets called with the info_dict of
 247                        every video.
 248                        If it returns a message, the video is ignored.
 249                        If it returns None, the video is downloaded.
 250                        match_filter_func in utils.py is one example for this.
 251     no_color:          Do not emit color codes in output.
 252
 253     The following options determine which downloader is picked:
 254     external_downloader: Executable of the external downloader to call.
 255                        None or unset for standard (built-in) downloader.
 256     hls_prefer_native: Use the native HLS downloader instead of ffmpeg/avconv.
 257
 258     The following parameters are not used by YoutubeDL itself, they are used by
 259     the downloader (see youtube_dl/downloader/common.py):
 260     nopart, updatetime, buffersize, ratelimit, min_filesize, max_filesize, test,
 261     noresizebuffer, retries, continuedl, noprogress, consoletitle,
 262     xattr_set_filesize, external_downloader_args.
 263
 264     The following options are used by the post processors:
 265     prefer_ffmpeg:     If True, use ffmpeg instead of avconv if both are available,
 266                        otherwise prefer avconv.
 267     """
 268
 269     params = None
 270     _ies = []
 271     _pps = []
 272     _download_retcode = None
 273     _num_downloads = None
 274     _screen_file = None
 275
 276     def __init__(self, params=None, auto_init=True):
 277         """Create a FileDownloader object with the given options."""
 278         if params is None:
 279             params = {}
 280         self._ies = []
 281         self._ies_instances = {}
 282         self._pps = []
 283         self._progress_hooks = []
 284         self._download_retcode = 0
 285         self._num_downloads = 0
 286         self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)]
 287         self._err_file = sys.stderr
 288         self.params = params
 289         self.cache = Cache(self)
 290
 291         if params.get('bidi_workaround', False):
 292             try:
 293                 import pty
 294                 master, slave = pty.openpty()
 295                 width = compat_get_terminal_size().columns
 296                 if width is None:
 297                     width_args = []
 298                 else:
 299                     width_args = ['-w', str(width)]
 300                 sp_kwargs = dict(
 301                     stdin=subprocess.PIPE,
 302                     stdout=slave,
 303                     stderr=self._err_file)
 304                 try:
 305                     self._output_process = subprocess.Popen(
 306                         ['bidiv'] + width_args, **sp_kwargs
 307                     )
 308                 except OSError:
 309                     self._output_process = subprocess.Popen(
 310                         ['fribidi', '-c', 'UTF-8'] + width_args, **sp_kwargs)
 311                 self._output_channel = os.fdopen(master, 'rb')
 312             except OSError as ose:
 313                 if ose.errno == 2:
 314                     self.report_warning('Could not find fribidi executable, ignoring --bidi-workaround . Make sure that  fribidi  is an executable file in one of the directories in your $PATH.')
 315                 else:
 316                     raise
 317
 318         if (sys.version_info >= (3,) and sys.platform != 'win32' and
 319                 sys.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968'] and
 320                 not params.get('restrictfilenames', False)):
 321             # On Python 3, the Unicode filesystem API will throw errors (#1474)
 322             self.report_warning(
 323                 'Assuming --restrict-filenames since file system encoding '
 324                 'cannot encode all characters. '
 325                 'Set the LC_ALL environment variable to fix this.')
 326             self.params['restrictfilenames'] = True
 327
 328         if isinstance(params.get('outtmpl'), bytes):
 329             self.report_warning(
 330                 'Parameter outtmpl is bytes, but should be a unicode string. '
 331                 'Put  from __future__ import unicode_literals  at the top of your code file or consider switching to Python 3.x.')
 332
 333         self._setup_opener()
 334
 335         if auto_init:
 336             self.print_debug_header()
 337             self.add_default_info_extractors()
 338
 339         for pp_def_raw in self.params.get('postprocessors', []):
 340             pp_class = get_postprocessor(pp_def_raw['key'])
 341             pp_def = dict(pp_def_raw)
 342             del pp_def['key']
 343             pp = pp_class(self, **compat_kwargs(pp_def))
 344             self.add_post_processor(pp)
 345
 346         for ph in self.params.get('progress_hooks', []):
 347             self.add_progress_hook(ph)
 348
 349     def warn_if_short_id(self, argv):
 350         # short YouTube ID starting with dash?
 351         idxs = [
 352             i for i, a in enumerate(argv)
 353             if re.match(r'^-[0-9A-Za-z_-]{10}$', a)]
 354         if idxs:
 355             correct_argv = (
 356                 ['youtube-dl'] +
 357                 [a for i, a in enumerate(argv) if i not in idxs] +
 358                 ['--'] + [argv[i] for i in idxs]
 359             )
 360             self.report_warning(
 361                 'Long argument string detected. '
 362                 'Use -- to separate parameters and URLs, like this:\n%s\n' %
 363                 args_to_str(correct_argv))
 364
 365     def add_info_extractor(self, ie):
 366         """Add an InfoExtractor object to the end of the list."""
 367         self._ies.append(ie)
 368         self._ies_instances[ie.ie_key()] = ie
 369         ie.set_downloader(self)
 370
 371     def get_info_extractor(self, ie_key):
 372         """
 373         Get an instance of an IE with name ie_key, it will try to get one from
 374         the _ies list, if there's no instance it will create a new one and add
 375         it to the extractor list.
 376         """
 377         ie = self._ies_instances.get(ie_key)
 378         if ie is None:
 379             ie = get_info_extractor(ie_key)()
 380             self.add_info_extractor(ie)
 381         return ie
 382
 383     def add_default_info_extractors(self):
 384         """
 385         Add the InfoExtractors returned by gen_extractors to the end of the list
 386         """
 387         for ie in gen_extractors():
 388             self.add_info_extractor(ie)
 389
 390     def add_post_processor(self, pp):
 391         """Add a PostProcessor object to the end of the chain."""
 392         self._pps.append(pp)
 393         pp.set_downloader(self)
 394
 395     def add_progress_hook(self, ph):
 396         """Add the progress hook (currently only for the file downloader)"""
 397         self._progress_hooks.append(ph)
 398
 399     def _bidi_workaround(self, message):
 400         if not hasattr(self, '_output_channel'):
 401             return message
 402
 403         assert hasattr(self, '_output_process')
 404         assert isinstance(message, compat_str)
 405         line_count = message.count('\n') + 1
 406         self._output_process.stdin.write((message + '\n').encode('utf-8'))
 407         self._output_process.stdin.flush()
 408         res = ''.join(self._output_channel.readline().decode('utf-8')
 409                       for _ in range(line_count))
 410         return res[:-len('\n')]
 411
 412     def to_screen(self, message, skip_eol=False):
 413         """Print message to stdout if not in quiet mode."""
 414         return self.to_stdout(message, skip_eol, check_quiet=True)
 415
 416     def _write_string(self, s, out=None):
 417         write_string(s, out=out, encoding=self.params.get('encoding'))
 418
 419     def to_stdout(self, message, skip_eol=False, check_quiet=False):
 420         """Print message to stdout if not in quiet mode."""
 421         if self.params.get('logger'):
 422             self.params['logger'].debug(message)
 423         elif not check_quiet or not self.params.get('quiet', False):
 424             message = self._bidi_workaround(message)
 425             terminator = ['\n', ''][skip_eol]
 426             output = message + terminator
 427
 428             self._write_string(output, self._screen_file)
 429
 430     def to_stderr(self, message):
 431         """Print message to stderr."""
 432         assert isinstance(message, compat_str)
 433         if self.params.get('logger'):
 434             self.params['logger'].error(message)
 435         else:
 436             message = self._bidi_workaround(message)
 437             output = message + '\n'
 438             self._write_string(output, self._err_file)
 439
 440     def to_console_title(self, message):
 441         if not self.params.get('consoletitle', False):
 442             return
 443         if os.name == 'nt' and ctypes.windll.kernel32.GetConsoleWindow():
 444             # c_wchar_p() might not be necessary if `message` is
 445             # already of type unicode()
 446             ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message))
 447         elif 'TERM' in os.environ:
 448             self._write_string('\033]0;%s\007' % message, self._screen_file)
 449
 450     def save_console_title(self):
 451         if not self.params.get('consoletitle', False):
 452             return
 453         if 'TERM' in os.environ:
 454             # Save the title on stack
 455             self._write_string('\033[22;0t', self._screen_file)
 456
 457     def restore_console_title(self):
 458         if not self.params.get('consoletitle', False):
 459             return
 460         if 'TERM' in os.environ:
 461             # Restore the title from stack
 462             self._write_string('\033[23;0t', self._screen_file)
 463
 464     def __enter__(self):
 465         self.save_console_title()
 466         return self
 467
 468     def __exit__(self, *args):
 469         self.restore_console_title()
 470
 471         if self.params.get('cookiefile') is not None:
 472             self.cookiejar.save()
 473
 474     def trouble(self, message=None, tb=None):
 475         """Determine action to take when a download problem appears.
 476
 477         Depending on if the downloader has been configured to ignore
 478         download errors or not, this method may throw an exception or
 479         not when errors are found, after printing the message.
 480
 481         tb, if given, is additional traceback information.
 482         """
 483         if message is not None:
 484             self.to_stderr(message)
 485         if self.params.get('verbose'):
 486             if tb is None:
 487                 if sys.exc_info()[0]:  # if .trouble has been called from an except block
 488                     tb = ''
 489                     if hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
 490                         tb += ''.join(traceback.format_exception(*sys.exc_info()[1].exc_info))
 491                     tb += compat_str(traceback.format_exc())
 492                 else:
 493                     tb_data = traceback.format_list(traceback.extract_stack())
 494                     tb = ''.join(tb_data)
 495             self.to_stderr(tb)
 496         if not self.params.get('ignoreerrors', False):
 497             if sys.exc_info()[0] and hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
 498                 exc_info = sys.exc_info()[1].exc_info
 499             else:
 500                 exc_info = sys.exc_info()
 501             raise DownloadError(message, exc_info)
 502         self._download_retcode = 1
 503
 504     def report_warning(self, message):
 505         '''
 506         Print the message to stderr, it will be prefixed with 'WARNING:'
 507         If stderr is a tty file the 'WARNING:' will be colored
 508         '''
 509         if self.params.get('logger') is not None:
 510             self.params['logger'].warning(message)
 511         else:
 512             if self.params.get('no_warnings'):
 513                 return
 514             if not self.params.get('no_color') and self._err_file.isatty() and os.name != 'nt':
 515                 _msg_header = '\033[0;33mWARNING:\033[0m'
 516             else:
 517                 _msg_header = 'WARNING:'
 518             warning_message = '%s %s' % (_msg_header, message)
 519             self.to_stderr(warning_message)
 520
 521     def report_error(self, message, tb=None):
 522         '''
 523         Do the same as trouble, but prefixes the message with 'ERROR:', colored
 524         in red if stderr is a tty file.
 525         '''
 526         if not self.params.get('no_color') and self._err_file.isatty() and os.name != 'nt':
 527             _msg_header = '\033[0;31mERROR:\033[0m'
 528         else:
 529             _msg_header = 'ERROR:'
 530         error_message = '%s %s' % (_msg_header, message)
 531         self.trouble(error_message, tb)
 532
 533     def report_file_already_downloaded(self, file_name):
 534         """Report file has already been fully downloaded."""
 535         try:
 536             self.to_screen('[download] %s has already been downloaded' % file_name)
 537         except UnicodeEncodeError:
 538             self.to_screen('[download] The file has already been downloaded')
 539
 540     def prepare_filename(self, info_dict):
 541         """Generate the output filename."""
 542         try:
 543             template_dict = dict(info_dict)
 544
 545             template_dict['epoch'] = int(time.time())
 546             autonumber_size = self.params.get('autonumber_size')
 547             if autonumber_size is None:
 548                 autonumber_size = 5
 549             autonumber_templ = '%0' + str(autonumber_size) + 'd'
 550             template_dict['autonumber'] = autonumber_templ % self._num_downloads
 551             if template_dict.get('playlist_index') is not None:
 552                 template_dict['playlist_index'] = '%0*d' % (len(str(template_dict['n_entries'])), template_dict['playlist_index'])
 553             if template_dict.get('resolution') is None:
 554                 if template_dict.get('width') and template_dict.get('height'):
 555                     template_dict['resolution'] = '%dx%d' % (template_dict['width'], template_dict['height'])
 556                 elif template_dict.get('height'):
 557                     template_dict['resolution'] = '%sp' % template_dict['height']
 558                 elif template_dict.get('width'):
 559                     template_dict['resolution'] = '?x%d' % template_dict['width']
 560
 561             sanitize = lambda k, v: sanitize_filename(
 562                 compat_str(v),
 563                 restricted=self.params.get('restrictfilenames'),
 564                 is_id=(k == 'id'))
 565             template_dict = dict((k, sanitize(k, v))
 566                                  for k, v in template_dict.items()
 567                                  if v is not None)
 568             template_dict = collections.defaultdict(lambda: 'NA', template_dict)
 569
 570             outtmpl = sanitize_path(self.params.get('outtmpl', DEFAULT_OUTTMPL))
 571             tmpl = compat_expanduser(outtmpl)
 572             filename = tmpl % template_dict
 573             # Temporary fix for #4787
 574             # 'Treat' all problem characters by passing filename through preferredencoding
 575             # to workaround encoding issues with subprocess on python2 @ Windows
 576             if sys.version_info < (3, 0) and sys.platform == 'win32':
 577                 filename = encodeFilename(filename, True).decode(preferredencoding())
 578             return filename
 579         except ValueError as err:
 580             self.report_error('Error in output template: ' + str(err) + ' (encoding: ' + repr(preferredencoding()) + ')')
 581             return None
 582
 583     def _match_entry(self, info_dict, incomplete):
 584         """ Returns None iff the file should be downloaded """
 585
 586         video_title = info_dict.get('title', info_dict.get('id', 'video'))
 587         if 'title' in info_dict:
 588             # This can happen when we're just evaluating the playlist
 589             title = info_dict['title']
 590             matchtitle = self.params.get('matchtitle', False)
 591             if matchtitle:
 592                 if not re.search(matchtitle, title, re.IGNORECASE):
 593                     return '"' + title + '" title did not match pattern "' + matchtitle + '"'
 594             rejecttitle = self.params.get('rejecttitle', False)
 595             if rejecttitle:
 596                 if re.search(rejecttitle, title, re.IGNORECASE):
 597                     return '"' + title + '" title matched reject pattern "' + rejecttitle + '"'
 598         date = info_dict.get('upload_date', None)
 599         if date is not None:
 600             dateRange = self.params.get('daterange', DateRange())
 601             if date not in dateRange:
 602                 return '%s upload date is not in range %s' % (date_from_str(date).isoformat(), dateRange)
 603         view_count = info_dict.get('view_count', None)
 604         if view_count is not None:
 605             min_views = self.params.get('min_views')
 606             if min_views is not None and view_count < min_views:
 607                 return 'Skipping %s, because it has not reached minimum view count (%d/%d)' % (video_title, view_count, min_views)
 608             max_views = self.params.get('max_views')
 609             if max_views is not None and view_count > max_views:
 610                 return 'Skipping %s, because it has exceeded the maximum view count (%d/%d)' % (video_title, view_count, max_views)
 611         if age_restricted(info_dict.get('age_limit'), self.params.get('age_limit')):
 612             return 'Skipping "%s" because it is age restricted' % video_title
 613         if self.in_download_archive(info_dict):
 614             return '%s has already been recorded in archive' % video_title
 615
 616         if not incomplete:
 617             match_filter = self.params.get('match_filter')
 618             if match_filter is not None:
 619                 ret = match_filter(info_dict)
 620                 if ret is not None:
 621                     return ret
 622
 623         return None
 624
 625     @staticmethod
 626     def add_extra_info(info_dict, extra_info):
 627         '''Set the keys from extra_info in info dict if they are missing'''
 628         for key, value in extra_info.items():
 629             info_dict.setdefault(key, value)
 630
 631     def extract_info(self, url, download=True, ie_key=None, extra_info={},
 632                      process=True, force_generic_extractor=False):
 633         '''
 634         Returns a list with a dictionary for each video we find.
 635         If 'download', also downloads the videos.
 636         extra_info is a dict containing the extra values to add to each result
 637         '''
 638
 639         if not ie_key and force_generic_extractor:
 640             ie_key = 'Generic'
 641
 642         if ie_key:
 643             ies = [self.get_info_extractor(ie_key)]
 644         else:
 645             ies = self._ies
 646
 647         for ie in ies:
 648             if not ie.suitable(url):
 649                 continue
 650
 651             if not ie.working():
 652                 self.report_warning('The program functionality for this site has been marked as broken, '
 653                                     'and will probably not work.')
 654
 655             try:
 656                 ie_result = ie.extract(url)
 657                 if ie_result is None:  # Finished already (backwards compatibility; listformats and friends should be moved here)
 658                     break
 659                 if isinstance(ie_result, list):
 660                     # Backwards compatibility: old IE result format
 661                     ie_result = {
 662                         '_type': 'compat_list',
 663                         'entries': ie_result,
 664                     }
 665                 self.add_default_extra_info(ie_result, ie, url)
 666                 if process:
 667                     return self.process_ie_result(ie_result, download, extra_info)
 668                 else:
 669                     return ie_result
 670             except ExtractorError as de:  # An error we somewhat expected
 671                 self.report_error(compat_str(de), de.format_traceback())
 672                 break
 673             except MaxDownloadsReached:
 674                 raise
 675             except Exception as e:
 676                 if self.params.get('ignoreerrors', False):
 677                     self.report_error(compat_str(e), tb=compat_str(traceback.format_exc()))
 678                     break
 679                 else:
 680                     raise
 681         else:
 682             self.report_error('no suitable InfoExtractor for URL %s' % url)
 683
 684     def add_default_extra_info(self, ie_result, ie, url):
 685         self.add_extra_info(ie_result, {
 686             'extractor': ie.IE_NAME,
 687             'webpage_url': url,
 688             'webpage_url_basename': url_basename(url),
 689             'extractor_key': ie.ie_key(),
 690         })
 691
 692     def process_ie_result(self, ie_result, download=True, extra_info={}):
 693         """
 694         Take the result of the ie(may be modified) and resolve all unresolved
 695         references (URLs, playlist items).
 696
 697         It will also download the videos if 'download'.
 698         Returns the resolved ie_result.
 699         """
 700
 701         result_type = ie_result.get('_type', 'video')
 702
 703         if result_type in ('url', 'url_transparent'):
 704             extract_flat = self.params.get('extract_flat', False)
 705             if ((extract_flat == 'in_playlist' and 'playlist' in extra_info) or
 706                     extract_flat is True):
 707                 if self.params.get('forcejson', False):
 708                     self.to_stdout(json.dumps(ie_result))
 709                 return ie_result
 710
 711         if result_type == 'video':
 712             self.add_extra_info(ie_result, extra_info)
 713             return self.process_video_result(ie_result, download=download)
 714         elif result_type == 'url':
 715             # We have to add extra_info to the results because it may be
 716             # contained in a playlist
 717             return self.extract_info(ie_result['url'],
 718                                      download,
 719                                      ie_key=ie_result.get('ie_key'),
 720                                      extra_info=extra_info)
 721         elif result_type == 'url_transparent':
 722             # Use the information from the embedding page
 723             info = self.extract_info(
 724                 ie_result['url'], ie_key=ie_result.get('ie_key'),
 725                 extra_info=extra_info, download=False, process=False)
 726
 727             force_properties = dict(
 728                 (k, v) for k, v in ie_result.items() if v is not None)
 729             for f in ('_type', 'url'):
 730                 if f in force_properties:
 731                     del force_properties[f]
 732             new_result = info.copy()
 733             new_result.update(force_properties)
 734
 735             assert new_result.get('_type') != 'url_transparent'
 736
 737             return self.process_ie_result(
 738                 new_result, download=download, extra_info=extra_info)
 739         elif result_type == 'playlist' or result_type == 'multi_video':
 740             # We process each entry in the playlist
 741             playlist = ie_result.get('title', None) or ie_result.get('id', None)
 742             self.to_screen('[download] Downloading playlist: %s' % playlist)
 743
 744             playlist_results = []
 745
 746             playliststart = self.params.get('playliststart', 1) - 1
 747             playlistend = self.params.get('playlistend', None)
 748             # For backwards compatibility, interpret -1 as whole list
 749             if playlistend == -1:
 750                 playlistend = None
 751
 752             playlistitems_str = self.params.get('playlist_items', None)
 753             playlistitems = None
 754             if playlistitems_str is not None:
 755                 def iter_playlistitems(format):
 756                     for string_segment in format.split(','):
 757                         if '-' in string_segment:
 758                             start, end = string_segment.split('-')
 759                             for item in range(int(start), int(end) + 1):
 760                                 yield int(item)
 761                         else:
 762                             yield int(string_segment)
 763                 playlistitems = iter_playlistitems(playlistitems_str)
 764
 765             ie_entries = ie_result['entries']
 766             if isinstance(ie_entries, list):
 767                 n_all_entries = len(ie_entries)
 768                 if playlistitems:
 769                     entries = [
 770                         ie_entries[i - 1] for i in playlistitems
 771                         if -n_all_entries <= i - 1 < n_all_entries]
 772                 else:
 773                     entries = ie_entries[playliststart:playlistend]
 774                 n_entries = len(entries)
 775                 self.to_screen(
 776                     "[%s] playlist %s: Collected %d video ids (downloading %d of them)" %
 777                     (ie_result['extractor'], playlist, n_all_entries, n_entries))
 778             elif isinstance(ie_entries, PagedList):
 779                 if playlistitems:
 780                     entries = []
 781                     for item in playlistitems:
 782                         entries.extend(ie_entries.getslice(
 783                             item - 1, item
 784                         ))
 785                 else:
 786                     entries = ie_entries.getslice(
 787                         playliststart, playlistend)
 788                 n_entries = len(entries)
 789                 self.to_screen(
 790                     "[%s] playlist %s: Downloading %d videos" %
 791                     (ie_result['extractor'], playlist, n_entries))
 792             else:  # iterable
 793                 if playlistitems:
 794                     entry_list = list(ie_entries)
 795                     entries = [entry_list[i - 1] for i in playlistitems]
 796                 else:
 797                     entries = list(itertools.islice(
 798                         ie_entries, playliststart, playlistend))
 799                 n_entries = len(entries)
 800                 self.to_screen(
 801                     "[%s] playlist %s: Downloading %d videos" %
 802                     (ie_result['extractor'], playlist, n_entries))
 803
 804             if self.params.get('playlistreverse', False):
 805                 entries = entries[::-1]
 806
 807             for i, entry in enumerate(entries, 1):
 808                 self.to_screen('[download] Downloading video %s of %s' % (i, n_entries))
 809                 extra = {
 810                     'n_entries': n_entries,
 811                     'playlist': playlist,
 812                     'playlist_id': ie_result.get('id'),
 813                     'playlist_title': ie_result.get('title'),
 814                     'playlist_index': i + playliststart,
 815                     'extractor': ie_result['extractor'],
 816                     'webpage_url': ie_result['webpage_url'],
 817                     'webpage_url_basename': url_basename(ie_result['webpage_url']),
 818                     'extractor_key': ie_result['extractor_key'],
 819                 }
 820
 821                 reason = self._match_entry(entry, incomplete=True)
 822                 if reason is not None:
 823                     self.to_screen('[download] ' + reason)
 824                     continue
 825
 826                 entry_result = self.process_ie_result(entry,
 827                                                       download=download,
 828                                                       extra_info=extra)
 829                 playlist_results.append(entry_result)
 830             ie_result['entries'] = playlist_results
 831             return ie_result
 832         elif result_type == 'compat_list':
 833             self.report_warning(
 834                 'Extractor %s returned a compat_list result. '
 835                 'It needs to be updated.' % ie_result.get('extractor'))
 836
 837             def _fixup(r):
 838                 self.add_extra_info(
 839                     r,
 840                     {
 841                         'extractor': ie_result['extractor'],
 842                         'webpage_url': ie_result['webpage_url'],
 843                         'webpage_url_basename': url_basename(ie_result['webpage_url']),
 844                         'extractor_key': ie_result['extractor_key'],
 845                     }
 846                 )
 847                 return r
 848             ie_result['entries'] = [
 849                 self.process_ie_result(_fixup(r), download, extra_info)
 850                 for r in ie_result['entries']
 851             ]
 852             return ie_result
 853         else:
 854             raise Exception('Invalid result type: %s' % result_type)
 855
 856     def _build_format_filter(self, filter_spec):
 857         " Returns a function to filter the formats according to the filter_spec "
 858
 859         OPERATORS = {
 860             '<': operator.lt,
 861             '<=': operator.le,
 862             '>': operator.gt,
 863             '>=': operator.ge,
 864             '=': operator.eq,
 865             '!=': operator.ne,
 866         }
 867         operator_rex = re.compile(r'''(?x)\s*
 868             (?P<key>width|height|tbr|abr|vbr|asr|filesize|fps)
 869             \s*(?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
 870             (?P<value>[0-9.]+(?:[kKmMgGtTpPeEzZyY]i?[Bb]?)?)
 871             $
 872             ''' % '|'.join(map(re.escape, OPERATORS.keys())))
 873         m = operator_rex.search(filter_spec)
 874         if m:
 875             try:
 876                 comparison_value = int(m.group('value'))
 877             except ValueError:
 878                 comparison_value = parse_filesize(m.group('value'))
 879                 if comparison_value is None:
 880                     comparison_value = parse_filesize(m.group('value') + 'B')
 881                 if comparison_value is None:
 882                     raise ValueError(
 883                         'Invalid value %r in format specification %r' % (
 884                             m.group('value'), filter_spec))
 885             op = OPERATORS[m.group('op')]
 886
 887         if not m:
 888             STR_OPERATORS = {
 889                 '=': operator.eq,
 890                 '!=': operator.ne,
 891             }
 892             str_operator_rex = re.compile(r'''(?x)
 893                 \s*(?P<key>ext|acodec|vcodec|container|protocol)
 894                 \s*(?P<op>%s)(?P<none_inclusive>\s*\?)?
 895                 \s*(?P<value>[a-zA-Z0-9_-]+)
 896                 \s*$
 897                 ''' % '|'.join(map(re.escape, STR_OPERATORS.keys())))
 898             m = str_operator_rex.search(filter_spec)
 899             if m:
 900                 comparison_value = m.group('value')
 901                 op = STR_OPERATORS[m.group('op')]
 902
 903         if not m:
 904             raise ValueError('Invalid filter specification %r' % filter_spec)
 905
 906         def _filter(f):
 907             actual_value = f.get(m.group('key'))
 908             if actual_value is None:
 909                 return m.group('none_inclusive')
 910             return op(actual_value, comparison_value)
 911         return _filter
 912
 913     def build_format_selector(self, format_spec):
 914         def syntax_error(note, start):
 915             message = (
 916                 'Invalid format specification: '
 917                 '{0}\n\t{1}\n\t{2}^'.format(note, format_spec, ' ' * start[1]))
 918             return SyntaxError(message)
 919
 920         PICKFIRST = 'PICKFIRST'
 921         MERGE = 'MERGE'
 922         SINGLE = 'SINGLE'
 923         GROUP = 'GROUP'
 924         FormatSelector = collections.namedtuple('FormatSelector', ['type', 'selector', 'filters'])
 925
 926         def _parse_filter(tokens):
 927             filter_parts = []
 928             for type, string, start, _, _ in tokens:
 929                 if type == tokenize.OP and string == ']':
 930                     return ''.join(filter_parts)
 931                 else:
 932                     filter_parts.append(string)
 933
 934         def _parse_format_selection(tokens, inside_merge=False, inside_choice=False, inside_group=False):
 935             selectors = []
 936             current_selector = None
 937             for type, string, start, _, _ in tokens:
 938                 # ENCODING is only defined in python 3.x
 939                 if type == getattr(tokenize, 'ENCODING', None):
 940                     continue
 941                 elif type in [tokenize.NAME, tokenize.NUMBER]:
 942                     current_selector = FormatSelector(SINGLE, string, [])
 943                 elif type == tokenize.OP:
 944                     if string == ')':
 945                         if not inside_group:
 946                             # ')' will be handled by the parentheses group
 947                             tokens.restore_last_token()
 948                         break
 949                     elif inside_merge and string in ['/', ',']:
 950                         tokens.restore_last_token()
 951                         break
 952                     elif inside_choice and string == ',':
 953                         tokens.restore_last_token()
 954                         break
 955                     elif string == ',':
 956                         selectors.append(current_selector)
 957                         current_selector = None
 958                     elif string == '/':
 959                         first_choice = current_selector
 960                         second_choice = _parse_format_selection(tokens, inside_choice=True)
 961                         current_selector = FormatSelector(PICKFIRST, (first_choice, second_choice), [])
 962                     elif string == '[':
 963                         if not current_selector:
 964                             current_selector = FormatSelector(SINGLE, 'best', [])
 965                         format_filter = _parse_filter(tokens)
 966                         current_selector.filters.append(format_filter)
 967                     elif string == '(':
 968                         if current_selector:
 969                             raise syntax_error('Unexpected "("', start)
 970                         group = _parse_format_selection(tokens, inside_group=True)
 971                         current_selector = FormatSelector(GROUP, group, [])
 972                     elif string == '+':
 973                         video_selector = current_selector
 974                         audio_selector = _parse_format_selection(tokens, inside_merge=True)
 975                         current_selector = FormatSelector(MERGE, (video_selector, audio_selector), [])
 976                     else:
 977                         raise syntax_error('Operator not recognized: "{0}"'.format(string), start)
 978                 elif type == tokenize.ENDMARKER:
 979                     break
 980             if current_selector:
 981                 selectors.append(current_selector)
 982             return selectors
 983
 984         def _build_selector_function(selector):
 985             if isinstance(selector, list):
 986                 fs = [_build_selector_function(s) for s in selector]
 987
 988                 def selector_function(formats):
 989                     for f in fs:
 990                         for format in f(formats):
 991                             yield format
 992                 return selector_function
 993             elif selector.type == GROUP:
 994                 selector_function = _build_selector_function(selector.selector)
 995             elif selector.type == PICKFIRST:
 996                 fs = [_build_selector_function(s) for s in selector.selector]
 997
 998                 def selector_function(formats):
 999                     for f in fs:
1000                         picked_formats = list(f(formats))
1001                         if picked_formats:
1002                             return picked_formats
1003                     return []
1004             elif selector.type == SINGLE:
1005                 format_spec = selector.selector
1006
1007                 def selector_function(formats):
1008                     formats = list(formats)
1009                     if not formats:
1010                         return
1011                     if format_spec == 'all':
1012                         for f in formats:
1013                             yield f
1014                     elif format_spec in ['best', 'worst', None]:
1015                         format_idx = 0 if format_spec == 'worst' else -1
1016                         audiovideo_formats = [
1017                             f for f in formats
1018                             if f.get('vcodec') != 'none' and f.get('acodec') != 'none']
1019                         if audiovideo_formats:
1020                             yield audiovideo_formats[format_idx]
1021                         # for audio only (soundcloud) or video only (imgur) urls, select the best/worst audio format
1022                         elif (all(f.get('acodec') != 'none' for f in formats) or
1023                               all(f.get('vcodec') != 'none' for f in formats)):
1024                             yield formats[format_idx]
1025                     elif format_spec == 'bestaudio':
1026                         audio_formats = [
1027                             f for f in formats
1028                             if f.get('vcodec') == 'none']
1029                         if audio_formats:
1030                             yield audio_formats[-1]
1031                     elif format_spec == 'worstaudio':
1032                         audio_formats = [
1033                             f for f in formats
1034                             if f.get('vcodec') == 'none']
1035                         if audio_formats:
1036                             yield audio_formats[0]
1037                     elif format_spec == 'bestvideo':
1038                         video_formats = [
1039                             f for f in formats
1040                             if f.get('acodec') == 'none']
1041                         if video_formats:
1042                             yield video_formats[-1]
1043                     elif format_spec == 'worstvideo':
1044                         video_formats = [
1045                             f for f in formats
1046                             if f.get('acodec') == 'none']
1047                         if video_formats:
1048                             yield video_formats[0]
1049                     else:
1050                         extensions = ['mp4', 'flv', 'webm', '3gp', 'm4a', 'mp3', 'ogg', 'aac', 'wav']
1051                         if format_spec in extensions:
1052                             filter_f = lambda f: f['ext'] == format_spec
1053                         else:
1054                             filter_f = lambda f: f['format_id'] == format_spec
1055                         matches = list(filter(filter_f, formats))
1056                         if matches:
1057                             yield matches[-1]
1058             elif selector.type == MERGE:
1059                 def _merge(formats_info):
1060                     format_1, format_2 = [f['format_id'] for f in formats_info]
1061                     # The first format must contain the video and the
1062                     # second the audio
1063                     if formats_info[0].get('vcodec') == 'none':
1064                         self.report_error('The first format must '
1065                                           'contain the video, try using '
1066                                           '"-f %s+%s"' % (format_2, format_1))
1067                         return
1068                     output_ext = (
1069                         formats_info[0]['ext']
1070                         if self.params.get('merge_output_format') is None
1071                         else self.params['merge_output_format'])
1072                     return {
1073                         'requested_formats': formats_info,
1074                         'format': '%s+%s' % (formats_info[0].get('format'),
1075                                              formats_info[1].get('format')),
1076                         'format_id': '%s+%s' % (formats_info[0].get('format_id'),
1077                                                 formats_info[1].get('format_id')),
1078                         'width': formats_info[0].get('width'),
1079                         'height': formats_info[0].get('height'),
1080                         'resolution': formats_info[0].get('resolution'),
1081                         'fps': formats_info[0].get('fps'),
1082                         'vcodec': formats_info[0].get('vcodec'),
1083                         'vbr': formats_info[0].get('vbr'),
1084                         'stretched_ratio': formats_info[0].get('stretched_ratio'),
1085                         'acodec': formats_info[1].get('acodec'),
1086                         'abr': formats_info[1].get('abr'),
1087                         'ext': output_ext,
1088                     }
1089                 video_selector, audio_selector = map(_build_selector_function, selector.selector)
1090
1091                 def selector_function(formats):
1092                     formats = list(formats)
1093                     for pair in itertools.product(video_selector(formats), audio_selector(formats)):
1094                         yield _merge(pair)
1095
1096             filters = [self._build_format_filter(f) for f in selector.filters]
1097
1098             def final_selector(formats):
1099                 for _filter in filters:
1100                     formats = list(filter(_filter, formats))
1101                 return selector_function(formats)
1102             return final_selector
1103
1104         stream = io.BytesIO(format_spec.encode('utf-8'))
1105         try:
1106             tokens = list(compat_tokenize_tokenize(stream.readline))
1107         except tokenize.TokenError:
1108             raise syntax_error('Missing closing/opening brackets or parenthesis', (0, len(format_spec)))
1109
1110         class TokenIterator(object):
1111             def __init__(self, tokens):
1112                 self.tokens = tokens
1113                 self.counter = 0
1114
1115             def __iter__(self):
1116                 return self
1117
1118             def __next__(self):
1119                 if self.counter >= len(self.tokens):
1120                     raise StopIteration()
1121                 value = self.tokens[self.counter]
1122                 self.counter += 1
1123                 return value
1124
1125             next = __next__
1126
1127             def restore_last_token(self):
1128                 self.counter -= 1
1129
1130         parsed_selector = _parse_format_selection(iter(TokenIterator(tokens)))
1131         return _build_selector_function(parsed_selector)
1132
1133     def _calc_headers(self, info_dict):
1134         res = std_headers.copy()
1135
1136         add_headers = info_dict.get('http_headers')
1137         if add_headers:
1138             res.update(add_headers)
1139
1140         cookies = self._calc_cookies(info_dict)
1141         if cookies:
1142             res['Cookie'] = cookies
1143
1144         return res
1145
1146     def _calc_cookies(self, info_dict):
1147         pr = compat_urllib_request.Request(info_dict['url'])
1148         self.cookiejar.add_cookie_header(pr)
1149         return pr.get_header('Cookie')
1150
1151     def process_video_result(self, info_dict, download=True):
1152         assert info_dict.get('_type', 'video') == 'video'
1153
1154         if 'id' not in info_dict:
1155             raise ExtractorError('Missing "id" field in extractor result')
1156         if 'title' not in info_dict:
1157             raise ExtractorError('Missing "title" field in extractor result')
1158
1159         if 'playlist' not in info_dict:
1160             # It isn't part of a playlist
1161             info_dict['playlist'] = None
1162             info_dict['playlist_index'] = None
1163
1164         thumbnails = info_dict.get('thumbnails')
1165         if thumbnails is None:
1166             thumbnail = info_dict.get('thumbnail')
1167             if thumbnail:
1168                 info_dict['thumbnails'] = thumbnails = [{'url': thumbnail}]
1169         if thumbnails:
1170             thumbnails.sort(key=lambda t: (
1171                 t.get('preference'), t.get('width'), t.get('height'),
1172                 t.get('id'), t.get('url')))
1173             for i, t in enumerate(thumbnails):
1174                 if 'width' in t and 'height' in t:
1175                     t['resolution'] = '%dx%d' % (t['width'], t['height'])
1176                 if t.get('id') is None:
1177                     t['id'] = '%d' % i
1178
1179         if thumbnails and 'thumbnail' not in info_dict:
1180             info_dict['thumbnail'] = thumbnails[-1]['url']
1181
1182         if 'display_id' not in info_dict and 'id' in info_dict:
1183             info_dict['display_id'] = info_dict['id']
1184
1185         if info_dict.get('upload_date') is None and info_dict.get('timestamp') is not None:
1186             # Working around out-of-range timestamp values (e.g. negative ones on Windows,
1187             # see http://bugs.python.org/issue1646728)
1188             try:
1189                 upload_date = datetime.datetime.utcfromtimestamp(info_dict['timestamp'])
1190                 info_dict['upload_date'] = upload_date.strftime('%Y%m%d')
1191             except (ValueError, OverflowError, OSError):
1192                 pass
1193
1194         if self.params.get('listsubtitles', False):
1195             if 'automatic_captions' in info_dict:
1196                 self.list_subtitles(info_dict['id'], info_dict.get('automatic_captions'), 'automatic captions')
1197             self.list_subtitles(info_dict['id'], info_dict.get('subtitles'), 'subtitles')
1198             return
1199         info_dict['requested_subtitles'] = self.process_subtitles(
1200             info_dict['id'], info_dict.get('subtitles'),
1201             info_dict.get('automatic_captions'))
1202
1203         # We now pick which formats have to be downloaded
1204         if info_dict.get('formats') is None:
1205             # There's only one format available
1206             formats = [info_dict]
1207         else:
1208             formats = info_dict['formats']
1209
1210         if not formats:
1211             raise ExtractorError('No video formats found!')
1212
1213         formats_dict = {}
1214
1215         # We check that all the formats have the format and format_id fields
1216         for i, format in enumerate(formats):
1217             if 'url' not in format:
1218                 raise ExtractorError('Missing "url" key in result (index %d)' % i)
1219
1220             if format.get('format_id') is None:
1221                 format['format_id'] = compat_str(i)
1222             format_id = format['format_id']
1223             if format_id not in formats_dict:
1224                 formats_dict[format_id] = []
1225             formats_dict[format_id].append(format)
1226
1227         # Make sure all formats have unique format_id
1228         for format_id, ambiguous_formats in formats_dict.items():
1229             if len(ambiguous_formats) > 1:
1230                 for i, format in enumerate(ambiguous_formats):
1231                     format['format_id'] = '%s-%d' % (format_id, i)
1232
1233         for i, format in enumerate(formats):
1234             if format.get('format') is None:
1235                 format['format'] = '{id} - {res}{note}'.format(
1236                     id=format['format_id'],
1237                     res=self.format_resolution(format),
1238                     note=' ({0})'.format(format['format_note']) if format.get('format_note') is not None else '',
1239                 )
1240             # Automatically determine file extension if missing
1241             if 'ext' not in format:
1242                 format['ext'] = determine_ext(format['url']).lower()
1243             # Add HTTP headers, so that external programs can use them from the
1244             # json output
1245             full_format_info = info_dict.copy()
1246             full_format_info.update(format)
1247             format['http_headers'] = self._calc_headers(full_format_info)
1248
1249         # TODO Central sorting goes here
1250
1251         if formats[0] is not info_dict:
1252             # only set the 'formats' fields if the original info_dict list them
1253             # otherwise we end up with a circular reference, the first (and unique)
1254             # element in the 'formats' field in info_dict is info_dict itself,
1255             # wich can't be exported to json
1256             info_dict['formats'] = formats
1257         if self.params.get('listformats'):
1258             self.list_formats(info_dict)
1259             return
1260         if self.params.get('list_thumbnails'):
1261             self.list_thumbnails(info_dict)
1262             return
1263
1264         req_format = self.params.get('format')
1265         if req_format is None:
1266             req_format_list = []
1267             if (self.params.get('outtmpl', DEFAULT_OUTTMPL) != '-' and
1268                     info_dict['extractor'] in ['youtube', 'ted']):
1269                 merger = FFmpegMergerPP(self)
1270                 if merger.available and merger.can_merge():
1271                     req_format_list.append('bestvideo+bestaudio')
1272             req_format_list.append('best')
1273             req_format = '/'.join(req_format_list)
1274         format_selector = self.build_format_selector(req_format)
1275         formats_to_download = list(format_selector(formats))
1276         if not formats_to_download:
1277             raise ExtractorError('requested format not available',
1278                                  expected=True)
1279
1280         if download:
1281             if len(formats_to_download) > 1:
1282                 self.to_screen('[info] %s: downloading video in %s formats' % (info_dict['id'], len(formats_to_download)))
1283             for format in formats_to_download:
1284                 new_info = dict(info_dict)
1285                 new_info.update(format)
1286                 self.process_info(new_info)
1287         # We update the info dict with the best quality format (backwards compatibility)
1288         info_dict.update(formats_to_download[-1])
1289         return info_dict
1290
1291     def process_subtitles(self, video_id, normal_subtitles, automatic_captions):
1292         """Select the requested subtitles and their format"""
1293         available_subs = {}
1294         if normal_subtitles and self.params.get('writesubtitles'):
1295             available_subs.update(normal_subtitles)
1296         if automatic_captions and self.params.get('writeautomaticsub'):
1297             for lang, cap_info in automatic_captions.items():
1298                 if lang not in available_subs:
1299                     available_subs[lang] = cap_info
1300
1301         if (not self.params.get('writesubtitles') and not
1302                 self.params.get('writeautomaticsub') or not
1303                 available_subs):
1304             return None
1305
1306         if self.params.get('allsubtitles', False):
1307             requested_langs = available_subs.keys()
1308         else:
1309             if self.params.get('subtitleslangs', False):
1310                 requested_langs = self.params.get('subtitleslangs')
1311             elif 'en' in available_subs:
1312                 requested_langs = ['en']
1313             else:
1314                 requested_langs = [list(available_subs.keys())[0]]
1315
1316         formats_query = self.params.get('subtitlesformat', 'best')
1317         formats_preference = formats_query.split('/') if formats_query else []
1318         subs = {}
1319         for lang in requested_langs:
1320             formats = available_subs.get(lang)
1321             if formats is None:
1322                 self.report_warning('%s subtitles not available for %s' % (lang, video_id))
1323                 continue
1324             for ext in formats_preference:
1325                 if ext == 'best':
1326                     f = formats[-1]
1327                     break
1328                 matches = list(filter(lambda f: f['ext'] == ext, formats))
1329                 if matches:
1330                     f = matches[-1]
1331                     break
1332             else:
1333                 f = formats[-1]
1334                 self.report_warning(
1335                     'No subtitle format found matching "%s" for language %s, '
1336                     'using %s' % (formats_query, lang, f['ext']))
1337             subs[lang] = f
1338         return subs
1339
1340     def process_info(self, info_dict):
1341         """Process a single resolved IE result."""
1342
1343         assert info_dict.get('_type', 'video') == 'video'
1344
1345         max_downloads = self.params.get('max_downloads')
1346         if max_downloads is not None:
1347             if self._num_downloads >= int(max_downloads):
1348                 raise MaxDownloadsReached()
1349
1350         info_dict['fulltitle'] = info_dict['title']
1351         if len(info_dict['title']) > 200:
1352             info_dict['title'] = info_dict['title'][:197] + '...'
1353
1354         if 'format' not in info_dict:
1355             info_dict['format'] = info_dict['ext']
1356
1357         reason = self._match_entry(info_dict, incomplete=False)
1358         if reason is not None:
1359             self.to_screen('[download] ' + reason)
1360             return
1361
1362         self._num_downloads += 1
1363
1364         info_dict['_filename'] = filename = self.prepare_filename(info_dict)
1365
1366         # Forced printings
1367         if self.params.get('forcetitle', False):
1368             self.to_stdout(info_dict['fulltitle'])
1369         if self.params.get('forceid', False):
1370             self.to_stdout(info_dict['id'])
1371         if self.params.get('forceurl', False):
1372             if info_dict.get('requested_formats') is not None:
1373                 for f in info_dict['requested_formats']:
1374                     self.to_stdout(f['url'] + f.get('play_path', ''))
1375             else:
1376                 # For RTMP URLs, also include the playpath
1377                 self.to_stdout(info_dict['url'] + info_dict.get('play_path', ''))
1378         if self.params.get('forcethumbnail', False) and info_dict.get('thumbnail') is not None:
1379             self.to_stdout(info_dict['thumbnail'])
1380         if self.params.get('forcedescription', False) and info_dict.get('description') is not None:
1381             self.to_stdout(info_dict['description'])
1382         if self.params.get('forcefilename', False) and filename is not None:
1383             self.to_stdout(filename)
1384         if self.params.get('forceduration', False) and info_dict.get('duration') is not None:
1385             self.to_stdout(formatSeconds(info_dict['duration']))
1386         if self.params.get('forceformat', False):
1387             self.to_stdout(info_dict['format'])
1388         if self.params.get('forcejson', False):
1389             self.to_stdout(json.dumps(info_dict))
1390
1391         # Do nothing else if in simulate mode
1392         if self.params.get('simulate', False):
1393             return
1394
1395         if filename is None:
1396             return
1397
1398         try:
1399             dn = os.path.dirname(sanitize_path(encodeFilename(filename)))
1400             if dn and not os.path.exists(dn):
1401                 os.makedirs(dn)
1402         except (OSError, IOError) as err:
1403             self.report_error('unable to create directory ' + compat_str(err))
1404             return
1405
1406         if self.params.get('writedescription', False):
1407             descfn = replace_extension(filename, 'description', info_dict.get('ext'))
1408             if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(descfn)):
1409                 self.to_screen('[info] Video description is already present')
1410             elif info_dict.get('description') is None:
1411                 self.report_warning('There\'s no description to write.')
1412             else:
1413                 try:
1414                     self.to_screen('[info] Writing video description to: ' + descfn)
1415                     with io.open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile:
1416                         descfile.write(info_dict['description'])
1417                 except (OSError, IOError):
1418                     self.report_error('Cannot write description file ' + descfn)
1419                     return
1420
1421         if self.params.get('writeannotations', False):
1422             annofn = replace_extension(filename, 'annotations.xml', info_dict.get('ext'))
1423             if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(annofn)):
1424                 self.to_screen('[info] Video annotations are already present')
1425             else:
1426                 try:
1427                     self.to_screen('[info] Writing video annotations to: ' + annofn)
1428                     with io.open(encodeFilename(annofn), 'w', encoding='utf-8') as annofile:
1429                         annofile.write(info_dict['annotations'])
1430                 except (KeyError, TypeError):
1431                     self.report_warning('There are no annotations to write.')
1432                 except (OSError, IOError):
1433                     self.report_error('Cannot write annotations file: ' + annofn)
1434                     return
1435
1436         subtitles_are_requested = any([self.params.get('writesubtitles', False),
1437                                        self.params.get('writeautomaticsub')])
1438
1439         if subtitles_are_requested and info_dict.get('requested_subtitles'):
1440             # subtitles download errors are already managed as troubles in relevant IE
1441             # that way it will silently go on when used with unsupporting IE
1442             subtitles = info_dict['requested_subtitles']
1443             ie = self.get_info_extractor(info_dict['extractor_key'])
1444             for sub_lang, sub_info in subtitles.items():
1445                 sub_format = sub_info['ext']
1446                 if sub_info.get('data') is not None:
1447                     sub_data = sub_info['data']
1448                 else:
1449                     try:
1450                         sub_data = ie._download_webpage(
1451                             sub_info['url'], info_dict['id'], note=False)
1452                     except ExtractorError as err:
1453                         self.report_warning('Unable to download subtitle for "%s": %s' %
1454                                             (sub_lang, compat_str(err.cause)))
1455                         continue
1456                 try:
1457                     sub_filename = subtitles_filename(filename, sub_lang, sub_format)
1458                     if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(sub_filename)):
1459                         self.to_screen('[info] Video subtitle %s.%s is already_present' % (sub_lang, sub_format))
1460                     else:
1461                         self.to_screen('[info] Writing video subtitles to: ' + sub_filename)
1462                         with io.open(encodeFilename(sub_filename), 'w', encoding='utf-8') as subfile:
1463                             subfile.write(sub_data)
1464                 except (OSError, IOError):
1465                     self.report_error('Cannot write subtitles file ' + sub_filename)
1466                     return
1467
1468         if self.params.get('writeinfojson', False):
1469             infofn = replace_extension(filename, 'info.json', info_dict.get('ext'))
1470             if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(infofn)):
1471                 self.to_screen('[info] Video description metadata is already present')
1472             else:
1473                 self.to_screen('[info] Writing video description metadata as JSON to: ' + infofn)
1474                 try:
1475                     write_json_file(self.filter_requested_info(info_dict), infofn)
1476                 except (OSError, IOError):
1477                     self.report_error('Cannot write metadata to JSON file ' + infofn)
1478                     return
1479
1480         self._write_thumbnails(info_dict, filename)
1481
1482         if not self.params.get('skip_download', False):
1483             try:
1484                 def dl(name, info):
1485                     fd = get_suitable_downloader(info, self.params)(self, self.params)
1486                     for ph in self._progress_hooks:
1487                         fd.add_progress_hook(ph)
1488                     if self.params.get('verbose'):
1489                         self.to_stdout('[debug] Invoking downloader on %r' % info.get('url'))
1490                     return fd.download(name, info)
1491
1492                 if info_dict.get('requested_formats') is not None:
1493                     downloaded = []
1494                     success = True
1495                     merger = FFmpegMergerPP(self)
1496                     if not merger.available:
1497                         postprocessors = []
1498                         self.report_warning('You have requested multiple '
1499                                             'formats but ffmpeg or avconv are not installed.'
1500                                             ' The formats won\'t be merged.')
1501                     else:
1502                         postprocessors = [merger]
1503
1504                     def compatible_formats(formats):
1505                         video, audio = formats
1506                         # Check extension
1507                         video_ext, audio_ext = audio.get('ext'), video.get('ext')
1508                         if video_ext and audio_ext:
1509                             COMPATIBLE_EXTS = (
1510                                 ('mp3', 'mp4', 'm4a', 'm4p', 'm4b', 'm4r', 'm4v'),
1511                                 ('webm')
1512                             )
1513                             for exts in COMPATIBLE_EXTS:
1514                                 if video_ext in exts and audio_ext in exts:
1515                                     return True
1516                         # TODO: Check acodec/vcodec
1517                         return False
1518
1519                     filename_real_ext = os.path.splitext(filename)[1][1:]
1520                     filename_wo_ext = (
1521                         os.path.splitext(filename)[0]
1522                         if filename_real_ext == info_dict['ext']
1523                         else filename)
1524                     requested_formats = info_dict['requested_formats']
1525                     if self.params.get('merge_output_format') is None and not compatible_formats(requested_formats):
1526                         info_dict['ext'] = 'mkv'
1527                         self.report_warning(
1528                             'Requested formats are incompatible for merge and will be merged into mkv.')
1529                     # Ensure filename always has a correct extension for successful merge
1530                     filename = '%s.%s' % (filename_wo_ext, info_dict['ext'])
1531                     if os.path.exists(encodeFilename(filename)):
1532                         self.to_screen(
1533                             '[download] %s has already been downloaded and '
1534                             'merged' % filename)
1535                     else:
1536                         for f in requested_formats:
1537                             new_info = dict(info_dict)
1538                             new_info.update(f)
1539                             fname = self.prepare_filename(new_info)
1540                             fname = prepend_extension(fname, 'f%s' % f['format_id'], new_info['ext'])
1541                             downloaded.append(fname)
1542                             partial_success = dl(fname, new_info)
1543                             success = success and partial_success
1544                         info_dict['__postprocessors'] = postprocessors
1545                         info_dict['__files_to_merge'] = downloaded
1546                 else:
1547                     # Just a single file
1548                     success = dl(filename, info_dict)
1549             except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
1550                 self.report_error('unable to download video data: %s' % str(err))
1551                 return
1552             except (OSError, IOError) as err:
1553                 raise UnavailableVideoError(err)
1554             except (ContentTooShortError, ) as err:
1555                 self.report_error('content too short (expected %s bytes and served %s)' % (err.expected, err.downloaded))
1556                 return
1557
1558             if success:
1559                 # Fixup content
1560                 fixup_policy = self.params.get('fixup')
1561                 if fixup_policy is None:
1562                     fixup_policy = 'detect_or_warn'
1563
1564                 stretched_ratio = info_dict.get('stretched_ratio')
1565                 if stretched_ratio is not None and stretched_ratio != 1:
1566                     if fixup_policy == 'warn':
1567                         self.report_warning('%s: Non-uniform pixel ratio (%s)' % (
1568                             info_dict['id'], stretched_ratio))
1569                     elif fixup_policy == 'detect_or_warn':
1570                         stretched_pp = FFmpegFixupStretchedPP(self)
1571                         if stretched_pp.available:
1572                             info_dict.setdefault('__postprocessors', [])
1573                             info_dict['__postprocessors'].append(stretched_pp)
1574                         else:
1575                             self.report_warning(
1576                                 '%s: Non-uniform pixel ratio (%s). Install ffmpeg or avconv to fix this automatically.' % (
1577                                     info_dict['id'], stretched_ratio))
1578                     else:
1579                         assert fixup_policy in ('ignore', 'never')
1580
1581                 if info_dict.get('requested_formats') is None and info_dict.get('container') == 'm4a_dash':
1582                     if fixup_policy == 'warn':
1583                         self.report_warning('%s: writing DASH m4a. Only some players support this container.' % (
1584                             info_dict['id']))
1585                     elif fixup_policy == 'detect_or_warn':
1586                         fixup_pp = FFmpegFixupM4aPP(self)
1587                         if fixup_pp.available:
1588                             info_dict.setdefault('__postprocessors', [])
1589                             info_dict['__postprocessors'].append(fixup_pp)
1590                         else:
1591                             self.report_warning(
1592                                 '%s: writing DASH m4a. Only some players support this container. Install ffmpeg or avconv to fix this automatically.' % (
1593                                     info_dict['id']))
1594                     else:
1595                         assert fixup_policy in ('ignore', 'never')
1596
1597                 try:
1598                     self.post_process(filename, info_dict)
1599                 except (PostProcessingError) as err:
1600                     self.report_error('postprocessing: %s' % str(err))
1601                     return
1602                 self.record_download_archive(info_dict)
1603
1604     def download(self, url_list):
1605         """Download a given list of URLs."""
1606         outtmpl = self.params.get('outtmpl', DEFAULT_OUTTMPL)
1607         if (len(url_list) > 1 and
1608                 '%' not in outtmpl and
1609                 self.params.get('max_downloads') != 1):
1610             raise SameFileError(outtmpl)
1611
1612         for url in url_list:
1613             try:
1614                 # It also downloads the videos
1615                 res = self.extract_info(
1616                     url, force_generic_extractor=self.params.get('force_generic_extractor', False))
1617             except UnavailableVideoError:
1618                 self.report_error('unable to download video')
1619             except MaxDownloadsReached:
1620                 self.to_screen('[info] Maximum number of downloaded files reached.')
1621                 raise
1622             else:
1623                 if self.params.get('dump_single_json', False):
1624                     self.to_stdout(json.dumps(res))
1625
1626         return self._download_retcode
1627
1628     def download_with_info_file(self, info_filename):
1629         with contextlib.closing(fileinput.FileInput(
1630                 [info_filename], mode='r',
1631                 openhook=fileinput.hook_encoded('utf-8'))) as f:
1632             # FileInput doesn't have a read method, we can't call json.load
1633             info = self.filter_requested_info(json.loads('\n'.join(f)))
1634         try:
1635             self.process_ie_result(info, download=True)
1636         except DownloadError:
1637             webpage_url = info.get('webpage_url')
1638             if webpage_url is not None:
1639                 self.report_warning('The info failed to download, trying with "%s"' % webpage_url)
1640                 return self.download([webpage_url])
1641             else:
1642                 raise
1643         return self._download_retcode
1644
1645     @staticmethod
1646     def filter_requested_info(info_dict):
1647         return dict(
1648             (k, v) for k, v in info_dict.items()
1649             if k not in ['requested_formats', 'requested_subtitles'])
1650
1651     def post_process(self, filename, ie_info):
1652         """Run all the postprocessors on the given file."""
1653         info = dict(ie_info)
1654         info['filepath'] = filename
1655         pps_chain = []
1656         if ie_info.get('__postprocessors') is not None:
1657             pps_chain.extend(ie_info['__postprocessors'])
1658         pps_chain.extend(self._pps)
1659         for pp in pps_chain:
1660             files_to_delete = []
1661             try:
1662                 files_to_delete, info = pp.run(info)
1663             except PostProcessingError as e:
1664                 self.report_error(e.msg)
1665             if files_to_delete and not self.params.get('keepvideo', False):
1666                 for old_filename in files_to_delete:
1667                     self.to_screen('Deleting original file %s (pass -k to keep)' % old_filename)
1668                     try:
1669                         os.remove(encodeFilename(old_filename))
1670                     except (IOError, OSError):
1671                         self.report_warning('Unable to remove downloaded original file')
1672
1673     def _make_archive_id(self, info_dict):
1674         # Future-proof against any change in case
1675         # and backwards compatibility with prior versions
1676         extractor = info_dict.get('extractor_key')
1677         if extractor is None:
1678             if 'id' in info_dict:
1679                 extractor = info_dict.get('ie_key')  # key in a playlist
1680         if extractor is None:
1681             return None  # Incomplete video information
1682         return extractor.lower() + ' ' + info_dict['id']
1683
1684     def in_download_archive(self, info_dict):
1685         fn = self.params.get('download_archive')
1686         if fn is None:
1687             return False
1688
1689         vid_id = self._make_archive_id(info_dict)
1690         if vid_id is None:
1691             return False  # Incomplete video information
1692
1693         try:
1694             with locked_file(fn, 'r', encoding='utf-8') as archive_file:
1695                 for line in archive_file:
1696                     if line.strip() == vid_id:
1697                         return True
1698         except IOError as ioe:
1699             if ioe.errno != errno.ENOENT:
1700                 raise
1701         return False
1702
1703     def record_download_archive(self, info_dict):
1704         fn = self.params.get('download_archive')
1705         if fn is None:
1706             return
1707         vid_id = self._make_archive_id(info_dict)
1708         assert vid_id
1709         with locked_file(fn, 'a', encoding='utf-8') as archive_file:
1710             archive_file.write(vid_id + '\n')
1711
1712     @staticmethod
1713     def format_resolution(format, default='unknown'):
1714         if format.get('vcodec') == 'none':
1715             return 'audio only'
1716         if format.get('resolution') is not None:
1717             return format['resolution']
1718         if format.get('height') is not None:
1719             if format.get('width') is not None:
1720                 res = '%sx%s' % (format['width'], format['height'])
1721             else:
1722                 res = '%sp' % format['height']
1723         elif format.get('width') is not None:
1724             res = '?x%d' % format['width']
1725         else:
1726             res = default
1727         return res
1728
1729     def _format_note(self, fdict):
1730         res = ''
1731         if fdict.get('ext') in ['f4f', 'f4m']:
1732             res += '(unsupported) '
1733         if fdict.get('format_note') is not None:
1734             res += fdict['format_note'] + ' '
1735         if fdict.get('tbr') is not None:
1736             res += '%4dk ' % fdict['tbr']
1737         if fdict.get('container') is not None:
1738             if res:
1739                 res += ', '
1740             res += '%s container' % fdict['container']
1741         if (fdict.get('vcodec') is not None and
1742                 fdict.get('vcodec') != 'none'):
1743             if res:
1744                 res += ', '
1745             res += fdict['vcodec']
1746             if fdict.get('vbr') is not None:
1747                 res += '@'
1748         elif fdict.get('vbr') is not None and fdict.get('abr') is not None:
1749             res += 'video@'
1750         if fdict.get('vbr') is not None:
1751             res += '%4dk' % fdict['vbr']
1752         if fdict.get('fps') is not None:
1753             res += ', %sfps' % fdict['fps']
1754         if fdict.get('acodec') is not None:
1755             if res:
1756                 res += ', '
1757             if fdict['acodec'] == 'none':
1758                 res += 'video only'
1759             else:
1760                 res += '%-5s' % fdict['acodec']
1761         elif fdict.get('abr') is not None:
1762             if res:
1763                 res += ', '
1764             res += 'audio'
1765         if fdict.get('abr') is not None:
1766             res += '@%3dk' % fdict['abr']
1767         if fdict.get('asr') is not None:
1768             res += ' (%5dHz)' % fdict['asr']
1769         if fdict.get('filesize') is not None:
1770             if res:
1771                 res += ', '
1772             res += format_bytes(fdict['filesize'])
1773         elif fdict.get('filesize_approx') is not None:
1774             if res:
1775                 res += ', '
1776             res += '~' + format_bytes(fdict['filesize_approx'])
1777         return res
1778
1779     def list_formats(self, info_dict):
1780         formats = info_dict.get('formats', [info_dict])
1781         table = [
1782             [f['format_id'], f['ext'], self.format_resolution(f), self._format_note(f)]
1783             for f in formats
1784             if f.get('preference') is None or f['preference'] >= -1000]
1785         if len(formats) > 1:
1786             table[-1][-1] += (' ' if table[-1][-1] else '') + '(best)'
1787
1788         header_line = ['format code', 'extension', 'resolution', 'note']
1789         self.to_screen(
1790             '[info] Available formats for %s:\n%s' %
1791             (info_dict['id'], render_table(header_line, table)))
1792
1793     def list_thumbnails(self, info_dict):
1794         thumbnails = info_dict.get('thumbnails')
1795         if not thumbnails:
1796             tn_url = info_dict.get('thumbnail')
1797             if tn_url:
1798                 thumbnails = [{'id': '0', 'url': tn_url}]
1799             else:
1800                 self.to_screen(
1801                     '[info] No thumbnails present for %s' % info_dict['id'])
1802                 return
1803
1804         self.to_screen(
1805             '[info] Thumbnails for %s:' % info_dict['id'])
1806         self.to_screen(render_table(
1807             ['ID', 'width', 'height', 'URL'],
1808             [[t['id'], t.get('width', 'unknown'), t.get('height', 'unknown'), t['url']] for t in thumbnails]))
1809
1810     def list_subtitles(self, video_id, subtitles, name='subtitles'):
1811         if not subtitles:
1812             self.to_screen('%s has no %s' % (video_id, name))
1813             return
1814         self.to_screen(
1815             'Available %s for %s:' % (name, video_id))
1816         self.to_screen(render_table(
1817             ['Language', 'formats'],
1818             [[lang, ', '.join(f['ext'] for f in reversed(formats))]
1819                 for lang, formats in subtitles.items()]))
1820
1821     def urlopen(self, req):
1822         """ Start an HTTP download """
1823
1824         # According to RFC 3986, URLs can not contain non-ASCII characters, however this is not
1825         # always respected by websites, some tend to give out URLs with non percent-encoded
1826         # non-ASCII characters (see telemb.py, ard.py [#3412])
1827         # urllib chokes on URLs with non-ASCII characters (see http://bugs.python.org/issue3991)
1828         # To work around aforementioned issue we will replace request's original URL with
1829         # percent-encoded one
1830         req_is_string = isinstance(req, compat_basestring)
1831         url = req if req_is_string else req.get_full_url()
1832         url_escaped = escape_url(url)
1833
1834         # Substitute URL if any change after escaping
1835         if url != url_escaped:
1836             if req_is_string:
1837                 req = url_escaped
1838             else:
1839                 req_type = HEADRequest if req.get_method() == 'HEAD' else compat_urllib_request.Request
1840                 req = req_type(
1841                     url_escaped, data=req.data, headers=req.headers,
1842                     origin_req_host=req.origin_req_host, unverifiable=req.unverifiable)
1843
1844         return self._opener.open(req, timeout=self._socket_timeout)
1845
1846     def print_debug_header(self):
1847         if not self.params.get('verbose'):
1848             return
1849
1850         if type('') is not compat_str:
1851             # Python 2.6 on SLES11 SP1 (https://github.com/rg3/youtube-dl/issues/3326)
1852             self.report_warning(
1853                 'Your Python is broken! Update to a newer and supported version')
1854
1855         stdout_encoding = getattr(
1856             sys.stdout, 'encoding', 'missing (%s)' % type(sys.stdout).__name__)
1857         encoding_str = (
1858             '[debug] Encodings: locale %s, fs %s, out %s, pref %s\n' % (
1859                 locale.getpreferredencoding(),
1860                 sys.getfilesystemencoding(),
1861                 stdout_encoding,
1862                 self.get_encoding()))
1863         write_string(encoding_str, encoding=None)
1864
1865         self._write_string('[debug] youtube-dl version ' + __version__ + '\n')
1866         try:
1867             sp = subprocess.Popen(
1868                 ['git', 'rev-parse', '--short', 'HEAD'],
1869                 stdout=subprocess.PIPE, stderr=subprocess.PIPE,
1870                 cwd=os.path.dirname(os.path.abspath(__file__)))
1871             out, err = sp.communicate()
1872             out = out.decode().strip()
1873             if re.match('[0-9a-f]+', out):
1874                 self._write_string('[debug] Git HEAD: ' + out + '\n')
1875         except Exception:
1876             try:
1877                 sys.exc_clear()
1878             except Exception:
1879                 pass
1880         self._write_string('[debug] Python version %s - %s\n' % (
1881             platform.python_version(), platform_name()))
1882
1883         exe_versions = FFmpegPostProcessor.get_versions(self)
1884         exe_versions['rtmpdump'] = rtmpdump_version()
1885         exe_str = ', '.join(
1886             '%s %s' % (exe, v)
1887             for exe, v in sorted(exe_versions.items())
1888             if v
1889         )
1890         if not exe_str:
1891             exe_str = 'none'
1892         self._write_string('[debug] exe versions: %s\n' % exe_str)
1893
1894         proxy_map = {}
1895         for handler in self._opener.handlers:
1896             if hasattr(handler, 'proxies'):
1897                 proxy_map.update(handler.proxies)
1898         self._write_string('[debug] Proxy map: ' + compat_str(proxy_map) + '\n')
1899
1900         if self.params.get('call_home', False):
1901             ipaddr = self.urlopen('https://yt-dl.org/ip').read().decode('utf-8')
1902             self._write_string('[debug] Public IP address: %s\n' % ipaddr)
1903             latest_version = self.urlopen(
1904                 'https://yt-dl.org/latest/version').read().decode('utf-8')
1905             if version_tuple(latest_version) > version_tuple(__version__):
1906                 self.report_warning(
1907                     'You are using an outdated version (newest version: %s)! '
1908                     'See https://yt-dl.org/update if you need help updating.' %
1909                     latest_version)
1910
1911     def _setup_opener(self):
1912         timeout_val = self.params.get('socket_timeout')
1913         self._socket_timeout = 600 if timeout_val is None else float(timeout_val)
1914
1915         opts_cookiefile = self.params.get('cookiefile')
1916         opts_proxy = self.params.get('proxy')
1917
1918         if opts_cookiefile is None:
1919             self.cookiejar = compat_cookiejar.CookieJar()
1920         else:
1921             self.cookiejar = compat_cookiejar.MozillaCookieJar(
1922                 opts_cookiefile)
1923             if os.access(opts_cookiefile, os.R_OK):
1924                 self.cookiejar.load()
1925
1926         cookie_processor = compat_urllib_request.HTTPCookieProcessor(
1927             self.cookiejar)
1928         if opts_proxy is not None:
1929             if opts_proxy == '':
1930                 proxies = {}
1931             else:
1932                 proxies = {'http': opts_proxy, 'https': opts_proxy}
1933         else:
1934             proxies = compat_urllib_request.getproxies()
1935             # Set HTTPS proxy to HTTP one if given (https://github.com/rg3/youtube-dl/issues/805)
1936             if 'http' in proxies and 'https' not in proxies:
1937                 proxies['https'] = proxies['http']
1938         proxy_handler = PerRequestProxyHandler(proxies)
1939
1940         debuglevel = 1 if self.params.get('debug_printtraffic') else 0
1941         https_handler = make_HTTPS_handler(self.params, debuglevel=debuglevel)
1942         ydlh = YoutubeDLHandler(self.params, debuglevel=debuglevel)
1943         opener = compat_urllib_request.build_opener(
1944             proxy_handler, https_handler, cookie_processor, ydlh)
1945
1946         # Delete the default user-agent header, which would otherwise apply in
1947         # cases where our custom HTTP handler doesn't come into play
1948         # (See https://github.com/rg3/youtube-dl/issues/1309 for details)
1949         opener.addheaders = []
1950         self._opener = opener
1951
1952     def encode(self, s):
1953         if isinstance(s, bytes):
1954             return s  # Already encoded
1955
1956         try:
1957             return s.encode(self.get_encoding())
1958         except UnicodeEncodeError as err:
1959             err.reason = err.reason + '. Check your system encoding configuration or use the --encoding option.'
1960             raise
1961
1962     def get_encoding(self):
1963         encoding = self.params.get('encoding')
1964         if encoding is None:
1965             encoding = preferredencoding()
1966         return encoding
1967
1968     def _write_thumbnails(self, info_dict, filename):
1969         if self.params.get('writethumbnail', False):
1970             thumbnails = info_dict.get('thumbnails')
1971             if thumbnails:
1972                 thumbnails = [thumbnails[-1]]
1973         elif self.params.get('write_all_thumbnails', False):
1974             thumbnails = info_dict.get('thumbnails')
1975         else:
1976             return
1977
1978         if not thumbnails:
1979             # No thumbnails present, so return immediately
1980             return
1981
1982         for t in thumbnails:
1983             thumb_ext = determine_ext(t['url'], 'jpg')
1984             suffix = '_%s' % t['id'] if len(thumbnails) > 1 else ''
1985             thumb_display_id = '%s ' % t['id'] if len(thumbnails) > 1 else ''
1986             t['filename'] = thumb_filename = os.path.splitext(filename)[0] + suffix + '.' + thumb_ext
1987
1988             if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(thumb_filename)):
1989                 self.to_screen('[%s] %s: Thumbnail %sis already present' %
1990                                (info_dict['extractor'], info_dict['id'], thumb_display_id))
1991             else:
1992                 self.to_screen('[%s] %s: Downloading thumbnail %s...' %
1993                                (info_dict['extractor'], info_dict['id'], thumb_display_id))
1994                 try:
1995                     uf = self.urlopen(t['url'])
1996                     with open(thumb_filename, 'wb') as thumbf:
1997                         shutil.copyfileobj(uf, thumbf)
1998                     self.to_screen('[%s] %s: Writing thumbnail %sto: %s' %
1999                                    (info_dict['extractor'], info_dict['id'], thumb_display_id, thumb_filename))
2000                 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
2001                     self.report_warning('Unable to download thumbnail "%s": %s' %
2002                                         (t['url'], compat_str(err)))