[YoutubeDL] Add more numeric fields for NA substitution in outtmpl
[youtube-dl] / youtube_dl / YoutubeDL.py
1 #!/usr/bin/env python
2 # coding: utf-8
3
4 from __future__ import absolute_import, unicode_literals
5
6 import collections
7 import contextlib
8 import copy
9 import datetime
10 import errno
11 import fileinput
12 import io
13 import itertools
14 import json
15 import locale
16 import operator
17 import os
18 import platform
19 import re
20 import shutil
21 import subprocess
22 import socket
23 import sys
24 import time
25 import tokenize
26 import traceback
27 import random
28
29 from .compat import (
30     compat_basestring,
31     compat_cookiejar,
32     compat_expanduser,
33     compat_get_terminal_size,
34     compat_http_client,
35     compat_kwargs,
36     compat_numeric_types,
37     compat_os_name,
38     compat_str,
39     compat_tokenize_tokenize,
40     compat_urllib_error,
41     compat_urllib_request,
42     compat_urllib_request_DataHandler,
43 )
44 from .utils import (
45     age_restricted,
46     args_to_str,
47     ContentTooShortError,
48     date_from_str,
49     DateRange,
50     DEFAULT_OUTTMPL,
51     determine_ext,
52     determine_protocol,
53     DownloadError,
54     encode_compat_str,
55     encodeFilename,
56     error_to_compat_str,
57     ExtractorError,
58     format_bytes,
59     formatSeconds,
60     GeoRestrictedError,
61     ISO3166Utils,
62     locked_file,
63     make_HTTPS_handler,
64     MaxDownloadsReached,
65     PagedList,
66     parse_filesize,
67     PerRequestProxyHandler,
68     platform_name,
69     PostProcessingError,
70     preferredencoding,
71     prepend_extension,
72     register_socks_protocols,
73     render_table,
74     replace_extension,
75     SameFileError,
76     sanitize_filename,
77     sanitize_path,
78     sanitize_url,
79     sanitized_Request,
80     std_headers,
81     subtitles_filename,
82     UnavailableVideoError,
83     url_basename,
84     version_tuple,
85     write_json_file,
86     write_string,
87     YoutubeDLCookieProcessor,
88     YoutubeDLHandler,
89 )
90 from .cache import Cache
91 from .extractor import get_info_extractor, gen_extractor_classes, _LAZY_LOADER
92 from .downloader import get_suitable_downloader
93 from .downloader.rtmp import rtmpdump_version
94 from .postprocessor import (
95     FFmpegFixupM3u8PP,
96     FFmpegFixupM4aPP,
97     FFmpegFixupStretchedPP,
98     FFmpegMergerPP,
99     FFmpegPostProcessor,
100     get_postprocessor,
101 )
102 from .version import __version__
103
104 if compat_os_name == 'nt':
105     import ctypes
106
107
108 class YoutubeDL(object):
109     """YoutubeDL class.
110
111     YoutubeDL objects are the ones responsible of downloading the
112     actual video file and writing it to disk if the user has requested
113     it, among some other tasks. In most cases there should be one per
114     program. As, given a video URL, the downloader doesn't know how to
115     extract all the needed information, task that InfoExtractors do, it
116     has to pass the URL to one of them.
117
118     For this, YoutubeDL objects have a method that allows
119     InfoExtractors to be registered in a given order. When it is passed
120     a URL, the YoutubeDL object handles it to the first InfoExtractor it
121     finds that reports being able to handle it. The InfoExtractor extracts
122     all the information about the video or videos the URL refers to, and
123     YoutubeDL process the extracted information, possibly using a File
124     Downloader to download the video.
125
126     YoutubeDL objects accept a lot of parameters. In order not to saturate
127     the object constructor with arguments, it receives a dictionary of
128     options instead. These options are available through the params
129     attribute for the InfoExtractors to use. The YoutubeDL also
130     registers itself as the downloader in charge for the InfoExtractors
131     that are added to it, so this is a "mutual registration".
132
133     Available options:
134
135     username:          Username for authentication purposes.
136     password:          Password for authentication purposes.
137     videopassword:     Password for accessing a video.
138     ap_mso:            Adobe Pass multiple-system operator identifier.
139     ap_username:       Multiple-system operator account username.
140     ap_password:       Multiple-system operator account password.
141     usenetrc:          Use netrc for authentication instead.
142     verbose:           Print additional info to stdout.
143     quiet:             Do not print messages to stdout.
144     no_warnings:       Do not print out anything for warnings.
145     forceurl:          Force printing final URL.
146     forcetitle:        Force printing title.
147     forceid:           Force printing ID.
148     forcethumbnail:    Force printing thumbnail URL.
149     forcedescription:  Force printing description.
150     forcefilename:     Force printing final filename.
151     forceduration:     Force printing duration.
152     forcejson:         Force printing info_dict as JSON.
153     dump_single_json:  Force printing the info_dict of the whole playlist
154                        (or video) as a single JSON line.
155     simulate:          Do not download the video files.
156     format:            Video format code. See options.py for more information.
157     outtmpl:           Template for output names.
158     restrictfilenames: Do not allow "&" and spaces in file names
159     ignoreerrors:      Do not stop on download errors.
160     force_generic_extractor: Force downloader to use the generic extractor
161     nooverwrites:      Prevent overwriting files.
162     playliststart:     Playlist item to start at.
163     playlistend:       Playlist item to end at.
164     playlist_items:    Specific indices of playlist to download.
165     playlistreverse:   Download playlist items in reverse order.
166     playlistrandom:    Download playlist items in random order.
167     matchtitle:        Download only matching titles.
168     rejecttitle:       Reject downloads for matching titles.
169     logger:            Log messages to a logging.Logger instance.
170     logtostderr:       Log messages to stderr instead of stdout.
171     writedescription:  Write the video description to a .description file
172     writeinfojson:     Write the video description to a .info.json file
173     writeannotations:  Write the video annotations to a .annotations.xml file
174     writethumbnail:    Write the thumbnail image to a file
175     write_all_thumbnails:  Write all thumbnail formats to files
176     writesubtitles:    Write the video subtitles to a file
177     writeautomaticsub: Write the automatically generated subtitles to a file
178     allsubtitles:      Downloads all the subtitles of the video
179                        (requires writesubtitles or writeautomaticsub)
180     listsubtitles:     Lists all available subtitles for the video
181     subtitlesformat:   The format code for subtitles
182     subtitleslangs:    List of languages of the subtitles to download
183     keepvideo:         Keep the video file after post-processing
184     daterange:         A DateRange object, download only if the upload_date is in the range.
185     skip_download:     Skip the actual download of the video file
186     cachedir:          Location of the cache files in the filesystem.
187                        False to disable filesystem cache.
188     noplaylist:        Download single video instead of a playlist if in doubt.
189     age_limit:         An integer representing the user's age in years.
190                        Unsuitable videos for the given age are skipped.
191     min_views:         An integer representing the minimum view count the video
192                        must have in order to not be skipped.
193                        Videos without view count information are always
194                        downloaded. None for no limit.
195     max_views:         An integer representing the maximum view count.
196                        Videos that are more popular than that are not
197                        downloaded.
198                        Videos without view count information are always
199                        downloaded. None for no limit.
200     download_archive:  File name of a file where all downloads are recorded.
201                        Videos already present in the file are not downloaded
202                        again.
203     cookiefile:        File name where cookies should be read from and dumped to.
204     nocheckcertificate:Do not verify SSL certificates
205     prefer_insecure:   Use HTTP instead of HTTPS to retrieve information.
206                        At the moment, this is only supported by YouTube.
207     proxy:             URL of the proxy server to use
208     geo_verification_proxy:  URL of the proxy to use for IP address verification
209                        on geo-restricted sites. (Experimental)
210     socket_timeout:    Time to wait for unresponsive hosts, in seconds
211     bidi_workaround:   Work around buggy terminals without bidirectional text
212                        support, using fridibi
213     debug_printtraffic:Print out sent and received HTTP traffic
214     include_ads:       Download ads as well
215     default_search:    Prepend this string if an input url is not valid.
216                        'auto' for elaborate guessing
217     encoding:          Use this encoding instead of the system-specified.
218     extract_flat:      Do not resolve URLs, return the immediate result.
219                        Pass in 'in_playlist' to only show this behavior for
220                        playlist items.
221     postprocessors:    A list of dictionaries, each with an entry
222                        * key:  The name of the postprocessor. See
223                                youtube_dl/postprocessor/__init__.py for a list.
224                        as well as any further keyword arguments for the
225                        postprocessor.
226     progress_hooks:    A list of functions that get called on download
227                        progress, with a dictionary with the entries
228                        * status: One of "downloading", "error", or "finished".
229                                  Check this first and ignore unknown values.
230
231                        If status is one of "downloading", or "finished", the
232                        following properties may also be present:
233                        * filename: The final filename (always present)
234                        * tmpfilename: The filename we're currently writing to
235                        * downloaded_bytes: Bytes on disk
236                        * total_bytes: Size of the whole file, None if unknown
237                        * total_bytes_estimate: Guess of the eventual file size,
238                                                None if unavailable.
239                        * elapsed: The number of seconds since download started.
240                        * eta: The estimated time in seconds, None if unknown
241                        * speed: The download speed in bytes/second, None if
242                                 unknown
243                        * fragment_index: The counter of the currently
244                                          downloaded video fragment.
245                        * fragment_count: The number of fragments (= individual
246                                          files that will be merged)
247
248                        Progress hooks are guaranteed to be called at least once
249                        (with status "finished") if the download is successful.
250     merge_output_format: Extension to use when merging formats.
251     fixup:             Automatically correct known faults of the file.
252                        One of:
253                        - "never": do nothing
254                        - "warn": only emit a warning
255                        - "detect_or_warn": check whether we can do anything
256                                            about it, warn otherwise (default)
257     source_address:    (Experimental) Client-side IP address to bind to.
258     call_home:         Boolean, true iff we are allowed to contact the
259                        youtube-dl servers for debugging.
260     sleep_interval:    Number of seconds to sleep before each download when
261                        used alone or a lower bound of a range for randomized
262                        sleep before each download (minimum possible number
263                        of seconds to sleep) when used along with
264                        max_sleep_interval.
265     max_sleep_interval:Upper bound of a range for randomized sleep before each
266                        download (maximum possible number of seconds to sleep).
267                        Must only be used along with sleep_interval.
268                        Actual sleep time will be a random float from range
269                        [sleep_interval; max_sleep_interval].
270     listformats:       Print an overview of available video formats and exit.
271     list_thumbnails:   Print a table of all thumbnails and exit.
272     match_filter:      A function that gets called with the info_dict of
273                        every video.
274                        If it returns a message, the video is ignored.
275                        If it returns None, the video is downloaded.
276                        match_filter_func in utils.py is one example for this.
277     no_color:          Do not emit color codes in output.
278     geo_bypass:        Bypass geographic restriction via faking X-Forwarded-For
279                        HTTP header (experimental)
280     geo_bypass_country:
281                        Two-letter ISO 3166-2 country code that will be used for
282                        explicit geographic restriction bypassing via faking
283                        X-Forwarded-For HTTP header (experimental)
284
285     The following options determine which downloader is picked:
286     external_downloader: Executable of the external downloader to call.
287                        None or unset for standard (built-in) downloader.
288     hls_prefer_native: Use the native HLS downloader instead of ffmpeg/avconv
289                        if True, otherwise use ffmpeg/avconv if False, otherwise
290                        use downloader suggested by extractor if None.
291
292     The following parameters are not used by YoutubeDL itself, they are used by
293     the downloader (see youtube_dl/downloader/common.py):
294     nopart, updatetime, buffersize, ratelimit, min_filesize, max_filesize, test,
295     noresizebuffer, retries, continuedl, noprogress, consoletitle,
296     xattr_set_filesize, external_downloader_args, hls_use_mpegts.
297
298     The following options are used by the post processors:
299     prefer_ffmpeg:     If True, use ffmpeg instead of avconv if both are available,
300                        otherwise prefer avconv.
301     postprocessor_args: A list of additional command-line arguments for the
302                         postprocessor.
303     """
304
305     params = None
306     _ies = []
307     _pps = []
308     _download_retcode = None
309     _num_downloads = None
310     _screen_file = None
311
312     def __init__(self, params=None, auto_init=True):
313         """Create a FileDownloader object with the given options."""
314         if params is None:
315             params = {}
316         self._ies = []
317         self._ies_instances = {}
318         self._pps = []
319         self._progress_hooks = []
320         self._download_retcode = 0
321         self._num_downloads = 0
322         self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)]
323         self._err_file = sys.stderr
324         self.params = {
325             # Default parameters
326             'nocheckcertificate': False,
327         }
328         self.params.update(params)
329         self.cache = Cache(self)
330
331         if self.params.get('cn_verification_proxy') is not None:
332             self.report_warning('--cn-verification-proxy is deprecated. Use --geo-verification-proxy instead.')
333             if self.params.get('geo_verification_proxy') is None:
334                 self.params['geo_verification_proxy'] = self.params['cn_verification_proxy']
335
336         if params.get('bidi_workaround', False):
337             try:
338                 import pty
339                 master, slave = pty.openpty()
340                 width = compat_get_terminal_size().columns
341                 if width is None:
342                     width_args = []
343                 else:
344                     width_args = ['-w', str(width)]
345                 sp_kwargs = dict(
346                     stdin=subprocess.PIPE,
347                     stdout=slave,
348                     stderr=self._err_file)
349                 try:
350                     self._output_process = subprocess.Popen(
351                         ['bidiv'] + width_args, **sp_kwargs
352                     )
353                 except OSError:
354                     self._output_process = subprocess.Popen(
355                         ['fribidi', '-c', 'UTF-8'] + width_args, **sp_kwargs)
356                 self._output_channel = os.fdopen(master, 'rb')
357             except OSError as ose:
358                 if ose.errno == errno.ENOENT:
359                     self.report_warning('Could not find fribidi executable, ignoring --bidi-workaround . Make sure that  fribidi  is an executable file in one of the directories in your $PATH.')
360                 else:
361                     raise
362
363         if (sys.version_info >= (3,) and sys.platform != 'win32' and
364                 sys.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968'] and
365                 not params.get('restrictfilenames', False)):
366             # On Python 3, the Unicode filesystem API will throw errors (#1474)
367             self.report_warning(
368                 'Assuming --restrict-filenames since file system encoding '
369                 'cannot encode all characters. '
370                 'Set the LC_ALL environment variable to fix this.')
371             self.params['restrictfilenames'] = True
372
373         if isinstance(params.get('outtmpl'), bytes):
374             self.report_warning(
375                 'Parameter outtmpl is bytes, but should be a unicode string. '
376                 'Put  from __future__ import unicode_literals  at the top of your code file or consider switching to Python 3.x.')
377
378         self._setup_opener()
379
380         if auto_init:
381             self.print_debug_header()
382             self.add_default_info_extractors()
383
384         for pp_def_raw in self.params.get('postprocessors', []):
385             pp_class = get_postprocessor(pp_def_raw['key'])
386             pp_def = dict(pp_def_raw)
387             del pp_def['key']
388             pp = pp_class(self, **compat_kwargs(pp_def))
389             self.add_post_processor(pp)
390
391         for ph in self.params.get('progress_hooks', []):
392             self.add_progress_hook(ph)
393
394         register_socks_protocols()
395
396     def warn_if_short_id(self, argv):
397         # short YouTube ID starting with dash?
398         idxs = [
399             i for i, a in enumerate(argv)
400             if re.match(r'^-[0-9A-Za-z_-]{10}$', a)]
401         if idxs:
402             correct_argv = (
403                 ['youtube-dl'] +
404                 [a for i, a in enumerate(argv) if i not in idxs] +
405                 ['--'] + [argv[i] for i in idxs]
406             )
407             self.report_warning(
408                 'Long argument string detected. '
409                 'Use -- to separate parameters and URLs, like this:\n%s\n' %
410                 args_to_str(correct_argv))
411
412     def add_info_extractor(self, ie):
413         """Add an InfoExtractor object to the end of the list."""
414         self._ies.append(ie)
415         if not isinstance(ie, type):
416             self._ies_instances[ie.ie_key()] = ie
417             ie.set_downloader(self)
418
419     def get_info_extractor(self, ie_key):
420         """
421         Get an instance of an IE with name ie_key, it will try to get one from
422         the _ies list, if there's no instance it will create a new one and add
423         it to the extractor list.
424         """
425         ie = self._ies_instances.get(ie_key)
426         if ie is None:
427             ie = get_info_extractor(ie_key)()
428             self.add_info_extractor(ie)
429         return ie
430
431     def add_default_info_extractors(self):
432         """
433         Add the InfoExtractors returned by gen_extractors to the end of the list
434         """
435         for ie in gen_extractor_classes():
436             self.add_info_extractor(ie)
437
438     def add_post_processor(self, pp):
439         """Add a PostProcessor object to the end of the chain."""
440         self._pps.append(pp)
441         pp.set_downloader(self)
442
443     def add_progress_hook(self, ph):
444         """Add the progress hook (currently only for the file downloader)"""
445         self._progress_hooks.append(ph)
446
447     def _bidi_workaround(self, message):
448         if not hasattr(self, '_output_channel'):
449             return message
450
451         assert hasattr(self, '_output_process')
452         assert isinstance(message, compat_str)
453         line_count = message.count('\n') + 1
454         self._output_process.stdin.write((message + '\n').encode('utf-8'))
455         self._output_process.stdin.flush()
456         res = ''.join(self._output_channel.readline().decode('utf-8')
457                       for _ in range(line_count))
458         return res[:-len('\n')]
459
460     def to_screen(self, message, skip_eol=False):
461         """Print message to stdout if not in quiet mode."""
462         return self.to_stdout(message, skip_eol, check_quiet=True)
463
464     def _write_string(self, s, out=None):
465         write_string(s, out=out, encoding=self.params.get('encoding'))
466
467     def to_stdout(self, message, skip_eol=False, check_quiet=False):
468         """Print message to stdout if not in quiet mode."""
469         if self.params.get('logger'):
470             self.params['logger'].debug(message)
471         elif not check_quiet or not self.params.get('quiet', False):
472             message = self._bidi_workaround(message)
473             terminator = ['\n', ''][skip_eol]
474             output = message + terminator
475
476             self._write_string(output, self._screen_file)
477
478     def to_stderr(self, message):
479         """Print message to stderr."""
480         assert isinstance(message, compat_str)
481         if self.params.get('logger'):
482             self.params['logger'].error(message)
483         else:
484             message = self._bidi_workaround(message)
485             output = message + '\n'
486             self._write_string(output, self._err_file)
487
488     def to_console_title(self, message):
489         if not self.params.get('consoletitle', False):
490             return
491         if compat_os_name == 'nt' and ctypes.windll.kernel32.GetConsoleWindow():
492             # c_wchar_p() might not be necessary if `message` is
493             # already of type unicode()
494             ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message))
495         elif 'TERM' in os.environ:
496             self._write_string('\033]0;%s\007' % message, self._screen_file)
497
498     def save_console_title(self):
499         if not self.params.get('consoletitle', False):
500             return
501         if 'TERM' in os.environ:
502             # Save the title on stack
503             self._write_string('\033[22;0t', self._screen_file)
504
505     def restore_console_title(self):
506         if not self.params.get('consoletitle', False):
507             return
508         if 'TERM' in os.environ:
509             # Restore the title from stack
510             self._write_string('\033[23;0t', self._screen_file)
511
512     def __enter__(self):
513         self.save_console_title()
514         return self
515
516     def __exit__(self, *args):
517         self.restore_console_title()
518
519         if self.params.get('cookiefile') is not None:
520             self.cookiejar.save()
521
522     def trouble(self, message=None, tb=None):
523         """Determine action to take when a download problem appears.
524
525         Depending on if the downloader has been configured to ignore
526         download errors or not, this method may throw an exception or
527         not when errors are found, after printing the message.
528
529         tb, if given, is additional traceback information.
530         """
531         if message is not None:
532             self.to_stderr(message)
533         if self.params.get('verbose'):
534             if tb is None:
535                 if sys.exc_info()[0]:  # if .trouble has been called from an except block
536                     tb = ''
537                     if hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
538                         tb += ''.join(traceback.format_exception(*sys.exc_info()[1].exc_info))
539                     tb += encode_compat_str(traceback.format_exc())
540                 else:
541                     tb_data = traceback.format_list(traceback.extract_stack())
542                     tb = ''.join(tb_data)
543             self.to_stderr(tb)
544         if not self.params.get('ignoreerrors', False):
545             if sys.exc_info()[0] and hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
546                 exc_info = sys.exc_info()[1].exc_info
547             else:
548                 exc_info = sys.exc_info()
549             raise DownloadError(message, exc_info)
550         self._download_retcode = 1
551
552     def report_warning(self, message):
553         '''
554         Print the message to stderr, it will be prefixed with 'WARNING:'
555         If stderr is a tty file the 'WARNING:' will be colored
556         '''
557         if self.params.get('logger') is not None:
558             self.params['logger'].warning(message)
559         else:
560             if self.params.get('no_warnings'):
561                 return
562             if not self.params.get('no_color') and self._err_file.isatty() and compat_os_name != 'nt':
563                 _msg_header = '\033[0;33mWARNING:\033[0m'
564             else:
565                 _msg_header = 'WARNING:'
566             warning_message = '%s %s' % (_msg_header, message)
567             self.to_stderr(warning_message)
568
569     def report_error(self, message, tb=None):
570         '''
571         Do the same as trouble, but prefixes the message with 'ERROR:', colored
572         in red if stderr is a tty file.
573         '''
574         if not self.params.get('no_color') and self._err_file.isatty() and compat_os_name != 'nt':
575             _msg_header = '\033[0;31mERROR:\033[0m'
576         else:
577             _msg_header = 'ERROR:'
578         error_message = '%s %s' % (_msg_header, message)
579         self.trouble(error_message, tb)
580
581     def report_file_already_downloaded(self, file_name):
582         """Report file has already been fully downloaded."""
583         try:
584             self.to_screen('[download] %s has already been downloaded' % file_name)
585         except UnicodeEncodeError:
586             self.to_screen('[download] The file has already been downloaded')
587
588     def prepare_filename(self, info_dict):
589         """Generate the output filename."""
590         try:
591             template_dict = dict(info_dict)
592
593             template_dict['epoch'] = int(time.time())
594             autonumber_size = self.params.get('autonumber_size')
595             if autonumber_size is None:
596                 autonumber_size = 5
597             template_dict['autonumber'] = self.params.get('autonumber_start', 1) - 1 + self._num_downloads
598             if template_dict.get('resolution') is None:
599                 if template_dict.get('width') and template_dict.get('height'):
600                     template_dict['resolution'] = '%dx%d' % (template_dict['width'], template_dict['height'])
601                 elif template_dict.get('height'):
602                     template_dict['resolution'] = '%sp' % template_dict['height']
603                 elif template_dict.get('width'):
604                     template_dict['resolution'] = '%dx?' % template_dict['width']
605
606             sanitize = lambda k, v: sanitize_filename(
607                 compat_str(v),
608                 restricted=self.params.get('restrictfilenames'),
609                 is_id=(k == 'id'))
610             template_dict = dict((k, v if isinstance(v, compat_numeric_types) else sanitize(k, v))
611                                  for k, v in template_dict.items()
612                                  if v is not None and not isinstance(v, (list, tuple, dict)))
613             template_dict = collections.defaultdict(lambda: 'NA', template_dict)
614
615             outtmpl = self.params.get('outtmpl', DEFAULT_OUTTMPL)
616
617             # For fields playlist_index and autonumber convert all occurrences
618             # of %(field)s to %(field)0Nd for backward compatibility
619             field_size_compat_map = {
620                 'playlist_index': len(str(template_dict['n_entries'])),
621                 'autonumber': autonumber_size,
622             }
623             FIELD_SIZE_COMPAT_RE = r'(?<!%)%\((?P<field>autonumber|playlist_index)\)s'
624             mobj = re.search(FIELD_SIZE_COMPAT_RE, outtmpl)
625             if mobj:
626                 outtmpl = re.sub(
627                     FIELD_SIZE_COMPAT_RE,
628                     r'%%(\1)0%dd' % field_size_compat_map[mobj.group('field')],
629                     outtmpl)
630
631             NUMERIC_FIELDS = set((
632                 'width', 'height', 'tbr', 'abr', 'asr', 'vbr', 'fps', 'filesize', 'filesize_approx',
633                 'upload_year', 'upload_month', 'upload_day',
634                 'duration', 'view_count', 'like_count', 'dislike_count', 'repost_count',
635                 'average_rating', 'comment_count', 'age_limit',
636                 'start_time', 'end_time',
637                 'chapter_number', 'season_number', 'episode_number',
638                 'track_number', 'disc_number', 'release_year',
639                 'playlist_index',
640             ))
641
642             # Missing numeric fields used together with integer presentation types
643             # in format specification will break the argument substitution since
644             # string 'NA' is returned for missing fields. We will patch output
645             # template for missing fields to meet string presentation type.
646             for numeric_field in NUMERIC_FIELDS:
647                 if numeric_field not in template_dict:
648                     # As of [1] format syntax is:
649                     #  %[mapping_key][conversion_flags][minimum_width][.precision][length_modifier]type
650                     # 1. https://docs.python.org/2/library/stdtypes.html#string-formatting
651                     FORMAT_RE = r'''(?x)
652                         (?<!%)
653                         %
654                         \({0}\)  # mapping key
655                         (?:[#0\-+ ]+)?  # conversion flags (optional)
656                         (?:\d+)?  # minimum field width (optional)
657                         (?:\.\d+)?  # precision (optional)
658                         [hlL]?  # length modifier (optional)
659                         [diouxXeEfFgGcrs%]  # conversion type
660                     '''
661                     outtmpl = re.sub(
662                         FORMAT_RE.format(numeric_field),
663                         r'%({0})s'.format(numeric_field), outtmpl)
664
665             tmpl = compat_expanduser(outtmpl)
666             filename = tmpl % template_dict
667             # Temporary fix for #4787
668             # 'Treat' all problem characters by passing filename through preferredencoding
669             # to workaround encoding issues with subprocess on python2 @ Windows
670             if sys.version_info < (3, 0) and sys.platform == 'win32':
671                 filename = encodeFilename(filename, True).decode(preferredencoding())
672             return sanitize_path(filename)
673         except ValueError as err:
674             self.report_error('Error in output template: ' + str(err) + ' (encoding: ' + repr(preferredencoding()) + ')')
675             return None
676
677     def _match_entry(self, info_dict, incomplete):
678         """ Returns None iff the file should be downloaded """
679
680         video_title = info_dict.get('title', info_dict.get('id', 'video'))
681         if 'title' in info_dict:
682             # This can happen when we're just evaluating the playlist
683             title = info_dict['title']
684             matchtitle = self.params.get('matchtitle', False)
685             if matchtitle:
686                 if not re.search(matchtitle, title, re.IGNORECASE):
687                     return '"' + title + '" title did not match pattern "' + matchtitle + '"'
688             rejecttitle = self.params.get('rejecttitle', False)
689             if rejecttitle:
690                 if re.search(rejecttitle, title, re.IGNORECASE):
691                     return '"' + title + '" title matched reject pattern "' + rejecttitle + '"'
692         date = info_dict.get('upload_date')
693         if date is not None:
694             dateRange = self.params.get('daterange', DateRange())
695             if date not in dateRange:
696                 return '%s upload date is not in range %s' % (date_from_str(date).isoformat(), dateRange)
697         view_count = info_dict.get('view_count')
698         if view_count is not None:
699             min_views = self.params.get('min_views')
700             if min_views is not None and view_count < min_views:
701                 return 'Skipping %s, because it has not reached minimum view count (%d/%d)' % (video_title, view_count, min_views)
702             max_views = self.params.get('max_views')
703             if max_views is not None and view_count > max_views:
704                 return 'Skipping %s, because it has exceeded the maximum view count (%d/%d)' % (video_title, view_count, max_views)
705         if age_restricted(info_dict.get('age_limit'), self.params.get('age_limit')):
706             return 'Skipping "%s" because it is age restricted' % video_title
707         if self.in_download_archive(info_dict):
708             return '%s has already been recorded in archive' % video_title
709
710         if not incomplete:
711             match_filter = self.params.get('match_filter')
712             if match_filter is not None:
713                 ret = match_filter(info_dict)
714                 if ret is not None:
715                     return ret
716
717         return None
718
719     @staticmethod
720     def add_extra_info(info_dict, extra_info):
721         '''Set the keys from extra_info in info dict if they are missing'''
722         for key, value in extra_info.items():
723             info_dict.setdefault(key, value)
724
725     def extract_info(self, url, download=True, ie_key=None, extra_info={},
726                      process=True, force_generic_extractor=False):
727         '''
728         Returns a list with a dictionary for each video we find.
729         If 'download', also downloads the videos.
730         extra_info is a dict containing the extra values to add to each result
731         '''
732
733         if not ie_key and force_generic_extractor:
734             ie_key = 'Generic'
735
736         if ie_key:
737             ies = [self.get_info_extractor(ie_key)]
738         else:
739             ies = self._ies
740
741         for ie in ies:
742             if not ie.suitable(url):
743                 continue
744
745             ie = self.get_info_extractor(ie.ie_key())
746             if not ie.working():
747                 self.report_warning('The program functionality for this site has been marked as broken, '
748                                     'and will probably not work.')
749
750             try:
751                 ie_result = ie.extract(url)
752                 if ie_result is None:  # Finished already (backwards compatibility; listformats and friends should be moved here)
753                     break
754                 if isinstance(ie_result, list):
755                     # Backwards compatibility: old IE result format
756                     ie_result = {
757                         '_type': 'compat_list',
758                         'entries': ie_result,
759                     }
760                 self.add_default_extra_info(ie_result, ie, url)
761                 if process:
762                     return self.process_ie_result(ie_result, download, extra_info)
763                 else:
764                     return ie_result
765             except GeoRestrictedError as e:
766                 msg = e.msg
767                 if e.countries:
768                     msg += '\nThis video is available in %s.' % ', '.join(
769                         map(ISO3166Utils.short2full, e.countries))
770                 msg += '\nYou might want to use a VPN or a proxy server (with --proxy) to workaround.'
771                 self.report_error(msg)
772                 break
773             except ExtractorError as e:  # An error we somewhat expected
774                 self.report_error(compat_str(e), e.format_traceback())
775                 break
776             except MaxDownloadsReached:
777                 raise
778             except Exception as e:
779                 if self.params.get('ignoreerrors', False):
780                     self.report_error(error_to_compat_str(e), tb=encode_compat_str(traceback.format_exc()))
781                     break
782                 else:
783                     raise
784         else:
785             self.report_error('no suitable InfoExtractor for URL %s' % url)
786
787     def add_default_extra_info(self, ie_result, ie, url):
788         self.add_extra_info(ie_result, {
789             'extractor': ie.IE_NAME,
790             'webpage_url': url,
791             'webpage_url_basename': url_basename(url),
792             'extractor_key': ie.ie_key(),
793         })
794
795     def process_ie_result(self, ie_result, download=True, extra_info={}):
796         """
797         Take the result of the ie(may be modified) and resolve all unresolved
798         references (URLs, playlist items).
799
800         It will also download the videos if 'download'.
801         Returns the resolved ie_result.
802         """
803         result_type = ie_result.get('_type', 'video')
804
805         if result_type in ('url', 'url_transparent'):
806             ie_result['url'] = sanitize_url(ie_result['url'])
807             extract_flat = self.params.get('extract_flat', False)
808             if ((extract_flat == 'in_playlist' and 'playlist' in extra_info) or
809                     extract_flat is True):
810                 if self.params.get('forcejson', False):
811                     self.to_stdout(json.dumps(ie_result))
812                 return ie_result
813
814         if result_type == 'video':
815             self.add_extra_info(ie_result, extra_info)
816             return self.process_video_result(ie_result, download=download)
817         elif result_type == 'url':
818             # We have to add extra_info to the results because it may be
819             # contained in a playlist
820             return self.extract_info(ie_result['url'],
821                                      download,
822                                      ie_key=ie_result.get('ie_key'),
823                                      extra_info=extra_info)
824         elif result_type == 'url_transparent':
825             # Use the information from the embedding page
826             info = self.extract_info(
827                 ie_result['url'], ie_key=ie_result.get('ie_key'),
828                 extra_info=extra_info, download=False, process=False)
829
830             force_properties = dict(
831                 (k, v) for k, v in ie_result.items() if v is not None)
832             for f in ('_type', 'url', 'ie_key'):
833                 if f in force_properties:
834                     del force_properties[f]
835             new_result = info.copy()
836             new_result.update(force_properties)
837
838             assert new_result.get('_type') != 'url_transparent'
839
840             return self.process_ie_result(
841                 new_result, download=download, extra_info=extra_info)
842         elif result_type == 'playlist' or result_type == 'multi_video':
843             # We process each entry in the playlist
844             playlist = ie_result.get('title') or ie_result.get('id')
845             self.to_screen('[download] Downloading playlist: %s' % playlist)
846
847             playlist_results = []
848
849             playliststart = self.params.get('playliststart', 1) - 1
850             playlistend = self.params.get('playlistend')
851             # For backwards compatibility, interpret -1 as whole list
852             if playlistend == -1:
853                 playlistend = None
854
855             playlistitems_str = self.params.get('playlist_items')
856             playlistitems = None
857             if playlistitems_str is not None:
858                 def iter_playlistitems(format):
859                     for string_segment in format.split(','):
860                         if '-' in string_segment:
861                             start, end = string_segment.split('-')
862                             for item in range(int(start), int(end) + 1):
863                                 yield int(item)
864                         else:
865                             yield int(string_segment)
866                 playlistitems = iter_playlistitems(playlistitems_str)
867
868             ie_entries = ie_result['entries']
869             if isinstance(ie_entries, list):
870                 n_all_entries = len(ie_entries)
871                 if playlistitems:
872                     entries = [
873                         ie_entries[i - 1] for i in playlistitems
874                         if -n_all_entries <= i - 1 < n_all_entries]
875                 else:
876                     entries = ie_entries[playliststart:playlistend]
877                 n_entries = len(entries)
878                 self.to_screen(
879                     '[%s] playlist %s: Collected %d video ids (downloading %d of them)' %
880                     (ie_result['extractor'], playlist, n_all_entries, n_entries))
881             elif isinstance(ie_entries, PagedList):
882                 if playlistitems:
883                     entries = []
884                     for item in playlistitems:
885                         entries.extend(ie_entries.getslice(
886                             item - 1, item
887                         ))
888                 else:
889                     entries = ie_entries.getslice(
890                         playliststart, playlistend)
891                 n_entries = len(entries)
892                 self.to_screen(
893                     '[%s] playlist %s: Downloading %d videos' %
894                     (ie_result['extractor'], playlist, n_entries))
895             else:  # iterable
896                 if playlistitems:
897                     entry_list = list(ie_entries)
898                     entries = [entry_list[i - 1] for i in playlistitems]
899                 else:
900                     entries = list(itertools.islice(
901                         ie_entries, playliststart, playlistend))
902                 n_entries = len(entries)
903                 self.to_screen(
904                     '[%s] playlist %s: Downloading %d videos' %
905                     (ie_result['extractor'], playlist, n_entries))
906
907             if self.params.get('playlistreverse', False):
908                 entries = entries[::-1]
909
910             if self.params.get('playlistrandom', False):
911                 random.shuffle(entries)
912
913             x_forwarded_for = ie_result.get('__x_forwarded_for_ip')
914
915             for i, entry in enumerate(entries, 1):
916                 self.to_screen('[download] Downloading video %s of %s' % (i, n_entries))
917                 # This __x_forwarded_for_ip thing is a bit ugly but requires
918                 # minimal changes
919                 if x_forwarded_for:
920                     entry['__x_forwarded_for_ip'] = x_forwarded_for
921                 extra = {
922                     'n_entries': n_entries,
923                     'playlist': playlist,
924                     'playlist_id': ie_result.get('id'),
925                     'playlist_title': ie_result.get('title'),
926                     'playlist_index': i + playliststart,
927                     'extractor': ie_result['extractor'],
928                     'webpage_url': ie_result['webpage_url'],
929                     'webpage_url_basename': url_basename(ie_result['webpage_url']),
930                     'extractor_key': ie_result['extractor_key'],
931                 }
932
933                 reason = self._match_entry(entry, incomplete=True)
934                 if reason is not None:
935                     self.to_screen('[download] ' + reason)
936                     continue
937
938                 entry_result = self.process_ie_result(entry,
939                                                       download=download,
940                                                       extra_info=extra)
941                 playlist_results.append(entry_result)
942             ie_result['entries'] = playlist_results
943             self.to_screen('[download] Finished downloading playlist: %s' % playlist)
944             return ie_result
945         elif result_type == 'compat_list':
946             self.report_warning(
947                 'Extractor %s returned a compat_list result. '
948                 'It needs to be updated.' % ie_result.get('extractor'))
949
950             def _fixup(r):
951                 self.add_extra_info(
952                     r,
953                     {
954                         'extractor': ie_result['extractor'],
955                         'webpage_url': ie_result['webpage_url'],
956                         'webpage_url_basename': url_basename(ie_result['webpage_url']),
957                         'extractor_key': ie_result['extractor_key'],
958                     }
959                 )
960                 return r
961             ie_result['entries'] = [
962                 self.process_ie_result(_fixup(r), download, extra_info)
963                 for r in ie_result['entries']
964             ]
965             return ie_result
966         else:
967             raise Exception('Invalid result type: %s' % result_type)
968
969     def _build_format_filter(self, filter_spec):
970         " Returns a function to filter the formats according to the filter_spec "
971
972         OPERATORS = {
973             '<': operator.lt,
974             '<=': operator.le,
975             '>': operator.gt,
976             '>=': operator.ge,
977             '=': operator.eq,
978             '!=': operator.ne,
979         }
980         operator_rex = re.compile(r'''(?x)\s*
981             (?P<key>width|height|tbr|abr|vbr|asr|filesize|fps)
982             \s*(?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
983             (?P<value>[0-9.]+(?:[kKmMgGtTpPeEzZyY]i?[Bb]?)?)
984             $
985             ''' % '|'.join(map(re.escape, OPERATORS.keys())))
986         m = operator_rex.search(filter_spec)
987         if m:
988             try:
989                 comparison_value = int(m.group('value'))
990             except ValueError:
991                 comparison_value = parse_filesize(m.group('value'))
992                 if comparison_value is None:
993                     comparison_value = parse_filesize(m.group('value') + 'B')
994                 if comparison_value is None:
995                     raise ValueError(
996                         'Invalid value %r in format specification %r' % (
997                             m.group('value'), filter_spec))
998             op = OPERATORS[m.group('op')]
999
1000         if not m:
1001             STR_OPERATORS = {
1002                 '=': operator.eq,
1003                 '!=': operator.ne,
1004                 '^=': lambda attr, value: attr.startswith(value),
1005                 '$=': lambda attr, value: attr.endswith(value),
1006                 '*=': lambda attr, value: value in attr,
1007             }
1008             str_operator_rex = re.compile(r'''(?x)
1009                 \s*(?P<key>ext|acodec|vcodec|container|protocol|format_id)
1010                 \s*(?P<op>%s)(?P<none_inclusive>\s*\?)?
1011                 \s*(?P<value>[a-zA-Z0-9._-]+)
1012                 \s*$
1013                 ''' % '|'.join(map(re.escape, STR_OPERATORS.keys())))
1014             m = str_operator_rex.search(filter_spec)
1015             if m:
1016                 comparison_value = m.group('value')
1017                 op = STR_OPERATORS[m.group('op')]
1018
1019         if not m:
1020             raise ValueError('Invalid filter specification %r' % filter_spec)
1021
1022         def _filter(f):
1023             actual_value = f.get(m.group('key'))
1024             if actual_value is None:
1025                 return m.group('none_inclusive')
1026             return op(actual_value, comparison_value)
1027         return _filter
1028
1029     def build_format_selector(self, format_spec):
1030         def syntax_error(note, start):
1031             message = (
1032                 'Invalid format specification: '
1033                 '{0}\n\t{1}\n\t{2}^'.format(note, format_spec, ' ' * start[1]))
1034             return SyntaxError(message)
1035
1036         PICKFIRST = 'PICKFIRST'
1037         MERGE = 'MERGE'
1038         SINGLE = 'SINGLE'
1039         GROUP = 'GROUP'
1040         FormatSelector = collections.namedtuple('FormatSelector', ['type', 'selector', 'filters'])
1041
1042         def _parse_filter(tokens):
1043             filter_parts = []
1044             for type, string, start, _, _ in tokens:
1045                 if type == tokenize.OP and string == ']':
1046                     return ''.join(filter_parts)
1047                 else:
1048                     filter_parts.append(string)
1049
1050         def _remove_unused_ops(tokens):
1051             # Remove operators that we don't use and join them with the surrounding strings
1052             # for example: 'mp4' '-' 'baseline' '-' '16x9' is converted to 'mp4-baseline-16x9'
1053             ALLOWED_OPS = ('/', '+', ',', '(', ')')
1054             last_string, last_start, last_end, last_line = None, None, None, None
1055             for type, string, start, end, line in tokens:
1056                 if type == tokenize.OP and string == '[':
1057                     if last_string:
1058                         yield tokenize.NAME, last_string, last_start, last_end, last_line
1059                         last_string = None
1060                     yield type, string, start, end, line
1061                     # everything inside brackets will be handled by _parse_filter
1062                     for type, string, start, end, line in tokens:
1063                         yield type, string, start, end, line
1064                         if type == tokenize.OP and string == ']':
1065                             break
1066                 elif type == tokenize.OP and string in ALLOWED_OPS:
1067                     if last_string:
1068                         yield tokenize.NAME, last_string, last_start, last_end, last_line
1069                         last_string = None
1070                     yield type, string, start, end, line
1071                 elif type in [tokenize.NAME, tokenize.NUMBER, tokenize.OP]:
1072                     if not last_string:
1073                         last_string = string
1074                         last_start = start
1075                         last_end = end
1076                     else:
1077                         last_string += string
1078             if last_string:
1079                 yield tokenize.NAME, last_string, last_start, last_end, last_line
1080
1081         def _parse_format_selection(tokens, inside_merge=False, inside_choice=False, inside_group=False):
1082             selectors = []
1083             current_selector = None
1084             for type, string, start, _, _ in tokens:
1085                 # ENCODING is only defined in python 3.x
1086                 if type == getattr(tokenize, 'ENCODING', None):
1087                     continue
1088                 elif type in [tokenize.NAME, tokenize.NUMBER]:
1089                     current_selector = FormatSelector(SINGLE, string, [])
1090                 elif type == tokenize.OP:
1091                     if string == ')':
1092                         if not inside_group:
1093                             # ')' will be handled by the parentheses group
1094                             tokens.restore_last_token()
1095                         break
1096                     elif inside_merge and string in ['/', ',']:
1097                         tokens.restore_last_token()
1098                         break
1099                     elif inside_choice and string == ',':
1100                         tokens.restore_last_token()
1101                         break
1102                     elif string == ',':
1103                         if not current_selector:
1104                             raise syntax_error('"," must follow a format selector', start)
1105                         selectors.append(current_selector)
1106                         current_selector = None
1107                     elif string == '/':
1108                         if not current_selector:
1109                             raise syntax_error('"/" must follow a format selector', start)
1110                         first_choice = current_selector
1111                         second_choice = _parse_format_selection(tokens, inside_choice=True)
1112                         current_selector = FormatSelector(PICKFIRST, (first_choice, second_choice), [])
1113                     elif string == '[':
1114                         if not current_selector:
1115                             current_selector = FormatSelector(SINGLE, 'best', [])
1116                         format_filter = _parse_filter(tokens)
1117                         current_selector.filters.append(format_filter)
1118                     elif string == '(':
1119                         if current_selector:
1120                             raise syntax_error('Unexpected "("', start)
1121                         group = _parse_format_selection(tokens, inside_group=True)
1122                         current_selector = FormatSelector(GROUP, group, [])
1123                     elif string == '+':
1124                         video_selector = current_selector
1125                         audio_selector = _parse_format_selection(tokens, inside_merge=True)
1126                         if not video_selector or not audio_selector:
1127                             raise syntax_error('"+" must be between two format selectors', start)
1128                         current_selector = FormatSelector(MERGE, (video_selector, audio_selector), [])
1129                     else:
1130                         raise syntax_error('Operator not recognized: "{0}"'.format(string), start)
1131                 elif type == tokenize.ENDMARKER:
1132                     break
1133             if current_selector:
1134                 selectors.append(current_selector)
1135             return selectors
1136
1137         def _build_selector_function(selector):
1138             if isinstance(selector, list):
1139                 fs = [_build_selector_function(s) for s in selector]
1140
1141                 def selector_function(ctx):
1142                     for f in fs:
1143                         for format in f(ctx):
1144                             yield format
1145                 return selector_function
1146             elif selector.type == GROUP:
1147                 selector_function = _build_selector_function(selector.selector)
1148             elif selector.type == PICKFIRST:
1149                 fs = [_build_selector_function(s) for s in selector.selector]
1150
1151                 def selector_function(ctx):
1152                     for f in fs:
1153                         picked_formats = list(f(ctx))
1154                         if picked_formats:
1155                             return picked_formats
1156                     return []
1157             elif selector.type == SINGLE:
1158                 format_spec = selector.selector
1159
1160                 def selector_function(ctx):
1161                     formats = list(ctx['formats'])
1162                     if not formats:
1163                         return
1164                     if format_spec == 'all':
1165                         for f in formats:
1166                             yield f
1167                     elif format_spec in ['best', 'worst', None]:
1168                         format_idx = 0 if format_spec == 'worst' else -1
1169                         audiovideo_formats = [
1170                             f for f in formats
1171                             if f.get('vcodec') != 'none' and f.get('acodec') != 'none']
1172                         if audiovideo_formats:
1173                             yield audiovideo_formats[format_idx]
1174                         # for extractors with incomplete formats (audio only (soundcloud)
1175                         # or video only (imgur)) we will fallback to best/worst
1176                         # {video,audio}-only format
1177                         elif ctx['incomplete_formats']:
1178                             yield formats[format_idx]
1179                     elif format_spec == 'bestaudio':
1180                         audio_formats = [
1181                             f for f in formats
1182                             if f.get('vcodec') == 'none']
1183                         if audio_formats:
1184                             yield audio_formats[-1]
1185                     elif format_spec == 'worstaudio':
1186                         audio_formats = [
1187                             f for f in formats
1188                             if f.get('vcodec') == 'none']
1189                         if audio_formats:
1190                             yield audio_formats[0]
1191                     elif format_spec == 'bestvideo':
1192                         video_formats = [
1193                             f for f in formats
1194                             if f.get('acodec') == 'none']
1195                         if video_formats:
1196                             yield video_formats[-1]
1197                     elif format_spec == 'worstvideo':
1198                         video_formats = [
1199                             f for f in formats
1200                             if f.get('acodec') == 'none']
1201                         if video_formats:
1202                             yield video_formats[0]
1203                     else:
1204                         extensions = ['mp4', 'flv', 'webm', '3gp', 'm4a', 'mp3', 'ogg', 'aac', 'wav']
1205                         if format_spec in extensions:
1206                             filter_f = lambda f: f['ext'] == format_spec
1207                         else:
1208                             filter_f = lambda f: f['format_id'] == format_spec
1209                         matches = list(filter(filter_f, formats))
1210                         if matches:
1211                             yield matches[-1]
1212             elif selector.type == MERGE:
1213                 def _merge(formats_info):
1214                     format_1, format_2 = [f['format_id'] for f in formats_info]
1215                     # The first format must contain the video and the
1216                     # second the audio
1217                     if formats_info[0].get('vcodec') == 'none':
1218                         self.report_error('The first format must '
1219                                           'contain the video, try using '
1220                                           '"-f %s+%s"' % (format_2, format_1))
1221                         return
1222                     # Formats must be opposite (video+audio)
1223                     if formats_info[0].get('acodec') == 'none' and formats_info[1].get('acodec') == 'none':
1224                         self.report_error(
1225                             'Both formats %s and %s are video-only, you must specify "-f video+audio"'
1226                             % (format_1, format_2))
1227                         return
1228                     output_ext = (
1229                         formats_info[0]['ext']
1230                         if self.params.get('merge_output_format') is None
1231                         else self.params['merge_output_format'])
1232                     return {
1233                         'requested_formats': formats_info,
1234                         'format': '%s+%s' % (formats_info[0].get('format'),
1235                                              formats_info[1].get('format')),
1236                         'format_id': '%s+%s' % (formats_info[0].get('format_id'),
1237                                                 formats_info[1].get('format_id')),
1238                         'width': formats_info[0].get('width'),
1239                         'height': formats_info[0].get('height'),
1240                         'resolution': formats_info[0].get('resolution'),
1241                         'fps': formats_info[0].get('fps'),
1242                         'vcodec': formats_info[0].get('vcodec'),
1243                         'vbr': formats_info[0].get('vbr'),
1244                         'stretched_ratio': formats_info[0].get('stretched_ratio'),
1245                         'acodec': formats_info[1].get('acodec'),
1246                         'abr': formats_info[1].get('abr'),
1247                         'ext': output_ext,
1248                     }
1249                 video_selector, audio_selector = map(_build_selector_function, selector.selector)
1250
1251                 def selector_function(ctx):
1252                     for pair in itertools.product(
1253                             video_selector(copy.deepcopy(ctx)), audio_selector(copy.deepcopy(ctx))):
1254                         yield _merge(pair)
1255
1256             filters = [self._build_format_filter(f) for f in selector.filters]
1257
1258             def final_selector(ctx):
1259                 ctx_copy = copy.deepcopy(ctx)
1260                 for _filter in filters:
1261                     ctx_copy['formats'] = list(filter(_filter, ctx_copy['formats']))
1262                 return selector_function(ctx_copy)
1263             return final_selector
1264
1265         stream = io.BytesIO(format_spec.encode('utf-8'))
1266         try:
1267             tokens = list(_remove_unused_ops(compat_tokenize_tokenize(stream.readline)))
1268         except tokenize.TokenError:
1269             raise syntax_error('Missing closing/opening brackets or parenthesis', (0, len(format_spec)))
1270
1271         class TokenIterator(object):
1272             def __init__(self, tokens):
1273                 self.tokens = tokens
1274                 self.counter = 0
1275
1276             def __iter__(self):
1277                 return self
1278
1279             def __next__(self):
1280                 if self.counter >= len(self.tokens):
1281                     raise StopIteration()
1282                 value = self.tokens[self.counter]
1283                 self.counter += 1
1284                 return value
1285
1286             next = __next__
1287
1288             def restore_last_token(self):
1289                 self.counter -= 1
1290
1291         parsed_selector = _parse_format_selection(iter(TokenIterator(tokens)))
1292         return _build_selector_function(parsed_selector)
1293
1294     def _calc_headers(self, info_dict):
1295         res = std_headers.copy()
1296
1297         add_headers = info_dict.get('http_headers')
1298         if add_headers:
1299             res.update(add_headers)
1300
1301         cookies = self._calc_cookies(info_dict)
1302         if cookies:
1303             res['Cookie'] = cookies
1304
1305         if 'X-Forwarded-For' not in res:
1306             x_forwarded_for_ip = info_dict.get('__x_forwarded_for_ip')
1307             if x_forwarded_for_ip:
1308                 res['X-Forwarded-For'] = x_forwarded_for_ip
1309
1310         return res
1311
1312     def _calc_cookies(self, info_dict):
1313         pr = sanitized_Request(info_dict['url'])
1314         self.cookiejar.add_cookie_header(pr)
1315         return pr.get_header('Cookie')
1316
1317     def process_video_result(self, info_dict, download=True):
1318         assert info_dict.get('_type', 'video') == 'video'
1319
1320         if 'id' not in info_dict:
1321             raise ExtractorError('Missing "id" field in extractor result')
1322         if 'title' not in info_dict:
1323             raise ExtractorError('Missing "title" field in extractor result')
1324
1325         if not isinstance(info_dict['id'], compat_str):
1326             self.report_warning('"id" field is not a string - forcing string conversion')
1327             info_dict['id'] = compat_str(info_dict['id'])
1328
1329         if 'playlist' not in info_dict:
1330             # It isn't part of a playlist
1331             info_dict['playlist'] = None
1332             info_dict['playlist_index'] = None
1333
1334         thumbnails = info_dict.get('thumbnails')
1335         if thumbnails is None:
1336             thumbnail = info_dict.get('thumbnail')
1337             if thumbnail:
1338                 info_dict['thumbnails'] = thumbnails = [{'url': thumbnail}]
1339         if thumbnails:
1340             thumbnails.sort(key=lambda t: (
1341                 t.get('preference') if t.get('preference') is not None else -1,
1342                 t.get('width') if t.get('width') is not None else -1,
1343                 t.get('height') if t.get('height') is not None else -1,
1344                 t.get('id') if t.get('id') is not None else '', t.get('url')))
1345             for i, t in enumerate(thumbnails):
1346                 t['url'] = sanitize_url(t['url'])
1347                 if t.get('width') and t.get('height'):
1348                     t['resolution'] = '%dx%d' % (t['width'], t['height'])
1349                 if t.get('id') is None:
1350                     t['id'] = '%d' % i
1351
1352         if self.params.get('list_thumbnails'):
1353             self.list_thumbnails(info_dict)
1354             return
1355
1356         thumbnail = info_dict.get('thumbnail')
1357         if thumbnail:
1358             info_dict['thumbnail'] = sanitize_url(thumbnail)
1359         elif thumbnails:
1360             info_dict['thumbnail'] = thumbnails[-1]['url']
1361
1362         if 'display_id' not in info_dict and 'id' in info_dict:
1363             info_dict['display_id'] = info_dict['id']
1364
1365         if info_dict.get('upload_date') is None and info_dict.get('timestamp') is not None:
1366             # Working around out-of-range timestamp values (e.g. negative ones on Windows,
1367             # see http://bugs.python.org/issue1646728)
1368             try:
1369                 upload_date = datetime.datetime.utcfromtimestamp(info_dict['timestamp'])
1370                 info_dict['upload_date'] = upload_date.strftime('%Y%m%d')
1371             except (ValueError, OverflowError, OSError):
1372                 pass
1373
1374         # Auto generate title fields corresponding to the *_number fields when missing
1375         # in order to always have clean titles. This is very common for TV series.
1376         for field in ('chapter', 'season', 'episode'):
1377             if info_dict.get('%s_number' % field) is not None and not info_dict.get(field):
1378                 info_dict[field] = '%s %d' % (field.capitalize(), info_dict['%s_number' % field])
1379
1380         subtitles = info_dict.get('subtitles')
1381         if subtitles:
1382             for _, subtitle in subtitles.items():
1383                 for subtitle_format in subtitle:
1384                     if subtitle_format.get('url'):
1385                         subtitle_format['url'] = sanitize_url(subtitle_format['url'])
1386                     if subtitle_format.get('ext') is None:
1387                         subtitle_format['ext'] = determine_ext(subtitle_format['url']).lower()
1388
1389         if self.params.get('listsubtitles', False):
1390             if 'automatic_captions' in info_dict:
1391                 self.list_subtitles(info_dict['id'], info_dict.get('automatic_captions'), 'automatic captions')
1392             self.list_subtitles(info_dict['id'], subtitles, 'subtitles')
1393             return
1394         info_dict['requested_subtitles'] = self.process_subtitles(
1395             info_dict['id'], subtitles,
1396             info_dict.get('automatic_captions'))
1397
1398         # We now pick which formats have to be downloaded
1399         if info_dict.get('formats') is None:
1400             # There's only one format available
1401             formats = [info_dict]
1402         else:
1403             formats = info_dict['formats']
1404
1405         if not formats:
1406             raise ExtractorError('No video formats found!')
1407
1408         formats_dict = {}
1409
1410         # We check that all the formats have the format and format_id fields
1411         for i, format in enumerate(formats):
1412             if 'url' not in format:
1413                 raise ExtractorError('Missing "url" key in result (index %d)' % i)
1414
1415             format['url'] = sanitize_url(format['url'])
1416
1417             if format.get('format_id') is None:
1418                 format['format_id'] = compat_str(i)
1419             else:
1420                 # Sanitize format_id from characters used in format selector expression
1421                 format['format_id'] = re.sub(r'[\s,/+\[\]()]', '_', format['format_id'])
1422             format_id = format['format_id']
1423             if format_id not in formats_dict:
1424                 formats_dict[format_id] = []
1425             formats_dict[format_id].append(format)
1426
1427         # Make sure all formats have unique format_id
1428         for format_id, ambiguous_formats in formats_dict.items():
1429             if len(ambiguous_formats) > 1:
1430                 for i, format in enumerate(ambiguous_formats):
1431                     format['format_id'] = '%s-%d' % (format_id, i)
1432
1433         for i, format in enumerate(formats):
1434             if format.get('format') is None:
1435                 format['format'] = '{id} - {res}{note}'.format(
1436                     id=format['format_id'],
1437                     res=self.format_resolution(format),
1438                     note=' ({0})'.format(format['format_note']) if format.get('format_note') is not None else '',
1439                 )
1440             # Automatically determine file extension if missing
1441             if format.get('ext') is None:
1442                 format['ext'] = determine_ext(format['url']).lower()
1443             # Automatically determine protocol if missing (useful for format
1444             # selection purposes)
1445             if format.get('protocol') is None:
1446                 format['protocol'] = determine_protocol(format)
1447             # Add HTTP headers, so that external programs can use them from the
1448             # json output
1449             full_format_info = info_dict.copy()
1450             full_format_info.update(format)
1451             format['http_headers'] = self._calc_headers(full_format_info)
1452         # Remove private housekeeping stuff
1453         if '__x_forwarded_for_ip' in info_dict:
1454             del info_dict['__x_forwarded_for_ip']
1455
1456         # TODO Central sorting goes here
1457
1458         if formats[0] is not info_dict:
1459             # only set the 'formats' fields if the original info_dict list them
1460             # otherwise we end up with a circular reference, the first (and unique)
1461             # element in the 'formats' field in info_dict is info_dict itself,
1462             # which can't be exported to json
1463             info_dict['formats'] = formats
1464         if self.params.get('listformats'):
1465             self.list_formats(info_dict)
1466             return
1467
1468         req_format = self.params.get('format')
1469         if req_format is None:
1470             req_format_list = []
1471             if (self.params.get('outtmpl', DEFAULT_OUTTMPL) != '-' and
1472                     not info_dict.get('is_live')):
1473                 merger = FFmpegMergerPP(self)
1474                 if merger.available and merger.can_merge():
1475                     req_format_list.append('bestvideo+bestaudio')
1476             req_format_list.append('best')
1477             req_format = '/'.join(req_format_list)
1478         format_selector = self.build_format_selector(req_format)
1479
1480         # While in format selection we may need to have an access to the original
1481         # format set in order to calculate some metrics or do some processing.
1482         # For now we need to be able to guess whether original formats provided
1483         # by extractor are incomplete or not (i.e. whether extractor provides only
1484         # video-only or audio-only formats) for proper formats selection for
1485         # extractors with such incomplete formats (see
1486         # https://github.com/rg3/youtube-dl/pull/5556).
1487         # Since formats may be filtered during format selection and may not match
1488         # the original formats the results may be incorrect. Thus original formats
1489         # or pre-calculated metrics should be passed to format selection routines
1490         # as well.
1491         # We will pass a context object containing all necessary additional data
1492         # instead of just formats.
1493         # This fixes incorrect format selection issue (see
1494         # https://github.com/rg3/youtube-dl/issues/10083).
1495         incomplete_formats = (
1496             # All formats are video-only or
1497             all(f.get('vcodec') != 'none' and f.get('acodec') == 'none' for f in formats) or
1498             # all formats are audio-only
1499             all(f.get('vcodec') == 'none' and f.get('acodec') != 'none' for f in formats))
1500
1501         ctx = {
1502             'formats': formats,
1503             'incomplete_formats': incomplete_formats,
1504         }
1505
1506         formats_to_download = list(format_selector(ctx))
1507         if not formats_to_download:
1508             raise ExtractorError('requested format not available',
1509                                  expected=True)
1510
1511         if download:
1512             if len(formats_to_download) > 1:
1513                 self.to_screen('[info] %s: downloading video in %s formats' % (info_dict['id'], len(formats_to_download)))
1514             for format in formats_to_download:
1515                 new_info = dict(info_dict)
1516                 new_info.update(format)
1517                 self.process_info(new_info)
1518         # We update the info dict with the best quality format (backwards compatibility)
1519         info_dict.update(formats_to_download[-1])
1520         return info_dict
1521
1522     def process_subtitles(self, video_id, normal_subtitles, automatic_captions):
1523         """Select the requested subtitles and their format"""
1524         available_subs = {}
1525         if normal_subtitles and self.params.get('writesubtitles'):
1526             available_subs.update(normal_subtitles)
1527         if automatic_captions and self.params.get('writeautomaticsub'):
1528             for lang, cap_info in automatic_captions.items():
1529                 if lang not in available_subs:
1530                     available_subs[lang] = cap_info
1531
1532         if (not self.params.get('writesubtitles') and not
1533                 self.params.get('writeautomaticsub') or not
1534                 available_subs):
1535             return None
1536
1537         if self.params.get('allsubtitles', False):
1538             requested_langs = available_subs.keys()
1539         else:
1540             if self.params.get('subtitleslangs', False):
1541                 requested_langs = self.params.get('subtitleslangs')
1542             elif 'en' in available_subs:
1543                 requested_langs = ['en']
1544             else:
1545                 requested_langs = [list(available_subs.keys())[0]]
1546
1547         formats_query = self.params.get('subtitlesformat', 'best')
1548         formats_preference = formats_query.split('/') if formats_query else []
1549         subs = {}
1550         for lang in requested_langs:
1551             formats = available_subs.get(lang)
1552             if formats is None:
1553                 self.report_warning('%s subtitles not available for %s' % (lang, video_id))
1554                 continue
1555             for ext in formats_preference:
1556                 if ext == 'best':
1557                     f = formats[-1]
1558                     break
1559                 matches = list(filter(lambda f: f['ext'] == ext, formats))
1560                 if matches:
1561                     f = matches[-1]
1562                     break
1563             else:
1564                 f = formats[-1]
1565                 self.report_warning(
1566                     'No subtitle format found matching "%s" for language %s, '
1567                     'using %s' % (formats_query, lang, f['ext']))
1568             subs[lang] = f
1569         return subs
1570
1571     def process_info(self, info_dict):
1572         """Process a single resolved IE result."""
1573
1574         assert info_dict.get('_type', 'video') == 'video'
1575
1576         max_downloads = self.params.get('max_downloads')
1577         if max_downloads is not None:
1578             if self._num_downloads >= int(max_downloads):
1579                 raise MaxDownloadsReached()
1580
1581         info_dict['fulltitle'] = info_dict['title']
1582         if len(info_dict['title']) > 200:
1583             info_dict['title'] = info_dict['title'][:197] + '...'
1584
1585         if 'format' not in info_dict:
1586             info_dict['format'] = info_dict['ext']
1587
1588         reason = self._match_entry(info_dict, incomplete=False)
1589         if reason is not None:
1590             self.to_screen('[download] ' + reason)
1591             return
1592
1593         self._num_downloads += 1
1594
1595         info_dict['_filename'] = filename = self.prepare_filename(info_dict)
1596
1597         # Forced printings
1598         if self.params.get('forcetitle', False):
1599             self.to_stdout(info_dict['fulltitle'])
1600         if self.params.get('forceid', False):
1601             self.to_stdout(info_dict['id'])
1602         if self.params.get('forceurl', False):
1603             if info_dict.get('requested_formats') is not None:
1604                 for f in info_dict['requested_formats']:
1605                     self.to_stdout(f['url'] + f.get('play_path', ''))
1606             else:
1607                 # For RTMP URLs, also include the playpath
1608                 self.to_stdout(info_dict['url'] + info_dict.get('play_path', ''))
1609         if self.params.get('forcethumbnail', False) and info_dict.get('thumbnail') is not None:
1610             self.to_stdout(info_dict['thumbnail'])
1611         if self.params.get('forcedescription', False) and info_dict.get('description') is not None:
1612             self.to_stdout(info_dict['description'])
1613         if self.params.get('forcefilename', False) and filename is not None:
1614             self.to_stdout(filename)
1615         if self.params.get('forceduration', False) and info_dict.get('duration') is not None:
1616             self.to_stdout(formatSeconds(info_dict['duration']))
1617         if self.params.get('forceformat', False):
1618             self.to_stdout(info_dict['format'])
1619         if self.params.get('forcejson', False):
1620             self.to_stdout(json.dumps(info_dict))
1621
1622         # Do nothing else if in simulate mode
1623         if self.params.get('simulate', False):
1624             return
1625
1626         if filename is None:
1627             return
1628
1629         try:
1630             dn = os.path.dirname(sanitize_path(encodeFilename(filename)))
1631             if dn and not os.path.exists(dn):
1632                 os.makedirs(dn)
1633         except (OSError, IOError) as err:
1634             self.report_error('unable to create directory ' + error_to_compat_str(err))
1635             return
1636
1637         if self.params.get('writedescription', False):
1638             descfn = replace_extension(filename, 'description', info_dict.get('ext'))
1639             if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(descfn)):
1640                 self.to_screen('[info] Video description is already present')
1641             elif info_dict.get('description') is None:
1642                 self.report_warning('There\'s no description to write.')
1643             else:
1644                 try:
1645                     self.to_screen('[info] Writing video description to: ' + descfn)
1646                     with io.open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile:
1647                         descfile.write(info_dict['description'])
1648                 except (OSError, IOError):
1649                     self.report_error('Cannot write description file ' + descfn)
1650                     return
1651
1652         if self.params.get('writeannotations', False):
1653             annofn = replace_extension(filename, 'annotations.xml', info_dict.get('ext'))
1654             if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(annofn)):
1655                 self.to_screen('[info] Video annotations are already present')
1656             else:
1657                 try:
1658                     self.to_screen('[info] Writing video annotations to: ' + annofn)
1659                     with io.open(encodeFilename(annofn), 'w', encoding='utf-8') as annofile:
1660                         annofile.write(info_dict['annotations'])
1661                 except (KeyError, TypeError):
1662                     self.report_warning('There are no annotations to write.')
1663                 except (OSError, IOError):
1664                     self.report_error('Cannot write annotations file: ' + annofn)
1665                     return
1666
1667         subtitles_are_requested = any([self.params.get('writesubtitles', False),
1668                                        self.params.get('writeautomaticsub')])
1669
1670         if subtitles_are_requested and info_dict.get('requested_subtitles'):
1671             # subtitles download errors are already managed as troubles in relevant IE
1672             # that way it will silently go on when used with unsupporting IE
1673             subtitles = info_dict['requested_subtitles']
1674             ie = self.get_info_extractor(info_dict['extractor_key'])
1675             for sub_lang, sub_info in subtitles.items():
1676                 sub_format = sub_info['ext']
1677                 if sub_info.get('data') is not None:
1678                     sub_data = sub_info['data']
1679                 else:
1680                     try:
1681                         sub_data = ie._download_webpage(
1682                             sub_info['url'], info_dict['id'], note=False)
1683                     except ExtractorError as err:
1684                         self.report_warning('Unable to download subtitle for "%s": %s' %
1685                                             (sub_lang, error_to_compat_str(err.cause)))
1686                         continue
1687                 try:
1688                     sub_filename = subtitles_filename(filename, sub_lang, sub_format)
1689                     if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(sub_filename)):
1690                         self.to_screen('[info] Video subtitle %s.%s is already_present' % (sub_lang, sub_format))
1691                     else:
1692                         self.to_screen('[info] Writing video subtitles to: ' + sub_filename)
1693                         # Use newline='' to prevent conversion of newline characters
1694                         # See https://github.com/rg3/youtube-dl/issues/10268
1695                         with io.open(encodeFilename(sub_filename), 'w', encoding='utf-8', newline='') as subfile:
1696                             subfile.write(sub_data)
1697                 except (OSError, IOError):
1698                     self.report_error('Cannot write subtitles file ' + sub_filename)
1699                     return
1700
1701         if self.params.get('writeinfojson', False):
1702             infofn = replace_extension(filename, 'info.json', info_dict.get('ext'))
1703             if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(infofn)):
1704                 self.to_screen('[info] Video description metadata is already present')
1705             else:
1706                 self.to_screen('[info] Writing video description metadata as JSON to: ' + infofn)
1707                 try:
1708                     write_json_file(self.filter_requested_info(info_dict), infofn)
1709                 except (OSError, IOError):
1710                     self.report_error('Cannot write metadata to JSON file ' + infofn)
1711                     return
1712
1713         self._write_thumbnails(info_dict, filename)
1714
1715         if not self.params.get('skip_download', False):
1716             try:
1717                 def dl(name, info):
1718                     fd = get_suitable_downloader(info, self.params)(self, self.params)
1719                     for ph in self._progress_hooks:
1720                         fd.add_progress_hook(ph)
1721                     if self.params.get('verbose'):
1722                         self.to_stdout('[debug] Invoking downloader on %r' % info.get('url'))
1723                     return fd.download(name, info)
1724
1725                 if info_dict.get('requested_formats') is not None:
1726                     downloaded = []
1727                     success = True
1728                     merger = FFmpegMergerPP(self)
1729                     if not merger.available:
1730                         postprocessors = []
1731                         self.report_warning('You have requested multiple '
1732                                             'formats but ffmpeg or avconv are not installed.'
1733                                             ' The formats won\'t be merged.')
1734                     else:
1735                         postprocessors = [merger]
1736
1737                     def compatible_formats(formats):
1738                         video, audio = formats
1739                         # Check extension
1740                         video_ext, audio_ext = audio.get('ext'), video.get('ext')
1741                         if video_ext and audio_ext:
1742                             COMPATIBLE_EXTS = (
1743                                 ('mp3', 'mp4', 'm4a', 'm4p', 'm4b', 'm4r', 'm4v', 'ismv', 'isma'),
1744                                 ('webm')
1745                             )
1746                             for exts in COMPATIBLE_EXTS:
1747                                 if video_ext in exts and audio_ext in exts:
1748                                     return True
1749                         # TODO: Check acodec/vcodec
1750                         return False
1751
1752                     filename_real_ext = os.path.splitext(filename)[1][1:]
1753                     filename_wo_ext = (
1754                         os.path.splitext(filename)[0]
1755                         if filename_real_ext == info_dict['ext']
1756                         else filename)
1757                     requested_formats = info_dict['requested_formats']
1758                     if self.params.get('merge_output_format') is None and not compatible_formats(requested_formats):
1759                         info_dict['ext'] = 'mkv'
1760                         self.report_warning(
1761                             'Requested formats are incompatible for merge and will be merged into mkv.')
1762                     # Ensure filename always has a correct extension for successful merge
1763                     filename = '%s.%s' % (filename_wo_ext, info_dict['ext'])
1764                     if os.path.exists(encodeFilename(filename)):
1765                         self.to_screen(
1766                             '[download] %s has already been downloaded and '
1767                             'merged' % filename)
1768                     else:
1769                         for f in requested_formats:
1770                             new_info = dict(info_dict)
1771                             new_info.update(f)
1772                             fname = self.prepare_filename(new_info)
1773                             fname = prepend_extension(fname, 'f%s' % f['format_id'], new_info['ext'])
1774                             downloaded.append(fname)
1775                             partial_success = dl(fname, new_info)
1776                             success = success and partial_success
1777                         info_dict['__postprocessors'] = postprocessors
1778                         info_dict['__files_to_merge'] = downloaded
1779                 else:
1780                     # Just a single file
1781                     success = dl(filename, info_dict)
1782             except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
1783                 self.report_error('unable to download video data: %s' % error_to_compat_str(err))
1784                 return
1785             except (OSError, IOError) as err:
1786                 raise UnavailableVideoError(err)
1787             except (ContentTooShortError, ) as err:
1788                 self.report_error('content too short (expected %s bytes and served %s)' % (err.expected, err.downloaded))
1789                 return
1790
1791             if success and filename != '-':
1792                 # Fixup content
1793                 fixup_policy = self.params.get('fixup')
1794                 if fixup_policy is None:
1795                     fixup_policy = 'detect_or_warn'
1796
1797                 INSTALL_FFMPEG_MESSAGE = 'Install ffmpeg or avconv to fix this automatically.'
1798
1799                 stretched_ratio = info_dict.get('stretched_ratio')
1800                 if stretched_ratio is not None and stretched_ratio != 1:
1801                     if fixup_policy == 'warn':
1802                         self.report_warning('%s: Non-uniform pixel ratio (%s)' % (
1803                             info_dict['id'], stretched_ratio))
1804                     elif fixup_policy == 'detect_or_warn':
1805                         stretched_pp = FFmpegFixupStretchedPP(self)
1806                         if stretched_pp.available:
1807                             info_dict.setdefault('__postprocessors', [])
1808                             info_dict['__postprocessors'].append(stretched_pp)
1809                         else:
1810                             self.report_warning(
1811                                 '%s: Non-uniform pixel ratio (%s). %s'
1812                                 % (info_dict['id'], stretched_ratio, INSTALL_FFMPEG_MESSAGE))
1813                     else:
1814                         assert fixup_policy in ('ignore', 'never')
1815
1816                 if (info_dict.get('requested_formats') is None and
1817                         info_dict.get('container') == 'm4a_dash'):
1818                     if fixup_policy == 'warn':
1819                         self.report_warning(
1820                             '%s: writing DASH m4a. '
1821                             'Only some players support this container.'
1822                             % info_dict['id'])
1823                     elif fixup_policy == 'detect_or_warn':
1824                         fixup_pp = FFmpegFixupM4aPP(self)
1825                         if fixup_pp.available:
1826                             info_dict.setdefault('__postprocessors', [])
1827                             info_dict['__postprocessors'].append(fixup_pp)
1828                         else:
1829                             self.report_warning(
1830                                 '%s: writing DASH m4a. '
1831                                 'Only some players support this container. %s'
1832                                 % (info_dict['id'], INSTALL_FFMPEG_MESSAGE))
1833                     else:
1834                         assert fixup_policy in ('ignore', 'never')
1835
1836                 if (info_dict.get('protocol') == 'm3u8_native' or
1837                         info_dict.get('protocol') == 'm3u8' and
1838                         self.params.get('hls_prefer_native')):
1839                     if fixup_policy == 'warn':
1840                         self.report_warning('%s: malformated aac bitstream.' % (
1841                             info_dict['id']))
1842                     elif fixup_policy == 'detect_or_warn':
1843                         fixup_pp = FFmpegFixupM3u8PP(self)
1844                         if fixup_pp.available:
1845                             info_dict.setdefault('__postprocessors', [])
1846                             info_dict['__postprocessors'].append(fixup_pp)
1847                         else:
1848                             self.report_warning(
1849                                 '%s: malformated aac bitstream. %s'
1850                                 % (info_dict['id'], INSTALL_FFMPEG_MESSAGE))
1851                     else:
1852                         assert fixup_policy in ('ignore', 'never')
1853
1854                 try:
1855                     self.post_process(filename, info_dict)
1856                 except (PostProcessingError) as err:
1857                     self.report_error('postprocessing: %s' % str(err))
1858                     return
1859                 self.record_download_archive(info_dict)
1860
1861     def download(self, url_list):
1862         """Download a given list of URLs."""
1863         outtmpl = self.params.get('outtmpl', DEFAULT_OUTTMPL)
1864         if (len(url_list) > 1 and
1865                 '%' not in outtmpl and
1866                 self.params.get('max_downloads') != 1):
1867             raise SameFileError(outtmpl)
1868
1869         for url in url_list:
1870             try:
1871                 # It also downloads the videos
1872                 res = self.extract_info(
1873                     url, force_generic_extractor=self.params.get('force_generic_extractor', False))
1874             except UnavailableVideoError:
1875                 self.report_error('unable to download video')
1876             except MaxDownloadsReached:
1877                 self.to_screen('[info] Maximum number of downloaded files reached.')
1878                 raise
1879             else:
1880                 if self.params.get('dump_single_json', False):
1881                     self.to_stdout(json.dumps(res))
1882
1883         return self._download_retcode
1884
1885     def download_with_info_file(self, info_filename):
1886         with contextlib.closing(fileinput.FileInput(
1887                 [info_filename], mode='r',
1888                 openhook=fileinput.hook_encoded('utf-8'))) as f:
1889             # FileInput doesn't have a read method, we can't call json.load
1890             info = self.filter_requested_info(json.loads('\n'.join(f)))
1891         try:
1892             self.process_ie_result(info, download=True)
1893         except DownloadError:
1894             webpage_url = info.get('webpage_url')
1895             if webpage_url is not None:
1896                 self.report_warning('The info failed to download, trying with "%s"' % webpage_url)
1897                 return self.download([webpage_url])
1898             else:
1899                 raise
1900         return self._download_retcode
1901
1902     @staticmethod
1903     def filter_requested_info(info_dict):
1904         return dict(
1905             (k, v) for k, v in info_dict.items()
1906             if k not in ['requested_formats', 'requested_subtitles'])
1907
1908     def post_process(self, filename, ie_info):
1909         """Run all the postprocessors on the given file."""
1910         info = dict(ie_info)
1911         info['filepath'] = filename
1912         pps_chain = []
1913         if ie_info.get('__postprocessors') is not None:
1914             pps_chain.extend(ie_info['__postprocessors'])
1915         pps_chain.extend(self._pps)
1916         for pp in pps_chain:
1917             files_to_delete = []
1918             try:
1919                 files_to_delete, info = pp.run(info)
1920             except PostProcessingError as e:
1921                 self.report_error(e.msg)
1922             if files_to_delete and not self.params.get('keepvideo', False):
1923                 for old_filename in files_to_delete:
1924                     self.to_screen('Deleting original file %s (pass -k to keep)' % old_filename)
1925                     try:
1926                         os.remove(encodeFilename(old_filename))
1927                     except (IOError, OSError):
1928                         self.report_warning('Unable to remove downloaded original file')
1929
1930     def _make_archive_id(self, info_dict):
1931         # Future-proof against any change in case
1932         # and backwards compatibility with prior versions
1933         extractor = info_dict.get('extractor_key')
1934         if extractor is None:
1935             if 'id' in info_dict:
1936                 extractor = info_dict.get('ie_key')  # key in a playlist
1937         if extractor is None:
1938             return None  # Incomplete video information
1939         return extractor.lower() + ' ' + info_dict['id']
1940
1941     def in_download_archive(self, info_dict):
1942         fn = self.params.get('download_archive')
1943         if fn is None:
1944             return False
1945
1946         vid_id = self._make_archive_id(info_dict)
1947         if vid_id is None:
1948             return False  # Incomplete video information
1949
1950         try:
1951             with locked_file(fn, 'r', encoding='utf-8') as archive_file:
1952                 for line in archive_file:
1953                     if line.strip() == vid_id:
1954                         return True
1955         except IOError as ioe:
1956             if ioe.errno != errno.ENOENT:
1957                 raise
1958         return False
1959
1960     def record_download_archive(self, info_dict):
1961         fn = self.params.get('download_archive')
1962         if fn is None:
1963             return
1964         vid_id = self._make_archive_id(info_dict)
1965         assert vid_id
1966         with locked_file(fn, 'a', encoding='utf-8') as archive_file:
1967             archive_file.write(vid_id + '\n')
1968
1969     @staticmethod
1970     def format_resolution(format, default='unknown'):
1971         if format.get('vcodec') == 'none':
1972             return 'audio only'
1973         if format.get('resolution') is not None:
1974             return format['resolution']
1975         if format.get('height') is not None:
1976             if format.get('width') is not None:
1977                 res = '%sx%s' % (format['width'], format['height'])
1978             else:
1979                 res = '%sp' % format['height']
1980         elif format.get('width') is not None:
1981             res = '%dx?' % format['width']
1982         else:
1983             res = default
1984         return res
1985
1986     def _format_note(self, fdict):
1987         res = ''
1988         if fdict.get('ext') in ['f4f', 'f4m']:
1989             res += '(unsupported) '
1990         if fdict.get('language'):
1991             if res:
1992                 res += ' '
1993             res += '[%s] ' % fdict['language']
1994         if fdict.get('format_note') is not None:
1995             res += fdict['format_note'] + ' '
1996         if fdict.get('tbr') is not None:
1997             res += '%4dk ' % fdict['tbr']
1998         if fdict.get('container') is not None:
1999             if res:
2000                 res += ', '
2001             res += '%s container' % fdict['container']
2002         if (fdict.get('vcodec') is not None and
2003                 fdict.get('vcodec') != 'none'):
2004             if res:
2005                 res += ', '
2006             res += fdict['vcodec']
2007             if fdict.get('vbr') is not None:
2008                 res += '@'
2009         elif fdict.get('vbr') is not None and fdict.get('abr') is not None:
2010             res += 'video@'
2011         if fdict.get('vbr') is not None:
2012             res += '%4dk' % fdict['vbr']
2013         if fdict.get('fps') is not None:
2014             if res:
2015                 res += ', '
2016             res += '%sfps' % fdict['fps']
2017         if fdict.get('acodec') is not None:
2018             if res:
2019                 res += ', '
2020             if fdict['acodec'] == 'none':
2021                 res += 'video only'
2022             else:
2023                 res += '%-5s' % fdict['acodec']
2024         elif fdict.get('abr') is not None:
2025             if res:
2026                 res += ', '
2027             res += 'audio'
2028         if fdict.get('abr') is not None:
2029             res += '@%3dk' % fdict['abr']
2030         if fdict.get('asr') is not None:
2031             res += ' (%5dHz)' % fdict['asr']
2032         if fdict.get('filesize') is not None:
2033             if res:
2034                 res += ', '
2035             res += format_bytes(fdict['filesize'])
2036         elif fdict.get('filesize_approx') is not None:
2037             if res:
2038                 res += ', '
2039             res += '~' + format_bytes(fdict['filesize_approx'])
2040         return res
2041
2042     def list_formats(self, info_dict):
2043         formats = info_dict.get('formats', [info_dict])
2044         table = [
2045             [f['format_id'], f['ext'], self.format_resolution(f), self._format_note(f)]
2046             for f in formats
2047             if f.get('preference') is None or f['preference'] >= -1000]
2048         if len(formats) > 1:
2049             table[-1][-1] += (' ' if table[-1][-1] else '') + '(best)'
2050
2051         header_line = ['format code', 'extension', 'resolution', 'note']
2052         self.to_screen(
2053             '[info] Available formats for %s:\n%s' %
2054             (info_dict['id'], render_table(header_line, table)))
2055
2056     def list_thumbnails(self, info_dict):
2057         thumbnails = info_dict.get('thumbnails')
2058         if not thumbnails:
2059             self.to_screen('[info] No thumbnails present for %s' % info_dict['id'])
2060             return
2061
2062         self.to_screen(
2063             '[info] Thumbnails for %s:' % info_dict['id'])
2064         self.to_screen(render_table(
2065             ['ID', 'width', 'height', 'URL'],
2066             [[t['id'], t.get('width', 'unknown'), t.get('height', 'unknown'), t['url']] for t in thumbnails]))
2067
2068     def list_subtitles(self, video_id, subtitles, name='subtitles'):
2069         if not subtitles:
2070             self.to_screen('%s has no %s' % (video_id, name))
2071             return
2072         self.to_screen(
2073             'Available %s for %s:' % (name, video_id))
2074         self.to_screen(render_table(
2075             ['Language', 'formats'],
2076             [[lang, ', '.join(f['ext'] for f in reversed(formats))]
2077                 for lang, formats in subtitles.items()]))
2078
2079     def urlopen(self, req):
2080         """ Start an HTTP download """
2081         if isinstance(req, compat_basestring):
2082             req = sanitized_Request(req)
2083         return self._opener.open(req, timeout=self._socket_timeout)
2084
2085     def print_debug_header(self):
2086         if not self.params.get('verbose'):
2087             return
2088
2089         if type('') is not compat_str:
2090             # Python 2.6 on SLES11 SP1 (https://github.com/rg3/youtube-dl/issues/3326)
2091             self.report_warning(
2092                 'Your Python is broken! Update to a newer and supported version')
2093
2094         stdout_encoding = getattr(
2095             sys.stdout, 'encoding', 'missing (%s)' % type(sys.stdout).__name__)
2096         encoding_str = (
2097             '[debug] Encodings: locale %s, fs %s, out %s, pref %s\n' % (
2098                 locale.getpreferredencoding(),
2099                 sys.getfilesystemencoding(),
2100                 stdout_encoding,
2101                 self.get_encoding()))
2102         write_string(encoding_str, encoding=None)
2103
2104         self._write_string('[debug] youtube-dl version ' + __version__ + '\n')
2105         if _LAZY_LOADER:
2106             self._write_string('[debug] Lazy loading extractors enabled' + '\n')
2107         try:
2108             sp = subprocess.Popen(
2109                 ['git', 'rev-parse', '--short', 'HEAD'],
2110                 stdout=subprocess.PIPE, stderr=subprocess.PIPE,
2111                 cwd=os.path.dirname(os.path.abspath(__file__)))
2112             out, err = sp.communicate()
2113             out = out.decode().strip()
2114             if re.match('[0-9a-f]+', out):
2115                 self._write_string('[debug] Git HEAD: ' + out + '\n')
2116         except Exception:
2117             try:
2118                 sys.exc_clear()
2119             except Exception:
2120                 pass
2121         self._write_string('[debug] Python version %s - %s\n' % (
2122             platform.python_version(), platform_name()))
2123
2124         exe_versions = FFmpegPostProcessor.get_versions(self)
2125         exe_versions['rtmpdump'] = rtmpdump_version()
2126         exe_str = ', '.join(
2127             '%s %s' % (exe, v)
2128             for exe, v in sorted(exe_versions.items())
2129             if v
2130         )
2131         if not exe_str:
2132             exe_str = 'none'
2133         self._write_string('[debug] exe versions: %s\n' % exe_str)
2134
2135         proxy_map = {}
2136         for handler in self._opener.handlers:
2137             if hasattr(handler, 'proxies'):
2138                 proxy_map.update(handler.proxies)
2139         self._write_string('[debug] Proxy map: ' + compat_str(proxy_map) + '\n')
2140
2141         if self.params.get('call_home', False):
2142             ipaddr = self.urlopen('https://yt-dl.org/ip').read().decode('utf-8')
2143             self._write_string('[debug] Public IP address: %s\n' % ipaddr)
2144             latest_version = self.urlopen(
2145                 'https://yt-dl.org/latest/version').read().decode('utf-8')
2146             if version_tuple(latest_version) > version_tuple(__version__):
2147                 self.report_warning(
2148                     'You are using an outdated version (newest version: %s)! '
2149                     'See https://yt-dl.org/update if you need help updating.' %
2150                     latest_version)
2151
2152     def _setup_opener(self):
2153         timeout_val = self.params.get('socket_timeout')
2154         self._socket_timeout = 600 if timeout_val is None else float(timeout_val)
2155
2156         opts_cookiefile = self.params.get('cookiefile')
2157         opts_proxy = self.params.get('proxy')
2158
2159         if opts_cookiefile is None:
2160             self.cookiejar = compat_cookiejar.CookieJar()
2161         else:
2162             opts_cookiefile = compat_expanduser(opts_cookiefile)
2163             self.cookiejar = compat_cookiejar.MozillaCookieJar(
2164                 opts_cookiefile)
2165             if os.access(opts_cookiefile, os.R_OK):
2166                 self.cookiejar.load()
2167
2168         cookie_processor = YoutubeDLCookieProcessor(self.cookiejar)
2169         if opts_proxy is not None:
2170             if opts_proxy == '':
2171                 proxies = {}
2172             else:
2173                 proxies = {'http': opts_proxy, 'https': opts_proxy}
2174         else:
2175             proxies = compat_urllib_request.getproxies()
2176             # Set HTTPS proxy to HTTP one if given (https://github.com/rg3/youtube-dl/issues/805)
2177             if 'http' in proxies and 'https' not in proxies:
2178                 proxies['https'] = proxies['http']
2179         proxy_handler = PerRequestProxyHandler(proxies)
2180
2181         debuglevel = 1 if self.params.get('debug_printtraffic') else 0
2182         https_handler = make_HTTPS_handler(self.params, debuglevel=debuglevel)
2183         ydlh = YoutubeDLHandler(self.params, debuglevel=debuglevel)
2184         data_handler = compat_urllib_request_DataHandler()
2185
2186         # When passing our own FileHandler instance, build_opener won't add the
2187         # default FileHandler and allows us to disable the file protocol, which
2188         # can be used for malicious purposes (see
2189         # https://github.com/rg3/youtube-dl/issues/8227)
2190         file_handler = compat_urllib_request.FileHandler()
2191
2192         def file_open(*args, **kwargs):
2193             raise compat_urllib_error.URLError('file:// scheme is explicitly disabled in youtube-dl for security reasons')
2194         file_handler.file_open = file_open
2195
2196         opener = compat_urllib_request.build_opener(
2197             proxy_handler, https_handler, cookie_processor, ydlh, data_handler, file_handler)
2198
2199         # Delete the default user-agent header, which would otherwise apply in
2200         # cases where our custom HTTP handler doesn't come into play
2201         # (See https://github.com/rg3/youtube-dl/issues/1309 for details)
2202         opener.addheaders = []
2203         self._opener = opener
2204
2205     def encode(self, s):
2206         if isinstance(s, bytes):
2207             return s  # Already encoded
2208
2209         try:
2210             return s.encode(self.get_encoding())
2211         except UnicodeEncodeError as err:
2212             err.reason = err.reason + '. Check your system encoding configuration or use the --encoding option.'
2213             raise
2214
2215     def get_encoding(self):
2216         encoding = self.params.get('encoding')
2217         if encoding is None:
2218             encoding = preferredencoding()
2219         return encoding
2220
2221     def _write_thumbnails(self, info_dict, filename):
2222         if self.params.get('writethumbnail', False):
2223             thumbnails = info_dict.get('thumbnails')
2224             if thumbnails:
2225                 thumbnails = [thumbnails[-1]]
2226         elif self.params.get('write_all_thumbnails', False):
2227             thumbnails = info_dict.get('thumbnails')
2228         else:
2229             return
2230
2231         if not thumbnails:
2232             # No thumbnails present, so return immediately
2233             return
2234
2235         for t in thumbnails:
2236             thumb_ext = determine_ext(t['url'], 'jpg')
2237             suffix = '_%s' % t['id'] if len(thumbnails) > 1 else ''
2238             thumb_display_id = '%s ' % t['id'] if len(thumbnails) > 1 else ''
2239             t['filename'] = thumb_filename = os.path.splitext(filename)[0] + suffix + '.' + thumb_ext
2240
2241             if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(thumb_filename)):
2242                 self.to_screen('[%s] %s: Thumbnail %sis already present' %
2243                                (info_dict['extractor'], info_dict['id'], thumb_display_id))
2244             else:
2245                 self.to_screen('[%s] %s: Downloading thumbnail %s...' %
2246                                (info_dict['extractor'], info_dict['id'], thumb_display_id))
2247                 try:
2248                     uf = self.urlopen(t['url'])
2249                     with open(encodeFilename(thumb_filename), 'wb') as thumbf:
2250                         shutil.copyfileobj(uf, thumbf)
2251                     self.to_screen('[%s] %s: Writing thumbnail %sto: %s' %
2252                                    (info_dict['extractor'], info_dict['id'], thumb_display_id, thumb_filename))
2253                 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
2254                     self.report_warning('Unable to download thumbnail "%s": %s' %
2255                                         (t['url'], error_to_compat_str(err)))