[YoutubeDL] Add support for string formatting operations in output template
[youtube-dl] / youtube_dl / YoutubeDL.py
1 #!/usr/bin/env python
2 # coding: utf-8
3
4 from __future__ import absolute_import, unicode_literals
5
6 import collections
7 import contextlib
8 import copy
9 import datetime
10 import errno
11 import fileinput
12 import io
13 import itertools
14 import json
15 import locale
16 import operator
17 import os
18 import platform
19 import re
20 import shutil
21 import subprocess
22 import socket
23 import sys
24 import time
25 import tokenize
26 import traceback
27 import random
28
29 from .compat import (
30     compat_basestring,
31     compat_cookiejar,
32     compat_expanduser,
33     compat_get_terminal_size,
34     compat_http_client,
35     compat_kwargs,
36     compat_numeric_types,
37     compat_os_name,
38     compat_str,
39     compat_tokenize_tokenize,
40     compat_urllib_error,
41     compat_urllib_request,
42     compat_urllib_request_DataHandler,
43 )
44 from .utils import (
45     age_restricted,
46     args_to_str,
47     ContentTooShortError,
48     date_from_str,
49     DateRange,
50     DEFAULT_OUTTMPL,
51     determine_ext,
52     determine_protocol,
53     DownloadError,
54     encode_compat_str,
55     encodeFilename,
56     error_to_compat_str,
57     ExtractorError,
58     format_bytes,
59     formatSeconds,
60     GeoRestrictedError,
61     ISO3166Utils,
62     locked_file,
63     make_HTTPS_handler,
64     MaxDownloadsReached,
65     PagedList,
66     parse_filesize,
67     PerRequestProxyHandler,
68     platform_name,
69     PostProcessingError,
70     preferredencoding,
71     prepend_extension,
72     register_socks_protocols,
73     render_table,
74     replace_extension,
75     SameFileError,
76     sanitize_filename,
77     sanitize_path,
78     sanitize_url,
79     sanitized_Request,
80     std_headers,
81     subtitles_filename,
82     UnavailableVideoError,
83     url_basename,
84     version_tuple,
85     write_json_file,
86     write_string,
87     YoutubeDLCookieProcessor,
88     YoutubeDLHandler,
89 )
90 from .cache import Cache
91 from .extractor import get_info_extractor, gen_extractor_classes, _LAZY_LOADER
92 from .downloader import get_suitable_downloader
93 from .downloader.rtmp import rtmpdump_version
94 from .postprocessor import (
95     FFmpegFixupM3u8PP,
96     FFmpegFixupM4aPP,
97     FFmpegFixupStretchedPP,
98     FFmpegMergerPP,
99     FFmpegPostProcessor,
100     get_postprocessor,
101 )
102 from .version import __version__
103
104 if compat_os_name == 'nt':
105     import ctypes
106
107
108 class YoutubeDL(object):
109     """YoutubeDL class.
110
111     YoutubeDL objects are the ones responsible of downloading the
112     actual video file and writing it to disk if the user has requested
113     it, among some other tasks. In most cases there should be one per
114     program. As, given a video URL, the downloader doesn't know how to
115     extract all the needed information, task that InfoExtractors do, it
116     has to pass the URL to one of them.
117
118     For this, YoutubeDL objects have a method that allows
119     InfoExtractors to be registered in a given order. When it is passed
120     a URL, the YoutubeDL object handles it to the first InfoExtractor it
121     finds that reports being able to handle it. The InfoExtractor extracts
122     all the information about the video or videos the URL refers to, and
123     YoutubeDL process the extracted information, possibly using a File
124     Downloader to download the video.
125
126     YoutubeDL objects accept a lot of parameters. In order not to saturate
127     the object constructor with arguments, it receives a dictionary of
128     options instead. These options are available through the params
129     attribute for the InfoExtractors to use. The YoutubeDL also
130     registers itself as the downloader in charge for the InfoExtractors
131     that are added to it, so this is a "mutual registration".
132
133     Available options:
134
135     username:          Username for authentication purposes.
136     password:          Password for authentication purposes.
137     videopassword:     Password for accessing a video.
138     ap_mso:            Adobe Pass multiple-system operator identifier.
139     ap_username:       Multiple-system operator account username.
140     ap_password:       Multiple-system operator account password.
141     usenetrc:          Use netrc for authentication instead.
142     verbose:           Print additional info to stdout.
143     quiet:             Do not print messages to stdout.
144     no_warnings:       Do not print out anything for warnings.
145     forceurl:          Force printing final URL.
146     forcetitle:        Force printing title.
147     forceid:           Force printing ID.
148     forcethumbnail:    Force printing thumbnail URL.
149     forcedescription:  Force printing description.
150     forcefilename:     Force printing final filename.
151     forceduration:     Force printing duration.
152     forcejson:         Force printing info_dict as JSON.
153     dump_single_json:  Force printing the info_dict of the whole playlist
154                        (or video) as a single JSON line.
155     simulate:          Do not download the video files.
156     format:            Video format code. See options.py for more information.
157     outtmpl:           Template for output names.
158     restrictfilenames: Do not allow "&" and spaces in file names
159     ignoreerrors:      Do not stop on download errors.
160     force_generic_extractor: Force downloader to use the generic extractor
161     nooverwrites:      Prevent overwriting files.
162     playliststart:     Playlist item to start at.
163     playlistend:       Playlist item to end at.
164     playlist_items:    Specific indices of playlist to download.
165     playlistreverse:   Download playlist items in reverse order.
166     playlistrandom:    Download playlist items in random order.
167     matchtitle:        Download only matching titles.
168     rejecttitle:       Reject downloads for matching titles.
169     logger:            Log messages to a logging.Logger instance.
170     logtostderr:       Log messages to stderr instead of stdout.
171     writedescription:  Write the video description to a .description file
172     writeinfojson:     Write the video description to a .info.json file
173     writeannotations:  Write the video annotations to a .annotations.xml file
174     writethumbnail:    Write the thumbnail image to a file
175     write_all_thumbnails:  Write all thumbnail formats to files
176     writesubtitles:    Write the video subtitles to a file
177     writeautomaticsub: Write the automatically generated subtitles to a file
178     allsubtitles:      Downloads all the subtitles of the video
179                        (requires writesubtitles or writeautomaticsub)
180     listsubtitles:     Lists all available subtitles for the video
181     subtitlesformat:   The format code for subtitles
182     subtitleslangs:    List of languages of the subtitles to download
183     keepvideo:         Keep the video file after post-processing
184     daterange:         A DateRange object, download only if the upload_date is in the range.
185     skip_download:     Skip the actual download of the video file
186     cachedir:          Location of the cache files in the filesystem.
187                        False to disable filesystem cache.
188     noplaylist:        Download single video instead of a playlist if in doubt.
189     age_limit:         An integer representing the user's age in years.
190                        Unsuitable videos for the given age are skipped.
191     min_views:         An integer representing the minimum view count the video
192                        must have in order to not be skipped.
193                        Videos without view count information are always
194                        downloaded. None for no limit.
195     max_views:         An integer representing the maximum view count.
196                        Videos that are more popular than that are not
197                        downloaded.
198                        Videos without view count information are always
199                        downloaded. None for no limit.
200     download_archive:  File name of a file where all downloads are recorded.
201                        Videos already present in the file are not downloaded
202                        again.
203     cookiefile:        File name where cookies should be read from and dumped to.
204     nocheckcertificate:Do not verify SSL certificates
205     prefer_insecure:   Use HTTP instead of HTTPS to retrieve information.
206                        At the moment, this is only supported by YouTube.
207     proxy:             URL of the proxy server to use
208     geo_verification_proxy:  URL of the proxy to use for IP address verification
209                        on geo-restricted sites. (Experimental)
210     socket_timeout:    Time to wait for unresponsive hosts, in seconds
211     bidi_workaround:   Work around buggy terminals without bidirectional text
212                        support, using fridibi
213     debug_printtraffic:Print out sent and received HTTP traffic
214     include_ads:       Download ads as well
215     default_search:    Prepend this string if an input url is not valid.
216                        'auto' for elaborate guessing
217     encoding:          Use this encoding instead of the system-specified.
218     extract_flat:      Do not resolve URLs, return the immediate result.
219                        Pass in 'in_playlist' to only show this behavior for
220                        playlist items.
221     postprocessors:    A list of dictionaries, each with an entry
222                        * key:  The name of the postprocessor. See
223                                youtube_dl/postprocessor/__init__.py for a list.
224                        as well as any further keyword arguments for the
225                        postprocessor.
226     progress_hooks:    A list of functions that get called on download
227                        progress, with a dictionary with the entries
228                        * status: One of "downloading", "error", or "finished".
229                                  Check this first and ignore unknown values.
230
231                        If status is one of "downloading", or "finished", the
232                        following properties may also be present:
233                        * filename: The final filename (always present)
234                        * tmpfilename: The filename we're currently writing to
235                        * downloaded_bytes: Bytes on disk
236                        * total_bytes: Size of the whole file, None if unknown
237                        * total_bytes_estimate: Guess of the eventual file size,
238                                                None if unavailable.
239                        * elapsed: The number of seconds since download started.
240                        * eta: The estimated time in seconds, None if unknown
241                        * speed: The download speed in bytes/second, None if
242                                 unknown
243                        * fragment_index: The counter of the currently
244                                          downloaded video fragment.
245                        * fragment_count: The number of fragments (= individual
246                                          files that will be merged)
247
248                        Progress hooks are guaranteed to be called at least once
249                        (with status "finished") if the download is successful.
250     merge_output_format: Extension to use when merging formats.
251     fixup:             Automatically correct known faults of the file.
252                        One of:
253                        - "never": do nothing
254                        - "warn": only emit a warning
255                        - "detect_or_warn": check whether we can do anything
256                                            about it, warn otherwise (default)
257     source_address:    (Experimental) Client-side IP address to bind to.
258     call_home:         Boolean, true iff we are allowed to contact the
259                        youtube-dl servers for debugging.
260     sleep_interval:    Number of seconds to sleep before each download when
261                        used alone or a lower bound of a range for randomized
262                        sleep before each download (minimum possible number
263                        of seconds to sleep) when used along with
264                        max_sleep_interval.
265     max_sleep_interval:Upper bound of a range for randomized sleep before each
266                        download (maximum possible number of seconds to sleep).
267                        Must only be used along with sleep_interval.
268                        Actual sleep time will be a random float from range
269                        [sleep_interval; max_sleep_interval].
270     listformats:       Print an overview of available video formats and exit.
271     list_thumbnails:   Print a table of all thumbnails and exit.
272     match_filter:      A function that gets called with the info_dict of
273                        every video.
274                        If it returns a message, the video is ignored.
275                        If it returns None, the video is downloaded.
276                        match_filter_func in utils.py is one example for this.
277     no_color:          Do not emit color codes in output.
278     geo_bypass:        Bypass geographic restriction via faking X-Forwarded-For
279                        HTTP header (experimental)
280     geo_bypass_country:
281                        Two-letter ISO 3166-2 country code that will be used for
282                        explicit geographic restriction bypassing via faking
283                        X-Forwarded-For HTTP header (experimental)
284
285     The following options determine which downloader is picked:
286     external_downloader: Executable of the external downloader to call.
287                        None or unset for standard (built-in) downloader.
288     hls_prefer_native: Use the native HLS downloader instead of ffmpeg/avconv
289                        if True, otherwise use ffmpeg/avconv if False, otherwise
290                        use downloader suggested by extractor if None.
291
292     The following parameters are not used by YoutubeDL itself, they are used by
293     the downloader (see youtube_dl/downloader/common.py):
294     nopart, updatetime, buffersize, ratelimit, min_filesize, max_filesize, test,
295     noresizebuffer, retries, continuedl, noprogress, consoletitle,
296     xattr_set_filesize, external_downloader_args, hls_use_mpegts.
297
298     The following options are used by the post processors:
299     prefer_ffmpeg:     If True, use ffmpeg instead of avconv if both are available,
300                        otherwise prefer avconv.
301     postprocessor_args: A list of additional command-line arguments for the
302                         postprocessor.
303     """
304
305     params = None
306     _ies = []
307     _pps = []
308     _download_retcode = None
309     _num_downloads = None
310     _screen_file = None
311
312     def __init__(self, params=None, auto_init=True):
313         """Create a FileDownloader object with the given options."""
314         if params is None:
315             params = {}
316         self._ies = []
317         self._ies_instances = {}
318         self._pps = []
319         self._progress_hooks = []
320         self._download_retcode = 0
321         self._num_downloads = 0
322         self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)]
323         self._err_file = sys.stderr
324         self.params = {
325             # Default parameters
326             'nocheckcertificate': False,
327         }
328         self.params.update(params)
329         self.cache = Cache(self)
330
331         if self.params.get('cn_verification_proxy') is not None:
332             self.report_warning('--cn-verification-proxy is deprecated. Use --geo-verification-proxy instead.')
333             if self.params.get('geo_verification_proxy') is None:
334                 self.params['geo_verification_proxy'] = self.params['cn_verification_proxy']
335
336         if params.get('bidi_workaround', False):
337             try:
338                 import pty
339                 master, slave = pty.openpty()
340                 width = compat_get_terminal_size().columns
341                 if width is None:
342                     width_args = []
343                 else:
344                     width_args = ['-w', str(width)]
345                 sp_kwargs = dict(
346                     stdin=subprocess.PIPE,
347                     stdout=slave,
348                     stderr=self._err_file)
349                 try:
350                     self._output_process = subprocess.Popen(
351                         ['bidiv'] + width_args, **sp_kwargs
352                     )
353                 except OSError:
354                     self._output_process = subprocess.Popen(
355                         ['fribidi', '-c', 'UTF-8'] + width_args, **sp_kwargs)
356                 self._output_channel = os.fdopen(master, 'rb')
357             except OSError as ose:
358                 if ose.errno == errno.ENOENT:
359                     self.report_warning('Could not find fribidi executable, ignoring --bidi-workaround . Make sure that  fribidi  is an executable file in one of the directories in your $PATH.')
360                 else:
361                     raise
362
363         if (sys.version_info >= (3,) and sys.platform != 'win32' and
364                 sys.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968'] and
365                 not params.get('restrictfilenames', False)):
366             # On Python 3, the Unicode filesystem API will throw errors (#1474)
367             self.report_warning(
368                 'Assuming --restrict-filenames since file system encoding '
369                 'cannot encode all characters. '
370                 'Set the LC_ALL environment variable to fix this.')
371             self.params['restrictfilenames'] = True
372
373         if isinstance(params.get('outtmpl'), bytes):
374             self.report_warning(
375                 'Parameter outtmpl is bytes, but should be a unicode string. '
376                 'Put  from __future__ import unicode_literals  at the top of your code file or consider switching to Python 3.x.')
377
378         self._setup_opener()
379
380         if auto_init:
381             self.print_debug_header()
382             self.add_default_info_extractors()
383
384         for pp_def_raw in self.params.get('postprocessors', []):
385             pp_class = get_postprocessor(pp_def_raw['key'])
386             pp_def = dict(pp_def_raw)
387             del pp_def['key']
388             pp = pp_class(self, **compat_kwargs(pp_def))
389             self.add_post_processor(pp)
390
391         for ph in self.params.get('progress_hooks', []):
392             self.add_progress_hook(ph)
393
394         register_socks_protocols()
395
396     def warn_if_short_id(self, argv):
397         # short YouTube ID starting with dash?
398         idxs = [
399             i for i, a in enumerate(argv)
400             if re.match(r'^-[0-9A-Za-z_-]{10}$', a)]
401         if idxs:
402             correct_argv = (
403                 ['youtube-dl'] +
404                 [a for i, a in enumerate(argv) if i not in idxs] +
405                 ['--'] + [argv[i] for i in idxs]
406             )
407             self.report_warning(
408                 'Long argument string detected. '
409                 'Use -- to separate parameters and URLs, like this:\n%s\n' %
410                 args_to_str(correct_argv))
411
412     def add_info_extractor(self, ie):
413         """Add an InfoExtractor object to the end of the list."""
414         self._ies.append(ie)
415         if not isinstance(ie, type):
416             self._ies_instances[ie.ie_key()] = ie
417             ie.set_downloader(self)
418
419     def get_info_extractor(self, ie_key):
420         """
421         Get an instance of an IE with name ie_key, it will try to get one from
422         the _ies list, if there's no instance it will create a new one and add
423         it to the extractor list.
424         """
425         ie = self._ies_instances.get(ie_key)
426         if ie is None:
427             ie = get_info_extractor(ie_key)()
428             self.add_info_extractor(ie)
429         return ie
430
431     def add_default_info_extractors(self):
432         """
433         Add the InfoExtractors returned by gen_extractors to the end of the list
434         """
435         for ie in gen_extractor_classes():
436             self.add_info_extractor(ie)
437
438     def add_post_processor(self, pp):
439         """Add a PostProcessor object to the end of the chain."""
440         self._pps.append(pp)
441         pp.set_downloader(self)
442
443     def add_progress_hook(self, ph):
444         """Add the progress hook (currently only for the file downloader)"""
445         self._progress_hooks.append(ph)
446
447     def _bidi_workaround(self, message):
448         if not hasattr(self, '_output_channel'):
449             return message
450
451         assert hasattr(self, '_output_process')
452         assert isinstance(message, compat_str)
453         line_count = message.count('\n') + 1
454         self._output_process.stdin.write((message + '\n').encode('utf-8'))
455         self._output_process.stdin.flush()
456         res = ''.join(self._output_channel.readline().decode('utf-8')
457                       for _ in range(line_count))
458         return res[:-len('\n')]
459
460     def to_screen(self, message, skip_eol=False):
461         """Print message to stdout if not in quiet mode."""
462         return self.to_stdout(message, skip_eol, check_quiet=True)
463
464     def _write_string(self, s, out=None):
465         write_string(s, out=out, encoding=self.params.get('encoding'))
466
467     def to_stdout(self, message, skip_eol=False, check_quiet=False):
468         """Print message to stdout if not in quiet mode."""
469         if self.params.get('logger'):
470             self.params['logger'].debug(message)
471         elif not check_quiet or not self.params.get('quiet', False):
472             message = self._bidi_workaround(message)
473             terminator = ['\n', ''][skip_eol]
474             output = message + terminator
475
476             self._write_string(output, self._screen_file)
477
478     def to_stderr(self, message):
479         """Print message to stderr."""
480         assert isinstance(message, compat_str)
481         if self.params.get('logger'):
482             self.params['logger'].error(message)
483         else:
484             message = self._bidi_workaround(message)
485             output = message + '\n'
486             self._write_string(output, self._err_file)
487
488     def to_console_title(self, message):
489         if not self.params.get('consoletitle', False):
490             return
491         if compat_os_name == 'nt' and ctypes.windll.kernel32.GetConsoleWindow():
492             # c_wchar_p() might not be necessary if `message` is
493             # already of type unicode()
494             ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message))
495         elif 'TERM' in os.environ:
496             self._write_string('\033]0;%s\007' % message, self._screen_file)
497
498     def save_console_title(self):
499         if not self.params.get('consoletitle', False):
500             return
501         if 'TERM' in os.environ:
502             # Save the title on stack
503             self._write_string('\033[22;0t', self._screen_file)
504
505     def restore_console_title(self):
506         if not self.params.get('consoletitle', False):
507             return
508         if 'TERM' in os.environ:
509             # Restore the title from stack
510             self._write_string('\033[23;0t', self._screen_file)
511
512     def __enter__(self):
513         self.save_console_title()
514         return self
515
516     def __exit__(self, *args):
517         self.restore_console_title()
518
519         if self.params.get('cookiefile') is not None:
520             self.cookiejar.save()
521
522     def trouble(self, message=None, tb=None):
523         """Determine action to take when a download problem appears.
524
525         Depending on if the downloader has been configured to ignore
526         download errors or not, this method may throw an exception or
527         not when errors are found, after printing the message.
528
529         tb, if given, is additional traceback information.
530         """
531         if message is not None:
532             self.to_stderr(message)
533         if self.params.get('verbose'):
534             if tb is None:
535                 if sys.exc_info()[0]:  # if .trouble has been called from an except block
536                     tb = ''
537                     if hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
538                         tb += ''.join(traceback.format_exception(*sys.exc_info()[1].exc_info))
539                     tb += encode_compat_str(traceback.format_exc())
540                 else:
541                     tb_data = traceback.format_list(traceback.extract_stack())
542                     tb = ''.join(tb_data)
543             self.to_stderr(tb)
544         if not self.params.get('ignoreerrors', False):
545             if sys.exc_info()[0] and hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
546                 exc_info = sys.exc_info()[1].exc_info
547             else:
548                 exc_info = sys.exc_info()
549             raise DownloadError(message, exc_info)
550         self._download_retcode = 1
551
552     def report_warning(self, message):
553         '''
554         Print the message to stderr, it will be prefixed with 'WARNING:'
555         If stderr is a tty file the 'WARNING:' will be colored
556         '''
557         if self.params.get('logger') is not None:
558             self.params['logger'].warning(message)
559         else:
560             if self.params.get('no_warnings'):
561                 return
562             if not self.params.get('no_color') and self._err_file.isatty() and compat_os_name != 'nt':
563                 _msg_header = '\033[0;33mWARNING:\033[0m'
564             else:
565                 _msg_header = 'WARNING:'
566             warning_message = '%s %s' % (_msg_header, message)
567             self.to_stderr(warning_message)
568
569     def report_error(self, message, tb=None):
570         '''
571         Do the same as trouble, but prefixes the message with 'ERROR:', colored
572         in red if stderr is a tty file.
573         '''
574         if not self.params.get('no_color') and self._err_file.isatty() and compat_os_name != 'nt':
575             _msg_header = '\033[0;31mERROR:\033[0m'
576         else:
577             _msg_header = 'ERROR:'
578         error_message = '%s %s' % (_msg_header, message)
579         self.trouble(error_message, tb)
580
581     def report_file_already_downloaded(self, file_name):
582         """Report file has already been fully downloaded."""
583         try:
584             self.to_screen('[download] %s has already been downloaded' % file_name)
585         except UnicodeEncodeError:
586             self.to_screen('[download] The file has already been downloaded')
587
588     def prepare_filename(self, info_dict):
589         """Generate the output filename."""
590         try:
591             template_dict = dict(info_dict)
592
593             template_dict['epoch'] = int(time.time())
594             autonumber_size = self.params.get('autonumber_size')
595             if autonumber_size is None:
596                 autonumber_size = 5
597             autonumber_templ = '%0' + str(autonumber_size) + 'd'
598             template_dict['autonumber'] = autonumber_templ % (self.params.get('autonumber_start', 1) - 1 + self._num_downloads)
599             if template_dict.get('playlist_index') is not None:
600                 template_dict['playlist_index'] = '%0*d' % (len(str(template_dict['n_entries'])), template_dict['playlist_index'])
601             if template_dict.get('resolution') is None:
602                 if template_dict.get('width') and template_dict.get('height'):
603                     template_dict['resolution'] = '%dx%d' % (template_dict['width'], template_dict['height'])
604                 elif template_dict.get('height'):
605                     template_dict['resolution'] = '%sp' % template_dict['height']
606                 elif template_dict.get('width'):
607                     template_dict['resolution'] = '%dx?' % template_dict['width']
608
609             sanitize = lambda k, v: sanitize_filename(
610                 compat_str(v),
611                 restricted=self.params.get('restrictfilenames'),
612                 is_id=(k == 'id'))
613             template_dict = dict((k, v if isinstance(v, compat_numeric_types) else sanitize(k, v))
614                                  for k, v in template_dict.items()
615                                  if v is not None and not isinstance(v, (list, tuple, dict)))
616             template_dict = collections.defaultdict(lambda: 'NA', template_dict)
617
618             outtmpl = self.params.get('outtmpl', DEFAULT_OUTTMPL)
619
620             NUMERIC_FIELDS = set((
621                 'width', 'height', 'tbr', 'abr', 'asr', 'vbr', 'fps', 'filesize', 'filesize_approx',
622                 'upload_year', 'upload_month', 'upload_day',
623                 'duration', 'view_count', 'like_count', 'dislike_count', 'repost_count',
624                 'average_rating', 'comment_count', 'age_limit',
625                 'start_time', 'end_time',
626                 'chapter_number', 'season_number', 'episode_number',
627             ))
628
629             # Missing numeric fields used together with integer presentation types
630             # in format specification will break the argument substitution since
631             # string 'NA' is returned for missing fields. We will patch output
632             # template for missing fields to meet string presentation type.
633             for numeric_field in NUMERIC_FIELDS:
634                 if numeric_field not in template_dict:
635                     # As of [1] format syntax is:
636                     #  %[mapping_key][conversion_flags][minimum_width][.precision][length_modifier]type
637                     # 1. https://docs.python.org/2/library/stdtypes.html#string-formatting
638                     FORMAT_RE = r'''(?x)
639                         (?<!%)
640                         %
641                         \({0}\)  # mapping key
642                         (?:[#0\-+ ]+)?  # conversion flags (optional)
643                         (?:\d+)?  # minimum field width (optional)
644                         (?:\.\d+)?  # precision (optional)
645                         [hlL]?  # length modifier (optional)
646                         [diouxXeEfFgGcrs%]  # conversion type
647                     '''
648                     outtmpl = re.sub(
649                         FORMAT_RE.format(numeric_field),
650                         r'%({0})s'.format(numeric_field), outtmpl)
651
652             tmpl = compat_expanduser(outtmpl)
653             filename = tmpl % template_dict
654             # Temporary fix for #4787
655             # 'Treat' all problem characters by passing filename through preferredencoding
656             # to workaround encoding issues with subprocess on python2 @ Windows
657             if sys.version_info < (3, 0) and sys.platform == 'win32':
658                 filename = encodeFilename(filename, True).decode(preferredencoding())
659             return sanitize_path(filename)
660         except ValueError as err:
661             self.report_error('Error in output template: ' + str(err) + ' (encoding: ' + repr(preferredencoding()) + ')')
662             return None
663
664     def _match_entry(self, info_dict, incomplete):
665         """ Returns None iff the file should be downloaded """
666
667         video_title = info_dict.get('title', info_dict.get('id', 'video'))
668         if 'title' in info_dict:
669             # This can happen when we're just evaluating the playlist
670             title = info_dict['title']
671             matchtitle = self.params.get('matchtitle', False)
672             if matchtitle:
673                 if not re.search(matchtitle, title, re.IGNORECASE):
674                     return '"' + title + '" title did not match pattern "' + matchtitle + '"'
675             rejecttitle = self.params.get('rejecttitle', False)
676             if rejecttitle:
677                 if re.search(rejecttitle, title, re.IGNORECASE):
678                     return '"' + title + '" title matched reject pattern "' + rejecttitle + '"'
679         date = info_dict.get('upload_date')
680         if date is not None:
681             dateRange = self.params.get('daterange', DateRange())
682             if date not in dateRange:
683                 return '%s upload date is not in range %s' % (date_from_str(date).isoformat(), dateRange)
684         view_count = info_dict.get('view_count')
685         if view_count is not None:
686             min_views = self.params.get('min_views')
687             if min_views is not None and view_count < min_views:
688                 return 'Skipping %s, because it has not reached minimum view count (%d/%d)' % (video_title, view_count, min_views)
689             max_views = self.params.get('max_views')
690             if max_views is not None and view_count > max_views:
691                 return 'Skipping %s, because it has exceeded the maximum view count (%d/%d)' % (video_title, view_count, max_views)
692         if age_restricted(info_dict.get('age_limit'), self.params.get('age_limit')):
693             return 'Skipping "%s" because it is age restricted' % video_title
694         if self.in_download_archive(info_dict):
695             return '%s has already been recorded in archive' % video_title
696
697         if not incomplete:
698             match_filter = self.params.get('match_filter')
699             if match_filter is not None:
700                 ret = match_filter(info_dict)
701                 if ret is not None:
702                     return ret
703
704         return None
705
706     @staticmethod
707     def add_extra_info(info_dict, extra_info):
708         '''Set the keys from extra_info in info dict if they are missing'''
709         for key, value in extra_info.items():
710             info_dict.setdefault(key, value)
711
712     def extract_info(self, url, download=True, ie_key=None, extra_info={},
713                      process=True, force_generic_extractor=False):
714         '''
715         Returns a list with a dictionary for each video we find.
716         If 'download', also downloads the videos.
717         extra_info is a dict containing the extra values to add to each result
718         '''
719
720         if not ie_key and force_generic_extractor:
721             ie_key = 'Generic'
722
723         if ie_key:
724             ies = [self.get_info_extractor(ie_key)]
725         else:
726             ies = self._ies
727
728         for ie in ies:
729             if not ie.suitable(url):
730                 continue
731
732             ie = self.get_info_extractor(ie.ie_key())
733             if not ie.working():
734                 self.report_warning('The program functionality for this site has been marked as broken, '
735                                     'and will probably not work.')
736
737             try:
738                 ie_result = ie.extract(url)
739                 if ie_result is None:  # Finished already (backwards compatibility; listformats and friends should be moved here)
740                     break
741                 if isinstance(ie_result, list):
742                     # Backwards compatibility: old IE result format
743                     ie_result = {
744                         '_type': 'compat_list',
745                         'entries': ie_result,
746                     }
747                 self.add_default_extra_info(ie_result, ie, url)
748                 if process:
749                     return self.process_ie_result(ie_result, download, extra_info)
750                 else:
751                     return ie_result
752             except GeoRestrictedError as e:
753                 msg = e.msg
754                 if e.countries:
755                     msg += '\nThis video is available in %s.' % ', '.join(
756                         map(ISO3166Utils.short2full, e.countries))
757                 msg += '\nYou might want to use a VPN or a proxy server (with --proxy) to workaround.'
758                 self.report_error(msg)
759                 break
760             except ExtractorError as e:  # An error we somewhat expected
761                 self.report_error(compat_str(e), e.format_traceback())
762                 break
763             except MaxDownloadsReached:
764                 raise
765             except Exception as e:
766                 if self.params.get('ignoreerrors', False):
767                     self.report_error(error_to_compat_str(e), tb=encode_compat_str(traceback.format_exc()))
768                     break
769                 else:
770                     raise
771         else:
772             self.report_error('no suitable InfoExtractor for URL %s' % url)
773
774     def add_default_extra_info(self, ie_result, ie, url):
775         self.add_extra_info(ie_result, {
776             'extractor': ie.IE_NAME,
777             'webpage_url': url,
778             'webpage_url_basename': url_basename(url),
779             'extractor_key': ie.ie_key(),
780         })
781
782     def process_ie_result(self, ie_result, download=True, extra_info={}):
783         """
784         Take the result of the ie(may be modified) and resolve all unresolved
785         references (URLs, playlist items).
786
787         It will also download the videos if 'download'.
788         Returns the resolved ie_result.
789         """
790         result_type = ie_result.get('_type', 'video')
791
792         if result_type in ('url', 'url_transparent'):
793             ie_result['url'] = sanitize_url(ie_result['url'])
794             extract_flat = self.params.get('extract_flat', False)
795             if ((extract_flat == 'in_playlist' and 'playlist' in extra_info) or
796                     extract_flat is True):
797                 if self.params.get('forcejson', False):
798                     self.to_stdout(json.dumps(ie_result))
799                 return ie_result
800
801         if result_type == 'video':
802             self.add_extra_info(ie_result, extra_info)
803             return self.process_video_result(ie_result, download=download)
804         elif result_type == 'url':
805             # We have to add extra_info to the results because it may be
806             # contained in a playlist
807             return self.extract_info(ie_result['url'],
808                                      download,
809                                      ie_key=ie_result.get('ie_key'),
810                                      extra_info=extra_info)
811         elif result_type == 'url_transparent':
812             # Use the information from the embedding page
813             info = self.extract_info(
814                 ie_result['url'], ie_key=ie_result.get('ie_key'),
815                 extra_info=extra_info, download=False, process=False)
816
817             force_properties = dict(
818                 (k, v) for k, v in ie_result.items() if v is not None)
819             for f in ('_type', 'url', 'ie_key'):
820                 if f in force_properties:
821                     del force_properties[f]
822             new_result = info.copy()
823             new_result.update(force_properties)
824
825             assert new_result.get('_type') != 'url_transparent'
826
827             return self.process_ie_result(
828                 new_result, download=download, extra_info=extra_info)
829         elif result_type == 'playlist' or result_type == 'multi_video':
830             # We process each entry in the playlist
831             playlist = ie_result.get('title') or ie_result.get('id')
832             self.to_screen('[download] Downloading playlist: %s' % playlist)
833
834             playlist_results = []
835
836             playliststart = self.params.get('playliststart', 1) - 1
837             playlistend = self.params.get('playlistend')
838             # For backwards compatibility, interpret -1 as whole list
839             if playlistend == -1:
840                 playlistend = None
841
842             playlistitems_str = self.params.get('playlist_items')
843             playlistitems = None
844             if playlistitems_str is not None:
845                 def iter_playlistitems(format):
846                     for string_segment in format.split(','):
847                         if '-' in string_segment:
848                             start, end = string_segment.split('-')
849                             for item in range(int(start), int(end) + 1):
850                                 yield int(item)
851                         else:
852                             yield int(string_segment)
853                 playlistitems = iter_playlistitems(playlistitems_str)
854
855             ie_entries = ie_result['entries']
856             if isinstance(ie_entries, list):
857                 n_all_entries = len(ie_entries)
858                 if playlistitems:
859                     entries = [
860                         ie_entries[i - 1] for i in playlistitems
861                         if -n_all_entries <= i - 1 < n_all_entries]
862                 else:
863                     entries = ie_entries[playliststart:playlistend]
864                 n_entries = len(entries)
865                 self.to_screen(
866                     '[%s] playlist %s: Collected %d video ids (downloading %d of them)' %
867                     (ie_result['extractor'], playlist, n_all_entries, n_entries))
868             elif isinstance(ie_entries, PagedList):
869                 if playlistitems:
870                     entries = []
871                     for item in playlistitems:
872                         entries.extend(ie_entries.getslice(
873                             item - 1, item
874                         ))
875                 else:
876                     entries = ie_entries.getslice(
877                         playliststart, playlistend)
878                 n_entries = len(entries)
879                 self.to_screen(
880                     '[%s] playlist %s: Downloading %d videos' %
881                     (ie_result['extractor'], playlist, n_entries))
882             else:  # iterable
883                 if playlistitems:
884                     entry_list = list(ie_entries)
885                     entries = [entry_list[i - 1] for i in playlistitems]
886                 else:
887                     entries = list(itertools.islice(
888                         ie_entries, playliststart, playlistend))
889                 n_entries = len(entries)
890                 self.to_screen(
891                     '[%s] playlist %s: Downloading %d videos' %
892                     (ie_result['extractor'], playlist, n_entries))
893
894             if self.params.get('playlistreverse', False):
895                 entries = entries[::-1]
896
897             if self.params.get('playlistrandom', False):
898                 random.shuffle(entries)
899
900             x_forwarded_for = ie_result.get('__x_forwarded_for_ip')
901
902             for i, entry in enumerate(entries, 1):
903                 self.to_screen('[download] Downloading video %s of %s' % (i, n_entries))
904                 # This __x_forwarded_for_ip thing is a bit ugly but requires
905                 # minimal changes
906                 if x_forwarded_for:
907                     entry['__x_forwarded_for_ip'] = x_forwarded_for
908                 extra = {
909                     'n_entries': n_entries,
910                     'playlist': playlist,
911                     'playlist_id': ie_result.get('id'),
912                     'playlist_title': ie_result.get('title'),
913                     'playlist_index': i + playliststart,
914                     'extractor': ie_result['extractor'],
915                     'webpage_url': ie_result['webpage_url'],
916                     'webpage_url_basename': url_basename(ie_result['webpage_url']),
917                     'extractor_key': ie_result['extractor_key'],
918                 }
919
920                 reason = self._match_entry(entry, incomplete=True)
921                 if reason is not None:
922                     self.to_screen('[download] ' + reason)
923                     continue
924
925                 entry_result = self.process_ie_result(entry,
926                                                       download=download,
927                                                       extra_info=extra)
928                 playlist_results.append(entry_result)
929             ie_result['entries'] = playlist_results
930             self.to_screen('[download] Finished downloading playlist: %s' % playlist)
931             return ie_result
932         elif result_type == 'compat_list':
933             self.report_warning(
934                 'Extractor %s returned a compat_list result. '
935                 'It needs to be updated.' % ie_result.get('extractor'))
936
937             def _fixup(r):
938                 self.add_extra_info(
939                     r,
940                     {
941                         'extractor': ie_result['extractor'],
942                         'webpage_url': ie_result['webpage_url'],
943                         'webpage_url_basename': url_basename(ie_result['webpage_url']),
944                         'extractor_key': ie_result['extractor_key'],
945                     }
946                 )
947                 return r
948             ie_result['entries'] = [
949                 self.process_ie_result(_fixup(r), download, extra_info)
950                 for r in ie_result['entries']
951             ]
952             return ie_result
953         else:
954             raise Exception('Invalid result type: %s' % result_type)
955
956     def _build_format_filter(self, filter_spec):
957         " Returns a function to filter the formats according to the filter_spec "
958
959         OPERATORS = {
960             '<': operator.lt,
961             '<=': operator.le,
962             '>': operator.gt,
963             '>=': operator.ge,
964             '=': operator.eq,
965             '!=': operator.ne,
966         }
967         operator_rex = re.compile(r'''(?x)\s*
968             (?P<key>width|height|tbr|abr|vbr|asr|filesize|fps)
969             \s*(?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
970             (?P<value>[0-9.]+(?:[kKmMgGtTpPeEzZyY]i?[Bb]?)?)
971             $
972             ''' % '|'.join(map(re.escape, OPERATORS.keys())))
973         m = operator_rex.search(filter_spec)
974         if m:
975             try:
976                 comparison_value = int(m.group('value'))
977             except ValueError:
978                 comparison_value = parse_filesize(m.group('value'))
979                 if comparison_value is None:
980                     comparison_value = parse_filesize(m.group('value') + 'B')
981                 if comparison_value is None:
982                     raise ValueError(
983                         'Invalid value %r in format specification %r' % (
984                             m.group('value'), filter_spec))
985             op = OPERATORS[m.group('op')]
986
987         if not m:
988             STR_OPERATORS = {
989                 '=': operator.eq,
990                 '!=': operator.ne,
991                 '^=': lambda attr, value: attr.startswith(value),
992                 '$=': lambda attr, value: attr.endswith(value),
993                 '*=': lambda attr, value: value in attr,
994             }
995             str_operator_rex = re.compile(r'''(?x)
996                 \s*(?P<key>ext|acodec|vcodec|container|protocol|format_id)
997                 \s*(?P<op>%s)(?P<none_inclusive>\s*\?)?
998                 \s*(?P<value>[a-zA-Z0-9._-]+)
999                 \s*$
1000                 ''' % '|'.join(map(re.escape, STR_OPERATORS.keys())))
1001             m = str_operator_rex.search(filter_spec)
1002             if m:
1003                 comparison_value = m.group('value')
1004                 op = STR_OPERATORS[m.group('op')]
1005
1006         if not m:
1007             raise ValueError('Invalid filter specification %r' % filter_spec)
1008
1009         def _filter(f):
1010             actual_value = f.get(m.group('key'))
1011             if actual_value is None:
1012                 return m.group('none_inclusive')
1013             return op(actual_value, comparison_value)
1014         return _filter
1015
1016     def build_format_selector(self, format_spec):
1017         def syntax_error(note, start):
1018             message = (
1019                 'Invalid format specification: '
1020                 '{0}\n\t{1}\n\t{2}^'.format(note, format_spec, ' ' * start[1]))
1021             return SyntaxError(message)
1022
1023         PICKFIRST = 'PICKFIRST'
1024         MERGE = 'MERGE'
1025         SINGLE = 'SINGLE'
1026         GROUP = 'GROUP'
1027         FormatSelector = collections.namedtuple('FormatSelector', ['type', 'selector', 'filters'])
1028
1029         def _parse_filter(tokens):
1030             filter_parts = []
1031             for type, string, start, _, _ in tokens:
1032                 if type == tokenize.OP and string == ']':
1033                     return ''.join(filter_parts)
1034                 else:
1035                     filter_parts.append(string)
1036
1037         def _remove_unused_ops(tokens):
1038             # Remove operators that we don't use and join them with the surrounding strings
1039             # for example: 'mp4' '-' 'baseline' '-' '16x9' is converted to 'mp4-baseline-16x9'
1040             ALLOWED_OPS = ('/', '+', ',', '(', ')')
1041             last_string, last_start, last_end, last_line = None, None, None, None
1042             for type, string, start, end, line in tokens:
1043                 if type == tokenize.OP and string == '[':
1044                     if last_string:
1045                         yield tokenize.NAME, last_string, last_start, last_end, last_line
1046                         last_string = None
1047                     yield type, string, start, end, line
1048                     # everything inside brackets will be handled by _parse_filter
1049                     for type, string, start, end, line in tokens:
1050                         yield type, string, start, end, line
1051                         if type == tokenize.OP and string == ']':
1052                             break
1053                 elif type == tokenize.OP and string in ALLOWED_OPS:
1054                     if last_string:
1055                         yield tokenize.NAME, last_string, last_start, last_end, last_line
1056                         last_string = None
1057                     yield type, string, start, end, line
1058                 elif type in [tokenize.NAME, tokenize.NUMBER, tokenize.OP]:
1059                     if not last_string:
1060                         last_string = string
1061                         last_start = start
1062                         last_end = end
1063                     else:
1064                         last_string += string
1065             if last_string:
1066                 yield tokenize.NAME, last_string, last_start, last_end, last_line
1067
1068         def _parse_format_selection(tokens, inside_merge=False, inside_choice=False, inside_group=False):
1069             selectors = []
1070             current_selector = None
1071             for type, string, start, _, _ in tokens:
1072                 # ENCODING is only defined in python 3.x
1073                 if type == getattr(tokenize, 'ENCODING', None):
1074                     continue
1075                 elif type in [tokenize.NAME, tokenize.NUMBER]:
1076                     current_selector = FormatSelector(SINGLE, string, [])
1077                 elif type == tokenize.OP:
1078                     if string == ')':
1079                         if not inside_group:
1080                             # ')' will be handled by the parentheses group
1081                             tokens.restore_last_token()
1082                         break
1083                     elif inside_merge and string in ['/', ',']:
1084                         tokens.restore_last_token()
1085                         break
1086                     elif inside_choice and string == ',':
1087                         tokens.restore_last_token()
1088                         break
1089                     elif string == ',':
1090                         if not current_selector:
1091                             raise syntax_error('"," must follow a format selector', start)
1092                         selectors.append(current_selector)
1093                         current_selector = None
1094                     elif string == '/':
1095                         if not current_selector:
1096                             raise syntax_error('"/" must follow a format selector', start)
1097                         first_choice = current_selector
1098                         second_choice = _parse_format_selection(tokens, inside_choice=True)
1099                         current_selector = FormatSelector(PICKFIRST, (first_choice, second_choice), [])
1100                     elif string == '[':
1101                         if not current_selector:
1102                             current_selector = FormatSelector(SINGLE, 'best', [])
1103                         format_filter = _parse_filter(tokens)
1104                         current_selector.filters.append(format_filter)
1105                     elif string == '(':
1106                         if current_selector:
1107                             raise syntax_error('Unexpected "("', start)
1108                         group = _parse_format_selection(tokens, inside_group=True)
1109                         current_selector = FormatSelector(GROUP, group, [])
1110                     elif string == '+':
1111                         video_selector = current_selector
1112                         audio_selector = _parse_format_selection(tokens, inside_merge=True)
1113                         if not video_selector or not audio_selector:
1114                             raise syntax_error('"+" must be between two format selectors', start)
1115                         current_selector = FormatSelector(MERGE, (video_selector, audio_selector), [])
1116                     else:
1117                         raise syntax_error('Operator not recognized: "{0}"'.format(string), start)
1118                 elif type == tokenize.ENDMARKER:
1119                     break
1120             if current_selector:
1121                 selectors.append(current_selector)
1122             return selectors
1123
1124         def _build_selector_function(selector):
1125             if isinstance(selector, list):
1126                 fs = [_build_selector_function(s) for s in selector]
1127
1128                 def selector_function(ctx):
1129                     for f in fs:
1130                         for format in f(ctx):
1131                             yield format
1132                 return selector_function
1133             elif selector.type == GROUP:
1134                 selector_function = _build_selector_function(selector.selector)
1135             elif selector.type == PICKFIRST:
1136                 fs = [_build_selector_function(s) for s in selector.selector]
1137
1138                 def selector_function(ctx):
1139                     for f in fs:
1140                         picked_formats = list(f(ctx))
1141                         if picked_formats:
1142                             return picked_formats
1143                     return []
1144             elif selector.type == SINGLE:
1145                 format_spec = selector.selector
1146
1147                 def selector_function(ctx):
1148                     formats = list(ctx['formats'])
1149                     if not formats:
1150                         return
1151                     if format_spec == 'all':
1152                         for f in formats:
1153                             yield f
1154                     elif format_spec in ['best', 'worst', None]:
1155                         format_idx = 0 if format_spec == 'worst' else -1
1156                         audiovideo_formats = [
1157                             f for f in formats
1158                             if f.get('vcodec') != 'none' and f.get('acodec') != 'none']
1159                         if audiovideo_formats:
1160                             yield audiovideo_formats[format_idx]
1161                         # for extractors with incomplete formats (audio only (soundcloud)
1162                         # or video only (imgur)) we will fallback to best/worst
1163                         # {video,audio}-only format
1164                         elif ctx['incomplete_formats']:
1165                             yield formats[format_idx]
1166                     elif format_spec == 'bestaudio':
1167                         audio_formats = [
1168                             f for f in formats
1169                             if f.get('vcodec') == 'none']
1170                         if audio_formats:
1171                             yield audio_formats[-1]
1172                     elif format_spec == 'worstaudio':
1173                         audio_formats = [
1174                             f for f in formats
1175                             if f.get('vcodec') == 'none']
1176                         if audio_formats:
1177                             yield audio_formats[0]
1178                     elif format_spec == 'bestvideo':
1179                         video_formats = [
1180                             f for f in formats
1181                             if f.get('acodec') == 'none']
1182                         if video_formats:
1183                             yield video_formats[-1]
1184                     elif format_spec == 'worstvideo':
1185                         video_formats = [
1186                             f for f in formats
1187                             if f.get('acodec') == 'none']
1188                         if video_formats:
1189                             yield video_formats[0]
1190                     else:
1191                         extensions = ['mp4', 'flv', 'webm', '3gp', 'm4a', 'mp3', 'ogg', 'aac', 'wav']
1192                         if format_spec in extensions:
1193                             filter_f = lambda f: f['ext'] == format_spec
1194                         else:
1195                             filter_f = lambda f: f['format_id'] == format_spec
1196                         matches = list(filter(filter_f, formats))
1197                         if matches:
1198                             yield matches[-1]
1199             elif selector.type == MERGE:
1200                 def _merge(formats_info):
1201                     format_1, format_2 = [f['format_id'] for f in formats_info]
1202                     # The first format must contain the video and the
1203                     # second the audio
1204                     if formats_info[0].get('vcodec') == 'none':
1205                         self.report_error('The first format must '
1206                                           'contain the video, try using '
1207                                           '"-f %s+%s"' % (format_2, format_1))
1208                         return
1209                     # Formats must be opposite (video+audio)
1210                     if formats_info[0].get('acodec') == 'none' and formats_info[1].get('acodec') == 'none':
1211                         self.report_error(
1212                             'Both formats %s and %s are video-only, you must specify "-f video+audio"'
1213                             % (format_1, format_2))
1214                         return
1215                     output_ext = (
1216                         formats_info[0]['ext']
1217                         if self.params.get('merge_output_format') is None
1218                         else self.params['merge_output_format'])
1219                     return {
1220                         'requested_formats': formats_info,
1221                         'format': '%s+%s' % (formats_info[0].get('format'),
1222                                              formats_info[1].get('format')),
1223                         'format_id': '%s+%s' % (formats_info[0].get('format_id'),
1224                                                 formats_info[1].get('format_id')),
1225                         'width': formats_info[0].get('width'),
1226                         'height': formats_info[0].get('height'),
1227                         'resolution': formats_info[0].get('resolution'),
1228                         'fps': formats_info[0].get('fps'),
1229                         'vcodec': formats_info[0].get('vcodec'),
1230                         'vbr': formats_info[0].get('vbr'),
1231                         'stretched_ratio': formats_info[0].get('stretched_ratio'),
1232                         'acodec': formats_info[1].get('acodec'),
1233                         'abr': formats_info[1].get('abr'),
1234                         'ext': output_ext,
1235                     }
1236                 video_selector, audio_selector = map(_build_selector_function, selector.selector)
1237
1238                 def selector_function(ctx):
1239                     for pair in itertools.product(
1240                             video_selector(copy.deepcopy(ctx)), audio_selector(copy.deepcopy(ctx))):
1241                         yield _merge(pair)
1242
1243             filters = [self._build_format_filter(f) for f in selector.filters]
1244
1245             def final_selector(ctx):
1246                 ctx_copy = copy.deepcopy(ctx)
1247                 for _filter in filters:
1248                     ctx_copy['formats'] = list(filter(_filter, ctx_copy['formats']))
1249                 return selector_function(ctx_copy)
1250             return final_selector
1251
1252         stream = io.BytesIO(format_spec.encode('utf-8'))
1253         try:
1254             tokens = list(_remove_unused_ops(compat_tokenize_tokenize(stream.readline)))
1255         except tokenize.TokenError:
1256             raise syntax_error('Missing closing/opening brackets or parenthesis', (0, len(format_spec)))
1257
1258         class TokenIterator(object):
1259             def __init__(self, tokens):
1260                 self.tokens = tokens
1261                 self.counter = 0
1262
1263             def __iter__(self):
1264                 return self
1265
1266             def __next__(self):
1267                 if self.counter >= len(self.tokens):
1268                     raise StopIteration()
1269                 value = self.tokens[self.counter]
1270                 self.counter += 1
1271                 return value
1272
1273             next = __next__
1274
1275             def restore_last_token(self):
1276                 self.counter -= 1
1277
1278         parsed_selector = _parse_format_selection(iter(TokenIterator(tokens)))
1279         return _build_selector_function(parsed_selector)
1280
1281     def _calc_headers(self, info_dict):
1282         res = std_headers.copy()
1283
1284         add_headers = info_dict.get('http_headers')
1285         if add_headers:
1286             res.update(add_headers)
1287
1288         cookies = self._calc_cookies(info_dict)
1289         if cookies:
1290             res['Cookie'] = cookies
1291
1292         if 'X-Forwarded-For' not in res:
1293             x_forwarded_for_ip = info_dict.get('__x_forwarded_for_ip')
1294             if x_forwarded_for_ip:
1295                 res['X-Forwarded-For'] = x_forwarded_for_ip
1296
1297         return res
1298
1299     def _calc_cookies(self, info_dict):
1300         pr = sanitized_Request(info_dict['url'])
1301         self.cookiejar.add_cookie_header(pr)
1302         return pr.get_header('Cookie')
1303
1304     def process_video_result(self, info_dict, download=True):
1305         assert info_dict.get('_type', 'video') == 'video'
1306
1307         if 'id' not in info_dict:
1308             raise ExtractorError('Missing "id" field in extractor result')
1309         if 'title' not in info_dict:
1310             raise ExtractorError('Missing "title" field in extractor result')
1311
1312         if not isinstance(info_dict['id'], compat_str):
1313             self.report_warning('"id" field is not a string - forcing string conversion')
1314             info_dict['id'] = compat_str(info_dict['id'])
1315
1316         if 'playlist' not in info_dict:
1317             # It isn't part of a playlist
1318             info_dict['playlist'] = None
1319             info_dict['playlist_index'] = None
1320
1321         thumbnails = info_dict.get('thumbnails')
1322         if thumbnails is None:
1323             thumbnail = info_dict.get('thumbnail')
1324             if thumbnail:
1325                 info_dict['thumbnails'] = thumbnails = [{'url': thumbnail}]
1326         if thumbnails:
1327             thumbnails.sort(key=lambda t: (
1328                 t.get('preference') if t.get('preference') is not None else -1,
1329                 t.get('width') if t.get('width') is not None else -1,
1330                 t.get('height') if t.get('height') is not None else -1,
1331                 t.get('id') if t.get('id') is not None else '', t.get('url')))
1332             for i, t in enumerate(thumbnails):
1333                 t['url'] = sanitize_url(t['url'])
1334                 if t.get('width') and t.get('height'):
1335                     t['resolution'] = '%dx%d' % (t['width'], t['height'])
1336                 if t.get('id') is None:
1337                     t['id'] = '%d' % i
1338
1339         if self.params.get('list_thumbnails'):
1340             self.list_thumbnails(info_dict)
1341             return
1342
1343         thumbnail = info_dict.get('thumbnail')
1344         if thumbnail:
1345             info_dict['thumbnail'] = sanitize_url(thumbnail)
1346         elif thumbnails:
1347             info_dict['thumbnail'] = thumbnails[-1]['url']
1348
1349         if 'display_id' not in info_dict and 'id' in info_dict:
1350             info_dict['display_id'] = info_dict['id']
1351
1352         if info_dict.get('upload_date') is None and info_dict.get('timestamp') is not None:
1353             # Working around out-of-range timestamp values (e.g. negative ones on Windows,
1354             # see http://bugs.python.org/issue1646728)
1355             try:
1356                 upload_date = datetime.datetime.utcfromtimestamp(info_dict['timestamp'])
1357                 info_dict['upload_date'] = upload_date.strftime('%Y%m%d')
1358             except (ValueError, OverflowError, OSError):
1359                 pass
1360
1361         # Auto generate title fields corresponding to the *_number fields when missing
1362         # in order to always have clean titles. This is very common for TV series.
1363         for field in ('chapter', 'season', 'episode'):
1364             if info_dict.get('%s_number' % field) is not None and not info_dict.get(field):
1365                 info_dict[field] = '%s %d' % (field.capitalize(), info_dict['%s_number' % field])
1366
1367         subtitles = info_dict.get('subtitles')
1368         if subtitles:
1369             for _, subtitle in subtitles.items():
1370                 for subtitle_format in subtitle:
1371                     if subtitle_format.get('url'):
1372                         subtitle_format['url'] = sanitize_url(subtitle_format['url'])
1373                     if subtitle_format.get('ext') is None:
1374                         subtitle_format['ext'] = determine_ext(subtitle_format['url']).lower()
1375
1376         if self.params.get('listsubtitles', False):
1377             if 'automatic_captions' in info_dict:
1378                 self.list_subtitles(info_dict['id'], info_dict.get('automatic_captions'), 'automatic captions')
1379             self.list_subtitles(info_dict['id'], subtitles, 'subtitles')
1380             return
1381         info_dict['requested_subtitles'] = self.process_subtitles(
1382             info_dict['id'], subtitles,
1383             info_dict.get('automatic_captions'))
1384
1385         # We now pick which formats have to be downloaded
1386         if info_dict.get('formats') is None:
1387             # There's only one format available
1388             formats = [info_dict]
1389         else:
1390             formats = info_dict['formats']
1391
1392         if not formats:
1393             raise ExtractorError('No video formats found!')
1394
1395         formats_dict = {}
1396
1397         # We check that all the formats have the format and format_id fields
1398         for i, format in enumerate(formats):
1399             if 'url' not in format:
1400                 raise ExtractorError('Missing "url" key in result (index %d)' % i)
1401
1402             format['url'] = sanitize_url(format['url'])
1403
1404             if format.get('format_id') is None:
1405                 format['format_id'] = compat_str(i)
1406             else:
1407                 # Sanitize format_id from characters used in format selector expression
1408                 format['format_id'] = re.sub(r'[\s,/+\[\]()]', '_', format['format_id'])
1409             format_id = format['format_id']
1410             if format_id not in formats_dict:
1411                 formats_dict[format_id] = []
1412             formats_dict[format_id].append(format)
1413
1414         # Make sure all formats have unique format_id
1415         for format_id, ambiguous_formats in formats_dict.items():
1416             if len(ambiguous_formats) > 1:
1417                 for i, format in enumerate(ambiguous_formats):
1418                     format['format_id'] = '%s-%d' % (format_id, i)
1419
1420         for i, format in enumerate(formats):
1421             if format.get('format') is None:
1422                 format['format'] = '{id} - {res}{note}'.format(
1423                     id=format['format_id'],
1424                     res=self.format_resolution(format),
1425                     note=' ({0})'.format(format['format_note']) if format.get('format_note') is not None else '',
1426                 )
1427             # Automatically determine file extension if missing
1428             if format.get('ext') is None:
1429                 format['ext'] = determine_ext(format['url']).lower()
1430             # Automatically determine protocol if missing (useful for format
1431             # selection purposes)
1432             if format.get('protocol') is None:
1433                 format['protocol'] = determine_protocol(format)
1434             # Add HTTP headers, so that external programs can use them from the
1435             # json output
1436             full_format_info = info_dict.copy()
1437             full_format_info.update(format)
1438             format['http_headers'] = self._calc_headers(full_format_info)
1439         # Remove private housekeeping stuff
1440         if '__x_forwarded_for_ip' in info_dict:
1441             del info_dict['__x_forwarded_for_ip']
1442
1443         # TODO Central sorting goes here
1444
1445         if formats[0] is not info_dict:
1446             # only set the 'formats' fields if the original info_dict list them
1447             # otherwise we end up with a circular reference, the first (and unique)
1448             # element in the 'formats' field in info_dict is info_dict itself,
1449             # which can't be exported to json
1450             info_dict['formats'] = formats
1451         if self.params.get('listformats'):
1452             self.list_formats(info_dict)
1453             return
1454
1455         req_format = self.params.get('format')
1456         if req_format is None:
1457             req_format_list = []
1458             if (self.params.get('outtmpl', DEFAULT_OUTTMPL) != '-' and
1459                     not info_dict.get('is_live')):
1460                 merger = FFmpegMergerPP(self)
1461                 if merger.available and merger.can_merge():
1462                     req_format_list.append('bestvideo+bestaudio')
1463             req_format_list.append('best')
1464             req_format = '/'.join(req_format_list)
1465         format_selector = self.build_format_selector(req_format)
1466
1467         # While in format selection we may need to have an access to the original
1468         # format set in order to calculate some metrics or do some processing.
1469         # For now we need to be able to guess whether original formats provided
1470         # by extractor are incomplete or not (i.e. whether extractor provides only
1471         # video-only or audio-only formats) for proper formats selection for
1472         # extractors with such incomplete formats (see
1473         # https://github.com/rg3/youtube-dl/pull/5556).
1474         # Since formats may be filtered during format selection and may not match
1475         # the original formats the results may be incorrect. Thus original formats
1476         # or pre-calculated metrics should be passed to format selection routines
1477         # as well.
1478         # We will pass a context object containing all necessary additional data
1479         # instead of just formats.
1480         # This fixes incorrect format selection issue (see
1481         # https://github.com/rg3/youtube-dl/issues/10083).
1482         incomplete_formats = (
1483             # All formats are video-only or
1484             all(f.get('vcodec') != 'none' and f.get('acodec') == 'none' for f in formats) or
1485             # all formats are audio-only
1486             all(f.get('vcodec') == 'none' and f.get('acodec') != 'none' for f in formats))
1487
1488         ctx = {
1489             'formats': formats,
1490             'incomplete_formats': incomplete_formats,
1491         }
1492
1493         formats_to_download = list(format_selector(ctx))
1494         if not formats_to_download:
1495             raise ExtractorError('requested format not available',
1496                                  expected=True)
1497
1498         if download:
1499             if len(formats_to_download) > 1:
1500                 self.to_screen('[info] %s: downloading video in %s formats' % (info_dict['id'], len(formats_to_download)))
1501             for format in formats_to_download:
1502                 new_info = dict(info_dict)
1503                 new_info.update(format)
1504                 self.process_info(new_info)
1505         # We update the info dict with the best quality format (backwards compatibility)
1506         info_dict.update(formats_to_download[-1])
1507         return info_dict
1508
1509     def process_subtitles(self, video_id, normal_subtitles, automatic_captions):
1510         """Select the requested subtitles and their format"""
1511         available_subs = {}
1512         if normal_subtitles and self.params.get('writesubtitles'):
1513             available_subs.update(normal_subtitles)
1514         if automatic_captions and self.params.get('writeautomaticsub'):
1515             for lang, cap_info in automatic_captions.items():
1516                 if lang not in available_subs:
1517                     available_subs[lang] = cap_info
1518
1519         if (not self.params.get('writesubtitles') and not
1520                 self.params.get('writeautomaticsub') or not
1521                 available_subs):
1522             return None
1523
1524         if self.params.get('allsubtitles', False):
1525             requested_langs = available_subs.keys()
1526         else:
1527             if self.params.get('subtitleslangs', False):
1528                 requested_langs = self.params.get('subtitleslangs')
1529             elif 'en' in available_subs:
1530                 requested_langs = ['en']
1531             else:
1532                 requested_langs = [list(available_subs.keys())[0]]
1533
1534         formats_query = self.params.get('subtitlesformat', 'best')
1535         formats_preference = formats_query.split('/') if formats_query else []
1536         subs = {}
1537         for lang in requested_langs:
1538             formats = available_subs.get(lang)
1539             if formats is None:
1540                 self.report_warning('%s subtitles not available for %s' % (lang, video_id))
1541                 continue
1542             for ext in formats_preference:
1543                 if ext == 'best':
1544                     f = formats[-1]
1545                     break
1546                 matches = list(filter(lambda f: f['ext'] == ext, formats))
1547                 if matches:
1548                     f = matches[-1]
1549                     break
1550             else:
1551                 f = formats[-1]
1552                 self.report_warning(
1553                     'No subtitle format found matching "%s" for language %s, '
1554                     'using %s' % (formats_query, lang, f['ext']))
1555             subs[lang] = f
1556         return subs
1557
1558     def process_info(self, info_dict):
1559         """Process a single resolved IE result."""
1560
1561         assert info_dict.get('_type', 'video') == 'video'
1562
1563         max_downloads = self.params.get('max_downloads')
1564         if max_downloads is not None:
1565             if self._num_downloads >= int(max_downloads):
1566                 raise MaxDownloadsReached()
1567
1568         info_dict['fulltitle'] = info_dict['title']
1569         if len(info_dict['title']) > 200:
1570             info_dict['title'] = info_dict['title'][:197] + '...'
1571
1572         if 'format' not in info_dict:
1573             info_dict['format'] = info_dict['ext']
1574
1575         reason = self._match_entry(info_dict, incomplete=False)
1576         if reason is not None:
1577             self.to_screen('[download] ' + reason)
1578             return
1579
1580         self._num_downloads += 1
1581
1582         info_dict['_filename'] = filename = self.prepare_filename(info_dict)
1583
1584         # Forced printings
1585         if self.params.get('forcetitle', False):
1586             self.to_stdout(info_dict['fulltitle'])
1587         if self.params.get('forceid', False):
1588             self.to_stdout(info_dict['id'])
1589         if self.params.get('forceurl', False):
1590             if info_dict.get('requested_formats') is not None:
1591                 for f in info_dict['requested_formats']:
1592                     self.to_stdout(f['url'] + f.get('play_path', ''))
1593             else:
1594                 # For RTMP URLs, also include the playpath
1595                 self.to_stdout(info_dict['url'] + info_dict.get('play_path', ''))
1596         if self.params.get('forcethumbnail', False) and info_dict.get('thumbnail') is not None:
1597             self.to_stdout(info_dict['thumbnail'])
1598         if self.params.get('forcedescription', False) and info_dict.get('description') is not None:
1599             self.to_stdout(info_dict['description'])
1600         if self.params.get('forcefilename', False) and filename is not None:
1601             self.to_stdout(filename)
1602         if self.params.get('forceduration', False) and info_dict.get('duration') is not None:
1603             self.to_stdout(formatSeconds(info_dict['duration']))
1604         if self.params.get('forceformat', False):
1605             self.to_stdout(info_dict['format'])
1606         if self.params.get('forcejson', False):
1607             self.to_stdout(json.dumps(info_dict))
1608
1609         # Do nothing else if in simulate mode
1610         if self.params.get('simulate', False):
1611             return
1612
1613         if filename is None:
1614             return
1615
1616         try:
1617             dn = os.path.dirname(sanitize_path(encodeFilename(filename)))
1618             if dn and not os.path.exists(dn):
1619                 os.makedirs(dn)
1620         except (OSError, IOError) as err:
1621             self.report_error('unable to create directory ' + error_to_compat_str(err))
1622             return
1623
1624         if self.params.get('writedescription', False):
1625             descfn = replace_extension(filename, 'description', info_dict.get('ext'))
1626             if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(descfn)):
1627                 self.to_screen('[info] Video description is already present')
1628             elif info_dict.get('description') is None:
1629                 self.report_warning('There\'s no description to write.')
1630             else:
1631                 try:
1632                     self.to_screen('[info] Writing video description to: ' + descfn)
1633                     with io.open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile:
1634                         descfile.write(info_dict['description'])
1635                 except (OSError, IOError):
1636                     self.report_error('Cannot write description file ' + descfn)
1637                     return
1638
1639         if self.params.get('writeannotations', False):
1640             annofn = replace_extension(filename, 'annotations.xml', info_dict.get('ext'))
1641             if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(annofn)):
1642                 self.to_screen('[info] Video annotations are already present')
1643             else:
1644                 try:
1645                     self.to_screen('[info] Writing video annotations to: ' + annofn)
1646                     with io.open(encodeFilename(annofn), 'w', encoding='utf-8') as annofile:
1647                         annofile.write(info_dict['annotations'])
1648                 except (KeyError, TypeError):
1649                     self.report_warning('There are no annotations to write.')
1650                 except (OSError, IOError):
1651                     self.report_error('Cannot write annotations file: ' + annofn)
1652                     return
1653
1654         subtitles_are_requested = any([self.params.get('writesubtitles', False),
1655                                        self.params.get('writeautomaticsub')])
1656
1657         if subtitles_are_requested and info_dict.get('requested_subtitles'):
1658             # subtitles download errors are already managed as troubles in relevant IE
1659             # that way it will silently go on when used with unsupporting IE
1660             subtitles = info_dict['requested_subtitles']
1661             ie = self.get_info_extractor(info_dict['extractor_key'])
1662             for sub_lang, sub_info in subtitles.items():
1663                 sub_format = sub_info['ext']
1664                 if sub_info.get('data') is not None:
1665                     sub_data = sub_info['data']
1666                 else:
1667                     try:
1668                         sub_data = ie._download_webpage(
1669                             sub_info['url'], info_dict['id'], note=False)
1670                     except ExtractorError as err:
1671                         self.report_warning('Unable to download subtitle for "%s": %s' %
1672                                             (sub_lang, error_to_compat_str(err.cause)))
1673                         continue
1674                 try:
1675                     sub_filename = subtitles_filename(filename, sub_lang, sub_format)
1676                     if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(sub_filename)):
1677                         self.to_screen('[info] Video subtitle %s.%s is already_present' % (sub_lang, sub_format))
1678                     else:
1679                         self.to_screen('[info] Writing video subtitles to: ' + sub_filename)
1680                         # Use newline='' to prevent conversion of newline characters
1681                         # See https://github.com/rg3/youtube-dl/issues/10268
1682                         with io.open(encodeFilename(sub_filename), 'w', encoding='utf-8', newline='') as subfile:
1683                             subfile.write(sub_data)
1684                 except (OSError, IOError):
1685                     self.report_error('Cannot write subtitles file ' + sub_filename)
1686                     return
1687
1688         if self.params.get('writeinfojson', False):
1689             infofn = replace_extension(filename, 'info.json', info_dict.get('ext'))
1690             if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(infofn)):
1691                 self.to_screen('[info] Video description metadata is already present')
1692             else:
1693                 self.to_screen('[info] Writing video description metadata as JSON to: ' + infofn)
1694                 try:
1695                     write_json_file(self.filter_requested_info(info_dict), infofn)
1696                 except (OSError, IOError):
1697                     self.report_error('Cannot write metadata to JSON file ' + infofn)
1698                     return
1699
1700         self._write_thumbnails(info_dict, filename)
1701
1702         if not self.params.get('skip_download', False):
1703             try:
1704                 def dl(name, info):
1705                     fd = get_suitable_downloader(info, self.params)(self, self.params)
1706                     for ph in self._progress_hooks:
1707                         fd.add_progress_hook(ph)
1708                     if self.params.get('verbose'):
1709                         self.to_stdout('[debug] Invoking downloader on %r' % info.get('url'))
1710                     return fd.download(name, info)
1711
1712                 if info_dict.get('requested_formats') is not None:
1713                     downloaded = []
1714                     success = True
1715                     merger = FFmpegMergerPP(self)
1716                     if not merger.available:
1717                         postprocessors = []
1718                         self.report_warning('You have requested multiple '
1719                                             'formats but ffmpeg or avconv are not installed.'
1720                                             ' The formats won\'t be merged.')
1721                     else:
1722                         postprocessors = [merger]
1723
1724                     def compatible_formats(formats):
1725                         video, audio = formats
1726                         # Check extension
1727                         video_ext, audio_ext = audio.get('ext'), video.get('ext')
1728                         if video_ext and audio_ext:
1729                             COMPATIBLE_EXTS = (
1730                                 ('mp3', 'mp4', 'm4a', 'm4p', 'm4b', 'm4r', 'm4v', 'ismv', 'isma'),
1731                                 ('webm')
1732                             )
1733                             for exts in COMPATIBLE_EXTS:
1734                                 if video_ext in exts and audio_ext in exts:
1735                                     return True
1736                         # TODO: Check acodec/vcodec
1737                         return False
1738
1739                     filename_real_ext = os.path.splitext(filename)[1][1:]
1740                     filename_wo_ext = (
1741                         os.path.splitext(filename)[0]
1742                         if filename_real_ext == info_dict['ext']
1743                         else filename)
1744                     requested_formats = info_dict['requested_formats']
1745                     if self.params.get('merge_output_format') is None and not compatible_formats(requested_formats):
1746                         info_dict['ext'] = 'mkv'
1747                         self.report_warning(
1748                             'Requested formats are incompatible for merge and will be merged into mkv.')
1749                     # Ensure filename always has a correct extension for successful merge
1750                     filename = '%s.%s' % (filename_wo_ext, info_dict['ext'])
1751                     if os.path.exists(encodeFilename(filename)):
1752                         self.to_screen(
1753                             '[download] %s has already been downloaded and '
1754                             'merged' % filename)
1755                     else:
1756                         for f in requested_formats:
1757                             new_info = dict(info_dict)
1758                             new_info.update(f)
1759                             fname = self.prepare_filename(new_info)
1760                             fname = prepend_extension(fname, 'f%s' % f['format_id'], new_info['ext'])
1761                             downloaded.append(fname)
1762                             partial_success = dl(fname, new_info)
1763                             success = success and partial_success
1764                         info_dict['__postprocessors'] = postprocessors
1765                         info_dict['__files_to_merge'] = downloaded
1766                 else:
1767                     # Just a single file
1768                     success = dl(filename, info_dict)
1769             except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
1770                 self.report_error('unable to download video data: %s' % error_to_compat_str(err))
1771                 return
1772             except (OSError, IOError) as err:
1773                 raise UnavailableVideoError(err)
1774             except (ContentTooShortError, ) as err:
1775                 self.report_error('content too short (expected %s bytes and served %s)' % (err.expected, err.downloaded))
1776                 return
1777
1778             if success and filename != '-':
1779                 # Fixup content
1780                 fixup_policy = self.params.get('fixup')
1781                 if fixup_policy is None:
1782                     fixup_policy = 'detect_or_warn'
1783
1784                 INSTALL_FFMPEG_MESSAGE = 'Install ffmpeg or avconv to fix this automatically.'
1785
1786                 stretched_ratio = info_dict.get('stretched_ratio')
1787                 if stretched_ratio is not None and stretched_ratio != 1:
1788                     if fixup_policy == 'warn':
1789                         self.report_warning('%s: Non-uniform pixel ratio (%s)' % (
1790                             info_dict['id'], stretched_ratio))
1791                     elif fixup_policy == 'detect_or_warn':
1792                         stretched_pp = FFmpegFixupStretchedPP(self)
1793                         if stretched_pp.available:
1794                             info_dict.setdefault('__postprocessors', [])
1795                             info_dict['__postprocessors'].append(stretched_pp)
1796                         else:
1797                             self.report_warning(
1798                                 '%s: Non-uniform pixel ratio (%s). %s'
1799                                 % (info_dict['id'], stretched_ratio, INSTALL_FFMPEG_MESSAGE))
1800                     else:
1801                         assert fixup_policy in ('ignore', 'never')
1802
1803                 if (info_dict.get('requested_formats') is None and
1804                         info_dict.get('container') == 'm4a_dash'):
1805                     if fixup_policy == 'warn':
1806                         self.report_warning(
1807                             '%s: writing DASH m4a. '
1808                             'Only some players support this container.'
1809                             % info_dict['id'])
1810                     elif fixup_policy == 'detect_or_warn':
1811                         fixup_pp = FFmpegFixupM4aPP(self)
1812                         if fixup_pp.available:
1813                             info_dict.setdefault('__postprocessors', [])
1814                             info_dict['__postprocessors'].append(fixup_pp)
1815                         else:
1816                             self.report_warning(
1817                                 '%s: writing DASH m4a. '
1818                                 'Only some players support this container. %s'
1819                                 % (info_dict['id'], INSTALL_FFMPEG_MESSAGE))
1820                     else:
1821                         assert fixup_policy in ('ignore', 'never')
1822
1823                 if (info_dict.get('protocol') == 'm3u8_native' or
1824                         info_dict.get('protocol') == 'm3u8' and
1825                         self.params.get('hls_prefer_native')):
1826                     if fixup_policy == 'warn':
1827                         self.report_warning('%s: malformated aac bitstream.' % (
1828                             info_dict['id']))
1829                     elif fixup_policy == 'detect_or_warn':
1830                         fixup_pp = FFmpegFixupM3u8PP(self)
1831                         if fixup_pp.available:
1832                             info_dict.setdefault('__postprocessors', [])
1833                             info_dict['__postprocessors'].append(fixup_pp)
1834                         else:
1835                             self.report_warning(
1836                                 '%s: malformated aac bitstream. %s'
1837                                 % (info_dict['id'], INSTALL_FFMPEG_MESSAGE))
1838                     else:
1839                         assert fixup_policy in ('ignore', 'never')
1840
1841                 try:
1842                     self.post_process(filename, info_dict)
1843                 except (PostProcessingError) as err:
1844                     self.report_error('postprocessing: %s' % str(err))
1845                     return
1846                 self.record_download_archive(info_dict)
1847
1848     def download(self, url_list):
1849         """Download a given list of URLs."""
1850         outtmpl = self.params.get('outtmpl', DEFAULT_OUTTMPL)
1851         if (len(url_list) > 1 and
1852                 '%' not in outtmpl and
1853                 self.params.get('max_downloads') != 1):
1854             raise SameFileError(outtmpl)
1855
1856         for url in url_list:
1857             try:
1858                 # It also downloads the videos
1859                 res = self.extract_info(
1860                     url, force_generic_extractor=self.params.get('force_generic_extractor', False))
1861             except UnavailableVideoError:
1862                 self.report_error('unable to download video')
1863             except MaxDownloadsReached:
1864                 self.to_screen('[info] Maximum number of downloaded files reached.')
1865                 raise
1866             else:
1867                 if self.params.get('dump_single_json', False):
1868                     self.to_stdout(json.dumps(res))
1869
1870         return self._download_retcode
1871
1872     def download_with_info_file(self, info_filename):
1873         with contextlib.closing(fileinput.FileInput(
1874                 [info_filename], mode='r',
1875                 openhook=fileinput.hook_encoded('utf-8'))) as f:
1876             # FileInput doesn't have a read method, we can't call json.load
1877             info = self.filter_requested_info(json.loads('\n'.join(f)))
1878         try:
1879             self.process_ie_result(info, download=True)
1880         except DownloadError:
1881             webpage_url = info.get('webpage_url')
1882             if webpage_url is not None:
1883                 self.report_warning('The info failed to download, trying with "%s"' % webpage_url)
1884                 return self.download([webpage_url])
1885             else:
1886                 raise
1887         return self._download_retcode
1888
1889     @staticmethod
1890     def filter_requested_info(info_dict):
1891         return dict(
1892             (k, v) for k, v in info_dict.items()
1893             if k not in ['requested_formats', 'requested_subtitles'])
1894
1895     def post_process(self, filename, ie_info):
1896         """Run all the postprocessors on the given file."""
1897         info = dict(ie_info)
1898         info['filepath'] = filename
1899         pps_chain = []
1900         if ie_info.get('__postprocessors') is not None:
1901             pps_chain.extend(ie_info['__postprocessors'])
1902         pps_chain.extend(self._pps)
1903         for pp in pps_chain:
1904             files_to_delete = []
1905             try:
1906                 files_to_delete, info = pp.run(info)
1907             except PostProcessingError as e:
1908                 self.report_error(e.msg)
1909             if files_to_delete and not self.params.get('keepvideo', False):
1910                 for old_filename in files_to_delete:
1911                     self.to_screen('Deleting original file %s (pass -k to keep)' % old_filename)
1912                     try:
1913                         os.remove(encodeFilename(old_filename))
1914                     except (IOError, OSError):
1915                         self.report_warning('Unable to remove downloaded original file')
1916
1917     def _make_archive_id(self, info_dict):
1918         # Future-proof against any change in case
1919         # and backwards compatibility with prior versions
1920         extractor = info_dict.get('extractor_key')
1921         if extractor is None:
1922             if 'id' in info_dict:
1923                 extractor = info_dict.get('ie_key')  # key in a playlist
1924         if extractor is None:
1925             return None  # Incomplete video information
1926         return extractor.lower() + ' ' + info_dict['id']
1927
1928     def in_download_archive(self, info_dict):
1929         fn = self.params.get('download_archive')
1930         if fn is None:
1931             return False
1932
1933         vid_id = self._make_archive_id(info_dict)
1934         if vid_id is None:
1935             return False  # Incomplete video information
1936
1937         try:
1938             with locked_file(fn, 'r', encoding='utf-8') as archive_file:
1939                 for line in archive_file:
1940                     if line.strip() == vid_id:
1941                         return True
1942         except IOError as ioe:
1943             if ioe.errno != errno.ENOENT:
1944                 raise
1945         return False
1946
1947     def record_download_archive(self, info_dict):
1948         fn = self.params.get('download_archive')
1949         if fn is None:
1950             return
1951         vid_id = self._make_archive_id(info_dict)
1952         assert vid_id
1953         with locked_file(fn, 'a', encoding='utf-8') as archive_file:
1954             archive_file.write(vid_id + '\n')
1955
1956     @staticmethod
1957     def format_resolution(format, default='unknown'):
1958         if format.get('vcodec') == 'none':
1959             return 'audio only'
1960         if format.get('resolution') is not None:
1961             return format['resolution']
1962         if format.get('height') is not None:
1963             if format.get('width') is not None:
1964                 res = '%sx%s' % (format['width'], format['height'])
1965             else:
1966                 res = '%sp' % format['height']
1967         elif format.get('width') is not None:
1968             res = '%dx?' % format['width']
1969         else:
1970             res = default
1971         return res
1972
1973     def _format_note(self, fdict):
1974         res = ''
1975         if fdict.get('ext') in ['f4f', 'f4m']:
1976             res += '(unsupported) '
1977         if fdict.get('language'):
1978             if res:
1979                 res += ' '
1980             res += '[%s] ' % fdict['language']
1981         if fdict.get('format_note') is not None:
1982             res += fdict['format_note'] + ' '
1983         if fdict.get('tbr') is not None:
1984             res += '%4dk ' % fdict['tbr']
1985         if fdict.get('container') is not None:
1986             if res:
1987                 res += ', '
1988             res += '%s container' % fdict['container']
1989         if (fdict.get('vcodec') is not None and
1990                 fdict.get('vcodec') != 'none'):
1991             if res:
1992                 res += ', '
1993             res += fdict['vcodec']
1994             if fdict.get('vbr') is not None:
1995                 res += '@'
1996         elif fdict.get('vbr') is not None and fdict.get('abr') is not None:
1997             res += 'video@'
1998         if fdict.get('vbr') is not None:
1999             res += '%4dk' % fdict['vbr']
2000         if fdict.get('fps') is not None:
2001             if res:
2002                 res += ', '
2003             res += '%sfps' % fdict['fps']
2004         if fdict.get('acodec') is not None:
2005             if res:
2006                 res += ', '
2007             if fdict['acodec'] == 'none':
2008                 res += 'video only'
2009             else:
2010                 res += '%-5s' % fdict['acodec']
2011         elif fdict.get('abr') is not None:
2012             if res:
2013                 res += ', '
2014             res += 'audio'
2015         if fdict.get('abr') is not None:
2016             res += '@%3dk' % fdict['abr']
2017         if fdict.get('asr') is not None:
2018             res += ' (%5dHz)' % fdict['asr']
2019         if fdict.get('filesize') is not None:
2020             if res:
2021                 res += ', '
2022             res += format_bytes(fdict['filesize'])
2023         elif fdict.get('filesize_approx') is not None:
2024             if res:
2025                 res += ', '
2026             res += '~' + format_bytes(fdict['filesize_approx'])
2027         return res
2028
2029     def list_formats(self, info_dict):
2030         formats = info_dict.get('formats', [info_dict])
2031         table = [
2032             [f['format_id'], f['ext'], self.format_resolution(f), self._format_note(f)]
2033             for f in formats
2034             if f.get('preference') is None or f['preference'] >= -1000]
2035         if len(formats) > 1:
2036             table[-1][-1] += (' ' if table[-1][-1] else '') + '(best)'
2037
2038         header_line = ['format code', 'extension', 'resolution', 'note']
2039         self.to_screen(
2040             '[info] Available formats for %s:\n%s' %
2041             (info_dict['id'], render_table(header_line, table)))
2042
2043     def list_thumbnails(self, info_dict):
2044         thumbnails = info_dict.get('thumbnails')
2045         if not thumbnails:
2046             self.to_screen('[info] No thumbnails present for %s' % info_dict['id'])
2047             return
2048
2049         self.to_screen(
2050             '[info] Thumbnails for %s:' % info_dict['id'])
2051         self.to_screen(render_table(
2052             ['ID', 'width', 'height', 'URL'],
2053             [[t['id'], t.get('width', 'unknown'), t.get('height', 'unknown'), t['url']] for t in thumbnails]))
2054
2055     def list_subtitles(self, video_id, subtitles, name='subtitles'):
2056         if not subtitles:
2057             self.to_screen('%s has no %s' % (video_id, name))
2058             return
2059         self.to_screen(
2060             'Available %s for %s:' % (name, video_id))
2061         self.to_screen(render_table(
2062             ['Language', 'formats'],
2063             [[lang, ', '.join(f['ext'] for f in reversed(formats))]
2064                 for lang, formats in subtitles.items()]))
2065
2066     def urlopen(self, req):
2067         """ Start an HTTP download """
2068         if isinstance(req, compat_basestring):
2069             req = sanitized_Request(req)
2070         return self._opener.open(req, timeout=self._socket_timeout)
2071
2072     def print_debug_header(self):
2073         if not self.params.get('verbose'):
2074             return
2075
2076         if type('') is not compat_str:
2077             # Python 2.6 on SLES11 SP1 (https://github.com/rg3/youtube-dl/issues/3326)
2078             self.report_warning(
2079                 'Your Python is broken! Update to a newer and supported version')
2080
2081         stdout_encoding = getattr(
2082             sys.stdout, 'encoding', 'missing (%s)' % type(sys.stdout).__name__)
2083         encoding_str = (
2084             '[debug] Encodings: locale %s, fs %s, out %s, pref %s\n' % (
2085                 locale.getpreferredencoding(),
2086                 sys.getfilesystemencoding(),
2087                 stdout_encoding,
2088                 self.get_encoding()))
2089         write_string(encoding_str, encoding=None)
2090
2091         self._write_string('[debug] youtube-dl version ' + __version__ + '\n')
2092         if _LAZY_LOADER:
2093             self._write_string('[debug] Lazy loading extractors enabled' + '\n')
2094         try:
2095             sp = subprocess.Popen(
2096                 ['git', 'rev-parse', '--short', 'HEAD'],
2097                 stdout=subprocess.PIPE, stderr=subprocess.PIPE,
2098                 cwd=os.path.dirname(os.path.abspath(__file__)))
2099             out, err = sp.communicate()
2100             out = out.decode().strip()
2101             if re.match('[0-9a-f]+', out):
2102                 self._write_string('[debug] Git HEAD: ' + out + '\n')
2103         except Exception:
2104             try:
2105                 sys.exc_clear()
2106             except Exception:
2107                 pass
2108         self._write_string('[debug] Python version %s - %s\n' % (
2109             platform.python_version(), platform_name()))
2110
2111         exe_versions = FFmpegPostProcessor.get_versions(self)
2112         exe_versions['rtmpdump'] = rtmpdump_version()
2113         exe_str = ', '.join(
2114             '%s %s' % (exe, v)
2115             for exe, v in sorted(exe_versions.items())
2116             if v
2117         )
2118         if not exe_str:
2119             exe_str = 'none'
2120         self._write_string('[debug] exe versions: %s\n' % exe_str)
2121
2122         proxy_map = {}
2123         for handler in self._opener.handlers:
2124             if hasattr(handler, 'proxies'):
2125                 proxy_map.update(handler.proxies)
2126         self._write_string('[debug] Proxy map: ' + compat_str(proxy_map) + '\n')
2127
2128         if self.params.get('call_home', False):
2129             ipaddr = self.urlopen('https://yt-dl.org/ip').read().decode('utf-8')
2130             self._write_string('[debug] Public IP address: %s\n' % ipaddr)
2131             latest_version = self.urlopen(
2132                 'https://yt-dl.org/latest/version').read().decode('utf-8')
2133             if version_tuple(latest_version) > version_tuple(__version__):
2134                 self.report_warning(
2135                     'You are using an outdated version (newest version: %s)! '
2136                     'See https://yt-dl.org/update if you need help updating.' %
2137                     latest_version)
2138
2139     def _setup_opener(self):
2140         timeout_val = self.params.get('socket_timeout')
2141         self._socket_timeout = 600 if timeout_val is None else float(timeout_val)
2142
2143         opts_cookiefile = self.params.get('cookiefile')
2144         opts_proxy = self.params.get('proxy')
2145
2146         if opts_cookiefile is None:
2147             self.cookiejar = compat_cookiejar.CookieJar()
2148         else:
2149             opts_cookiefile = compat_expanduser(opts_cookiefile)
2150             self.cookiejar = compat_cookiejar.MozillaCookieJar(
2151                 opts_cookiefile)
2152             if os.access(opts_cookiefile, os.R_OK):
2153                 self.cookiejar.load()
2154
2155         cookie_processor = YoutubeDLCookieProcessor(self.cookiejar)
2156         if opts_proxy is not None:
2157             if opts_proxy == '':
2158                 proxies = {}
2159             else:
2160                 proxies = {'http': opts_proxy, 'https': opts_proxy}
2161         else:
2162             proxies = compat_urllib_request.getproxies()
2163             # Set HTTPS proxy to HTTP one if given (https://github.com/rg3/youtube-dl/issues/805)
2164             if 'http' in proxies and 'https' not in proxies:
2165                 proxies['https'] = proxies['http']
2166         proxy_handler = PerRequestProxyHandler(proxies)
2167
2168         debuglevel = 1 if self.params.get('debug_printtraffic') else 0
2169         https_handler = make_HTTPS_handler(self.params, debuglevel=debuglevel)
2170         ydlh = YoutubeDLHandler(self.params, debuglevel=debuglevel)
2171         data_handler = compat_urllib_request_DataHandler()
2172
2173         # When passing our own FileHandler instance, build_opener won't add the
2174         # default FileHandler and allows us to disable the file protocol, which
2175         # can be used for malicious purposes (see
2176         # https://github.com/rg3/youtube-dl/issues/8227)
2177         file_handler = compat_urllib_request.FileHandler()
2178
2179         def file_open(*args, **kwargs):
2180             raise compat_urllib_error.URLError('file:// scheme is explicitly disabled in youtube-dl for security reasons')
2181         file_handler.file_open = file_open
2182
2183         opener = compat_urllib_request.build_opener(
2184             proxy_handler, https_handler, cookie_processor, ydlh, data_handler, file_handler)
2185
2186         # Delete the default user-agent header, which would otherwise apply in
2187         # cases where our custom HTTP handler doesn't come into play
2188         # (See https://github.com/rg3/youtube-dl/issues/1309 for details)
2189         opener.addheaders = []
2190         self._opener = opener
2191
2192     def encode(self, s):
2193         if isinstance(s, bytes):
2194             return s  # Already encoded
2195
2196         try:
2197             return s.encode(self.get_encoding())
2198         except UnicodeEncodeError as err:
2199             err.reason = err.reason + '. Check your system encoding configuration or use the --encoding option.'
2200             raise
2201
2202     def get_encoding(self):
2203         encoding = self.params.get('encoding')
2204         if encoding is None:
2205             encoding = preferredencoding()
2206         return encoding
2207
2208     def _write_thumbnails(self, info_dict, filename):
2209         if self.params.get('writethumbnail', False):
2210             thumbnails = info_dict.get('thumbnails')
2211             if thumbnails:
2212                 thumbnails = [thumbnails[-1]]
2213         elif self.params.get('write_all_thumbnails', False):
2214             thumbnails = info_dict.get('thumbnails')
2215         else:
2216             return
2217
2218         if not thumbnails:
2219             # No thumbnails present, so return immediately
2220             return
2221
2222         for t in thumbnails:
2223             thumb_ext = determine_ext(t['url'], 'jpg')
2224             suffix = '_%s' % t['id'] if len(thumbnails) > 1 else ''
2225             thumb_display_id = '%s ' % t['id'] if len(thumbnails) > 1 else ''
2226             t['filename'] = thumb_filename = os.path.splitext(filename)[0] + suffix + '.' + thumb_ext
2227
2228             if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(thumb_filename)):
2229                 self.to_screen('[%s] %s: Thumbnail %sis already present' %
2230                                (info_dict['extractor'], info_dict['id'], thumb_display_id))
2231             else:
2232                 self.to_screen('[%s] %s: Downloading thumbnail %s...' %
2233                                (info_dict['extractor'], info_dict['id'], thumb_display_id))
2234                 try:
2235                     uf = self.urlopen(t['url'])
2236                     with open(encodeFilename(thumb_filename), 'wb') as thumbf:
2237                         shutil.copyfileobj(uf, thumbf)
2238                     self.to_screen('[%s] %s: Writing thumbnail %sto: %s' %
2239                                    (info_dict['extractor'], info_dict['id'], thumb_display_id, thumb_filename))
2240                 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
2241                     self.report_warning('Unable to download thumbnail "%s": %s' %
2242                                         (t['url'], error_to_compat_str(err)))