[YoutubeDL] Add support for playlist_uploader and playlist_uploader_id in output...
[youtube-dl] / youtube_dl / YoutubeDL.py
1 #!/usr/bin/env python
2 # coding: utf-8
3
4 from __future__ import absolute_import, unicode_literals
5
6 import collections
7 import contextlib
8 import copy
9 import datetime
10 import errno
11 import fileinput
12 import io
13 import itertools
14 import json
15 import locale
16 import operator
17 import os
18 import platform
19 import re
20 import shutil
21 import subprocess
22 import socket
23 import sys
24 import time
25 import tokenize
26 import traceback
27 import random
28
29 from string import ascii_letters
30
31 from .compat import (
32     compat_basestring,
33     compat_cookiejar,
34     compat_get_terminal_size,
35     compat_http_client,
36     compat_kwargs,
37     compat_numeric_types,
38     compat_os_name,
39     compat_str,
40     compat_tokenize_tokenize,
41     compat_urllib_error,
42     compat_urllib_request,
43     compat_urllib_request_DataHandler,
44 )
45 from .utils import (
46     age_restricted,
47     args_to_str,
48     ContentTooShortError,
49     date_from_str,
50     DateRange,
51     DEFAULT_OUTTMPL,
52     determine_ext,
53     determine_protocol,
54     DownloadError,
55     encode_compat_str,
56     encodeFilename,
57     error_to_compat_str,
58     expand_path,
59     ExtractorError,
60     format_bytes,
61     formatSeconds,
62     GeoRestrictedError,
63     int_or_none,
64     ISO3166Utils,
65     locked_file,
66     make_HTTPS_handler,
67     MaxDownloadsReached,
68     orderedSet,
69     PagedList,
70     parse_filesize,
71     PerRequestProxyHandler,
72     platform_name,
73     PostProcessingError,
74     preferredencoding,
75     prepend_extension,
76     register_socks_protocols,
77     render_table,
78     replace_extension,
79     SameFileError,
80     sanitize_filename,
81     sanitize_path,
82     sanitize_url,
83     sanitized_Request,
84     std_headers,
85     subtitles_filename,
86     UnavailableVideoError,
87     url_basename,
88     version_tuple,
89     write_json_file,
90     write_string,
91     YoutubeDLCookieProcessor,
92     YoutubeDLHandler,
93 )
94 from .cache import Cache
95 from .extractor import get_info_extractor, gen_extractor_classes, _LAZY_LOADER
96 from .extractor.openload import PhantomJSwrapper
97 from .downloader import get_suitable_downloader
98 from .downloader.rtmp import rtmpdump_version
99 from .postprocessor import (
100     FFmpegFixupM3u8PP,
101     FFmpegFixupM4aPP,
102     FFmpegFixupStretchedPP,
103     FFmpegMergerPP,
104     FFmpegPostProcessor,
105     get_postprocessor,
106 )
107 from .version import __version__
108
109 if compat_os_name == 'nt':
110     import ctypes
111
112
113 class YoutubeDL(object):
114     """YoutubeDL class.
115
116     YoutubeDL objects are the ones responsible of downloading the
117     actual video file and writing it to disk if the user has requested
118     it, among some other tasks. In most cases there should be one per
119     program. As, given a video URL, the downloader doesn't know how to
120     extract all the needed information, task that InfoExtractors do, it
121     has to pass the URL to one of them.
122
123     For this, YoutubeDL objects have a method that allows
124     InfoExtractors to be registered in a given order. When it is passed
125     a URL, the YoutubeDL object handles it to the first InfoExtractor it
126     finds that reports being able to handle it. The InfoExtractor extracts
127     all the information about the video or videos the URL refers to, and
128     YoutubeDL process the extracted information, possibly using a File
129     Downloader to download the video.
130
131     YoutubeDL objects accept a lot of parameters. In order not to saturate
132     the object constructor with arguments, it receives a dictionary of
133     options instead. These options are available through the params
134     attribute for the InfoExtractors to use. The YoutubeDL also
135     registers itself as the downloader in charge for the InfoExtractors
136     that are added to it, so this is a "mutual registration".
137
138     Available options:
139
140     username:          Username for authentication purposes.
141     password:          Password for authentication purposes.
142     videopassword:     Password for accessing a video.
143     ap_mso:            Adobe Pass multiple-system operator identifier.
144     ap_username:       Multiple-system operator account username.
145     ap_password:       Multiple-system operator account password.
146     usenetrc:          Use netrc for authentication instead.
147     verbose:           Print additional info to stdout.
148     quiet:             Do not print messages to stdout.
149     no_warnings:       Do not print out anything for warnings.
150     forceurl:          Force printing final URL.
151     forcetitle:        Force printing title.
152     forceid:           Force printing ID.
153     forcethumbnail:    Force printing thumbnail URL.
154     forcedescription:  Force printing description.
155     forcefilename:     Force printing final filename.
156     forceduration:     Force printing duration.
157     forcejson:         Force printing info_dict as JSON.
158     dump_single_json:  Force printing the info_dict of the whole playlist
159                        (or video) as a single JSON line.
160     simulate:          Do not download the video files.
161     format:            Video format code. See options.py for more information.
162     outtmpl:           Template for output names.
163     restrictfilenames: Do not allow "&" and spaces in file names
164     ignoreerrors:      Do not stop on download errors.
165     force_generic_extractor: Force downloader to use the generic extractor
166     nooverwrites:      Prevent overwriting files.
167     playliststart:     Playlist item to start at.
168     playlistend:       Playlist item to end at.
169     playlist_items:    Specific indices of playlist to download.
170     playlistreverse:   Download playlist items in reverse order.
171     playlistrandom:    Download playlist items in random order.
172     matchtitle:        Download only matching titles.
173     rejecttitle:       Reject downloads for matching titles.
174     logger:            Log messages to a logging.Logger instance.
175     logtostderr:       Log messages to stderr instead of stdout.
176     writedescription:  Write the video description to a .description file
177     writeinfojson:     Write the video description to a .info.json file
178     writeannotations:  Write the video annotations to a .annotations.xml file
179     writethumbnail:    Write the thumbnail image to a file
180     write_all_thumbnails:  Write all thumbnail formats to files
181     writesubtitles:    Write the video subtitles to a file
182     writeautomaticsub: Write the automatically generated subtitles to a file
183     allsubtitles:      Downloads all the subtitles of the video
184                        (requires writesubtitles or writeautomaticsub)
185     listsubtitles:     Lists all available subtitles for the video
186     subtitlesformat:   The format code for subtitles
187     subtitleslangs:    List of languages of the subtitles to download
188     keepvideo:         Keep the video file after post-processing
189     daterange:         A DateRange object, download only if the upload_date is in the range.
190     skip_download:     Skip the actual download of the video file
191     cachedir:          Location of the cache files in the filesystem.
192                        False to disable filesystem cache.
193     noplaylist:        Download single video instead of a playlist if in doubt.
194     age_limit:         An integer representing the user's age in years.
195                        Unsuitable videos for the given age are skipped.
196     min_views:         An integer representing the minimum view count the video
197                        must have in order to not be skipped.
198                        Videos without view count information are always
199                        downloaded. None for no limit.
200     max_views:         An integer representing the maximum view count.
201                        Videos that are more popular than that are not
202                        downloaded.
203                        Videos without view count information are always
204                        downloaded. None for no limit.
205     download_archive:  File name of a file where all downloads are recorded.
206                        Videos already present in the file are not downloaded
207                        again.
208     cookiefile:        File name where cookies should be read from and dumped to.
209     nocheckcertificate:Do not verify SSL certificates
210     prefer_insecure:   Use HTTP instead of HTTPS to retrieve information.
211                        At the moment, this is only supported by YouTube.
212     proxy:             URL of the proxy server to use
213     geo_verification_proxy:  URL of the proxy to use for IP address verification
214                        on geo-restricted sites. (Experimental)
215     socket_timeout:    Time to wait for unresponsive hosts, in seconds
216     bidi_workaround:   Work around buggy terminals without bidirectional text
217                        support, using fridibi
218     debug_printtraffic:Print out sent and received HTTP traffic
219     include_ads:       Download ads as well
220     default_search:    Prepend this string if an input url is not valid.
221                        'auto' for elaborate guessing
222     encoding:          Use this encoding instead of the system-specified.
223     extract_flat:      Do not resolve URLs, return the immediate result.
224                        Pass in 'in_playlist' to only show this behavior for
225                        playlist items.
226     postprocessors:    A list of dictionaries, each with an entry
227                        * key:  The name of the postprocessor. See
228                                youtube_dl/postprocessor/__init__.py for a list.
229                        as well as any further keyword arguments for the
230                        postprocessor.
231     progress_hooks:    A list of functions that get called on download
232                        progress, with a dictionary with the entries
233                        * status: One of "downloading", "error", or "finished".
234                                  Check this first and ignore unknown values.
235
236                        If status is one of "downloading", or "finished", the
237                        following properties may also be present:
238                        * filename: The final filename (always present)
239                        * tmpfilename: The filename we're currently writing to
240                        * downloaded_bytes: Bytes on disk
241                        * total_bytes: Size of the whole file, None if unknown
242                        * total_bytes_estimate: Guess of the eventual file size,
243                                                None if unavailable.
244                        * elapsed: The number of seconds since download started.
245                        * eta: The estimated time in seconds, None if unknown
246                        * speed: The download speed in bytes/second, None if
247                                 unknown
248                        * fragment_index: The counter of the currently
249                                          downloaded video fragment.
250                        * fragment_count: The number of fragments (= individual
251                                          files that will be merged)
252
253                        Progress hooks are guaranteed to be called at least once
254                        (with status "finished") if the download is successful.
255     merge_output_format: Extension to use when merging formats.
256     fixup:             Automatically correct known faults of the file.
257                        One of:
258                        - "never": do nothing
259                        - "warn": only emit a warning
260                        - "detect_or_warn": check whether we can do anything
261                                            about it, warn otherwise (default)
262     source_address:    (Experimental) Client-side IP address to bind to.
263     call_home:         Boolean, true iff we are allowed to contact the
264                        youtube-dl servers for debugging.
265     sleep_interval:    Number of seconds to sleep before each download when
266                        used alone or a lower bound of a range for randomized
267                        sleep before each download (minimum possible number
268                        of seconds to sleep) when used along with
269                        max_sleep_interval.
270     max_sleep_interval:Upper bound of a range for randomized sleep before each
271                        download (maximum possible number of seconds to sleep).
272                        Must only be used along with sleep_interval.
273                        Actual sleep time will be a random float from range
274                        [sleep_interval; max_sleep_interval].
275     listformats:       Print an overview of available video formats and exit.
276     list_thumbnails:   Print a table of all thumbnails and exit.
277     match_filter:      A function that gets called with the info_dict of
278                        every video.
279                        If it returns a message, the video is ignored.
280                        If it returns None, the video is downloaded.
281                        match_filter_func in utils.py is one example for this.
282     no_color:          Do not emit color codes in output.
283     geo_bypass:        Bypass geographic restriction via faking X-Forwarded-For
284                        HTTP header (experimental)
285     geo_bypass_country:
286                        Two-letter ISO 3166-2 country code that will be used for
287                        explicit geographic restriction bypassing via faking
288                        X-Forwarded-For HTTP header (experimental)
289
290     The following options determine which downloader is picked:
291     external_downloader: Executable of the external downloader to call.
292                        None or unset for standard (built-in) downloader.
293     hls_prefer_native: Use the native HLS downloader instead of ffmpeg/avconv
294                        if True, otherwise use ffmpeg/avconv if False, otherwise
295                        use downloader suggested by extractor if None.
296
297     The following parameters are not used by YoutubeDL itself, they are used by
298     the downloader (see youtube_dl/downloader/common.py):
299     nopart, updatetime, buffersize, ratelimit, min_filesize, max_filesize, test,
300     noresizebuffer, retries, continuedl, noprogress, consoletitle,
301     xattr_set_filesize, external_downloader_args, hls_use_mpegts.
302
303     The following options are used by the post processors:
304     prefer_ffmpeg:     If True, use ffmpeg instead of avconv if both are available,
305                        otherwise prefer avconv.
306     postprocessor_args: A list of additional command-line arguments for the
307                         postprocessor.
308
309     The following options are used by the Youtube extractor:
310     youtube_include_dash_manifest: If True (default), DASH manifests and related
311                         data will be downloaded and processed by extractor.
312                         You can reduce network I/O by disabling it if you don't
313                         care about DASH.
314     """
315
316     _NUMERIC_FIELDS = set((
317         'width', 'height', 'tbr', 'abr', 'asr', 'vbr', 'fps', 'filesize', 'filesize_approx',
318         'timestamp', 'upload_year', 'upload_month', 'upload_day',
319         'duration', 'view_count', 'like_count', 'dislike_count', 'repost_count',
320         'average_rating', 'comment_count', 'age_limit',
321         'start_time', 'end_time',
322         'chapter_number', 'season_number', 'episode_number',
323         'track_number', 'disc_number', 'release_year',
324         'playlist_index',
325     ))
326
327     params = None
328     _ies = []
329     _pps = []
330     _download_retcode = None
331     _num_downloads = None
332     _screen_file = None
333
334     def __init__(self, params=None, auto_init=True):
335         """Create a FileDownloader object with the given options."""
336         if params is None:
337             params = {}
338         self._ies = []
339         self._ies_instances = {}
340         self._pps = []
341         self._progress_hooks = []
342         self._download_retcode = 0
343         self._num_downloads = 0
344         self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)]
345         self._err_file = sys.stderr
346         self.params = {
347             # Default parameters
348             'nocheckcertificate': False,
349         }
350         self.params.update(params)
351         self.cache = Cache(self)
352
353         def check_deprecated(param, option, suggestion):
354             if self.params.get(param) is not None:
355                 self.report_warning(
356                     '%s is deprecated. Use %s instead.' % (option, suggestion))
357                 return True
358             return False
359
360         if check_deprecated('cn_verification_proxy', '--cn-verification-proxy', '--geo-verification-proxy'):
361             if self.params.get('geo_verification_proxy') is None:
362                 self.params['geo_verification_proxy'] = self.params['cn_verification_proxy']
363
364         check_deprecated('autonumber_size', '--autonumber-size', 'output template with %(autonumber)0Nd, where N in the number of digits')
365         check_deprecated('autonumber', '--auto-number', '-o "%(autonumber)s-%(title)s.%(ext)s"')
366         check_deprecated('usetitle', '--title', '-o "%(title)s-%(id)s.%(ext)s"')
367
368         if params.get('bidi_workaround', False):
369             try:
370                 import pty
371                 master, slave = pty.openpty()
372                 width = compat_get_terminal_size().columns
373                 if width is None:
374                     width_args = []
375                 else:
376                     width_args = ['-w', str(width)]
377                 sp_kwargs = dict(
378                     stdin=subprocess.PIPE,
379                     stdout=slave,
380                     stderr=self._err_file)
381                 try:
382                     self._output_process = subprocess.Popen(
383                         ['bidiv'] + width_args, **sp_kwargs
384                     )
385                 except OSError:
386                     self._output_process = subprocess.Popen(
387                         ['fribidi', '-c', 'UTF-8'] + width_args, **sp_kwargs)
388                 self._output_channel = os.fdopen(master, 'rb')
389             except OSError as ose:
390                 if ose.errno == errno.ENOENT:
391                     self.report_warning('Could not find fribidi executable, ignoring --bidi-workaround . Make sure that  fribidi  is an executable file in one of the directories in your $PATH.')
392                 else:
393                     raise
394
395         if (sys.platform != 'win32' and
396                 sys.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968'] and
397                 not params.get('restrictfilenames', False)):
398             # Unicode filesystem API will throw errors (#1474, #13027)
399             self.report_warning(
400                 'Assuming --restrict-filenames since file system encoding '
401                 'cannot encode all characters. '
402                 'Set the LC_ALL environment variable to fix this.')
403             self.params['restrictfilenames'] = True
404
405         if isinstance(params.get('outtmpl'), bytes):
406             self.report_warning(
407                 'Parameter outtmpl is bytes, but should be a unicode string. '
408                 'Put  from __future__ import unicode_literals  at the top of your code file or consider switching to Python 3.x.')
409
410         self._setup_opener()
411
412         if auto_init:
413             self.print_debug_header()
414             self.add_default_info_extractors()
415
416         for pp_def_raw in self.params.get('postprocessors', []):
417             pp_class = get_postprocessor(pp_def_raw['key'])
418             pp_def = dict(pp_def_raw)
419             del pp_def['key']
420             pp = pp_class(self, **compat_kwargs(pp_def))
421             self.add_post_processor(pp)
422
423         for ph in self.params.get('progress_hooks', []):
424             self.add_progress_hook(ph)
425
426         register_socks_protocols()
427
428     def warn_if_short_id(self, argv):
429         # short YouTube ID starting with dash?
430         idxs = [
431             i for i, a in enumerate(argv)
432             if re.match(r'^-[0-9A-Za-z_-]{10}$', a)]
433         if idxs:
434             correct_argv = (
435                 ['youtube-dl'] +
436                 [a for i, a in enumerate(argv) if i not in idxs] +
437                 ['--'] + [argv[i] for i in idxs]
438             )
439             self.report_warning(
440                 'Long argument string detected. '
441                 'Use -- to separate parameters and URLs, like this:\n%s\n' %
442                 args_to_str(correct_argv))
443
444     def add_info_extractor(self, ie):
445         """Add an InfoExtractor object to the end of the list."""
446         self._ies.append(ie)
447         if not isinstance(ie, type):
448             self._ies_instances[ie.ie_key()] = ie
449             ie.set_downloader(self)
450
451     def get_info_extractor(self, ie_key):
452         """
453         Get an instance of an IE with name ie_key, it will try to get one from
454         the _ies list, if there's no instance it will create a new one and add
455         it to the extractor list.
456         """
457         ie = self._ies_instances.get(ie_key)
458         if ie is None:
459             ie = get_info_extractor(ie_key)()
460             self.add_info_extractor(ie)
461         return ie
462
463     def add_default_info_extractors(self):
464         """
465         Add the InfoExtractors returned by gen_extractors to the end of the list
466         """
467         for ie in gen_extractor_classes():
468             self.add_info_extractor(ie)
469
470     def add_post_processor(self, pp):
471         """Add a PostProcessor object to the end of the chain."""
472         self._pps.append(pp)
473         pp.set_downloader(self)
474
475     def add_progress_hook(self, ph):
476         """Add the progress hook (currently only for the file downloader)"""
477         self._progress_hooks.append(ph)
478
479     def _bidi_workaround(self, message):
480         if not hasattr(self, '_output_channel'):
481             return message
482
483         assert hasattr(self, '_output_process')
484         assert isinstance(message, compat_str)
485         line_count = message.count('\n') + 1
486         self._output_process.stdin.write((message + '\n').encode('utf-8'))
487         self._output_process.stdin.flush()
488         res = ''.join(self._output_channel.readline().decode('utf-8')
489                       for _ in range(line_count))
490         return res[:-len('\n')]
491
492     def to_screen(self, message, skip_eol=False):
493         """Print message to stdout if not in quiet mode."""
494         return self.to_stdout(message, skip_eol, check_quiet=True)
495
496     def _write_string(self, s, out=None):
497         write_string(s, out=out, encoding=self.params.get('encoding'))
498
499     def to_stdout(self, message, skip_eol=False, check_quiet=False):
500         """Print message to stdout if not in quiet mode."""
501         if self.params.get('logger'):
502             self.params['logger'].debug(message)
503         elif not check_quiet or not self.params.get('quiet', False):
504             message = self._bidi_workaround(message)
505             terminator = ['\n', ''][skip_eol]
506             output = message + terminator
507
508             self._write_string(output, self._screen_file)
509
510     def to_stderr(self, message):
511         """Print message to stderr."""
512         assert isinstance(message, compat_str)
513         if self.params.get('logger'):
514             self.params['logger'].error(message)
515         else:
516             message = self._bidi_workaround(message)
517             output = message + '\n'
518             self._write_string(output, self._err_file)
519
520     def to_console_title(self, message):
521         if not self.params.get('consoletitle', False):
522             return
523         if compat_os_name == 'nt':
524             if ctypes.windll.kernel32.GetConsoleWindow():
525                 # c_wchar_p() might not be necessary if `message` is
526                 # already of type unicode()
527                 ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message))
528         elif 'TERM' in os.environ:
529             self._write_string('\033]0;%s\007' % message, self._screen_file)
530
531     def save_console_title(self):
532         if not self.params.get('consoletitle', False):
533             return
534         if compat_os_name != 'nt' and 'TERM' in os.environ:
535             # Save the title on stack
536             self._write_string('\033[22;0t', self._screen_file)
537
538     def restore_console_title(self):
539         if not self.params.get('consoletitle', False):
540             return
541         if compat_os_name != 'nt' and 'TERM' in os.environ:
542             # Restore the title from stack
543             self._write_string('\033[23;0t', self._screen_file)
544
545     def __enter__(self):
546         self.save_console_title()
547         return self
548
549     def __exit__(self, *args):
550         self.restore_console_title()
551
552         if self.params.get('cookiefile') is not None:
553             self.cookiejar.save()
554
555     def trouble(self, message=None, tb=None):
556         """Determine action to take when a download problem appears.
557
558         Depending on if the downloader has been configured to ignore
559         download errors or not, this method may throw an exception or
560         not when errors are found, after printing the message.
561
562         tb, if given, is additional traceback information.
563         """
564         if message is not None:
565             self.to_stderr(message)
566         if self.params.get('verbose'):
567             if tb is None:
568                 if sys.exc_info()[0]:  # if .trouble has been called from an except block
569                     tb = ''
570                     if hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
571                         tb += ''.join(traceback.format_exception(*sys.exc_info()[1].exc_info))
572                     tb += encode_compat_str(traceback.format_exc())
573                 else:
574                     tb_data = traceback.format_list(traceback.extract_stack())
575                     tb = ''.join(tb_data)
576             self.to_stderr(tb)
577         if not self.params.get('ignoreerrors', False):
578             if sys.exc_info()[0] and hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
579                 exc_info = sys.exc_info()[1].exc_info
580             else:
581                 exc_info = sys.exc_info()
582             raise DownloadError(message, exc_info)
583         self._download_retcode = 1
584
585     def report_warning(self, message):
586         '''
587         Print the message to stderr, it will be prefixed with 'WARNING:'
588         If stderr is a tty file the 'WARNING:' will be colored
589         '''
590         if self.params.get('logger') is not None:
591             self.params['logger'].warning(message)
592         else:
593             if self.params.get('no_warnings'):
594                 return
595             if not self.params.get('no_color') and self._err_file.isatty() and compat_os_name != 'nt':
596                 _msg_header = '\033[0;33mWARNING:\033[0m'
597             else:
598                 _msg_header = 'WARNING:'
599             warning_message = '%s %s' % (_msg_header, message)
600             self.to_stderr(warning_message)
601
602     def report_error(self, message, tb=None):
603         '''
604         Do the same as trouble, but prefixes the message with 'ERROR:', colored
605         in red if stderr is a tty file.
606         '''
607         if not self.params.get('no_color') and self._err_file.isatty() and compat_os_name != 'nt':
608             _msg_header = '\033[0;31mERROR:\033[0m'
609         else:
610             _msg_header = 'ERROR:'
611         error_message = '%s %s' % (_msg_header, message)
612         self.trouble(error_message, tb)
613
614     def report_file_already_downloaded(self, file_name):
615         """Report file has already been fully downloaded."""
616         try:
617             self.to_screen('[download] %s has already been downloaded' % file_name)
618         except UnicodeEncodeError:
619             self.to_screen('[download] The file has already been downloaded')
620
621     def prepare_filename(self, info_dict):
622         """Generate the output filename."""
623         try:
624             template_dict = dict(info_dict)
625
626             template_dict['epoch'] = int(time.time())
627             autonumber_size = self.params.get('autonumber_size')
628             if autonumber_size is None:
629                 autonumber_size = 5
630             template_dict['autonumber'] = self.params.get('autonumber_start', 1) - 1 + self._num_downloads
631             if template_dict.get('resolution') is None:
632                 if template_dict.get('width') and template_dict.get('height'):
633                     template_dict['resolution'] = '%dx%d' % (template_dict['width'], template_dict['height'])
634                 elif template_dict.get('height'):
635                     template_dict['resolution'] = '%sp' % template_dict['height']
636                 elif template_dict.get('width'):
637                     template_dict['resolution'] = '%dx?' % template_dict['width']
638
639             sanitize = lambda k, v: sanitize_filename(
640                 compat_str(v),
641                 restricted=self.params.get('restrictfilenames'),
642                 is_id=(k == 'id' or k.endswith('_id')))
643             template_dict = dict((k, v if isinstance(v, compat_numeric_types) else sanitize(k, v))
644                                  for k, v in template_dict.items()
645                                  if v is not None and not isinstance(v, (list, tuple, dict)))
646             template_dict = collections.defaultdict(lambda: 'NA', template_dict)
647
648             outtmpl = self.params.get('outtmpl', DEFAULT_OUTTMPL)
649
650             # For fields playlist_index and autonumber convert all occurrences
651             # of %(field)s to %(field)0Nd for backward compatibility
652             field_size_compat_map = {
653                 'playlist_index': len(str(template_dict['n_entries'])),
654                 'autonumber': autonumber_size,
655             }
656             FIELD_SIZE_COMPAT_RE = r'(?<!%)%\((?P<field>autonumber|playlist_index)\)s'
657             mobj = re.search(FIELD_SIZE_COMPAT_RE, outtmpl)
658             if mobj:
659                 outtmpl = re.sub(
660                     FIELD_SIZE_COMPAT_RE,
661                     r'%%(\1)0%dd' % field_size_compat_map[mobj.group('field')],
662                     outtmpl)
663
664             # Missing numeric fields used together with integer presentation types
665             # in format specification will break the argument substitution since
666             # string 'NA' is returned for missing fields. We will patch output
667             # template for missing fields to meet string presentation type.
668             for numeric_field in self._NUMERIC_FIELDS:
669                 if numeric_field not in template_dict:
670                     # As of [1] format syntax is:
671                     #  %[mapping_key][conversion_flags][minimum_width][.precision][length_modifier]type
672                     # 1. https://docs.python.org/2/library/stdtypes.html#string-formatting
673                     FORMAT_RE = r'''(?x)
674                         (?<!%)
675                         %
676                         \({0}\)  # mapping key
677                         (?:[#0\-+ ]+)?  # conversion flags (optional)
678                         (?:\d+)?  # minimum field width (optional)
679                         (?:\.\d+)?  # precision (optional)
680                         [hlL]?  # length modifier (optional)
681                         [diouxXeEfFgGcrs%]  # conversion type
682                     '''
683                     outtmpl = re.sub(
684                         FORMAT_RE.format(numeric_field),
685                         r'%({0})s'.format(numeric_field), outtmpl)
686
687             # expand_path translates '%%' into '%' and '$$' into '$'
688             # correspondingly that is not what we want since we need to keep
689             # '%%' intact for template dict substitution step. Working around
690             # with boundary-alike separator hack.
691             sep = ''.join([random.choice(ascii_letters) for _ in range(32)])
692             outtmpl = outtmpl.replace('%%', '%{0}%'.format(sep)).replace('$$', '${0}$'.format(sep))
693
694             # outtmpl should be expand_path'ed before template dict substitution
695             # because meta fields may contain env variables we don't want to
696             # be expanded. For example, for outtmpl "%(title)s.%(ext)s" and
697             # title "Hello $PATH", we don't want `$PATH` to be expanded.
698             filename = expand_path(outtmpl).replace(sep, '') % template_dict
699
700             # Temporary fix for #4787
701             # 'Treat' all problem characters by passing filename through preferredencoding
702             # to workaround encoding issues with subprocess on python2 @ Windows
703             if sys.version_info < (3, 0) and sys.platform == 'win32':
704                 filename = encodeFilename(filename, True).decode(preferredencoding())
705             return sanitize_path(filename)
706         except ValueError as err:
707             self.report_error('Error in output template: ' + str(err) + ' (encoding: ' + repr(preferredencoding()) + ')')
708             return None
709
710     def _match_entry(self, info_dict, incomplete):
711         """ Returns None iff the file should be downloaded """
712
713         video_title = info_dict.get('title', info_dict.get('id', 'video'))
714         if 'title' in info_dict:
715             # This can happen when we're just evaluating the playlist
716             title = info_dict['title']
717             matchtitle = self.params.get('matchtitle', False)
718             if matchtitle:
719                 if not re.search(matchtitle, title, re.IGNORECASE):
720                     return '"' + title + '" title did not match pattern "' + matchtitle + '"'
721             rejecttitle = self.params.get('rejecttitle', False)
722             if rejecttitle:
723                 if re.search(rejecttitle, title, re.IGNORECASE):
724                     return '"' + title + '" title matched reject pattern "' + rejecttitle + '"'
725         date = info_dict.get('upload_date')
726         if date is not None:
727             dateRange = self.params.get('daterange', DateRange())
728             if date not in dateRange:
729                 return '%s upload date is not in range %s' % (date_from_str(date).isoformat(), dateRange)
730         view_count = info_dict.get('view_count')
731         if view_count is not None:
732             min_views = self.params.get('min_views')
733             if min_views is not None and view_count < min_views:
734                 return 'Skipping %s, because it has not reached minimum view count (%d/%d)' % (video_title, view_count, min_views)
735             max_views = self.params.get('max_views')
736             if max_views is not None and view_count > max_views:
737                 return 'Skipping %s, because it has exceeded the maximum view count (%d/%d)' % (video_title, view_count, max_views)
738         if age_restricted(info_dict.get('age_limit'), self.params.get('age_limit')):
739             return 'Skipping "%s" because it is age restricted' % video_title
740         if self.in_download_archive(info_dict):
741             return '%s has already been recorded in archive' % video_title
742
743         if not incomplete:
744             match_filter = self.params.get('match_filter')
745             if match_filter is not None:
746                 ret = match_filter(info_dict)
747                 if ret is not None:
748                     return ret
749
750         return None
751
752     @staticmethod
753     def add_extra_info(info_dict, extra_info):
754         '''Set the keys from extra_info in info dict if they are missing'''
755         for key, value in extra_info.items():
756             info_dict.setdefault(key, value)
757
758     def extract_info(self, url, download=True, ie_key=None, extra_info={},
759                      process=True, force_generic_extractor=False):
760         '''
761         Returns a list with a dictionary for each video we find.
762         If 'download', also downloads the videos.
763         extra_info is a dict containing the extra values to add to each result
764         '''
765
766         if not ie_key and force_generic_extractor:
767             ie_key = 'Generic'
768
769         if ie_key:
770             ies = [self.get_info_extractor(ie_key)]
771         else:
772             ies = self._ies
773
774         for ie in ies:
775             if not ie.suitable(url):
776                 continue
777
778             ie = self.get_info_extractor(ie.ie_key())
779             if not ie.working():
780                 self.report_warning('The program functionality for this site has been marked as broken, '
781                                     'and will probably not work.')
782
783             try:
784                 ie_result = ie.extract(url)
785                 if ie_result is None:  # Finished already (backwards compatibility; listformats and friends should be moved here)
786                     break
787                 if isinstance(ie_result, list):
788                     # Backwards compatibility: old IE result format
789                     ie_result = {
790                         '_type': 'compat_list',
791                         'entries': ie_result,
792                     }
793                 self.add_default_extra_info(ie_result, ie, url)
794                 if process:
795                     return self.process_ie_result(ie_result, download, extra_info)
796                 else:
797                     return ie_result
798             except GeoRestrictedError as e:
799                 msg = e.msg
800                 if e.countries:
801                     msg += '\nThis video is available in %s.' % ', '.join(
802                         map(ISO3166Utils.short2full, e.countries))
803                 msg += '\nYou might want to use a VPN or a proxy server (with --proxy) to workaround.'
804                 self.report_error(msg)
805                 break
806             except ExtractorError as e:  # An error we somewhat expected
807                 self.report_error(compat_str(e), e.format_traceback())
808                 break
809             except MaxDownloadsReached:
810                 raise
811             except Exception as e:
812                 if self.params.get('ignoreerrors', False):
813                     self.report_error(error_to_compat_str(e), tb=encode_compat_str(traceback.format_exc()))
814                     break
815                 else:
816                     raise
817         else:
818             self.report_error('no suitable InfoExtractor for URL %s' % url)
819
820     def add_default_extra_info(self, ie_result, ie, url):
821         self.add_extra_info(ie_result, {
822             'extractor': ie.IE_NAME,
823             'webpage_url': url,
824             'webpage_url_basename': url_basename(url),
825             'extractor_key': ie.ie_key(),
826         })
827
828     def process_ie_result(self, ie_result, download=True, extra_info={}):
829         """
830         Take the result of the ie(may be modified) and resolve all unresolved
831         references (URLs, playlist items).
832
833         It will also download the videos if 'download'.
834         Returns the resolved ie_result.
835         """
836         result_type = ie_result.get('_type', 'video')
837
838         if result_type in ('url', 'url_transparent'):
839             ie_result['url'] = sanitize_url(ie_result['url'])
840             extract_flat = self.params.get('extract_flat', False)
841             if ((extract_flat == 'in_playlist' and 'playlist' in extra_info) or
842                     extract_flat is True):
843                 if self.params.get('forcejson', False):
844                     self.to_stdout(json.dumps(ie_result))
845                 return ie_result
846
847         if result_type == 'video':
848             self.add_extra_info(ie_result, extra_info)
849             return self.process_video_result(ie_result, download=download)
850         elif result_type == 'url':
851             # We have to add extra_info to the results because it may be
852             # contained in a playlist
853             return self.extract_info(ie_result['url'],
854                                      download,
855                                      ie_key=ie_result.get('ie_key'),
856                                      extra_info=extra_info)
857         elif result_type == 'url_transparent':
858             # Use the information from the embedding page
859             info = self.extract_info(
860                 ie_result['url'], ie_key=ie_result.get('ie_key'),
861                 extra_info=extra_info, download=False, process=False)
862
863             # extract_info may return None when ignoreerrors is enabled and
864             # extraction failed with an error, don't crash and return early
865             # in this case
866             if not info:
867                 return info
868
869             force_properties = dict(
870                 (k, v) for k, v in ie_result.items() if v is not None)
871             for f in ('_type', 'url', 'id', 'extractor', 'extractor_key', 'ie_key'):
872                 if f in force_properties:
873                     del force_properties[f]
874             new_result = info.copy()
875             new_result.update(force_properties)
876
877             # Extracted info may not be a video result (i.e.
878             # info.get('_type', 'video') != video) but rather an url or
879             # url_transparent. In such cases outer metadata (from ie_result)
880             # should be propagated to inner one (info). For this to happen
881             # _type of info should be overridden with url_transparent. This
882             # fixes issue from https://github.com/rg3/youtube-dl/pull/11163.
883             if new_result.get('_type') == 'url':
884                 new_result['_type'] = 'url_transparent'
885
886             return self.process_ie_result(
887                 new_result, download=download, extra_info=extra_info)
888         elif result_type in ('playlist', 'multi_video'):
889             # We process each entry in the playlist
890             playlist = ie_result.get('title') or ie_result.get('id')
891             self.to_screen('[download] Downloading playlist: %s' % playlist)
892
893             playlist_results = []
894
895             playliststart = self.params.get('playliststart', 1) - 1
896             playlistend = self.params.get('playlistend')
897             # For backwards compatibility, interpret -1 as whole list
898             if playlistend == -1:
899                 playlistend = None
900
901             playlistitems_str = self.params.get('playlist_items')
902             playlistitems = None
903             if playlistitems_str is not None:
904                 def iter_playlistitems(format):
905                     for string_segment in format.split(','):
906                         if '-' in string_segment:
907                             start, end = string_segment.split('-')
908                             for item in range(int(start), int(end) + 1):
909                                 yield int(item)
910                         else:
911                             yield int(string_segment)
912                 playlistitems = orderedSet(iter_playlistitems(playlistitems_str))
913
914             ie_entries = ie_result['entries']
915
916             def make_playlistitems_entries(list_ie_entries):
917                 num_entries = len(list_ie_entries)
918                 return [
919                     list_ie_entries[i - 1] for i in playlistitems
920                     if -num_entries <= i - 1 < num_entries]
921
922             def report_download(num_entries):
923                 self.to_screen(
924                     '[%s] playlist %s: Downloading %d videos' %
925                     (ie_result['extractor'], playlist, num_entries))
926
927             if isinstance(ie_entries, list):
928                 n_all_entries = len(ie_entries)
929                 if playlistitems:
930                     entries = make_playlistitems_entries(ie_entries)
931                 else:
932                     entries = ie_entries[playliststart:playlistend]
933                 n_entries = len(entries)
934                 self.to_screen(
935                     '[%s] playlist %s: Collected %d video ids (downloading %d of them)' %
936                     (ie_result['extractor'], playlist, n_all_entries, n_entries))
937             elif isinstance(ie_entries, PagedList):
938                 if playlistitems:
939                     entries = []
940                     for item in playlistitems:
941                         entries.extend(ie_entries.getslice(
942                             item - 1, item
943                         ))
944                 else:
945                     entries = ie_entries.getslice(
946                         playliststart, playlistend)
947                 n_entries = len(entries)
948                 report_download(n_entries)
949             else:  # iterable
950                 if playlistitems:
951                     entries = make_playlistitems_entries(list(itertools.islice(
952                         ie_entries, 0, max(playlistitems))))
953                 else:
954                     entries = list(itertools.islice(
955                         ie_entries, playliststart, playlistend))
956                 n_entries = len(entries)
957                 report_download(n_entries)
958
959             if self.params.get('playlistreverse', False):
960                 entries = entries[::-1]
961
962             if self.params.get('playlistrandom', False):
963                 random.shuffle(entries)
964
965             x_forwarded_for = ie_result.get('__x_forwarded_for_ip')
966
967             for i, entry in enumerate(entries, 1):
968                 self.to_screen('[download] Downloading video %s of %s' % (i, n_entries))
969                 # This __x_forwarded_for_ip thing is a bit ugly but requires
970                 # minimal changes
971                 if x_forwarded_for:
972                     entry['__x_forwarded_for_ip'] = x_forwarded_for
973                 extra = {
974                     'n_entries': n_entries,
975                     'playlist': playlist,
976                     'playlist_id': ie_result.get('id'),
977                     'playlist_title': ie_result.get('title'),
978                     'playlist_uploader': ie_result.get('uploader'),
979                     'playlist_uploader_id': ie_result.get('uploader_id'),
980                     'playlist_index': i + playliststart,
981                     'extractor': ie_result['extractor'],
982                     'webpage_url': ie_result['webpage_url'],
983                     'webpage_url_basename': url_basename(ie_result['webpage_url']),
984                     'extractor_key': ie_result['extractor_key'],
985                 }
986
987                 reason = self._match_entry(entry, incomplete=True)
988                 if reason is not None:
989                     self.to_screen('[download] ' + reason)
990                     continue
991
992                 entry_result = self.process_ie_result(entry,
993                                                       download=download,
994                                                       extra_info=extra)
995                 playlist_results.append(entry_result)
996             ie_result['entries'] = playlist_results
997             self.to_screen('[download] Finished downloading playlist: %s' % playlist)
998             return ie_result
999         elif result_type == 'compat_list':
1000             self.report_warning(
1001                 'Extractor %s returned a compat_list result. '
1002                 'It needs to be updated.' % ie_result.get('extractor'))
1003
1004             def _fixup(r):
1005                 self.add_extra_info(
1006                     r,
1007                     {
1008                         'extractor': ie_result['extractor'],
1009                         'webpage_url': ie_result['webpage_url'],
1010                         'webpage_url_basename': url_basename(ie_result['webpage_url']),
1011                         'extractor_key': ie_result['extractor_key'],
1012                     }
1013                 )
1014                 return r
1015             ie_result['entries'] = [
1016                 self.process_ie_result(_fixup(r), download, extra_info)
1017                 for r in ie_result['entries']
1018             ]
1019             return ie_result
1020         else:
1021             raise Exception('Invalid result type: %s' % result_type)
1022
1023     def _build_format_filter(self, filter_spec):
1024         " Returns a function to filter the formats according to the filter_spec "
1025
1026         OPERATORS = {
1027             '<': operator.lt,
1028             '<=': operator.le,
1029             '>': operator.gt,
1030             '>=': operator.ge,
1031             '=': operator.eq,
1032             '!=': operator.ne,
1033         }
1034         operator_rex = re.compile(r'''(?x)\s*
1035             (?P<key>width|height|tbr|abr|vbr|asr|filesize|fps)
1036             \s*(?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
1037             (?P<value>[0-9.]+(?:[kKmMgGtTpPeEzZyY]i?[Bb]?)?)
1038             $
1039             ''' % '|'.join(map(re.escape, OPERATORS.keys())))
1040         m = operator_rex.search(filter_spec)
1041         if m:
1042             try:
1043                 comparison_value = int(m.group('value'))
1044             except ValueError:
1045                 comparison_value = parse_filesize(m.group('value'))
1046                 if comparison_value is None:
1047                     comparison_value = parse_filesize(m.group('value') + 'B')
1048                 if comparison_value is None:
1049                     raise ValueError(
1050                         'Invalid value %r in format specification %r' % (
1051                             m.group('value'), filter_spec))
1052             op = OPERATORS[m.group('op')]
1053
1054         if not m:
1055             STR_OPERATORS = {
1056                 '=': operator.eq,
1057                 '!=': operator.ne,
1058                 '^=': lambda attr, value: attr.startswith(value),
1059                 '$=': lambda attr, value: attr.endswith(value),
1060                 '*=': lambda attr, value: value in attr,
1061             }
1062             str_operator_rex = re.compile(r'''(?x)
1063                 \s*(?P<key>ext|acodec|vcodec|container|protocol|format_id)
1064                 \s*(?P<op>%s)(?P<none_inclusive>\s*\?)?
1065                 \s*(?P<value>[a-zA-Z0-9._-]+)
1066                 \s*$
1067                 ''' % '|'.join(map(re.escape, STR_OPERATORS.keys())))
1068             m = str_operator_rex.search(filter_spec)
1069             if m:
1070                 comparison_value = m.group('value')
1071                 op = STR_OPERATORS[m.group('op')]
1072
1073         if not m:
1074             raise ValueError('Invalid filter specification %r' % filter_spec)
1075
1076         def _filter(f):
1077             actual_value = f.get(m.group('key'))
1078             if actual_value is None:
1079                 return m.group('none_inclusive')
1080             return op(actual_value, comparison_value)
1081         return _filter
1082
1083     def _default_format_spec(self, info_dict, download=True):
1084
1085         def can_merge():
1086             merger = FFmpegMergerPP(self)
1087             return merger.available and merger.can_merge()
1088
1089         def prefer_best():
1090             if self.params.get('simulate', False):
1091                 return False
1092             if not download:
1093                 return False
1094             if self.params.get('outtmpl', DEFAULT_OUTTMPL) == '-':
1095                 return True
1096             if info_dict.get('is_live'):
1097                 return True
1098             if not can_merge():
1099                 return True
1100             return False
1101
1102         req_format_list = ['bestvideo+bestaudio', 'best']
1103         if prefer_best():
1104             req_format_list.reverse()
1105         return '/'.join(req_format_list)
1106
1107     def build_format_selector(self, format_spec):
1108         def syntax_error(note, start):
1109             message = (
1110                 'Invalid format specification: '
1111                 '{0}\n\t{1}\n\t{2}^'.format(note, format_spec, ' ' * start[1]))
1112             return SyntaxError(message)
1113
1114         PICKFIRST = 'PICKFIRST'
1115         MERGE = 'MERGE'
1116         SINGLE = 'SINGLE'
1117         GROUP = 'GROUP'
1118         FormatSelector = collections.namedtuple('FormatSelector', ['type', 'selector', 'filters'])
1119
1120         def _parse_filter(tokens):
1121             filter_parts = []
1122             for type, string, start, _, _ in tokens:
1123                 if type == tokenize.OP and string == ']':
1124                     return ''.join(filter_parts)
1125                 else:
1126                     filter_parts.append(string)
1127
1128         def _remove_unused_ops(tokens):
1129             # Remove operators that we don't use and join them with the surrounding strings
1130             # for example: 'mp4' '-' 'baseline' '-' '16x9' is converted to 'mp4-baseline-16x9'
1131             ALLOWED_OPS = ('/', '+', ',', '(', ')')
1132             last_string, last_start, last_end, last_line = None, None, None, None
1133             for type, string, start, end, line in tokens:
1134                 if type == tokenize.OP and string == '[':
1135                     if last_string:
1136                         yield tokenize.NAME, last_string, last_start, last_end, last_line
1137                         last_string = None
1138                     yield type, string, start, end, line
1139                     # everything inside brackets will be handled by _parse_filter
1140                     for type, string, start, end, line in tokens:
1141                         yield type, string, start, end, line
1142                         if type == tokenize.OP and string == ']':
1143                             break
1144                 elif type == tokenize.OP and string in ALLOWED_OPS:
1145                     if last_string:
1146                         yield tokenize.NAME, last_string, last_start, last_end, last_line
1147                         last_string = None
1148                     yield type, string, start, end, line
1149                 elif type in [tokenize.NAME, tokenize.NUMBER, tokenize.OP]:
1150                     if not last_string:
1151                         last_string = string
1152                         last_start = start
1153                         last_end = end
1154                     else:
1155                         last_string += string
1156             if last_string:
1157                 yield tokenize.NAME, last_string, last_start, last_end, last_line
1158
1159         def _parse_format_selection(tokens, inside_merge=False, inside_choice=False, inside_group=False):
1160             selectors = []
1161             current_selector = None
1162             for type, string, start, _, _ in tokens:
1163                 # ENCODING is only defined in python 3.x
1164                 if type == getattr(tokenize, 'ENCODING', None):
1165                     continue
1166                 elif type in [tokenize.NAME, tokenize.NUMBER]:
1167                     current_selector = FormatSelector(SINGLE, string, [])
1168                 elif type == tokenize.OP:
1169                     if string == ')':
1170                         if not inside_group:
1171                             # ')' will be handled by the parentheses group
1172                             tokens.restore_last_token()
1173                         break
1174                     elif inside_merge and string in ['/', ',']:
1175                         tokens.restore_last_token()
1176                         break
1177                     elif inside_choice and string == ',':
1178                         tokens.restore_last_token()
1179                         break
1180                     elif string == ',':
1181                         if not current_selector:
1182                             raise syntax_error('"," must follow a format selector', start)
1183                         selectors.append(current_selector)
1184                         current_selector = None
1185                     elif string == '/':
1186                         if not current_selector:
1187                             raise syntax_error('"/" must follow a format selector', start)
1188                         first_choice = current_selector
1189                         second_choice = _parse_format_selection(tokens, inside_choice=True)
1190                         current_selector = FormatSelector(PICKFIRST, (first_choice, second_choice), [])
1191                     elif string == '[':
1192                         if not current_selector:
1193                             current_selector = FormatSelector(SINGLE, 'best', [])
1194                         format_filter = _parse_filter(tokens)
1195                         current_selector.filters.append(format_filter)
1196                     elif string == '(':
1197                         if current_selector:
1198                             raise syntax_error('Unexpected "("', start)
1199                         group = _parse_format_selection(tokens, inside_group=True)
1200                         current_selector = FormatSelector(GROUP, group, [])
1201                     elif string == '+':
1202                         video_selector = current_selector
1203                         audio_selector = _parse_format_selection(tokens, inside_merge=True)
1204                         if not video_selector or not audio_selector:
1205                             raise syntax_error('"+" must be between two format selectors', start)
1206                         current_selector = FormatSelector(MERGE, (video_selector, audio_selector), [])
1207                     else:
1208                         raise syntax_error('Operator not recognized: "{0}"'.format(string), start)
1209                 elif type == tokenize.ENDMARKER:
1210                     break
1211             if current_selector:
1212                 selectors.append(current_selector)
1213             return selectors
1214
1215         def _build_selector_function(selector):
1216             if isinstance(selector, list):
1217                 fs = [_build_selector_function(s) for s in selector]
1218
1219                 def selector_function(ctx):
1220                     for f in fs:
1221                         for format in f(ctx):
1222                             yield format
1223                 return selector_function
1224             elif selector.type == GROUP:
1225                 selector_function = _build_selector_function(selector.selector)
1226             elif selector.type == PICKFIRST:
1227                 fs = [_build_selector_function(s) for s in selector.selector]
1228
1229                 def selector_function(ctx):
1230                     for f in fs:
1231                         picked_formats = list(f(ctx))
1232                         if picked_formats:
1233                             return picked_formats
1234                     return []
1235             elif selector.type == SINGLE:
1236                 format_spec = selector.selector
1237
1238                 def selector_function(ctx):
1239                     formats = list(ctx['formats'])
1240                     if not formats:
1241                         return
1242                     if format_spec == 'all':
1243                         for f in formats:
1244                             yield f
1245                     elif format_spec in ['best', 'worst', None]:
1246                         format_idx = 0 if format_spec == 'worst' else -1
1247                         audiovideo_formats = [
1248                             f for f in formats
1249                             if f.get('vcodec') != 'none' and f.get('acodec') != 'none']
1250                         if audiovideo_formats:
1251                             yield audiovideo_formats[format_idx]
1252                         # for extractors with incomplete formats (audio only (soundcloud)
1253                         # or video only (imgur)) we will fallback to best/worst
1254                         # {video,audio}-only format
1255                         elif ctx['incomplete_formats']:
1256                             yield formats[format_idx]
1257                     elif format_spec == 'bestaudio':
1258                         audio_formats = [
1259                             f for f in formats
1260                             if f.get('vcodec') == 'none']
1261                         if audio_formats:
1262                             yield audio_formats[-1]
1263                     elif format_spec == 'worstaudio':
1264                         audio_formats = [
1265                             f for f in formats
1266                             if f.get('vcodec') == 'none']
1267                         if audio_formats:
1268                             yield audio_formats[0]
1269                     elif format_spec == 'bestvideo':
1270                         video_formats = [
1271                             f for f in formats
1272                             if f.get('acodec') == 'none']
1273                         if video_formats:
1274                             yield video_formats[-1]
1275                     elif format_spec == 'worstvideo':
1276                         video_formats = [
1277                             f for f in formats
1278                             if f.get('acodec') == 'none']
1279                         if video_formats:
1280                             yield video_formats[0]
1281                     else:
1282                         extensions = ['mp4', 'flv', 'webm', '3gp', 'm4a', 'mp3', 'ogg', 'aac', 'wav']
1283                         if format_spec in extensions:
1284                             filter_f = lambda f: f['ext'] == format_spec
1285                         else:
1286                             filter_f = lambda f: f['format_id'] == format_spec
1287                         matches = list(filter(filter_f, formats))
1288                         if matches:
1289                             yield matches[-1]
1290             elif selector.type == MERGE:
1291                 def _merge(formats_info):
1292                     format_1, format_2 = [f['format_id'] for f in formats_info]
1293                     # The first format must contain the video and the
1294                     # second the audio
1295                     if formats_info[0].get('vcodec') == 'none':
1296                         self.report_error('The first format must '
1297                                           'contain the video, try using '
1298                                           '"-f %s+%s"' % (format_2, format_1))
1299                         return
1300                     # Formats must be opposite (video+audio)
1301                     if formats_info[0].get('acodec') == 'none' and formats_info[1].get('acodec') == 'none':
1302                         self.report_error(
1303                             'Both formats %s and %s are video-only, you must specify "-f video+audio"'
1304                             % (format_1, format_2))
1305                         return
1306                     output_ext = (
1307                         formats_info[0]['ext']
1308                         if self.params.get('merge_output_format') is None
1309                         else self.params['merge_output_format'])
1310                     return {
1311                         'requested_formats': formats_info,
1312                         'format': '%s+%s' % (formats_info[0].get('format'),
1313                                              formats_info[1].get('format')),
1314                         'format_id': '%s+%s' % (formats_info[0].get('format_id'),
1315                                                 formats_info[1].get('format_id')),
1316                         'width': formats_info[0].get('width'),
1317                         'height': formats_info[0].get('height'),
1318                         'resolution': formats_info[0].get('resolution'),
1319                         'fps': formats_info[0].get('fps'),
1320                         'vcodec': formats_info[0].get('vcodec'),
1321                         'vbr': formats_info[0].get('vbr'),
1322                         'stretched_ratio': formats_info[0].get('stretched_ratio'),
1323                         'acodec': formats_info[1].get('acodec'),
1324                         'abr': formats_info[1].get('abr'),
1325                         'ext': output_ext,
1326                     }
1327                 video_selector, audio_selector = map(_build_selector_function, selector.selector)
1328
1329                 def selector_function(ctx):
1330                     for pair in itertools.product(
1331                             video_selector(copy.deepcopy(ctx)), audio_selector(copy.deepcopy(ctx))):
1332                         yield _merge(pair)
1333
1334             filters = [self._build_format_filter(f) for f in selector.filters]
1335
1336             def final_selector(ctx):
1337                 ctx_copy = copy.deepcopy(ctx)
1338                 for _filter in filters:
1339                     ctx_copy['formats'] = list(filter(_filter, ctx_copy['formats']))
1340                 return selector_function(ctx_copy)
1341             return final_selector
1342
1343         stream = io.BytesIO(format_spec.encode('utf-8'))
1344         try:
1345             tokens = list(_remove_unused_ops(compat_tokenize_tokenize(stream.readline)))
1346         except tokenize.TokenError:
1347             raise syntax_error('Missing closing/opening brackets or parenthesis', (0, len(format_spec)))
1348
1349         class TokenIterator(object):
1350             def __init__(self, tokens):
1351                 self.tokens = tokens
1352                 self.counter = 0
1353
1354             def __iter__(self):
1355                 return self
1356
1357             def __next__(self):
1358                 if self.counter >= len(self.tokens):
1359                     raise StopIteration()
1360                 value = self.tokens[self.counter]
1361                 self.counter += 1
1362                 return value
1363
1364             next = __next__
1365
1366             def restore_last_token(self):
1367                 self.counter -= 1
1368
1369         parsed_selector = _parse_format_selection(iter(TokenIterator(tokens)))
1370         return _build_selector_function(parsed_selector)
1371
1372     def _calc_headers(self, info_dict):
1373         res = std_headers.copy()
1374
1375         add_headers = info_dict.get('http_headers')
1376         if add_headers:
1377             res.update(add_headers)
1378
1379         cookies = self._calc_cookies(info_dict)
1380         if cookies:
1381             res['Cookie'] = cookies
1382
1383         if 'X-Forwarded-For' not in res:
1384             x_forwarded_for_ip = info_dict.get('__x_forwarded_for_ip')
1385             if x_forwarded_for_ip:
1386                 res['X-Forwarded-For'] = x_forwarded_for_ip
1387
1388         return res
1389
1390     def _calc_cookies(self, info_dict):
1391         pr = sanitized_Request(info_dict['url'])
1392         self.cookiejar.add_cookie_header(pr)
1393         return pr.get_header('Cookie')
1394
1395     def process_video_result(self, info_dict, download=True):
1396         assert info_dict.get('_type', 'video') == 'video'
1397
1398         if 'id' not in info_dict:
1399             raise ExtractorError('Missing "id" field in extractor result')
1400         if 'title' not in info_dict:
1401             raise ExtractorError('Missing "title" field in extractor result')
1402
1403         def report_force_conversion(field, field_not, conversion):
1404             self.report_warning(
1405                 '"%s" field is not %s - forcing %s conversion, there is an error in extractor'
1406                 % (field, field_not, conversion))
1407
1408         def sanitize_string_field(info, string_field):
1409             field = info.get(string_field)
1410             if field is None or isinstance(field, compat_str):
1411                 return
1412             report_force_conversion(string_field, 'a string', 'string')
1413             info[string_field] = compat_str(field)
1414
1415         def sanitize_numeric_fields(info):
1416             for numeric_field in self._NUMERIC_FIELDS:
1417                 field = info.get(numeric_field)
1418                 if field is None or isinstance(field, compat_numeric_types):
1419                     continue
1420                 report_force_conversion(numeric_field, 'numeric', 'int')
1421                 info[numeric_field] = int_or_none(field)
1422
1423         sanitize_string_field(info_dict, 'id')
1424         sanitize_numeric_fields(info_dict)
1425
1426         if 'playlist' not in info_dict:
1427             # It isn't part of a playlist
1428             info_dict['playlist'] = None
1429             info_dict['playlist_index'] = None
1430
1431         thumbnails = info_dict.get('thumbnails')
1432         if thumbnails is None:
1433             thumbnail = info_dict.get('thumbnail')
1434             if thumbnail:
1435                 info_dict['thumbnails'] = thumbnails = [{'url': thumbnail}]
1436         if thumbnails:
1437             thumbnails.sort(key=lambda t: (
1438                 t.get('preference') if t.get('preference') is not None else -1,
1439                 t.get('width') if t.get('width') is not None else -1,
1440                 t.get('height') if t.get('height') is not None else -1,
1441                 t.get('id') if t.get('id') is not None else '', t.get('url')))
1442             for i, t in enumerate(thumbnails):
1443                 t['url'] = sanitize_url(t['url'])
1444                 if t.get('width') and t.get('height'):
1445                     t['resolution'] = '%dx%d' % (t['width'], t['height'])
1446                 if t.get('id') is None:
1447                     t['id'] = '%d' % i
1448
1449         if self.params.get('list_thumbnails'):
1450             self.list_thumbnails(info_dict)
1451             return
1452
1453         thumbnail = info_dict.get('thumbnail')
1454         if thumbnail:
1455             info_dict['thumbnail'] = sanitize_url(thumbnail)
1456         elif thumbnails:
1457             info_dict['thumbnail'] = thumbnails[-1]['url']
1458
1459         if 'display_id' not in info_dict and 'id' in info_dict:
1460             info_dict['display_id'] = info_dict['id']
1461
1462         if info_dict.get('upload_date') is None and info_dict.get('timestamp') is not None:
1463             # Working around out-of-range timestamp values (e.g. negative ones on Windows,
1464             # see http://bugs.python.org/issue1646728)
1465             try:
1466                 upload_date = datetime.datetime.utcfromtimestamp(info_dict['timestamp'])
1467                 info_dict['upload_date'] = upload_date.strftime('%Y%m%d')
1468             except (ValueError, OverflowError, OSError):
1469                 pass
1470
1471         # Auto generate title fields corresponding to the *_number fields when missing
1472         # in order to always have clean titles. This is very common for TV series.
1473         for field in ('chapter', 'season', 'episode'):
1474             if info_dict.get('%s_number' % field) is not None and not info_dict.get(field):
1475                 info_dict[field] = '%s %d' % (field.capitalize(), info_dict['%s_number' % field])
1476
1477         subtitles = info_dict.get('subtitles')
1478         if subtitles:
1479             for _, subtitle in subtitles.items():
1480                 for subtitle_format in subtitle:
1481                     if subtitle_format.get('url'):
1482                         subtitle_format['url'] = sanitize_url(subtitle_format['url'])
1483                     if subtitle_format.get('ext') is None:
1484                         subtitle_format['ext'] = determine_ext(subtitle_format['url']).lower()
1485
1486         if self.params.get('listsubtitles', False):
1487             if 'automatic_captions' in info_dict:
1488                 self.list_subtitles(info_dict['id'], info_dict.get('automatic_captions'), 'automatic captions')
1489             self.list_subtitles(info_dict['id'], subtitles, 'subtitles')
1490             return
1491         info_dict['requested_subtitles'] = self.process_subtitles(
1492             info_dict['id'], subtitles,
1493             info_dict.get('automatic_captions'))
1494
1495         # We now pick which formats have to be downloaded
1496         if info_dict.get('formats') is None:
1497             # There's only one format available
1498             formats = [info_dict]
1499         else:
1500             formats = info_dict['formats']
1501
1502         if not formats:
1503             raise ExtractorError('No video formats found!')
1504
1505         def is_wellformed(f):
1506             url = f.get('url')
1507             if not url:
1508                 self.report_warning(
1509                     '"url" field is missing or empty - skipping format, '
1510                     'there is an error in extractor')
1511                 return False
1512             if isinstance(url, bytes):
1513                 sanitize_string_field(f, 'url')
1514             return True
1515
1516         # Filter out malformed formats for better extraction robustness
1517         formats = list(filter(is_wellformed, formats))
1518
1519         formats_dict = {}
1520
1521         # We check that all the formats have the format and format_id fields
1522         for i, format in enumerate(formats):
1523             sanitize_string_field(format, 'format_id')
1524             sanitize_numeric_fields(format)
1525             format['url'] = sanitize_url(format['url'])
1526             if not format.get('format_id'):
1527                 format['format_id'] = compat_str(i)
1528             else:
1529                 # Sanitize format_id from characters used in format selector expression
1530                 format['format_id'] = re.sub(r'[\s,/+\[\]()]', '_', format['format_id'])
1531             format_id = format['format_id']
1532             if format_id not in formats_dict:
1533                 formats_dict[format_id] = []
1534             formats_dict[format_id].append(format)
1535
1536         # Make sure all formats have unique format_id
1537         for format_id, ambiguous_formats in formats_dict.items():
1538             if len(ambiguous_formats) > 1:
1539                 for i, format in enumerate(ambiguous_formats):
1540                     format['format_id'] = '%s-%d' % (format_id, i)
1541
1542         for i, format in enumerate(formats):
1543             if format.get('format') is None:
1544                 format['format'] = '{id} - {res}{note}'.format(
1545                     id=format['format_id'],
1546                     res=self.format_resolution(format),
1547                     note=' ({0})'.format(format['format_note']) if format.get('format_note') is not None else '',
1548                 )
1549             # Automatically determine file extension if missing
1550             if format.get('ext') is None:
1551                 format['ext'] = determine_ext(format['url']).lower()
1552             # Automatically determine protocol if missing (useful for format
1553             # selection purposes)
1554             if format.get('protocol') is None:
1555                 format['protocol'] = determine_protocol(format)
1556             # Add HTTP headers, so that external programs can use them from the
1557             # json output
1558             full_format_info = info_dict.copy()
1559             full_format_info.update(format)
1560             format['http_headers'] = self._calc_headers(full_format_info)
1561         # Remove private housekeeping stuff
1562         if '__x_forwarded_for_ip' in info_dict:
1563             del info_dict['__x_forwarded_for_ip']
1564
1565         # TODO Central sorting goes here
1566
1567         if formats[0] is not info_dict:
1568             # only set the 'formats' fields if the original info_dict list them
1569             # otherwise we end up with a circular reference, the first (and unique)
1570             # element in the 'formats' field in info_dict is info_dict itself,
1571             # which can't be exported to json
1572             info_dict['formats'] = formats
1573         if self.params.get('listformats'):
1574             self.list_formats(info_dict)
1575             return
1576
1577         req_format = self.params.get('format')
1578         if req_format is None:
1579             req_format = self._default_format_spec(info_dict, download=download)
1580             if self.params.get('verbose'):
1581                 self.to_stdout('[debug] Default format spec: %s' % req_format)
1582
1583         format_selector = self.build_format_selector(req_format)
1584
1585         # While in format selection we may need to have an access to the original
1586         # format set in order to calculate some metrics or do some processing.
1587         # For now we need to be able to guess whether original formats provided
1588         # by extractor are incomplete or not (i.e. whether extractor provides only
1589         # video-only or audio-only formats) for proper formats selection for
1590         # extractors with such incomplete formats (see
1591         # https://github.com/rg3/youtube-dl/pull/5556).
1592         # Since formats may be filtered during format selection and may not match
1593         # the original formats the results may be incorrect. Thus original formats
1594         # or pre-calculated metrics should be passed to format selection routines
1595         # as well.
1596         # We will pass a context object containing all necessary additional data
1597         # instead of just formats.
1598         # This fixes incorrect format selection issue (see
1599         # https://github.com/rg3/youtube-dl/issues/10083).
1600         incomplete_formats = (
1601             # All formats are video-only or
1602             all(f.get('vcodec') != 'none' and f.get('acodec') == 'none' for f in formats) or
1603             # all formats are audio-only
1604             all(f.get('vcodec') == 'none' and f.get('acodec') != 'none' for f in formats))
1605
1606         ctx = {
1607             'formats': formats,
1608             'incomplete_formats': incomplete_formats,
1609         }
1610
1611         formats_to_download = list(format_selector(ctx))
1612         if not formats_to_download:
1613             raise ExtractorError('requested format not available',
1614                                  expected=True)
1615
1616         if download:
1617             if len(formats_to_download) > 1:
1618                 self.to_screen('[info] %s: downloading video in %s formats' % (info_dict['id'], len(formats_to_download)))
1619             for format in formats_to_download:
1620                 new_info = dict(info_dict)
1621                 new_info.update(format)
1622                 self.process_info(new_info)
1623         # We update the info dict with the best quality format (backwards compatibility)
1624         info_dict.update(formats_to_download[-1])
1625         return info_dict
1626
1627     def process_subtitles(self, video_id, normal_subtitles, automatic_captions):
1628         """Select the requested subtitles and their format"""
1629         available_subs = {}
1630         if normal_subtitles and self.params.get('writesubtitles'):
1631             available_subs.update(normal_subtitles)
1632         if automatic_captions and self.params.get('writeautomaticsub'):
1633             for lang, cap_info in automatic_captions.items():
1634                 if lang not in available_subs:
1635                     available_subs[lang] = cap_info
1636
1637         if (not self.params.get('writesubtitles') and not
1638                 self.params.get('writeautomaticsub') or not
1639                 available_subs):
1640             return None
1641
1642         if self.params.get('allsubtitles', False):
1643             requested_langs = available_subs.keys()
1644         else:
1645             if self.params.get('subtitleslangs', False):
1646                 requested_langs = self.params.get('subtitleslangs')
1647             elif 'en' in available_subs:
1648                 requested_langs = ['en']
1649             else:
1650                 requested_langs = [list(available_subs.keys())[0]]
1651
1652         formats_query = self.params.get('subtitlesformat', 'best')
1653         formats_preference = formats_query.split('/') if formats_query else []
1654         subs = {}
1655         for lang in requested_langs:
1656             formats = available_subs.get(lang)
1657             if formats is None:
1658                 self.report_warning('%s subtitles not available for %s' % (lang, video_id))
1659                 continue
1660             for ext in formats_preference:
1661                 if ext == 'best':
1662                     f = formats[-1]
1663                     break
1664                 matches = list(filter(lambda f: f['ext'] == ext, formats))
1665                 if matches:
1666                     f = matches[-1]
1667                     break
1668             else:
1669                 f = formats[-1]
1670                 self.report_warning(
1671                     'No subtitle format found matching "%s" for language %s, '
1672                     'using %s' % (formats_query, lang, f['ext']))
1673             subs[lang] = f
1674         return subs
1675
1676     def process_info(self, info_dict):
1677         """Process a single resolved IE result."""
1678
1679         assert info_dict.get('_type', 'video') == 'video'
1680
1681         max_downloads = self.params.get('max_downloads')
1682         if max_downloads is not None:
1683             if self._num_downloads >= int(max_downloads):
1684                 raise MaxDownloadsReached()
1685
1686         info_dict['fulltitle'] = info_dict['title']
1687         if len(info_dict['title']) > 200:
1688             info_dict['title'] = info_dict['title'][:197] + '...'
1689
1690         if 'format' not in info_dict:
1691             info_dict['format'] = info_dict['ext']
1692
1693         reason = self._match_entry(info_dict, incomplete=False)
1694         if reason is not None:
1695             self.to_screen('[download] ' + reason)
1696             return
1697
1698         self._num_downloads += 1
1699
1700         info_dict['_filename'] = filename = self.prepare_filename(info_dict)
1701
1702         # Forced printings
1703         if self.params.get('forcetitle', False):
1704             self.to_stdout(info_dict['fulltitle'])
1705         if self.params.get('forceid', False):
1706             self.to_stdout(info_dict['id'])
1707         if self.params.get('forceurl', False):
1708             if info_dict.get('requested_formats') is not None:
1709                 for f in info_dict['requested_formats']:
1710                     self.to_stdout(f['url'] + f.get('play_path', ''))
1711             else:
1712                 # For RTMP URLs, also include the playpath
1713                 self.to_stdout(info_dict['url'] + info_dict.get('play_path', ''))
1714         if self.params.get('forcethumbnail', False) and info_dict.get('thumbnail') is not None:
1715             self.to_stdout(info_dict['thumbnail'])
1716         if self.params.get('forcedescription', False) and info_dict.get('description') is not None:
1717             self.to_stdout(info_dict['description'])
1718         if self.params.get('forcefilename', False) and filename is not None:
1719             self.to_stdout(filename)
1720         if self.params.get('forceduration', False) and info_dict.get('duration') is not None:
1721             self.to_stdout(formatSeconds(info_dict['duration']))
1722         if self.params.get('forceformat', False):
1723             self.to_stdout(info_dict['format'])
1724         if self.params.get('forcejson', False):
1725             self.to_stdout(json.dumps(info_dict))
1726
1727         # Do nothing else if in simulate mode
1728         if self.params.get('simulate', False):
1729             return
1730
1731         if filename is None:
1732             return
1733
1734         def ensure_dir_exists(path):
1735             try:
1736                 dn = os.path.dirname(path)
1737                 if dn and not os.path.exists(dn):
1738                     os.makedirs(dn)
1739                 return True
1740             except (OSError, IOError) as err:
1741                 self.report_error('unable to create directory ' + error_to_compat_str(err))
1742                 return False
1743
1744         if not ensure_dir_exists(sanitize_path(encodeFilename(filename))):
1745             return
1746
1747         if self.params.get('writedescription', False):
1748             descfn = replace_extension(filename, 'description', info_dict.get('ext'))
1749             if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(descfn)):
1750                 self.to_screen('[info] Video description is already present')
1751             elif info_dict.get('description') is None:
1752                 self.report_warning('There\'s no description to write.')
1753             else:
1754                 try:
1755                     self.to_screen('[info] Writing video description to: ' + descfn)
1756                     with io.open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile:
1757                         descfile.write(info_dict['description'])
1758                 except (OSError, IOError):
1759                     self.report_error('Cannot write description file ' + descfn)
1760                     return
1761
1762         if self.params.get('writeannotations', False):
1763             annofn = replace_extension(filename, 'annotations.xml', info_dict.get('ext'))
1764             if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(annofn)):
1765                 self.to_screen('[info] Video annotations are already present')
1766             else:
1767                 try:
1768                     self.to_screen('[info] Writing video annotations to: ' + annofn)
1769                     with io.open(encodeFilename(annofn), 'w', encoding='utf-8') as annofile:
1770                         annofile.write(info_dict['annotations'])
1771                 except (KeyError, TypeError):
1772                     self.report_warning('There are no annotations to write.')
1773                 except (OSError, IOError):
1774                     self.report_error('Cannot write annotations file: ' + annofn)
1775                     return
1776
1777         subtitles_are_requested = any([self.params.get('writesubtitles', False),
1778                                        self.params.get('writeautomaticsub')])
1779
1780         if subtitles_are_requested and info_dict.get('requested_subtitles'):
1781             # subtitles download errors are already managed as troubles in relevant IE
1782             # that way it will silently go on when used with unsupporting IE
1783             subtitles = info_dict['requested_subtitles']
1784             ie = self.get_info_extractor(info_dict['extractor_key'])
1785             for sub_lang, sub_info in subtitles.items():
1786                 sub_format = sub_info['ext']
1787                 sub_filename = subtitles_filename(filename, sub_lang, sub_format)
1788                 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(sub_filename)):
1789                     self.to_screen('[info] Video subtitle %s.%s is already present' % (sub_lang, sub_format))
1790                 else:
1791                     self.to_screen('[info] Writing video subtitles to: ' + sub_filename)
1792                     if sub_info.get('data') is not None:
1793                         try:
1794                             # Use newline='' to prevent conversion of newline characters
1795                             # See https://github.com/rg3/youtube-dl/issues/10268
1796                             with io.open(encodeFilename(sub_filename), 'w', encoding='utf-8', newline='') as subfile:
1797                                 subfile.write(sub_info['data'])
1798                         except (OSError, IOError):
1799                             self.report_error('Cannot write subtitles file ' + sub_filename)
1800                             return
1801                     else:
1802                         try:
1803                             sub_data = ie._request_webpage(
1804                                 sub_info['url'], info_dict['id'], note=False).read()
1805                             with io.open(encodeFilename(sub_filename), 'wb') as subfile:
1806                                 subfile.write(sub_data)
1807                         except (ExtractorError, IOError, OSError, ValueError) as err:
1808                             self.report_warning('Unable to download subtitle for "%s": %s' %
1809                                                 (sub_lang, error_to_compat_str(err)))
1810                             continue
1811
1812         if self.params.get('writeinfojson', False):
1813             infofn = replace_extension(filename, 'info.json', info_dict.get('ext'))
1814             if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(infofn)):
1815                 self.to_screen('[info] Video description metadata is already present')
1816             else:
1817                 self.to_screen('[info] Writing video description metadata as JSON to: ' + infofn)
1818                 try:
1819                     write_json_file(self.filter_requested_info(info_dict), infofn)
1820                 except (OSError, IOError):
1821                     self.report_error('Cannot write metadata to JSON file ' + infofn)
1822                     return
1823
1824         self._write_thumbnails(info_dict, filename)
1825
1826         if not self.params.get('skip_download', False):
1827             try:
1828                 def dl(name, info):
1829                     fd = get_suitable_downloader(info, self.params)(self, self.params)
1830                     for ph in self._progress_hooks:
1831                         fd.add_progress_hook(ph)
1832                     if self.params.get('verbose'):
1833                         self.to_stdout('[debug] Invoking downloader on %r' % info.get('url'))
1834                     return fd.download(name, info)
1835
1836                 if info_dict.get('requested_formats') is not None:
1837                     downloaded = []
1838                     success = True
1839                     merger = FFmpegMergerPP(self)
1840                     if not merger.available:
1841                         postprocessors = []
1842                         self.report_warning('You have requested multiple '
1843                                             'formats but ffmpeg or avconv are not installed.'
1844                                             ' The formats won\'t be merged.')
1845                     else:
1846                         postprocessors = [merger]
1847
1848                     def compatible_formats(formats):
1849                         video, audio = formats
1850                         # Check extension
1851                         video_ext, audio_ext = audio.get('ext'), video.get('ext')
1852                         if video_ext and audio_ext:
1853                             COMPATIBLE_EXTS = (
1854                                 ('mp3', 'mp4', 'm4a', 'm4p', 'm4b', 'm4r', 'm4v', 'ismv', 'isma'),
1855                                 ('webm')
1856                             )
1857                             for exts in COMPATIBLE_EXTS:
1858                                 if video_ext in exts and audio_ext in exts:
1859                                     return True
1860                         # TODO: Check acodec/vcodec
1861                         return False
1862
1863                     filename_real_ext = os.path.splitext(filename)[1][1:]
1864                     filename_wo_ext = (
1865                         os.path.splitext(filename)[0]
1866                         if filename_real_ext == info_dict['ext']
1867                         else filename)
1868                     requested_formats = info_dict['requested_formats']
1869                     if self.params.get('merge_output_format') is None and not compatible_formats(requested_formats):
1870                         info_dict['ext'] = 'mkv'
1871                         self.report_warning(
1872                             'Requested formats are incompatible for merge and will be merged into mkv.')
1873                     # Ensure filename always has a correct extension for successful merge
1874                     filename = '%s.%s' % (filename_wo_ext, info_dict['ext'])
1875                     if os.path.exists(encodeFilename(filename)):
1876                         self.to_screen(
1877                             '[download] %s has already been downloaded and '
1878                             'merged' % filename)
1879                     else:
1880                         for f in requested_formats:
1881                             new_info = dict(info_dict)
1882                             new_info.update(f)
1883                             fname = prepend_extension(
1884                                 self.prepare_filename(new_info),
1885                                 'f%s' % f['format_id'], new_info['ext'])
1886                             if not ensure_dir_exists(fname):
1887                                 return
1888                             downloaded.append(fname)
1889                             partial_success = dl(fname, new_info)
1890                             success = success and partial_success
1891                         info_dict['__postprocessors'] = postprocessors
1892                         info_dict['__files_to_merge'] = downloaded
1893                 else:
1894                     # Just a single file
1895                     success = dl(filename, info_dict)
1896             except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
1897                 self.report_error('unable to download video data: %s' % error_to_compat_str(err))
1898                 return
1899             except (OSError, IOError) as err:
1900                 raise UnavailableVideoError(err)
1901             except (ContentTooShortError, ) as err:
1902                 self.report_error('content too short (expected %s bytes and served %s)' % (err.expected, err.downloaded))
1903                 return
1904
1905             if success and filename != '-':
1906                 # Fixup content
1907                 fixup_policy = self.params.get('fixup')
1908                 if fixup_policy is None:
1909                     fixup_policy = 'detect_or_warn'
1910
1911                 INSTALL_FFMPEG_MESSAGE = 'Install ffmpeg or avconv to fix this automatically.'
1912
1913                 stretched_ratio = info_dict.get('stretched_ratio')
1914                 if stretched_ratio is not None and stretched_ratio != 1:
1915                     if fixup_policy == 'warn':
1916                         self.report_warning('%s: Non-uniform pixel ratio (%s)' % (
1917                             info_dict['id'], stretched_ratio))
1918                     elif fixup_policy == 'detect_or_warn':
1919                         stretched_pp = FFmpegFixupStretchedPP(self)
1920                         if stretched_pp.available:
1921                             info_dict.setdefault('__postprocessors', [])
1922                             info_dict['__postprocessors'].append(stretched_pp)
1923                         else:
1924                             self.report_warning(
1925                                 '%s: Non-uniform pixel ratio (%s). %s'
1926                                 % (info_dict['id'], stretched_ratio, INSTALL_FFMPEG_MESSAGE))
1927                     else:
1928                         assert fixup_policy in ('ignore', 'never')
1929
1930                 if (info_dict.get('requested_formats') is None and
1931                         info_dict.get('container') == 'm4a_dash'):
1932                     if fixup_policy == 'warn':
1933                         self.report_warning(
1934                             '%s: writing DASH m4a. '
1935                             'Only some players support this container.'
1936                             % info_dict['id'])
1937                     elif fixup_policy == 'detect_or_warn':
1938                         fixup_pp = FFmpegFixupM4aPP(self)
1939                         if fixup_pp.available:
1940                             info_dict.setdefault('__postprocessors', [])
1941                             info_dict['__postprocessors'].append(fixup_pp)
1942                         else:
1943                             self.report_warning(
1944                                 '%s: writing DASH m4a. '
1945                                 'Only some players support this container. %s'
1946                                 % (info_dict['id'], INSTALL_FFMPEG_MESSAGE))
1947                     else:
1948                         assert fixup_policy in ('ignore', 'never')
1949
1950                 if (info_dict.get('protocol') == 'm3u8_native' or
1951                         info_dict.get('protocol') == 'm3u8' and
1952                         self.params.get('hls_prefer_native')):
1953                     if fixup_policy == 'warn':
1954                         self.report_warning('%s: malformed AAC bitstream detected.' % (
1955                             info_dict['id']))
1956                     elif fixup_policy == 'detect_or_warn':
1957                         fixup_pp = FFmpegFixupM3u8PP(self)
1958                         if fixup_pp.available:
1959                             info_dict.setdefault('__postprocessors', [])
1960                             info_dict['__postprocessors'].append(fixup_pp)
1961                         else:
1962                             self.report_warning(
1963                                 '%s: malformed AAC bitstream detected. %s'
1964                                 % (info_dict['id'], INSTALL_FFMPEG_MESSAGE))
1965                     else:
1966                         assert fixup_policy in ('ignore', 'never')
1967
1968                 try:
1969                     self.post_process(filename, info_dict)
1970                 except (PostProcessingError) as err:
1971                     self.report_error('postprocessing: %s' % str(err))
1972                     return
1973                 self.record_download_archive(info_dict)
1974
1975     def download(self, url_list):
1976         """Download a given list of URLs."""
1977         outtmpl = self.params.get('outtmpl', DEFAULT_OUTTMPL)
1978         if (len(url_list) > 1 and
1979                 outtmpl != '-' and
1980                 '%' not in outtmpl and
1981                 self.params.get('max_downloads') != 1):
1982             raise SameFileError(outtmpl)
1983
1984         for url in url_list:
1985             try:
1986                 # It also downloads the videos
1987                 res = self.extract_info(
1988                     url, force_generic_extractor=self.params.get('force_generic_extractor', False))
1989             except UnavailableVideoError:
1990                 self.report_error('unable to download video')
1991             except MaxDownloadsReached:
1992                 self.to_screen('[info] Maximum number of downloaded files reached.')
1993                 raise
1994             else:
1995                 if self.params.get('dump_single_json', False):
1996                     self.to_stdout(json.dumps(res))
1997
1998         return self._download_retcode
1999
2000     def download_with_info_file(self, info_filename):
2001         with contextlib.closing(fileinput.FileInput(
2002                 [info_filename], mode='r',
2003                 openhook=fileinput.hook_encoded('utf-8'))) as f:
2004             # FileInput doesn't have a read method, we can't call json.load
2005             info = self.filter_requested_info(json.loads('\n'.join(f)))
2006         try:
2007             self.process_ie_result(info, download=True)
2008         except DownloadError:
2009             webpage_url = info.get('webpage_url')
2010             if webpage_url is not None:
2011                 self.report_warning('The info failed to download, trying with "%s"' % webpage_url)
2012                 return self.download([webpage_url])
2013             else:
2014                 raise
2015         return self._download_retcode
2016
2017     @staticmethod
2018     def filter_requested_info(info_dict):
2019         return dict(
2020             (k, v) for k, v in info_dict.items()
2021             if k not in ['requested_formats', 'requested_subtitles'])
2022
2023     def post_process(self, filename, ie_info):
2024         """Run all the postprocessors on the given file."""
2025         info = dict(ie_info)
2026         info['filepath'] = filename
2027         pps_chain = []
2028         if ie_info.get('__postprocessors') is not None:
2029             pps_chain.extend(ie_info['__postprocessors'])
2030         pps_chain.extend(self._pps)
2031         for pp in pps_chain:
2032             files_to_delete = []
2033             try:
2034                 files_to_delete, info = pp.run(info)
2035             except PostProcessingError as e:
2036                 self.report_error(e.msg)
2037             if files_to_delete and not self.params.get('keepvideo', False):
2038                 for old_filename in files_to_delete:
2039                     self.to_screen('Deleting original file %s (pass -k to keep)' % old_filename)
2040                     try:
2041                         os.remove(encodeFilename(old_filename))
2042                     except (IOError, OSError):
2043                         self.report_warning('Unable to remove downloaded original file')
2044
2045     def _make_archive_id(self, info_dict):
2046         # Future-proof against any change in case
2047         # and backwards compatibility with prior versions
2048         extractor = info_dict.get('extractor_key')
2049         if extractor is None:
2050             if 'id' in info_dict:
2051                 extractor = info_dict.get('ie_key')  # key in a playlist
2052         if extractor is None:
2053             return None  # Incomplete video information
2054         return extractor.lower() + ' ' + info_dict['id']
2055
2056     def in_download_archive(self, info_dict):
2057         fn = self.params.get('download_archive')
2058         if fn is None:
2059             return False
2060
2061         vid_id = self._make_archive_id(info_dict)
2062         if vid_id is None:
2063             return False  # Incomplete video information
2064
2065         try:
2066             with locked_file(fn, 'r', encoding='utf-8') as archive_file:
2067                 for line in archive_file:
2068                     if line.strip() == vid_id:
2069                         return True
2070         except IOError as ioe:
2071             if ioe.errno != errno.ENOENT:
2072                 raise
2073         return False
2074
2075     def record_download_archive(self, info_dict):
2076         fn = self.params.get('download_archive')
2077         if fn is None:
2078             return
2079         vid_id = self._make_archive_id(info_dict)
2080         assert vid_id
2081         with locked_file(fn, 'a', encoding='utf-8') as archive_file:
2082             archive_file.write(vid_id + '\n')
2083
2084     @staticmethod
2085     def format_resolution(format, default='unknown'):
2086         if format.get('vcodec') == 'none':
2087             return 'audio only'
2088         if format.get('resolution') is not None:
2089             return format['resolution']
2090         if format.get('height') is not None:
2091             if format.get('width') is not None:
2092                 res = '%sx%s' % (format['width'], format['height'])
2093             else:
2094                 res = '%sp' % format['height']
2095         elif format.get('width') is not None:
2096             res = '%dx?' % format['width']
2097         else:
2098             res = default
2099         return res
2100
2101     def _format_note(self, fdict):
2102         res = ''
2103         if fdict.get('ext') in ['f4f', 'f4m']:
2104             res += '(unsupported) '
2105         if fdict.get('language'):
2106             if res:
2107                 res += ' '
2108             res += '[%s] ' % fdict['language']
2109         if fdict.get('format_note') is not None:
2110             res += fdict['format_note'] + ' '
2111         if fdict.get('tbr') is not None:
2112             res += '%4dk ' % fdict['tbr']
2113         if fdict.get('container') is not None:
2114             if res:
2115                 res += ', '
2116             res += '%s container' % fdict['container']
2117         if (fdict.get('vcodec') is not None and
2118                 fdict.get('vcodec') != 'none'):
2119             if res:
2120                 res += ', '
2121             res += fdict['vcodec']
2122             if fdict.get('vbr') is not None:
2123                 res += '@'
2124         elif fdict.get('vbr') is not None and fdict.get('abr') is not None:
2125             res += 'video@'
2126         if fdict.get('vbr') is not None:
2127             res += '%4dk' % fdict['vbr']
2128         if fdict.get('fps') is not None:
2129             if res:
2130                 res += ', '
2131             res += '%sfps' % fdict['fps']
2132         if fdict.get('acodec') is not None:
2133             if res:
2134                 res += ', '
2135             if fdict['acodec'] == 'none':
2136                 res += 'video only'
2137             else:
2138                 res += '%-5s' % fdict['acodec']
2139         elif fdict.get('abr') is not None:
2140             if res:
2141                 res += ', '
2142             res += 'audio'
2143         if fdict.get('abr') is not None:
2144             res += '@%3dk' % fdict['abr']
2145         if fdict.get('asr') is not None:
2146             res += ' (%5dHz)' % fdict['asr']
2147         if fdict.get('filesize') is not None:
2148             if res:
2149                 res += ', '
2150             res += format_bytes(fdict['filesize'])
2151         elif fdict.get('filesize_approx') is not None:
2152             if res:
2153                 res += ', '
2154             res += '~' + format_bytes(fdict['filesize_approx'])
2155         return res
2156
2157     def list_formats(self, info_dict):
2158         formats = info_dict.get('formats', [info_dict])
2159         table = [
2160             [f['format_id'], f['ext'], self.format_resolution(f), self._format_note(f)]
2161             for f in formats
2162             if f.get('preference') is None or f['preference'] >= -1000]
2163         if len(formats) > 1:
2164             table[-1][-1] += (' ' if table[-1][-1] else '') + '(best)'
2165
2166         header_line = ['format code', 'extension', 'resolution', 'note']
2167         self.to_screen(
2168             '[info] Available formats for %s:\n%s' %
2169             (info_dict['id'], render_table(header_line, table)))
2170
2171     def list_thumbnails(self, info_dict):
2172         thumbnails = info_dict.get('thumbnails')
2173         if not thumbnails:
2174             self.to_screen('[info] No thumbnails present for %s' % info_dict['id'])
2175             return
2176
2177         self.to_screen(
2178             '[info] Thumbnails for %s:' % info_dict['id'])
2179         self.to_screen(render_table(
2180             ['ID', 'width', 'height', 'URL'],
2181             [[t['id'], t.get('width', 'unknown'), t.get('height', 'unknown'), t['url']] for t in thumbnails]))
2182
2183     def list_subtitles(self, video_id, subtitles, name='subtitles'):
2184         if not subtitles:
2185             self.to_screen('%s has no %s' % (video_id, name))
2186             return
2187         self.to_screen(
2188             'Available %s for %s:' % (name, video_id))
2189         self.to_screen(render_table(
2190             ['Language', 'formats'],
2191             [[lang, ', '.join(f['ext'] for f in reversed(formats))]
2192                 for lang, formats in subtitles.items()]))
2193
2194     def urlopen(self, req):
2195         """ Start an HTTP download """
2196         if isinstance(req, compat_basestring):
2197             req = sanitized_Request(req)
2198         return self._opener.open(req, timeout=self._socket_timeout)
2199
2200     def print_debug_header(self):
2201         if not self.params.get('verbose'):
2202             return
2203
2204         if type('') is not compat_str:
2205             # Python 2.6 on SLES11 SP1 (https://github.com/rg3/youtube-dl/issues/3326)
2206             self.report_warning(
2207                 'Your Python is broken! Update to a newer and supported version')
2208
2209         stdout_encoding = getattr(
2210             sys.stdout, 'encoding', 'missing (%s)' % type(sys.stdout).__name__)
2211         encoding_str = (
2212             '[debug] Encodings: locale %s, fs %s, out %s, pref %s\n' % (
2213                 locale.getpreferredencoding(),
2214                 sys.getfilesystemencoding(),
2215                 stdout_encoding,
2216                 self.get_encoding()))
2217         write_string(encoding_str, encoding=None)
2218
2219         self._write_string('[debug] youtube-dl version ' + __version__ + '\n')
2220         if _LAZY_LOADER:
2221             self._write_string('[debug] Lazy loading extractors enabled' + '\n')
2222         try:
2223             sp = subprocess.Popen(
2224                 ['git', 'rev-parse', '--short', 'HEAD'],
2225                 stdout=subprocess.PIPE, stderr=subprocess.PIPE,
2226                 cwd=os.path.dirname(os.path.abspath(__file__)))
2227             out, err = sp.communicate()
2228             out = out.decode().strip()
2229             if re.match('[0-9a-f]+', out):
2230                 self._write_string('[debug] Git HEAD: ' + out + '\n')
2231         except Exception:
2232             try:
2233                 sys.exc_clear()
2234             except Exception:
2235                 pass
2236         self._write_string('[debug] Python version %s - %s\n' % (
2237             platform.python_version(), platform_name()))
2238
2239         exe_versions = FFmpegPostProcessor.get_versions(self)
2240         exe_versions['rtmpdump'] = rtmpdump_version()
2241         exe_versions['phantomjs'] = PhantomJSwrapper._version()
2242         exe_str = ', '.join(
2243             '%s %s' % (exe, v)
2244             for exe, v in sorted(exe_versions.items())
2245             if v
2246         )
2247         if not exe_str:
2248             exe_str = 'none'
2249         self._write_string('[debug] exe versions: %s\n' % exe_str)
2250
2251         proxy_map = {}
2252         for handler in self._opener.handlers:
2253             if hasattr(handler, 'proxies'):
2254                 proxy_map.update(handler.proxies)
2255         self._write_string('[debug] Proxy map: ' + compat_str(proxy_map) + '\n')
2256
2257         if self.params.get('call_home', False):
2258             ipaddr = self.urlopen('https://yt-dl.org/ip').read().decode('utf-8')
2259             self._write_string('[debug] Public IP address: %s\n' % ipaddr)
2260             latest_version = self.urlopen(
2261                 'https://yt-dl.org/latest/version').read().decode('utf-8')
2262             if version_tuple(latest_version) > version_tuple(__version__):
2263                 self.report_warning(
2264                     'You are using an outdated version (newest version: %s)! '
2265                     'See https://yt-dl.org/update if you need help updating.' %
2266                     latest_version)
2267
2268     def _setup_opener(self):
2269         timeout_val = self.params.get('socket_timeout')
2270         self._socket_timeout = 600 if timeout_val is None else float(timeout_val)
2271
2272         opts_cookiefile = self.params.get('cookiefile')
2273         opts_proxy = self.params.get('proxy')
2274
2275         if opts_cookiefile is None:
2276             self.cookiejar = compat_cookiejar.CookieJar()
2277         else:
2278             opts_cookiefile = expand_path(opts_cookiefile)
2279             self.cookiejar = compat_cookiejar.MozillaCookieJar(
2280                 opts_cookiefile)
2281             if os.access(opts_cookiefile, os.R_OK):
2282                 self.cookiejar.load()
2283
2284         cookie_processor = YoutubeDLCookieProcessor(self.cookiejar)
2285         if opts_proxy is not None:
2286             if opts_proxy == '':
2287                 proxies = {}
2288             else:
2289                 proxies = {'http': opts_proxy, 'https': opts_proxy}
2290         else:
2291             proxies = compat_urllib_request.getproxies()
2292             # Set HTTPS proxy to HTTP one if given (https://github.com/rg3/youtube-dl/issues/805)
2293             if 'http' in proxies and 'https' not in proxies:
2294                 proxies['https'] = proxies['http']
2295         proxy_handler = PerRequestProxyHandler(proxies)
2296
2297         debuglevel = 1 if self.params.get('debug_printtraffic') else 0
2298         https_handler = make_HTTPS_handler(self.params, debuglevel=debuglevel)
2299         ydlh = YoutubeDLHandler(self.params, debuglevel=debuglevel)
2300         data_handler = compat_urllib_request_DataHandler()
2301
2302         # When passing our own FileHandler instance, build_opener won't add the
2303         # default FileHandler and allows us to disable the file protocol, which
2304         # can be used for malicious purposes (see
2305         # https://github.com/rg3/youtube-dl/issues/8227)
2306         file_handler = compat_urllib_request.FileHandler()
2307
2308         def file_open(*args, **kwargs):
2309             raise compat_urllib_error.URLError('file:// scheme is explicitly disabled in youtube-dl for security reasons')
2310         file_handler.file_open = file_open
2311
2312         opener = compat_urllib_request.build_opener(
2313             proxy_handler, https_handler, cookie_processor, ydlh, data_handler, file_handler)
2314
2315         # Delete the default user-agent header, which would otherwise apply in
2316         # cases where our custom HTTP handler doesn't come into play
2317         # (See https://github.com/rg3/youtube-dl/issues/1309 for details)
2318         opener.addheaders = []
2319         self._opener = opener
2320
2321     def encode(self, s):
2322         if isinstance(s, bytes):
2323             return s  # Already encoded
2324
2325         try:
2326             return s.encode(self.get_encoding())
2327         except UnicodeEncodeError as err:
2328             err.reason = err.reason + '. Check your system encoding configuration or use the --encoding option.'
2329             raise
2330
2331     def get_encoding(self):
2332         encoding = self.params.get('encoding')
2333         if encoding is None:
2334             encoding = preferredencoding()
2335         return encoding
2336
2337     def _write_thumbnails(self, info_dict, filename):
2338         if self.params.get('writethumbnail', False):
2339             thumbnails = info_dict.get('thumbnails')
2340             if thumbnails:
2341                 thumbnails = [thumbnails[-1]]
2342         elif self.params.get('write_all_thumbnails', False):
2343             thumbnails = info_dict.get('thumbnails')
2344         else:
2345             return
2346
2347         if not thumbnails:
2348             # No thumbnails present, so return immediately
2349             return
2350
2351         for t in thumbnails:
2352             thumb_ext = determine_ext(t['url'], 'jpg')
2353             suffix = '_%s' % t['id'] if len(thumbnails) > 1 else ''
2354             thumb_display_id = '%s ' % t['id'] if len(thumbnails) > 1 else ''
2355             t['filename'] = thumb_filename = os.path.splitext(filename)[0] + suffix + '.' + thumb_ext
2356
2357             if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(thumb_filename)):
2358                 self.to_screen('[%s] %s: Thumbnail %sis already present' %
2359                                (info_dict['extractor'], info_dict['id'], thumb_display_id))
2360             else:
2361                 self.to_screen('[%s] %s: Downloading thumbnail %s...' %
2362                                (info_dict['extractor'], info_dict['id'], thumb_display_id))
2363                 try:
2364                     uf = self.urlopen(t['url'])
2365                     with open(encodeFilename(thumb_filename), 'wb') as thumbf:
2366                         shutil.copyfileobj(uf, thumbf)
2367                     self.to_screen('[%s] %s: Writing thumbnail %sto: %s' %
2368                                    (info_dict['extractor'], info_dict['id'], thumb_display_id, thumb_filename))
2369                 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
2370                     self.report_warning('Unable to download thumbnail "%s": %s' %
2371                                         (t['url'], error_to_compat_str(err)))