[YoutubeDL] Fix typo in media extension compatibility checker
[youtube-dl] / youtube_dl / YoutubeDL.py
1 #!/usr/bin/env python
2 # coding: utf-8
3
4 from __future__ import absolute_import, unicode_literals
5
6 import collections
7 import contextlib
8 import copy
9 import datetime
10 import errno
11 import fileinput
12 import io
13 import itertools
14 import json
15 import locale
16 import operator
17 import os
18 import platform
19 import re
20 import shutil
21 import subprocess
22 import socket
23 import sys
24 import time
25 import tokenize
26 import traceback
27 import random
28
29 from string import ascii_letters
30
31 from .compat import (
32     compat_basestring,
33     compat_cookiejar,
34     compat_get_terminal_size,
35     compat_http_client,
36     compat_kwargs,
37     compat_numeric_types,
38     compat_os_name,
39     compat_str,
40     compat_tokenize_tokenize,
41     compat_urllib_error,
42     compat_urllib_request,
43     compat_urllib_request_DataHandler,
44 )
45 from .utils import (
46     age_restricted,
47     args_to_str,
48     ContentTooShortError,
49     date_from_str,
50     DateRange,
51     DEFAULT_OUTTMPL,
52     determine_ext,
53     determine_protocol,
54     DownloadError,
55     encode_compat_str,
56     encodeFilename,
57     error_to_compat_str,
58     expand_path,
59     ExtractorError,
60     format_bytes,
61     formatSeconds,
62     GeoRestrictedError,
63     int_or_none,
64     ISO3166Utils,
65     locked_file,
66     make_HTTPS_handler,
67     MaxDownloadsReached,
68     orderedSet,
69     PagedList,
70     parse_filesize,
71     PerRequestProxyHandler,
72     platform_name,
73     PostProcessingError,
74     preferredencoding,
75     prepend_extension,
76     register_socks_protocols,
77     render_table,
78     replace_extension,
79     SameFileError,
80     sanitize_filename,
81     sanitize_path,
82     sanitize_url,
83     sanitized_Request,
84     std_headers,
85     subtitles_filename,
86     UnavailableVideoError,
87     url_basename,
88     version_tuple,
89     write_json_file,
90     write_string,
91     YoutubeDLCookieProcessor,
92     YoutubeDLHandler,
93 )
94 from .cache import Cache
95 from .extractor import get_info_extractor, gen_extractor_classes, _LAZY_LOADER
96 from .extractor.openload import PhantomJSwrapper
97 from .downloader import get_suitable_downloader
98 from .downloader.rtmp import rtmpdump_version
99 from .postprocessor import (
100     FFmpegFixupM3u8PP,
101     FFmpegFixupM4aPP,
102     FFmpegFixupStretchedPP,
103     FFmpegMergerPP,
104     FFmpegPostProcessor,
105     get_postprocessor,
106 )
107 from .version import __version__
108
109 if compat_os_name == 'nt':
110     import ctypes
111
112
113 class YoutubeDL(object):
114     """YoutubeDL class.
115
116     YoutubeDL objects are the ones responsible of downloading the
117     actual video file and writing it to disk if the user has requested
118     it, among some other tasks. In most cases there should be one per
119     program. As, given a video URL, the downloader doesn't know how to
120     extract all the needed information, task that InfoExtractors do, it
121     has to pass the URL to one of them.
122
123     For this, YoutubeDL objects have a method that allows
124     InfoExtractors to be registered in a given order. When it is passed
125     a URL, the YoutubeDL object handles it to the first InfoExtractor it
126     finds that reports being able to handle it. The InfoExtractor extracts
127     all the information about the video or videos the URL refers to, and
128     YoutubeDL process the extracted information, possibly using a File
129     Downloader to download the video.
130
131     YoutubeDL objects accept a lot of parameters. In order not to saturate
132     the object constructor with arguments, it receives a dictionary of
133     options instead. These options are available through the params
134     attribute for the InfoExtractors to use. The YoutubeDL also
135     registers itself as the downloader in charge for the InfoExtractors
136     that are added to it, so this is a "mutual registration".
137
138     Available options:
139
140     username:          Username for authentication purposes.
141     password:          Password for authentication purposes.
142     videopassword:     Password for accessing a video.
143     ap_mso:            Adobe Pass multiple-system operator identifier.
144     ap_username:       Multiple-system operator account username.
145     ap_password:       Multiple-system operator account password.
146     usenetrc:          Use netrc for authentication instead.
147     verbose:           Print additional info to stdout.
148     quiet:             Do not print messages to stdout.
149     no_warnings:       Do not print out anything for warnings.
150     forceurl:          Force printing final URL.
151     forcetitle:        Force printing title.
152     forceid:           Force printing ID.
153     forcethumbnail:    Force printing thumbnail URL.
154     forcedescription:  Force printing description.
155     forcefilename:     Force printing final filename.
156     forceduration:     Force printing duration.
157     forcejson:         Force printing info_dict as JSON.
158     dump_single_json:  Force printing the info_dict of the whole playlist
159                        (or video) as a single JSON line.
160     simulate:          Do not download the video files.
161     format:            Video format code. See options.py for more information.
162     outtmpl:           Template for output names.
163     restrictfilenames: Do not allow "&" and spaces in file names
164     ignoreerrors:      Do not stop on download errors.
165     force_generic_extractor: Force downloader to use the generic extractor
166     nooverwrites:      Prevent overwriting files.
167     playliststart:     Playlist item to start at.
168     playlistend:       Playlist item to end at.
169     playlist_items:    Specific indices of playlist to download.
170     playlistreverse:   Download playlist items in reverse order.
171     playlistrandom:    Download playlist items in random order.
172     matchtitle:        Download only matching titles.
173     rejecttitle:       Reject downloads for matching titles.
174     logger:            Log messages to a logging.Logger instance.
175     logtostderr:       Log messages to stderr instead of stdout.
176     writedescription:  Write the video description to a .description file
177     writeinfojson:     Write the video description to a .info.json file
178     writeannotations:  Write the video annotations to a .annotations.xml file
179     writethumbnail:    Write the thumbnail image to a file
180     write_all_thumbnails:  Write all thumbnail formats to files
181     writesubtitles:    Write the video subtitles to a file
182     writeautomaticsub: Write the automatically generated subtitles to a file
183     allsubtitles:      Downloads all the subtitles of the video
184                        (requires writesubtitles or writeautomaticsub)
185     listsubtitles:     Lists all available subtitles for the video
186     subtitlesformat:   The format code for subtitles
187     subtitleslangs:    List of languages of the subtitles to download
188     keepvideo:         Keep the video file after post-processing
189     daterange:         A DateRange object, download only if the upload_date is in the range.
190     skip_download:     Skip the actual download of the video file
191     cachedir:          Location of the cache files in the filesystem.
192                        False to disable filesystem cache.
193     noplaylist:        Download single video instead of a playlist if in doubt.
194     age_limit:         An integer representing the user's age in years.
195                        Unsuitable videos for the given age are skipped.
196     min_views:         An integer representing the minimum view count the video
197                        must have in order to not be skipped.
198                        Videos without view count information are always
199                        downloaded. None for no limit.
200     max_views:         An integer representing the maximum view count.
201                        Videos that are more popular than that are not
202                        downloaded.
203                        Videos without view count information are always
204                        downloaded. None for no limit.
205     download_archive:  File name of a file where all downloads are recorded.
206                        Videos already present in the file are not downloaded
207                        again.
208     cookiefile:        File name where cookies should be read from and dumped to.
209     nocheckcertificate:Do not verify SSL certificates
210     prefer_insecure:   Use HTTP instead of HTTPS to retrieve information.
211                        At the moment, this is only supported by YouTube.
212     proxy:             URL of the proxy server to use
213     geo_verification_proxy:  URL of the proxy to use for IP address verification
214                        on geo-restricted sites. (Experimental)
215     socket_timeout:    Time to wait for unresponsive hosts, in seconds
216     bidi_workaround:   Work around buggy terminals without bidirectional text
217                        support, using fridibi
218     debug_printtraffic:Print out sent and received HTTP traffic
219     include_ads:       Download ads as well
220     default_search:    Prepend this string if an input url is not valid.
221                        'auto' for elaborate guessing
222     encoding:          Use this encoding instead of the system-specified.
223     extract_flat:      Do not resolve URLs, return the immediate result.
224                        Pass in 'in_playlist' to only show this behavior for
225                        playlist items.
226     postprocessors:    A list of dictionaries, each with an entry
227                        * key:  The name of the postprocessor. See
228                                youtube_dl/postprocessor/__init__.py for a list.
229                        as well as any further keyword arguments for the
230                        postprocessor.
231     progress_hooks:    A list of functions that get called on download
232                        progress, with a dictionary with the entries
233                        * status: One of "downloading", "error", or "finished".
234                                  Check this first and ignore unknown values.
235
236                        If status is one of "downloading", or "finished", the
237                        following properties may also be present:
238                        * filename: The final filename (always present)
239                        * tmpfilename: The filename we're currently writing to
240                        * downloaded_bytes: Bytes on disk
241                        * total_bytes: Size of the whole file, None if unknown
242                        * total_bytes_estimate: Guess of the eventual file size,
243                                                None if unavailable.
244                        * elapsed: The number of seconds since download started.
245                        * eta: The estimated time in seconds, None if unknown
246                        * speed: The download speed in bytes/second, None if
247                                 unknown
248                        * fragment_index: The counter of the currently
249                                          downloaded video fragment.
250                        * fragment_count: The number of fragments (= individual
251                                          files that will be merged)
252
253                        Progress hooks are guaranteed to be called at least once
254                        (with status "finished") if the download is successful.
255     merge_output_format: Extension to use when merging formats.
256     fixup:             Automatically correct known faults of the file.
257                        One of:
258                        - "never": do nothing
259                        - "warn": only emit a warning
260                        - "detect_or_warn": check whether we can do anything
261                                            about it, warn otherwise (default)
262     source_address:    (Experimental) Client-side IP address to bind to.
263     call_home:         Boolean, true iff we are allowed to contact the
264                        youtube-dl servers for debugging.
265     sleep_interval:    Number of seconds to sleep before each download when
266                        used alone or a lower bound of a range for randomized
267                        sleep before each download (minimum possible number
268                        of seconds to sleep) when used along with
269                        max_sleep_interval.
270     max_sleep_interval:Upper bound of a range for randomized sleep before each
271                        download (maximum possible number of seconds to sleep).
272                        Must only be used along with sleep_interval.
273                        Actual sleep time will be a random float from range
274                        [sleep_interval; max_sleep_interval].
275     listformats:       Print an overview of available video formats and exit.
276     list_thumbnails:   Print a table of all thumbnails and exit.
277     match_filter:      A function that gets called with the info_dict of
278                        every video.
279                        If it returns a message, the video is ignored.
280                        If it returns None, the video is downloaded.
281                        match_filter_func in utils.py is one example for this.
282     no_color:          Do not emit color codes in output.
283     geo_bypass:        Bypass geographic restriction via faking X-Forwarded-For
284                        HTTP header (experimental)
285     geo_bypass_country:
286                        Two-letter ISO 3166-2 country code that will be used for
287                        explicit geographic restriction bypassing via faking
288                        X-Forwarded-For HTTP header (experimental)
289
290     The following options determine which downloader is picked:
291     external_downloader: Executable of the external downloader to call.
292                        None or unset for standard (built-in) downloader.
293     hls_prefer_native: Use the native HLS downloader instead of ffmpeg/avconv
294                        if True, otherwise use ffmpeg/avconv if False, otherwise
295                        use downloader suggested by extractor if None.
296
297     The following parameters are not used by YoutubeDL itself, they are used by
298     the downloader (see youtube_dl/downloader/common.py):
299     nopart, updatetime, buffersize, ratelimit, min_filesize, max_filesize, test,
300     noresizebuffer, retries, continuedl, noprogress, consoletitle,
301     xattr_set_filesize, external_downloader_args, hls_use_mpegts,
302     http_chunk_size.
303
304     The following options are used by the post processors:
305     prefer_ffmpeg:     If True, use ffmpeg instead of avconv if both are available,
306                        otherwise prefer avconv.
307     postprocessor_args: A list of additional command-line arguments for the
308                         postprocessor.
309
310     The following options are used by the Youtube extractor:
311     youtube_include_dash_manifest: If True (default), DASH manifests and related
312                         data will be downloaded and processed by extractor.
313                         You can reduce network I/O by disabling it if you don't
314                         care about DASH.
315     """
316
317     _NUMERIC_FIELDS = set((
318         'width', 'height', 'tbr', 'abr', 'asr', 'vbr', 'fps', 'filesize', 'filesize_approx',
319         'timestamp', 'upload_year', 'upload_month', 'upload_day',
320         'duration', 'view_count', 'like_count', 'dislike_count', 'repost_count',
321         'average_rating', 'comment_count', 'age_limit',
322         'start_time', 'end_time',
323         'chapter_number', 'season_number', 'episode_number',
324         'track_number', 'disc_number', 'release_year',
325         'playlist_index',
326     ))
327
328     params = None
329     _ies = []
330     _pps = []
331     _download_retcode = None
332     _num_downloads = None
333     _screen_file = None
334
335     def __init__(self, params=None, auto_init=True):
336         """Create a FileDownloader object with the given options."""
337         if params is None:
338             params = {}
339         self._ies = []
340         self._ies_instances = {}
341         self._pps = []
342         self._progress_hooks = []
343         self._download_retcode = 0
344         self._num_downloads = 0
345         self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)]
346         self._err_file = sys.stderr
347         self.params = {
348             # Default parameters
349             'nocheckcertificate': False,
350         }
351         self.params.update(params)
352         self.cache = Cache(self)
353
354         def check_deprecated(param, option, suggestion):
355             if self.params.get(param) is not None:
356                 self.report_warning(
357                     '%s is deprecated. Use %s instead.' % (option, suggestion))
358                 return True
359             return False
360
361         if check_deprecated('cn_verification_proxy', '--cn-verification-proxy', '--geo-verification-proxy'):
362             if self.params.get('geo_verification_proxy') is None:
363                 self.params['geo_verification_proxy'] = self.params['cn_verification_proxy']
364
365         check_deprecated('autonumber_size', '--autonumber-size', 'output template with %(autonumber)0Nd, where N in the number of digits')
366         check_deprecated('autonumber', '--auto-number', '-o "%(autonumber)s-%(title)s.%(ext)s"')
367         check_deprecated('usetitle', '--title', '-o "%(title)s-%(id)s.%(ext)s"')
368
369         if params.get('bidi_workaround', False):
370             try:
371                 import pty
372                 master, slave = pty.openpty()
373                 width = compat_get_terminal_size().columns
374                 if width is None:
375                     width_args = []
376                 else:
377                     width_args = ['-w', str(width)]
378                 sp_kwargs = dict(
379                     stdin=subprocess.PIPE,
380                     stdout=slave,
381                     stderr=self._err_file)
382                 try:
383                     self._output_process = subprocess.Popen(
384                         ['bidiv'] + width_args, **sp_kwargs
385                     )
386                 except OSError:
387                     self._output_process = subprocess.Popen(
388                         ['fribidi', '-c', 'UTF-8'] + width_args, **sp_kwargs)
389                 self._output_channel = os.fdopen(master, 'rb')
390             except OSError as ose:
391                 if ose.errno == errno.ENOENT:
392                     self.report_warning('Could not find fribidi executable, ignoring --bidi-workaround . Make sure that  fribidi  is an executable file in one of the directories in your $PATH.')
393                 else:
394                     raise
395
396         if (sys.platform != 'win32' and
397                 sys.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968'] and
398                 not params.get('restrictfilenames', False)):
399             # Unicode filesystem API will throw errors (#1474, #13027)
400             self.report_warning(
401                 'Assuming --restrict-filenames since file system encoding '
402                 'cannot encode all characters. '
403                 'Set the LC_ALL environment variable to fix this.')
404             self.params['restrictfilenames'] = True
405
406         if isinstance(params.get('outtmpl'), bytes):
407             self.report_warning(
408                 'Parameter outtmpl is bytes, but should be a unicode string. '
409                 'Put  from __future__ import unicode_literals  at the top of your code file or consider switching to Python 3.x.')
410
411         self._setup_opener()
412
413         if auto_init:
414             self.print_debug_header()
415             self.add_default_info_extractors()
416
417         for pp_def_raw in self.params.get('postprocessors', []):
418             pp_class = get_postprocessor(pp_def_raw['key'])
419             pp_def = dict(pp_def_raw)
420             del pp_def['key']
421             pp = pp_class(self, **compat_kwargs(pp_def))
422             self.add_post_processor(pp)
423
424         for ph in self.params.get('progress_hooks', []):
425             self.add_progress_hook(ph)
426
427         register_socks_protocols()
428
429     def warn_if_short_id(self, argv):
430         # short YouTube ID starting with dash?
431         idxs = [
432             i for i, a in enumerate(argv)
433             if re.match(r'^-[0-9A-Za-z_-]{10}$', a)]
434         if idxs:
435             correct_argv = (
436                 ['youtube-dl'] +
437                 [a for i, a in enumerate(argv) if i not in idxs] +
438                 ['--'] + [argv[i] for i in idxs]
439             )
440             self.report_warning(
441                 'Long argument string detected. '
442                 'Use -- to separate parameters and URLs, like this:\n%s\n' %
443                 args_to_str(correct_argv))
444
445     def add_info_extractor(self, ie):
446         """Add an InfoExtractor object to the end of the list."""
447         self._ies.append(ie)
448         if not isinstance(ie, type):
449             self._ies_instances[ie.ie_key()] = ie
450             ie.set_downloader(self)
451
452     def get_info_extractor(self, ie_key):
453         """
454         Get an instance of an IE with name ie_key, it will try to get one from
455         the _ies list, if there's no instance it will create a new one and add
456         it to the extractor list.
457         """
458         ie = self._ies_instances.get(ie_key)
459         if ie is None:
460             ie = get_info_extractor(ie_key)()
461             self.add_info_extractor(ie)
462         return ie
463
464     def add_default_info_extractors(self):
465         """
466         Add the InfoExtractors returned by gen_extractors to the end of the list
467         """
468         for ie in gen_extractor_classes():
469             self.add_info_extractor(ie)
470
471     def add_post_processor(self, pp):
472         """Add a PostProcessor object to the end of the chain."""
473         self._pps.append(pp)
474         pp.set_downloader(self)
475
476     def add_progress_hook(self, ph):
477         """Add the progress hook (currently only for the file downloader)"""
478         self._progress_hooks.append(ph)
479
480     def _bidi_workaround(self, message):
481         if not hasattr(self, '_output_channel'):
482             return message
483
484         assert hasattr(self, '_output_process')
485         assert isinstance(message, compat_str)
486         line_count = message.count('\n') + 1
487         self._output_process.stdin.write((message + '\n').encode('utf-8'))
488         self._output_process.stdin.flush()
489         res = ''.join(self._output_channel.readline().decode('utf-8')
490                       for _ in range(line_count))
491         return res[:-len('\n')]
492
493     def to_screen(self, message, skip_eol=False):
494         """Print message to stdout if not in quiet mode."""
495         return self.to_stdout(message, skip_eol, check_quiet=True)
496
497     def _write_string(self, s, out=None):
498         write_string(s, out=out, encoding=self.params.get('encoding'))
499
500     def to_stdout(self, message, skip_eol=False, check_quiet=False):
501         """Print message to stdout if not in quiet mode."""
502         if self.params.get('logger'):
503             self.params['logger'].debug(message)
504         elif not check_quiet or not self.params.get('quiet', False):
505             message = self._bidi_workaround(message)
506             terminator = ['\n', ''][skip_eol]
507             output = message + terminator
508
509             self._write_string(output, self._screen_file)
510
511     def to_stderr(self, message):
512         """Print message to stderr."""
513         assert isinstance(message, compat_str)
514         if self.params.get('logger'):
515             self.params['logger'].error(message)
516         else:
517             message = self._bidi_workaround(message)
518             output = message + '\n'
519             self._write_string(output, self._err_file)
520
521     def to_console_title(self, message):
522         if not self.params.get('consoletitle', False):
523             return
524         if compat_os_name == 'nt':
525             if ctypes.windll.kernel32.GetConsoleWindow():
526                 # c_wchar_p() might not be necessary if `message` is
527                 # already of type unicode()
528                 ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message))
529         elif 'TERM' in os.environ:
530             self._write_string('\033]0;%s\007' % message, self._screen_file)
531
532     def save_console_title(self):
533         if not self.params.get('consoletitle', False):
534             return
535         if self.params.get('simulate', False):
536             return
537         if compat_os_name != 'nt' and 'TERM' in os.environ:
538             # Save the title on stack
539             self._write_string('\033[22;0t', self._screen_file)
540
541     def restore_console_title(self):
542         if not self.params.get('consoletitle', False):
543             return
544         if self.params.get('simulate', False):
545             return
546         if compat_os_name != 'nt' and 'TERM' in os.environ:
547             # Restore the title from stack
548             self._write_string('\033[23;0t', self._screen_file)
549
550     def __enter__(self):
551         self.save_console_title()
552         return self
553
554     def __exit__(self, *args):
555         self.restore_console_title()
556
557         if self.params.get('cookiefile') is not None:
558             self.cookiejar.save()
559
560     def trouble(self, message=None, tb=None):
561         """Determine action to take when a download problem appears.
562
563         Depending on if the downloader has been configured to ignore
564         download errors or not, this method may throw an exception or
565         not when errors are found, after printing the message.
566
567         tb, if given, is additional traceback information.
568         """
569         if message is not None:
570             self.to_stderr(message)
571         if self.params.get('verbose'):
572             if tb is None:
573                 if sys.exc_info()[0]:  # if .trouble has been called from an except block
574                     tb = ''
575                     if hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
576                         tb += ''.join(traceback.format_exception(*sys.exc_info()[1].exc_info))
577                     tb += encode_compat_str(traceback.format_exc())
578                 else:
579                     tb_data = traceback.format_list(traceback.extract_stack())
580                     tb = ''.join(tb_data)
581             self.to_stderr(tb)
582         if not self.params.get('ignoreerrors', False):
583             if sys.exc_info()[0] and hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
584                 exc_info = sys.exc_info()[1].exc_info
585             else:
586                 exc_info = sys.exc_info()
587             raise DownloadError(message, exc_info)
588         self._download_retcode = 1
589
590     def report_warning(self, message):
591         '''
592         Print the message to stderr, it will be prefixed with 'WARNING:'
593         If stderr is a tty file the 'WARNING:' will be colored
594         '''
595         if self.params.get('logger') is not None:
596             self.params['logger'].warning(message)
597         else:
598             if self.params.get('no_warnings'):
599                 return
600             if not self.params.get('no_color') and self._err_file.isatty() and compat_os_name != 'nt':
601                 _msg_header = '\033[0;33mWARNING:\033[0m'
602             else:
603                 _msg_header = 'WARNING:'
604             warning_message = '%s %s' % (_msg_header, message)
605             self.to_stderr(warning_message)
606
607     def report_error(self, message, tb=None):
608         '''
609         Do the same as trouble, but prefixes the message with 'ERROR:', colored
610         in red if stderr is a tty file.
611         '''
612         if not self.params.get('no_color') and self._err_file.isatty() and compat_os_name != 'nt':
613             _msg_header = '\033[0;31mERROR:\033[0m'
614         else:
615             _msg_header = 'ERROR:'
616         error_message = '%s %s' % (_msg_header, message)
617         self.trouble(error_message, tb)
618
619     def report_file_already_downloaded(self, file_name):
620         """Report file has already been fully downloaded."""
621         try:
622             self.to_screen('[download] %s has already been downloaded' % file_name)
623         except UnicodeEncodeError:
624             self.to_screen('[download] The file has already been downloaded')
625
626     def prepare_filename(self, info_dict):
627         """Generate the output filename."""
628         try:
629             template_dict = dict(info_dict)
630
631             template_dict['epoch'] = int(time.time())
632             autonumber_size = self.params.get('autonumber_size')
633             if autonumber_size is None:
634                 autonumber_size = 5
635             template_dict['autonumber'] = self.params.get('autonumber_start', 1) - 1 + self._num_downloads
636             if template_dict.get('resolution') is None:
637                 if template_dict.get('width') and template_dict.get('height'):
638                     template_dict['resolution'] = '%dx%d' % (template_dict['width'], template_dict['height'])
639                 elif template_dict.get('height'):
640                     template_dict['resolution'] = '%sp' % template_dict['height']
641                 elif template_dict.get('width'):
642                     template_dict['resolution'] = '%dx?' % template_dict['width']
643
644             sanitize = lambda k, v: sanitize_filename(
645                 compat_str(v),
646                 restricted=self.params.get('restrictfilenames'),
647                 is_id=(k == 'id' or k.endswith('_id')))
648             template_dict = dict((k, v if isinstance(v, compat_numeric_types) else sanitize(k, v))
649                                  for k, v in template_dict.items()
650                                  if v is not None and not isinstance(v, (list, tuple, dict)))
651             template_dict = collections.defaultdict(lambda: 'NA', template_dict)
652
653             outtmpl = self.params.get('outtmpl', DEFAULT_OUTTMPL)
654
655             # For fields playlist_index and autonumber convert all occurrences
656             # of %(field)s to %(field)0Nd for backward compatibility
657             field_size_compat_map = {
658                 'playlist_index': len(str(template_dict['n_entries'])),
659                 'autonumber': autonumber_size,
660             }
661             FIELD_SIZE_COMPAT_RE = r'(?<!%)%\((?P<field>autonumber|playlist_index)\)s'
662             mobj = re.search(FIELD_SIZE_COMPAT_RE, outtmpl)
663             if mobj:
664                 outtmpl = re.sub(
665                     FIELD_SIZE_COMPAT_RE,
666                     r'%%(\1)0%dd' % field_size_compat_map[mobj.group('field')],
667                     outtmpl)
668
669             # Missing numeric fields used together with integer presentation types
670             # in format specification will break the argument substitution since
671             # string 'NA' is returned for missing fields. We will patch output
672             # template for missing fields to meet string presentation type.
673             for numeric_field in self._NUMERIC_FIELDS:
674                 if numeric_field not in template_dict:
675                     # As of [1] format syntax is:
676                     #  %[mapping_key][conversion_flags][minimum_width][.precision][length_modifier]type
677                     # 1. https://docs.python.org/2/library/stdtypes.html#string-formatting
678                     FORMAT_RE = r'''(?x)
679                         (?<!%)
680                         %
681                         \({0}\)  # mapping key
682                         (?:[#0\-+ ]+)?  # conversion flags (optional)
683                         (?:\d+)?  # minimum field width (optional)
684                         (?:\.\d+)?  # precision (optional)
685                         [hlL]?  # length modifier (optional)
686                         [diouxXeEfFgGcrs%]  # conversion type
687                     '''
688                     outtmpl = re.sub(
689                         FORMAT_RE.format(numeric_field),
690                         r'%({0})s'.format(numeric_field), outtmpl)
691
692             # expand_path translates '%%' into '%' and '$$' into '$'
693             # correspondingly that is not what we want since we need to keep
694             # '%%' intact for template dict substitution step. Working around
695             # with boundary-alike separator hack.
696             sep = ''.join([random.choice(ascii_letters) for _ in range(32)])
697             outtmpl = outtmpl.replace('%%', '%{0}%'.format(sep)).replace('$$', '${0}$'.format(sep))
698
699             # outtmpl should be expand_path'ed before template dict substitution
700             # because meta fields may contain env variables we don't want to
701             # be expanded. For example, for outtmpl "%(title)s.%(ext)s" and
702             # title "Hello $PATH", we don't want `$PATH` to be expanded.
703             filename = expand_path(outtmpl).replace(sep, '') % template_dict
704
705             # Temporary fix for #4787
706             # 'Treat' all problem characters by passing filename through preferredencoding
707             # to workaround encoding issues with subprocess on python2 @ Windows
708             if sys.version_info < (3, 0) and sys.platform == 'win32':
709                 filename = encodeFilename(filename, True).decode(preferredencoding())
710             return sanitize_path(filename)
711         except ValueError as err:
712             self.report_error('Error in output template: ' + str(err) + ' (encoding: ' + repr(preferredencoding()) + ')')
713             return None
714
715     def _match_entry(self, info_dict, incomplete):
716         """ Returns None iff the file should be downloaded """
717
718         video_title = info_dict.get('title', info_dict.get('id', 'video'))
719         if 'title' in info_dict:
720             # This can happen when we're just evaluating the playlist
721             title = info_dict['title']
722             matchtitle = self.params.get('matchtitle', False)
723             if matchtitle:
724                 if not re.search(matchtitle, title, re.IGNORECASE):
725                     return '"' + title + '" title did not match pattern "' + matchtitle + '"'
726             rejecttitle = self.params.get('rejecttitle', False)
727             if rejecttitle:
728                 if re.search(rejecttitle, title, re.IGNORECASE):
729                     return '"' + title + '" title matched reject pattern "' + rejecttitle + '"'
730         date = info_dict.get('upload_date')
731         if date is not None:
732             dateRange = self.params.get('daterange', DateRange())
733             if date not in dateRange:
734                 return '%s upload date is not in range %s' % (date_from_str(date).isoformat(), dateRange)
735         view_count = info_dict.get('view_count')
736         if view_count is not None:
737             min_views = self.params.get('min_views')
738             if min_views is not None and view_count < min_views:
739                 return 'Skipping %s, because it has not reached minimum view count (%d/%d)' % (video_title, view_count, min_views)
740             max_views = self.params.get('max_views')
741             if max_views is not None and view_count > max_views:
742                 return 'Skipping %s, because it has exceeded the maximum view count (%d/%d)' % (video_title, view_count, max_views)
743         if age_restricted(info_dict.get('age_limit'), self.params.get('age_limit')):
744             return 'Skipping "%s" because it is age restricted' % video_title
745         if self.in_download_archive(info_dict):
746             return '%s has already been recorded in archive' % video_title
747
748         if not incomplete:
749             match_filter = self.params.get('match_filter')
750             if match_filter is not None:
751                 ret = match_filter(info_dict)
752                 if ret is not None:
753                     return ret
754
755         return None
756
757     @staticmethod
758     def add_extra_info(info_dict, extra_info):
759         '''Set the keys from extra_info in info dict if they are missing'''
760         for key, value in extra_info.items():
761             info_dict.setdefault(key, value)
762
763     def extract_info(self, url, download=True, ie_key=None, extra_info={},
764                      process=True, force_generic_extractor=False):
765         '''
766         Returns a list with a dictionary for each video we find.
767         If 'download', also downloads the videos.
768         extra_info is a dict containing the extra values to add to each result
769         '''
770
771         if not ie_key and force_generic_extractor:
772             ie_key = 'Generic'
773
774         if ie_key:
775             ies = [self.get_info_extractor(ie_key)]
776         else:
777             ies = self._ies
778
779         for ie in ies:
780             if not ie.suitable(url):
781                 continue
782
783             ie = self.get_info_extractor(ie.ie_key())
784             if not ie.working():
785                 self.report_warning('The program functionality for this site has been marked as broken, '
786                                     'and will probably not work.')
787
788             try:
789                 ie_result = ie.extract(url)
790                 if ie_result is None:  # Finished already (backwards compatibility; listformats and friends should be moved here)
791                     break
792                 if isinstance(ie_result, list):
793                     # Backwards compatibility: old IE result format
794                     ie_result = {
795                         '_type': 'compat_list',
796                         'entries': ie_result,
797                     }
798                 self.add_default_extra_info(ie_result, ie, url)
799                 if process:
800                     return self.process_ie_result(ie_result, download, extra_info)
801                 else:
802                     return ie_result
803             except GeoRestrictedError as e:
804                 msg = e.msg
805                 if e.countries:
806                     msg += '\nThis video is available in %s.' % ', '.join(
807                         map(ISO3166Utils.short2full, e.countries))
808                 msg += '\nYou might want to use a VPN or a proxy server (with --proxy) to workaround.'
809                 self.report_error(msg)
810                 break
811             except ExtractorError as e:  # An error we somewhat expected
812                 self.report_error(compat_str(e), e.format_traceback())
813                 break
814             except MaxDownloadsReached:
815                 raise
816             except Exception as e:
817                 if self.params.get('ignoreerrors', False):
818                     self.report_error(error_to_compat_str(e), tb=encode_compat_str(traceback.format_exc()))
819                     break
820                 else:
821                     raise
822         else:
823             self.report_error('no suitable InfoExtractor for URL %s' % url)
824
825     def add_default_extra_info(self, ie_result, ie, url):
826         self.add_extra_info(ie_result, {
827             'extractor': ie.IE_NAME,
828             'webpage_url': url,
829             'webpage_url_basename': url_basename(url),
830             'extractor_key': ie.ie_key(),
831         })
832
833     def process_ie_result(self, ie_result, download=True, extra_info={}):
834         """
835         Take the result of the ie(may be modified) and resolve all unresolved
836         references (URLs, playlist items).
837
838         It will also download the videos if 'download'.
839         Returns the resolved ie_result.
840         """
841         result_type = ie_result.get('_type', 'video')
842
843         if result_type in ('url', 'url_transparent'):
844             ie_result['url'] = sanitize_url(ie_result['url'])
845             extract_flat = self.params.get('extract_flat', False)
846             if ((extract_flat == 'in_playlist' and 'playlist' in extra_info) or
847                     extract_flat is True):
848                 if self.params.get('forcejson', False):
849                     self.to_stdout(json.dumps(ie_result))
850                 return ie_result
851
852         if result_type == 'video':
853             self.add_extra_info(ie_result, extra_info)
854             return self.process_video_result(ie_result, download=download)
855         elif result_type == 'url':
856             # We have to add extra_info to the results because it may be
857             # contained in a playlist
858             return self.extract_info(ie_result['url'],
859                                      download,
860                                      ie_key=ie_result.get('ie_key'),
861                                      extra_info=extra_info)
862         elif result_type == 'url_transparent':
863             # Use the information from the embedding page
864             info = self.extract_info(
865                 ie_result['url'], ie_key=ie_result.get('ie_key'),
866                 extra_info=extra_info, download=False, process=False)
867
868             # extract_info may return None when ignoreerrors is enabled and
869             # extraction failed with an error, don't crash and return early
870             # in this case
871             if not info:
872                 return info
873
874             force_properties = dict(
875                 (k, v) for k, v in ie_result.items() if v is not None)
876             for f in ('_type', 'url', 'id', 'extractor', 'extractor_key', 'ie_key'):
877                 if f in force_properties:
878                     del force_properties[f]
879             new_result = info.copy()
880             new_result.update(force_properties)
881
882             # Extracted info may not be a video result (i.e.
883             # info.get('_type', 'video') != video) but rather an url or
884             # url_transparent. In such cases outer metadata (from ie_result)
885             # should be propagated to inner one (info). For this to happen
886             # _type of info should be overridden with url_transparent. This
887             # fixes issue from https://github.com/rg3/youtube-dl/pull/11163.
888             if new_result.get('_type') == 'url':
889                 new_result['_type'] = 'url_transparent'
890
891             return self.process_ie_result(
892                 new_result, download=download, extra_info=extra_info)
893         elif result_type in ('playlist', 'multi_video'):
894             # We process each entry in the playlist
895             playlist = ie_result.get('title') or ie_result.get('id')
896             self.to_screen('[download] Downloading playlist: %s' % playlist)
897
898             playlist_results = []
899
900             playliststart = self.params.get('playliststart', 1) - 1
901             playlistend = self.params.get('playlistend')
902             # For backwards compatibility, interpret -1 as whole list
903             if playlistend == -1:
904                 playlistend = None
905
906             playlistitems_str = self.params.get('playlist_items')
907             playlistitems = None
908             if playlistitems_str is not None:
909                 def iter_playlistitems(format):
910                     for string_segment in format.split(','):
911                         if '-' in string_segment:
912                             start, end = string_segment.split('-')
913                             for item in range(int(start), int(end) + 1):
914                                 yield int(item)
915                         else:
916                             yield int(string_segment)
917                 playlistitems = orderedSet(iter_playlistitems(playlistitems_str))
918
919             ie_entries = ie_result['entries']
920
921             def make_playlistitems_entries(list_ie_entries):
922                 num_entries = len(list_ie_entries)
923                 return [
924                     list_ie_entries[i - 1] for i in playlistitems
925                     if -num_entries <= i - 1 < num_entries]
926
927             def report_download(num_entries):
928                 self.to_screen(
929                     '[%s] playlist %s: Downloading %d videos' %
930                     (ie_result['extractor'], playlist, num_entries))
931
932             if isinstance(ie_entries, list):
933                 n_all_entries = len(ie_entries)
934                 if playlistitems:
935                     entries = make_playlistitems_entries(ie_entries)
936                 else:
937                     entries = ie_entries[playliststart:playlistend]
938                 n_entries = len(entries)
939                 self.to_screen(
940                     '[%s] playlist %s: Collected %d video ids (downloading %d of them)' %
941                     (ie_result['extractor'], playlist, n_all_entries, n_entries))
942             elif isinstance(ie_entries, PagedList):
943                 if playlistitems:
944                     entries = []
945                     for item in playlistitems:
946                         entries.extend(ie_entries.getslice(
947                             item - 1, item
948                         ))
949                 else:
950                     entries = ie_entries.getslice(
951                         playliststart, playlistend)
952                 n_entries = len(entries)
953                 report_download(n_entries)
954             else:  # iterable
955                 if playlistitems:
956                     entries = make_playlistitems_entries(list(itertools.islice(
957                         ie_entries, 0, max(playlistitems))))
958                 else:
959                     entries = list(itertools.islice(
960                         ie_entries, playliststart, playlistend))
961                 n_entries = len(entries)
962                 report_download(n_entries)
963
964             if self.params.get('playlistreverse', False):
965                 entries = entries[::-1]
966
967             if self.params.get('playlistrandom', False):
968                 random.shuffle(entries)
969
970             x_forwarded_for = ie_result.get('__x_forwarded_for_ip')
971
972             for i, entry in enumerate(entries, 1):
973                 self.to_screen('[download] Downloading video %s of %s' % (i, n_entries))
974                 # This __x_forwarded_for_ip thing is a bit ugly but requires
975                 # minimal changes
976                 if x_forwarded_for:
977                     entry['__x_forwarded_for_ip'] = x_forwarded_for
978                 extra = {
979                     'n_entries': n_entries,
980                     'playlist': playlist,
981                     'playlist_id': ie_result.get('id'),
982                     'playlist_title': ie_result.get('title'),
983                     'playlist_uploader': ie_result.get('uploader'),
984                     'playlist_uploader_id': ie_result.get('uploader_id'),
985                     'playlist_index': i + playliststart,
986                     'extractor': ie_result['extractor'],
987                     'webpage_url': ie_result['webpage_url'],
988                     'webpage_url_basename': url_basename(ie_result['webpage_url']),
989                     'extractor_key': ie_result['extractor_key'],
990                 }
991
992                 reason = self._match_entry(entry, incomplete=True)
993                 if reason is not None:
994                     self.to_screen('[download] ' + reason)
995                     continue
996
997                 entry_result = self.process_ie_result(entry,
998                                                       download=download,
999                                                       extra_info=extra)
1000                 playlist_results.append(entry_result)
1001             ie_result['entries'] = playlist_results
1002             self.to_screen('[download] Finished downloading playlist: %s' % playlist)
1003             return ie_result
1004         elif result_type == 'compat_list':
1005             self.report_warning(
1006                 'Extractor %s returned a compat_list result. '
1007                 'It needs to be updated.' % ie_result.get('extractor'))
1008
1009             def _fixup(r):
1010                 self.add_extra_info(
1011                     r,
1012                     {
1013                         'extractor': ie_result['extractor'],
1014                         'webpage_url': ie_result['webpage_url'],
1015                         'webpage_url_basename': url_basename(ie_result['webpage_url']),
1016                         'extractor_key': ie_result['extractor_key'],
1017                     }
1018                 )
1019                 return r
1020             ie_result['entries'] = [
1021                 self.process_ie_result(_fixup(r), download, extra_info)
1022                 for r in ie_result['entries']
1023             ]
1024             return ie_result
1025         else:
1026             raise Exception('Invalid result type: %s' % result_type)
1027
1028     def _build_format_filter(self, filter_spec):
1029         " Returns a function to filter the formats according to the filter_spec "
1030
1031         OPERATORS = {
1032             '<': operator.lt,
1033             '<=': operator.le,
1034             '>': operator.gt,
1035             '>=': operator.ge,
1036             '=': operator.eq,
1037             '!=': operator.ne,
1038         }
1039         operator_rex = re.compile(r'''(?x)\s*
1040             (?P<key>width|height|tbr|abr|vbr|asr|filesize|filesize_approx|fps)
1041             \s*(?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
1042             (?P<value>[0-9.]+(?:[kKmMgGtTpPeEzZyY]i?[Bb]?)?)
1043             $
1044             ''' % '|'.join(map(re.escape, OPERATORS.keys())))
1045         m = operator_rex.search(filter_spec)
1046         if m:
1047             try:
1048                 comparison_value = int(m.group('value'))
1049             except ValueError:
1050                 comparison_value = parse_filesize(m.group('value'))
1051                 if comparison_value is None:
1052                     comparison_value = parse_filesize(m.group('value') + 'B')
1053                 if comparison_value is None:
1054                     raise ValueError(
1055                         'Invalid value %r in format specification %r' % (
1056                             m.group('value'), filter_spec))
1057             op = OPERATORS[m.group('op')]
1058
1059         if not m:
1060             STR_OPERATORS = {
1061                 '=': operator.eq,
1062                 '!=': operator.ne,
1063                 '^=': lambda attr, value: attr.startswith(value),
1064                 '$=': lambda attr, value: attr.endswith(value),
1065                 '*=': lambda attr, value: value in attr,
1066             }
1067             str_operator_rex = re.compile(r'''(?x)
1068                 \s*(?P<key>ext|acodec|vcodec|container|protocol|format_id)
1069                 \s*(?P<op>%s)(?P<none_inclusive>\s*\?)?
1070                 \s*(?P<value>[a-zA-Z0-9._-]+)
1071                 \s*$
1072                 ''' % '|'.join(map(re.escape, STR_OPERATORS.keys())))
1073             m = str_operator_rex.search(filter_spec)
1074             if m:
1075                 comparison_value = m.group('value')
1076                 op = STR_OPERATORS[m.group('op')]
1077
1078         if not m:
1079             raise ValueError('Invalid filter specification %r' % filter_spec)
1080
1081         def _filter(f):
1082             actual_value = f.get(m.group('key'))
1083             if actual_value is None:
1084                 return m.group('none_inclusive')
1085             return op(actual_value, comparison_value)
1086         return _filter
1087
1088     def _default_format_spec(self, info_dict, download=True):
1089
1090         def can_merge():
1091             merger = FFmpegMergerPP(self)
1092             return merger.available and merger.can_merge()
1093
1094         def prefer_best():
1095             if self.params.get('simulate', False):
1096                 return False
1097             if not download:
1098                 return False
1099             if self.params.get('outtmpl', DEFAULT_OUTTMPL) == '-':
1100                 return True
1101             if info_dict.get('is_live'):
1102                 return True
1103             if not can_merge():
1104                 return True
1105             return False
1106
1107         req_format_list = ['bestvideo+bestaudio', 'best']
1108         if prefer_best():
1109             req_format_list.reverse()
1110         return '/'.join(req_format_list)
1111
1112     def build_format_selector(self, format_spec):
1113         def syntax_error(note, start):
1114             message = (
1115                 'Invalid format specification: '
1116                 '{0}\n\t{1}\n\t{2}^'.format(note, format_spec, ' ' * start[1]))
1117             return SyntaxError(message)
1118
1119         PICKFIRST = 'PICKFIRST'
1120         MERGE = 'MERGE'
1121         SINGLE = 'SINGLE'
1122         GROUP = 'GROUP'
1123         FormatSelector = collections.namedtuple('FormatSelector', ['type', 'selector', 'filters'])
1124
1125         def _parse_filter(tokens):
1126             filter_parts = []
1127             for type, string, start, _, _ in tokens:
1128                 if type == tokenize.OP and string == ']':
1129                     return ''.join(filter_parts)
1130                 else:
1131                     filter_parts.append(string)
1132
1133         def _remove_unused_ops(tokens):
1134             # Remove operators that we don't use and join them with the surrounding strings
1135             # for example: 'mp4' '-' 'baseline' '-' '16x9' is converted to 'mp4-baseline-16x9'
1136             ALLOWED_OPS = ('/', '+', ',', '(', ')')
1137             last_string, last_start, last_end, last_line = None, None, None, None
1138             for type, string, start, end, line in tokens:
1139                 if type == tokenize.OP and string == '[':
1140                     if last_string:
1141                         yield tokenize.NAME, last_string, last_start, last_end, last_line
1142                         last_string = None
1143                     yield type, string, start, end, line
1144                     # everything inside brackets will be handled by _parse_filter
1145                     for type, string, start, end, line in tokens:
1146                         yield type, string, start, end, line
1147                         if type == tokenize.OP and string == ']':
1148                             break
1149                 elif type == tokenize.OP and string in ALLOWED_OPS:
1150                     if last_string:
1151                         yield tokenize.NAME, last_string, last_start, last_end, last_line
1152                         last_string = None
1153                     yield type, string, start, end, line
1154                 elif type in [tokenize.NAME, tokenize.NUMBER, tokenize.OP]:
1155                     if not last_string:
1156                         last_string = string
1157                         last_start = start
1158                         last_end = end
1159                     else:
1160                         last_string += string
1161             if last_string:
1162                 yield tokenize.NAME, last_string, last_start, last_end, last_line
1163
1164         def _parse_format_selection(tokens, inside_merge=False, inside_choice=False, inside_group=False):
1165             selectors = []
1166             current_selector = None
1167             for type, string, start, _, _ in tokens:
1168                 # ENCODING is only defined in python 3.x
1169                 if type == getattr(tokenize, 'ENCODING', None):
1170                     continue
1171                 elif type in [tokenize.NAME, tokenize.NUMBER]:
1172                     current_selector = FormatSelector(SINGLE, string, [])
1173                 elif type == tokenize.OP:
1174                     if string == ')':
1175                         if not inside_group:
1176                             # ')' will be handled by the parentheses group
1177                             tokens.restore_last_token()
1178                         break
1179                     elif inside_merge and string in ['/', ',']:
1180                         tokens.restore_last_token()
1181                         break
1182                     elif inside_choice and string == ',':
1183                         tokens.restore_last_token()
1184                         break
1185                     elif string == ',':
1186                         if not current_selector:
1187                             raise syntax_error('"," must follow a format selector', start)
1188                         selectors.append(current_selector)
1189                         current_selector = None
1190                     elif string == '/':
1191                         if not current_selector:
1192                             raise syntax_error('"/" must follow a format selector', start)
1193                         first_choice = current_selector
1194                         second_choice = _parse_format_selection(tokens, inside_choice=True)
1195                         current_selector = FormatSelector(PICKFIRST, (first_choice, second_choice), [])
1196                     elif string == '[':
1197                         if not current_selector:
1198                             current_selector = FormatSelector(SINGLE, 'best', [])
1199                         format_filter = _parse_filter(tokens)
1200                         current_selector.filters.append(format_filter)
1201                     elif string == '(':
1202                         if current_selector:
1203                             raise syntax_error('Unexpected "("', start)
1204                         group = _parse_format_selection(tokens, inside_group=True)
1205                         current_selector = FormatSelector(GROUP, group, [])
1206                     elif string == '+':
1207                         video_selector = current_selector
1208                         audio_selector = _parse_format_selection(tokens, inside_merge=True)
1209                         if not video_selector or not audio_selector:
1210                             raise syntax_error('"+" must be between two format selectors', start)
1211                         current_selector = FormatSelector(MERGE, (video_selector, audio_selector), [])
1212                     else:
1213                         raise syntax_error('Operator not recognized: "{0}"'.format(string), start)
1214                 elif type == tokenize.ENDMARKER:
1215                     break
1216             if current_selector:
1217                 selectors.append(current_selector)
1218             return selectors
1219
1220         def _build_selector_function(selector):
1221             if isinstance(selector, list):
1222                 fs = [_build_selector_function(s) for s in selector]
1223
1224                 def selector_function(ctx):
1225                     for f in fs:
1226                         for format in f(ctx):
1227                             yield format
1228                 return selector_function
1229             elif selector.type == GROUP:
1230                 selector_function = _build_selector_function(selector.selector)
1231             elif selector.type == PICKFIRST:
1232                 fs = [_build_selector_function(s) for s in selector.selector]
1233
1234                 def selector_function(ctx):
1235                     for f in fs:
1236                         picked_formats = list(f(ctx))
1237                         if picked_formats:
1238                             return picked_formats
1239                     return []
1240             elif selector.type == SINGLE:
1241                 format_spec = selector.selector
1242
1243                 def selector_function(ctx):
1244                     formats = list(ctx['formats'])
1245                     if not formats:
1246                         return
1247                     if format_spec == 'all':
1248                         for f in formats:
1249                             yield f
1250                     elif format_spec in ['best', 'worst', None]:
1251                         format_idx = 0 if format_spec == 'worst' else -1
1252                         audiovideo_formats = [
1253                             f for f in formats
1254                             if f.get('vcodec') != 'none' and f.get('acodec') != 'none']
1255                         if audiovideo_formats:
1256                             yield audiovideo_formats[format_idx]
1257                         # for extractors with incomplete formats (audio only (soundcloud)
1258                         # or video only (imgur)) we will fallback to best/worst
1259                         # {video,audio}-only format
1260                         elif ctx['incomplete_formats']:
1261                             yield formats[format_idx]
1262                     elif format_spec == 'bestaudio':
1263                         audio_formats = [
1264                             f for f in formats
1265                             if f.get('vcodec') == 'none']
1266                         if audio_formats:
1267                             yield audio_formats[-1]
1268                     elif format_spec == 'worstaudio':
1269                         audio_formats = [
1270                             f for f in formats
1271                             if f.get('vcodec') == 'none']
1272                         if audio_formats:
1273                             yield audio_formats[0]
1274                     elif format_spec == 'bestvideo':
1275                         video_formats = [
1276                             f for f in formats
1277                             if f.get('acodec') == 'none']
1278                         if video_formats:
1279                             yield video_formats[-1]
1280                     elif format_spec == 'worstvideo':
1281                         video_formats = [
1282                             f for f in formats
1283                             if f.get('acodec') == 'none']
1284                         if video_formats:
1285                             yield video_formats[0]
1286                     else:
1287                         extensions = ['mp4', 'flv', 'webm', '3gp', 'm4a', 'mp3', 'ogg', 'aac', 'wav']
1288                         if format_spec in extensions:
1289                             filter_f = lambda f: f['ext'] == format_spec
1290                         else:
1291                             filter_f = lambda f: f['format_id'] == format_spec
1292                         matches = list(filter(filter_f, formats))
1293                         if matches:
1294                             yield matches[-1]
1295             elif selector.type == MERGE:
1296                 def _merge(formats_info):
1297                     format_1, format_2 = [f['format_id'] for f in formats_info]
1298                     # The first format must contain the video and the
1299                     # second the audio
1300                     if formats_info[0].get('vcodec') == 'none':
1301                         self.report_error('The first format must '
1302                                           'contain the video, try using '
1303                                           '"-f %s+%s"' % (format_2, format_1))
1304                         return
1305                     # Formats must be opposite (video+audio)
1306                     if formats_info[0].get('acodec') == 'none' and formats_info[1].get('acodec') == 'none':
1307                         self.report_error(
1308                             'Both formats %s and %s are video-only, you must specify "-f video+audio"'
1309                             % (format_1, format_2))
1310                         return
1311                     output_ext = (
1312                         formats_info[0]['ext']
1313                         if self.params.get('merge_output_format') is None
1314                         else self.params['merge_output_format'])
1315                     return {
1316                         'requested_formats': formats_info,
1317                         'format': '%s+%s' % (formats_info[0].get('format'),
1318                                              formats_info[1].get('format')),
1319                         'format_id': '%s+%s' % (formats_info[0].get('format_id'),
1320                                                 formats_info[1].get('format_id')),
1321                         'width': formats_info[0].get('width'),
1322                         'height': formats_info[0].get('height'),
1323                         'resolution': formats_info[0].get('resolution'),
1324                         'fps': formats_info[0].get('fps'),
1325                         'vcodec': formats_info[0].get('vcodec'),
1326                         'vbr': formats_info[0].get('vbr'),
1327                         'stretched_ratio': formats_info[0].get('stretched_ratio'),
1328                         'acodec': formats_info[1].get('acodec'),
1329                         'abr': formats_info[1].get('abr'),
1330                         'ext': output_ext,
1331                     }
1332                 video_selector, audio_selector = map(_build_selector_function, selector.selector)
1333
1334                 def selector_function(ctx):
1335                     for pair in itertools.product(
1336                             video_selector(copy.deepcopy(ctx)), audio_selector(copy.deepcopy(ctx))):
1337                         yield _merge(pair)
1338
1339             filters = [self._build_format_filter(f) for f in selector.filters]
1340
1341             def final_selector(ctx):
1342                 ctx_copy = copy.deepcopy(ctx)
1343                 for _filter in filters:
1344                     ctx_copy['formats'] = list(filter(_filter, ctx_copy['formats']))
1345                 return selector_function(ctx_copy)
1346             return final_selector
1347
1348         stream = io.BytesIO(format_spec.encode('utf-8'))
1349         try:
1350             tokens = list(_remove_unused_ops(compat_tokenize_tokenize(stream.readline)))
1351         except tokenize.TokenError:
1352             raise syntax_error('Missing closing/opening brackets or parenthesis', (0, len(format_spec)))
1353
1354         class TokenIterator(object):
1355             def __init__(self, tokens):
1356                 self.tokens = tokens
1357                 self.counter = 0
1358
1359             def __iter__(self):
1360                 return self
1361
1362             def __next__(self):
1363                 if self.counter >= len(self.tokens):
1364                     raise StopIteration()
1365                 value = self.tokens[self.counter]
1366                 self.counter += 1
1367                 return value
1368
1369             next = __next__
1370
1371             def restore_last_token(self):
1372                 self.counter -= 1
1373
1374         parsed_selector = _parse_format_selection(iter(TokenIterator(tokens)))
1375         return _build_selector_function(parsed_selector)
1376
1377     def _calc_headers(self, info_dict):
1378         res = std_headers.copy()
1379
1380         add_headers = info_dict.get('http_headers')
1381         if add_headers:
1382             res.update(add_headers)
1383
1384         cookies = self._calc_cookies(info_dict)
1385         if cookies:
1386             res['Cookie'] = cookies
1387
1388         if 'X-Forwarded-For' not in res:
1389             x_forwarded_for_ip = info_dict.get('__x_forwarded_for_ip')
1390             if x_forwarded_for_ip:
1391                 res['X-Forwarded-For'] = x_forwarded_for_ip
1392
1393         return res
1394
1395     def _calc_cookies(self, info_dict):
1396         pr = sanitized_Request(info_dict['url'])
1397         self.cookiejar.add_cookie_header(pr)
1398         return pr.get_header('Cookie')
1399
1400     def process_video_result(self, info_dict, download=True):
1401         assert info_dict.get('_type', 'video') == 'video'
1402
1403         if 'id' not in info_dict:
1404             raise ExtractorError('Missing "id" field in extractor result')
1405         if 'title' not in info_dict:
1406             raise ExtractorError('Missing "title" field in extractor result')
1407
1408         def report_force_conversion(field, field_not, conversion):
1409             self.report_warning(
1410                 '"%s" field is not %s - forcing %s conversion, there is an error in extractor'
1411                 % (field, field_not, conversion))
1412
1413         def sanitize_string_field(info, string_field):
1414             field = info.get(string_field)
1415             if field is None or isinstance(field, compat_str):
1416                 return
1417             report_force_conversion(string_field, 'a string', 'string')
1418             info[string_field] = compat_str(field)
1419
1420         def sanitize_numeric_fields(info):
1421             for numeric_field in self._NUMERIC_FIELDS:
1422                 field = info.get(numeric_field)
1423                 if field is None or isinstance(field, compat_numeric_types):
1424                     continue
1425                 report_force_conversion(numeric_field, 'numeric', 'int')
1426                 info[numeric_field] = int_or_none(field)
1427
1428         sanitize_string_field(info_dict, 'id')
1429         sanitize_numeric_fields(info_dict)
1430
1431         if 'playlist' not in info_dict:
1432             # It isn't part of a playlist
1433             info_dict['playlist'] = None
1434             info_dict['playlist_index'] = None
1435
1436         thumbnails = info_dict.get('thumbnails')
1437         if thumbnails is None:
1438             thumbnail = info_dict.get('thumbnail')
1439             if thumbnail:
1440                 info_dict['thumbnails'] = thumbnails = [{'url': thumbnail}]
1441         if thumbnails:
1442             thumbnails.sort(key=lambda t: (
1443                 t.get('preference') if t.get('preference') is not None else -1,
1444                 t.get('width') if t.get('width') is not None else -1,
1445                 t.get('height') if t.get('height') is not None else -1,
1446                 t.get('id') if t.get('id') is not None else '', t.get('url')))
1447             for i, t in enumerate(thumbnails):
1448                 t['url'] = sanitize_url(t['url'])
1449                 if t.get('width') and t.get('height'):
1450                     t['resolution'] = '%dx%d' % (t['width'], t['height'])
1451                 if t.get('id') is None:
1452                     t['id'] = '%d' % i
1453
1454         if self.params.get('list_thumbnails'):
1455             self.list_thumbnails(info_dict)
1456             return
1457
1458         thumbnail = info_dict.get('thumbnail')
1459         if thumbnail:
1460             info_dict['thumbnail'] = sanitize_url(thumbnail)
1461         elif thumbnails:
1462             info_dict['thumbnail'] = thumbnails[-1]['url']
1463
1464         if 'display_id' not in info_dict and 'id' in info_dict:
1465             info_dict['display_id'] = info_dict['id']
1466
1467         if info_dict.get('upload_date') is None and info_dict.get('timestamp') is not None:
1468             # Working around out-of-range timestamp values (e.g. negative ones on Windows,
1469             # see http://bugs.python.org/issue1646728)
1470             try:
1471                 upload_date = datetime.datetime.utcfromtimestamp(info_dict['timestamp'])
1472                 info_dict['upload_date'] = upload_date.strftime('%Y%m%d')
1473             except (ValueError, OverflowError, OSError):
1474                 pass
1475
1476         # Auto generate title fields corresponding to the *_number fields when missing
1477         # in order to always have clean titles. This is very common for TV series.
1478         for field in ('chapter', 'season', 'episode'):
1479             if info_dict.get('%s_number' % field) is not None and not info_dict.get(field):
1480                 info_dict[field] = '%s %d' % (field.capitalize(), info_dict['%s_number' % field])
1481
1482         subtitles = info_dict.get('subtitles')
1483         if subtitles:
1484             for _, subtitle in subtitles.items():
1485                 for subtitle_format in subtitle:
1486                     if subtitle_format.get('url'):
1487                         subtitle_format['url'] = sanitize_url(subtitle_format['url'])
1488                     if subtitle_format.get('ext') is None:
1489                         subtitle_format['ext'] = determine_ext(subtitle_format['url']).lower()
1490
1491         if self.params.get('listsubtitles', False):
1492             if 'automatic_captions' in info_dict:
1493                 self.list_subtitles(info_dict['id'], info_dict.get('automatic_captions'), 'automatic captions')
1494             self.list_subtitles(info_dict['id'], subtitles, 'subtitles')
1495             return
1496         info_dict['requested_subtitles'] = self.process_subtitles(
1497             info_dict['id'], subtitles,
1498             info_dict.get('automatic_captions'))
1499
1500         # We now pick which formats have to be downloaded
1501         if info_dict.get('formats') is None:
1502             # There's only one format available
1503             formats = [info_dict]
1504         else:
1505             formats = info_dict['formats']
1506
1507         if not formats:
1508             raise ExtractorError('No video formats found!')
1509
1510         def is_wellformed(f):
1511             url = f.get('url')
1512             if not url:
1513                 self.report_warning(
1514                     '"url" field is missing or empty - skipping format, '
1515                     'there is an error in extractor')
1516                 return False
1517             if isinstance(url, bytes):
1518                 sanitize_string_field(f, 'url')
1519             return True
1520
1521         # Filter out malformed formats for better extraction robustness
1522         formats = list(filter(is_wellformed, formats))
1523
1524         formats_dict = {}
1525
1526         # We check that all the formats have the format and format_id fields
1527         for i, format in enumerate(formats):
1528             sanitize_string_field(format, 'format_id')
1529             sanitize_numeric_fields(format)
1530             format['url'] = sanitize_url(format['url'])
1531             if not format.get('format_id'):
1532                 format['format_id'] = compat_str(i)
1533             else:
1534                 # Sanitize format_id from characters used in format selector expression
1535                 format['format_id'] = re.sub(r'[\s,/+\[\]()]', '_', format['format_id'])
1536             format_id = format['format_id']
1537             if format_id not in formats_dict:
1538                 formats_dict[format_id] = []
1539             formats_dict[format_id].append(format)
1540
1541         # Make sure all formats have unique format_id
1542         for format_id, ambiguous_formats in formats_dict.items():
1543             if len(ambiguous_formats) > 1:
1544                 for i, format in enumerate(ambiguous_formats):
1545                     format['format_id'] = '%s-%d' % (format_id, i)
1546
1547         for i, format in enumerate(formats):
1548             if format.get('format') is None:
1549                 format['format'] = '{id} - {res}{note}'.format(
1550                     id=format['format_id'],
1551                     res=self.format_resolution(format),
1552                     note=' ({0})'.format(format['format_note']) if format.get('format_note') is not None else '',
1553                 )
1554             # Automatically determine file extension if missing
1555             if format.get('ext') is None:
1556                 format['ext'] = determine_ext(format['url']).lower()
1557             # Automatically determine protocol if missing (useful for format
1558             # selection purposes)
1559             if format.get('protocol') is None:
1560                 format['protocol'] = determine_protocol(format)
1561             # Add HTTP headers, so that external programs can use them from the
1562             # json output
1563             full_format_info = info_dict.copy()
1564             full_format_info.update(format)
1565             format['http_headers'] = self._calc_headers(full_format_info)
1566         # Remove private housekeeping stuff
1567         if '__x_forwarded_for_ip' in info_dict:
1568             del info_dict['__x_forwarded_for_ip']
1569
1570         # TODO Central sorting goes here
1571
1572         if formats[0] is not info_dict:
1573             # only set the 'formats' fields if the original info_dict list them
1574             # otherwise we end up with a circular reference, the first (and unique)
1575             # element in the 'formats' field in info_dict is info_dict itself,
1576             # which can't be exported to json
1577             info_dict['formats'] = formats
1578         if self.params.get('listformats'):
1579             self.list_formats(info_dict)
1580             return
1581
1582         req_format = self.params.get('format')
1583         if req_format is None:
1584             req_format = self._default_format_spec(info_dict, download=download)
1585             if self.params.get('verbose'):
1586                 self.to_stdout('[debug] Default format spec: %s' % req_format)
1587
1588         format_selector = self.build_format_selector(req_format)
1589
1590         # While in format selection we may need to have an access to the original
1591         # format set in order to calculate some metrics or do some processing.
1592         # For now we need to be able to guess whether original formats provided
1593         # by extractor are incomplete or not (i.e. whether extractor provides only
1594         # video-only or audio-only formats) for proper formats selection for
1595         # extractors with such incomplete formats (see
1596         # https://github.com/rg3/youtube-dl/pull/5556).
1597         # Since formats may be filtered during format selection and may not match
1598         # the original formats the results may be incorrect. Thus original formats
1599         # or pre-calculated metrics should be passed to format selection routines
1600         # as well.
1601         # We will pass a context object containing all necessary additional data
1602         # instead of just formats.
1603         # This fixes incorrect format selection issue (see
1604         # https://github.com/rg3/youtube-dl/issues/10083).
1605         incomplete_formats = (
1606             # All formats are video-only or
1607             all(f.get('vcodec') != 'none' and f.get('acodec') == 'none' for f in formats) or
1608             # all formats are audio-only
1609             all(f.get('vcodec') == 'none' and f.get('acodec') != 'none' for f in formats))
1610
1611         ctx = {
1612             'formats': formats,
1613             'incomplete_formats': incomplete_formats,
1614         }
1615
1616         formats_to_download = list(format_selector(ctx))
1617         if not formats_to_download:
1618             raise ExtractorError('requested format not available',
1619                                  expected=True)
1620
1621         if download:
1622             if len(formats_to_download) > 1:
1623                 self.to_screen('[info] %s: downloading video in %s formats' % (info_dict['id'], len(formats_to_download)))
1624             for format in formats_to_download:
1625                 new_info = dict(info_dict)
1626                 new_info.update(format)
1627                 self.process_info(new_info)
1628         # We update the info dict with the best quality format (backwards compatibility)
1629         info_dict.update(formats_to_download[-1])
1630         return info_dict
1631
1632     def process_subtitles(self, video_id, normal_subtitles, automatic_captions):
1633         """Select the requested subtitles and their format"""
1634         available_subs = {}
1635         if normal_subtitles and self.params.get('writesubtitles'):
1636             available_subs.update(normal_subtitles)
1637         if automatic_captions and self.params.get('writeautomaticsub'):
1638             for lang, cap_info in automatic_captions.items():
1639                 if lang not in available_subs:
1640                     available_subs[lang] = cap_info
1641
1642         if (not self.params.get('writesubtitles') and not
1643                 self.params.get('writeautomaticsub') or not
1644                 available_subs):
1645             return None
1646
1647         if self.params.get('allsubtitles', False):
1648             requested_langs = available_subs.keys()
1649         else:
1650             if self.params.get('subtitleslangs', False):
1651                 requested_langs = self.params.get('subtitleslangs')
1652             elif 'en' in available_subs:
1653                 requested_langs = ['en']
1654             else:
1655                 requested_langs = [list(available_subs.keys())[0]]
1656
1657         formats_query = self.params.get('subtitlesformat', 'best')
1658         formats_preference = formats_query.split('/') if formats_query else []
1659         subs = {}
1660         for lang in requested_langs:
1661             formats = available_subs.get(lang)
1662             if formats is None:
1663                 self.report_warning('%s subtitles not available for %s' % (lang, video_id))
1664                 continue
1665             for ext in formats_preference:
1666                 if ext == 'best':
1667                     f = formats[-1]
1668                     break
1669                 matches = list(filter(lambda f: f['ext'] == ext, formats))
1670                 if matches:
1671                     f = matches[-1]
1672                     break
1673             else:
1674                 f = formats[-1]
1675                 self.report_warning(
1676                     'No subtitle format found matching "%s" for language %s, '
1677                     'using %s' % (formats_query, lang, f['ext']))
1678             subs[lang] = f
1679         return subs
1680
1681     def process_info(self, info_dict):
1682         """Process a single resolved IE result."""
1683
1684         assert info_dict.get('_type', 'video') == 'video'
1685
1686         max_downloads = self.params.get('max_downloads')
1687         if max_downloads is not None:
1688             if self._num_downloads >= int(max_downloads):
1689                 raise MaxDownloadsReached()
1690
1691         info_dict['fulltitle'] = info_dict['title']
1692         if len(info_dict['title']) > 200:
1693             info_dict['title'] = info_dict['title'][:197] + '...'
1694
1695         if 'format' not in info_dict:
1696             info_dict['format'] = info_dict['ext']
1697
1698         reason = self._match_entry(info_dict, incomplete=False)
1699         if reason is not None:
1700             self.to_screen('[download] ' + reason)
1701             return
1702
1703         self._num_downloads += 1
1704
1705         info_dict['_filename'] = filename = self.prepare_filename(info_dict)
1706
1707         # Forced printings
1708         if self.params.get('forcetitle', False):
1709             self.to_stdout(info_dict['fulltitle'])
1710         if self.params.get('forceid', False):
1711             self.to_stdout(info_dict['id'])
1712         if self.params.get('forceurl', False):
1713             if info_dict.get('requested_formats') is not None:
1714                 for f in info_dict['requested_formats']:
1715                     self.to_stdout(f['url'] + f.get('play_path', ''))
1716             else:
1717                 # For RTMP URLs, also include the playpath
1718                 self.to_stdout(info_dict['url'] + info_dict.get('play_path', ''))
1719         if self.params.get('forcethumbnail', False) and info_dict.get('thumbnail') is not None:
1720             self.to_stdout(info_dict['thumbnail'])
1721         if self.params.get('forcedescription', False) and info_dict.get('description') is not None:
1722             self.to_stdout(info_dict['description'])
1723         if self.params.get('forcefilename', False) and filename is not None:
1724             self.to_stdout(filename)
1725         if self.params.get('forceduration', False) and info_dict.get('duration') is not None:
1726             self.to_stdout(formatSeconds(info_dict['duration']))
1727         if self.params.get('forceformat', False):
1728             self.to_stdout(info_dict['format'])
1729         if self.params.get('forcejson', False):
1730             self.to_stdout(json.dumps(info_dict))
1731
1732         # Do nothing else if in simulate mode
1733         if self.params.get('simulate', False):
1734             return
1735
1736         if filename is None:
1737             return
1738
1739         def ensure_dir_exists(path):
1740             try:
1741                 dn = os.path.dirname(path)
1742                 if dn and not os.path.exists(dn):
1743                     os.makedirs(dn)
1744                 return True
1745             except (OSError, IOError) as err:
1746                 self.report_error('unable to create directory ' + error_to_compat_str(err))
1747                 return False
1748
1749         if not ensure_dir_exists(sanitize_path(encodeFilename(filename))):
1750             return
1751
1752         if self.params.get('writedescription', False):
1753             descfn = replace_extension(filename, 'description', info_dict.get('ext'))
1754             if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(descfn)):
1755                 self.to_screen('[info] Video description is already present')
1756             elif info_dict.get('description') is None:
1757                 self.report_warning('There\'s no description to write.')
1758             else:
1759                 try:
1760                     self.to_screen('[info] Writing video description to: ' + descfn)
1761                     with io.open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile:
1762                         descfile.write(info_dict['description'])
1763                 except (OSError, IOError):
1764                     self.report_error('Cannot write description file ' + descfn)
1765                     return
1766
1767         if self.params.get('writeannotations', False):
1768             annofn = replace_extension(filename, 'annotations.xml', info_dict.get('ext'))
1769             if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(annofn)):
1770                 self.to_screen('[info] Video annotations are already present')
1771             else:
1772                 try:
1773                     self.to_screen('[info] Writing video annotations to: ' + annofn)
1774                     with io.open(encodeFilename(annofn), 'w', encoding='utf-8') as annofile:
1775                         annofile.write(info_dict['annotations'])
1776                 except (KeyError, TypeError):
1777                     self.report_warning('There are no annotations to write.')
1778                 except (OSError, IOError):
1779                     self.report_error('Cannot write annotations file: ' + annofn)
1780                     return
1781
1782         subtitles_are_requested = any([self.params.get('writesubtitles', False),
1783                                        self.params.get('writeautomaticsub')])
1784
1785         if subtitles_are_requested and info_dict.get('requested_subtitles'):
1786             # subtitles download errors are already managed as troubles in relevant IE
1787             # that way it will silently go on when used with unsupporting IE
1788             subtitles = info_dict['requested_subtitles']
1789             ie = self.get_info_extractor(info_dict['extractor_key'])
1790             for sub_lang, sub_info in subtitles.items():
1791                 sub_format = sub_info['ext']
1792                 sub_filename = subtitles_filename(filename, sub_lang, sub_format)
1793                 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(sub_filename)):
1794                     self.to_screen('[info] Video subtitle %s.%s is already present' % (sub_lang, sub_format))
1795                 else:
1796                     self.to_screen('[info] Writing video subtitles to: ' + sub_filename)
1797                     if sub_info.get('data') is not None:
1798                         try:
1799                             # Use newline='' to prevent conversion of newline characters
1800                             # See https://github.com/rg3/youtube-dl/issues/10268
1801                             with io.open(encodeFilename(sub_filename), 'w', encoding='utf-8', newline='') as subfile:
1802                                 subfile.write(sub_info['data'])
1803                         except (OSError, IOError):
1804                             self.report_error('Cannot write subtitles file ' + sub_filename)
1805                             return
1806                     else:
1807                         try:
1808                             sub_data = ie._request_webpage(
1809                                 sub_info['url'], info_dict['id'], note=False).read()
1810                             with io.open(encodeFilename(sub_filename), 'wb') as subfile:
1811                                 subfile.write(sub_data)
1812                         except (ExtractorError, IOError, OSError, ValueError) as err:
1813                             self.report_warning('Unable to download subtitle for "%s": %s' %
1814                                                 (sub_lang, error_to_compat_str(err)))
1815                             continue
1816
1817         if self.params.get('writeinfojson', False):
1818             infofn = replace_extension(filename, 'info.json', info_dict.get('ext'))
1819             if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(infofn)):
1820                 self.to_screen('[info] Video description metadata is already present')
1821             else:
1822                 self.to_screen('[info] Writing video description metadata as JSON to: ' + infofn)
1823                 try:
1824                     write_json_file(self.filter_requested_info(info_dict), infofn)
1825                 except (OSError, IOError):
1826                     self.report_error('Cannot write metadata to JSON file ' + infofn)
1827                     return
1828
1829         self._write_thumbnails(info_dict, filename)
1830
1831         if not self.params.get('skip_download', False):
1832             try:
1833                 def dl(name, info):
1834                     fd = get_suitable_downloader(info, self.params)(self, self.params)
1835                     for ph in self._progress_hooks:
1836                         fd.add_progress_hook(ph)
1837                     if self.params.get('verbose'):
1838                         self.to_stdout('[debug] Invoking downloader on %r' % info.get('url'))
1839                     return fd.download(name, info)
1840
1841                 if info_dict.get('requested_formats') is not None:
1842                     downloaded = []
1843                     success = True
1844                     merger = FFmpegMergerPP(self)
1845                     if not merger.available:
1846                         postprocessors = []
1847                         self.report_warning('You have requested multiple '
1848                                             'formats but ffmpeg or avconv are not installed.'
1849                                             ' The formats won\'t be merged.')
1850                     else:
1851                         postprocessors = [merger]
1852
1853                     def compatible_formats(formats):
1854                         video, audio = formats
1855                         # Check extension
1856                         video_ext, audio_ext = video.get('ext'), audio.get('ext')
1857                         if video_ext and audio_ext:
1858                             COMPATIBLE_EXTS = (
1859                                 ('mp3', 'mp4', 'm4a', 'm4p', 'm4b', 'm4r', 'm4v', 'ismv', 'isma'),
1860                                 ('webm')
1861                             )
1862                             for exts in COMPATIBLE_EXTS:
1863                                 if video_ext in exts and audio_ext in exts:
1864                                     return True
1865                         # TODO: Check acodec/vcodec
1866                         return False
1867
1868                     filename_real_ext = os.path.splitext(filename)[1][1:]
1869                     filename_wo_ext = (
1870                         os.path.splitext(filename)[0]
1871                         if filename_real_ext == info_dict['ext']
1872                         else filename)
1873                     requested_formats = info_dict['requested_formats']
1874                     if self.params.get('merge_output_format') is None and not compatible_formats(requested_formats):
1875                         info_dict['ext'] = 'mkv'
1876                         self.report_warning(
1877                             'Requested formats are incompatible for merge and will be merged into mkv.')
1878                     # Ensure filename always has a correct extension for successful merge
1879                     filename = '%s.%s' % (filename_wo_ext, info_dict['ext'])
1880                     if os.path.exists(encodeFilename(filename)):
1881                         self.to_screen(
1882                             '[download] %s has already been downloaded and '
1883                             'merged' % filename)
1884                     else:
1885                         for f in requested_formats:
1886                             new_info = dict(info_dict)
1887                             new_info.update(f)
1888                             fname = prepend_extension(
1889                                 self.prepare_filename(new_info),
1890                                 'f%s' % f['format_id'], new_info['ext'])
1891                             if not ensure_dir_exists(fname):
1892                                 return
1893                             downloaded.append(fname)
1894                             partial_success = dl(fname, new_info)
1895                             success = success and partial_success
1896                         info_dict['__postprocessors'] = postprocessors
1897                         info_dict['__files_to_merge'] = downloaded
1898                 else:
1899                     # Just a single file
1900                     success = dl(filename, info_dict)
1901             except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
1902                 self.report_error('unable to download video data: %s' % error_to_compat_str(err))
1903                 return
1904             except (OSError, IOError) as err:
1905                 raise UnavailableVideoError(err)
1906             except (ContentTooShortError, ) as err:
1907                 self.report_error('content too short (expected %s bytes and served %s)' % (err.expected, err.downloaded))
1908                 return
1909
1910             if success and filename != '-':
1911                 # Fixup content
1912                 fixup_policy = self.params.get('fixup')
1913                 if fixup_policy is None:
1914                     fixup_policy = 'detect_or_warn'
1915
1916                 INSTALL_FFMPEG_MESSAGE = 'Install ffmpeg or avconv to fix this automatically.'
1917
1918                 stretched_ratio = info_dict.get('stretched_ratio')
1919                 if stretched_ratio is not None and stretched_ratio != 1:
1920                     if fixup_policy == 'warn':
1921                         self.report_warning('%s: Non-uniform pixel ratio (%s)' % (
1922                             info_dict['id'], stretched_ratio))
1923                     elif fixup_policy == 'detect_or_warn':
1924                         stretched_pp = FFmpegFixupStretchedPP(self)
1925                         if stretched_pp.available:
1926                             info_dict.setdefault('__postprocessors', [])
1927                             info_dict['__postprocessors'].append(stretched_pp)
1928                         else:
1929                             self.report_warning(
1930                                 '%s: Non-uniform pixel ratio (%s). %s'
1931                                 % (info_dict['id'], stretched_ratio, INSTALL_FFMPEG_MESSAGE))
1932                     else:
1933                         assert fixup_policy in ('ignore', 'never')
1934
1935                 if (info_dict.get('requested_formats') is None and
1936                         info_dict.get('container') == 'm4a_dash'):
1937                     if fixup_policy == 'warn':
1938                         self.report_warning(
1939                             '%s: writing DASH m4a. '
1940                             'Only some players support this container.'
1941                             % info_dict['id'])
1942                     elif fixup_policy == 'detect_or_warn':
1943                         fixup_pp = FFmpegFixupM4aPP(self)
1944                         if fixup_pp.available:
1945                             info_dict.setdefault('__postprocessors', [])
1946                             info_dict['__postprocessors'].append(fixup_pp)
1947                         else:
1948                             self.report_warning(
1949                                 '%s: writing DASH m4a. '
1950                                 'Only some players support this container. %s'
1951                                 % (info_dict['id'], INSTALL_FFMPEG_MESSAGE))
1952                     else:
1953                         assert fixup_policy in ('ignore', 'never')
1954
1955                 if (info_dict.get('protocol') == 'm3u8_native' or
1956                         info_dict.get('protocol') == 'm3u8' and
1957                         self.params.get('hls_prefer_native')):
1958                     if fixup_policy == 'warn':
1959                         self.report_warning('%s: malformed AAC bitstream detected.' % (
1960                             info_dict['id']))
1961                     elif fixup_policy == 'detect_or_warn':
1962                         fixup_pp = FFmpegFixupM3u8PP(self)
1963                         if fixup_pp.available:
1964                             info_dict.setdefault('__postprocessors', [])
1965                             info_dict['__postprocessors'].append(fixup_pp)
1966                         else:
1967                             self.report_warning(
1968                                 '%s: malformed AAC bitstream detected. %s'
1969                                 % (info_dict['id'], INSTALL_FFMPEG_MESSAGE))
1970                     else:
1971                         assert fixup_policy in ('ignore', 'never')
1972
1973                 try:
1974                     self.post_process(filename, info_dict)
1975                 except (PostProcessingError) as err:
1976                     self.report_error('postprocessing: %s' % str(err))
1977                     return
1978                 self.record_download_archive(info_dict)
1979
1980     def download(self, url_list):
1981         """Download a given list of URLs."""
1982         outtmpl = self.params.get('outtmpl', DEFAULT_OUTTMPL)
1983         if (len(url_list) > 1 and
1984                 outtmpl != '-' and
1985                 '%' not in outtmpl and
1986                 self.params.get('max_downloads') != 1):
1987             raise SameFileError(outtmpl)
1988
1989         for url in url_list:
1990             try:
1991                 # It also downloads the videos
1992                 res = self.extract_info(
1993                     url, force_generic_extractor=self.params.get('force_generic_extractor', False))
1994             except UnavailableVideoError:
1995                 self.report_error('unable to download video')
1996             except MaxDownloadsReached:
1997                 self.to_screen('[info] Maximum number of downloaded files reached.')
1998                 raise
1999             else:
2000                 if self.params.get('dump_single_json', False):
2001                     self.to_stdout(json.dumps(res))
2002
2003         return self._download_retcode
2004
2005     def download_with_info_file(self, info_filename):
2006         with contextlib.closing(fileinput.FileInput(
2007                 [info_filename], mode='r',
2008                 openhook=fileinput.hook_encoded('utf-8'))) as f:
2009             # FileInput doesn't have a read method, we can't call json.load
2010             info = self.filter_requested_info(json.loads('\n'.join(f)))
2011         try:
2012             self.process_ie_result(info, download=True)
2013         except DownloadError:
2014             webpage_url = info.get('webpage_url')
2015             if webpage_url is not None:
2016                 self.report_warning('The info failed to download, trying with "%s"' % webpage_url)
2017                 return self.download([webpage_url])
2018             else:
2019                 raise
2020         return self._download_retcode
2021
2022     @staticmethod
2023     def filter_requested_info(info_dict):
2024         return dict(
2025             (k, v) for k, v in info_dict.items()
2026             if k not in ['requested_formats', 'requested_subtitles'])
2027
2028     def post_process(self, filename, ie_info):
2029         """Run all the postprocessors on the given file."""
2030         info = dict(ie_info)
2031         info['filepath'] = filename
2032         pps_chain = []
2033         if ie_info.get('__postprocessors') is not None:
2034             pps_chain.extend(ie_info['__postprocessors'])
2035         pps_chain.extend(self._pps)
2036         for pp in pps_chain:
2037             files_to_delete = []
2038             try:
2039                 files_to_delete, info = pp.run(info)
2040             except PostProcessingError as e:
2041                 self.report_error(e.msg)
2042             if files_to_delete and not self.params.get('keepvideo', False):
2043                 for old_filename in files_to_delete:
2044                     self.to_screen('Deleting original file %s (pass -k to keep)' % old_filename)
2045                     try:
2046                         os.remove(encodeFilename(old_filename))
2047                     except (IOError, OSError):
2048                         self.report_warning('Unable to remove downloaded original file')
2049
2050     def _make_archive_id(self, info_dict):
2051         # Future-proof against any change in case
2052         # and backwards compatibility with prior versions
2053         extractor = info_dict.get('extractor_key')
2054         if extractor is None:
2055             if 'id' in info_dict:
2056                 extractor = info_dict.get('ie_key')  # key in a playlist
2057         if extractor is None:
2058             return None  # Incomplete video information
2059         return extractor.lower() + ' ' + info_dict['id']
2060
2061     def in_download_archive(self, info_dict):
2062         fn = self.params.get('download_archive')
2063         if fn is None:
2064             return False
2065
2066         vid_id = self._make_archive_id(info_dict)
2067         if vid_id is None:
2068             return False  # Incomplete video information
2069
2070         try:
2071             with locked_file(fn, 'r', encoding='utf-8') as archive_file:
2072                 for line in archive_file:
2073                     if line.strip() == vid_id:
2074                         return True
2075         except IOError as ioe:
2076             if ioe.errno != errno.ENOENT:
2077                 raise
2078         return False
2079
2080     def record_download_archive(self, info_dict):
2081         fn = self.params.get('download_archive')
2082         if fn is None:
2083             return
2084         vid_id = self._make_archive_id(info_dict)
2085         assert vid_id
2086         with locked_file(fn, 'a', encoding='utf-8') as archive_file:
2087             archive_file.write(vid_id + '\n')
2088
2089     @staticmethod
2090     def format_resolution(format, default='unknown'):
2091         if format.get('vcodec') == 'none':
2092             return 'audio only'
2093         if format.get('resolution') is not None:
2094             return format['resolution']
2095         if format.get('height') is not None:
2096             if format.get('width') is not None:
2097                 res = '%sx%s' % (format['width'], format['height'])
2098             else:
2099                 res = '%sp' % format['height']
2100         elif format.get('width') is not None:
2101             res = '%dx?' % format['width']
2102         else:
2103             res = default
2104         return res
2105
2106     def _format_note(self, fdict):
2107         res = ''
2108         if fdict.get('ext') in ['f4f', 'f4m']:
2109             res += '(unsupported) '
2110         if fdict.get('language'):
2111             if res:
2112                 res += ' '
2113             res += '[%s] ' % fdict['language']
2114         if fdict.get('format_note') is not None:
2115             res += fdict['format_note'] + ' '
2116         if fdict.get('tbr') is not None:
2117             res += '%4dk ' % fdict['tbr']
2118         if fdict.get('container') is not None:
2119             if res:
2120                 res += ', '
2121             res += '%s container' % fdict['container']
2122         if (fdict.get('vcodec') is not None and
2123                 fdict.get('vcodec') != 'none'):
2124             if res:
2125                 res += ', '
2126             res += fdict['vcodec']
2127             if fdict.get('vbr') is not None:
2128                 res += '@'
2129         elif fdict.get('vbr') is not None and fdict.get('abr') is not None:
2130             res += 'video@'
2131         if fdict.get('vbr') is not None:
2132             res += '%4dk' % fdict['vbr']
2133         if fdict.get('fps') is not None:
2134             if res:
2135                 res += ', '
2136             res += '%sfps' % fdict['fps']
2137         if fdict.get('acodec') is not None:
2138             if res:
2139                 res += ', '
2140             if fdict['acodec'] == 'none':
2141                 res += 'video only'
2142             else:
2143                 res += '%-5s' % fdict['acodec']
2144         elif fdict.get('abr') is not None:
2145             if res:
2146                 res += ', '
2147             res += 'audio'
2148         if fdict.get('abr') is not None:
2149             res += '@%3dk' % fdict['abr']
2150         if fdict.get('asr') is not None:
2151             res += ' (%5dHz)' % fdict['asr']
2152         if fdict.get('filesize') is not None:
2153             if res:
2154                 res += ', '
2155             res += format_bytes(fdict['filesize'])
2156         elif fdict.get('filesize_approx') is not None:
2157             if res:
2158                 res += ', '
2159             res += '~' + format_bytes(fdict['filesize_approx'])
2160         return res
2161
2162     def list_formats(self, info_dict):
2163         formats = info_dict.get('formats', [info_dict])
2164         table = [
2165             [f['format_id'], f['ext'], self.format_resolution(f), self._format_note(f)]
2166             for f in formats
2167             if f.get('preference') is None or f['preference'] >= -1000]
2168         if len(formats) > 1:
2169             table[-1][-1] += (' ' if table[-1][-1] else '') + '(best)'
2170
2171         header_line = ['format code', 'extension', 'resolution', 'note']
2172         self.to_screen(
2173             '[info] Available formats for %s:\n%s' %
2174             (info_dict['id'], render_table(header_line, table)))
2175
2176     def list_thumbnails(self, info_dict):
2177         thumbnails = info_dict.get('thumbnails')
2178         if not thumbnails:
2179             self.to_screen('[info] No thumbnails present for %s' % info_dict['id'])
2180             return
2181
2182         self.to_screen(
2183             '[info] Thumbnails for %s:' % info_dict['id'])
2184         self.to_screen(render_table(
2185             ['ID', 'width', 'height', 'URL'],
2186             [[t['id'], t.get('width', 'unknown'), t.get('height', 'unknown'), t['url']] for t in thumbnails]))
2187
2188     def list_subtitles(self, video_id, subtitles, name='subtitles'):
2189         if not subtitles:
2190             self.to_screen('%s has no %s' % (video_id, name))
2191             return
2192         self.to_screen(
2193             'Available %s for %s:' % (name, video_id))
2194         self.to_screen(render_table(
2195             ['Language', 'formats'],
2196             [[lang, ', '.join(f['ext'] for f in reversed(formats))]
2197                 for lang, formats in subtitles.items()]))
2198
2199     def urlopen(self, req):
2200         """ Start an HTTP download """
2201         if isinstance(req, compat_basestring):
2202             req = sanitized_Request(req)
2203         return self._opener.open(req, timeout=self._socket_timeout)
2204
2205     def print_debug_header(self):
2206         if not self.params.get('verbose'):
2207             return
2208
2209         if type('') is not compat_str:
2210             # Python 2.6 on SLES11 SP1 (https://github.com/rg3/youtube-dl/issues/3326)
2211             self.report_warning(
2212                 'Your Python is broken! Update to a newer and supported version')
2213
2214         stdout_encoding = getattr(
2215             sys.stdout, 'encoding', 'missing (%s)' % type(sys.stdout).__name__)
2216         encoding_str = (
2217             '[debug] Encodings: locale %s, fs %s, out %s, pref %s\n' % (
2218                 locale.getpreferredencoding(),
2219                 sys.getfilesystemencoding(),
2220                 stdout_encoding,
2221                 self.get_encoding()))
2222         write_string(encoding_str, encoding=None)
2223
2224         self._write_string('[debug] youtube-dl version ' + __version__ + '\n')
2225         if _LAZY_LOADER:
2226             self._write_string('[debug] Lazy loading extractors enabled' + '\n')
2227         try:
2228             sp = subprocess.Popen(
2229                 ['git', 'rev-parse', '--short', 'HEAD'],
2230                 stdout=subprocess.PIPE, stderr=subprocess.PIPE,
2231                 cwd=os.path.dirname(os.path.abspath(__file__)))
2232             out, err = sp.communicate()
2233             out = out.decode().strip()
2234             if re.match('[0-9a-f]+', out):
2235                 self._write_string('[debug] Git HEAD: ' + out + '\n')
2236         except Exception:
2237             try:
2238                 sys.exc_clear()
2239             except Exception:
2240                 pass
2241
2242         def python_implementation():
2243             impl_name = platform.python_implementation()
2244             if impl_name == 'PyPy' and hasattr(sys, 'pypy_version_info'):
2245                 return impl_name + ' version %d.%d.%d' % sys.pypy_version_info[:3]
2246             return impl_name
2247
2248         self._write_string('[debug] Python version %s (%s) - %s\n' % (
2249             platform.python_version(), python_implementation(),
2250             platform_name()))
2251
2252         exe_versions = FFmpegPostProcessor.get_versions(self)
2253         exe_versions['rtmpdump'] = rtmpdump_version()
2254         exe_versions['phantomjs'] = PhantomJSwrapper._version()
2255         exe_str = ', '.join(
2256             '%s %s' % (exe, v)
2257             for exe, v in sorted(exe_versions.items())
2258             if v
2259         )
2260         if not exe_str:
2261             exe_str = 'none'
2262         self._write_string('[debug] exe versions: %s\n' % exe_str)
2263
2264         proxy_map = {}
2265         for handler in self._opener.handlers:
2266             if hasattr(handler, 'proxies'):
2267                 proxy_map.update(handler.proxies)
2268         self._write_string('[debug] Proxy map: ' + compat_str(proxy_map) + '\n')
2269
2270         if self.params.get('call_home', False):
2271             ipaddr = self.urlopen('https://yt-dl.org/ip').read().decode('utf-8')
2272             self._write_string('[debug] Public IP address: %s\n' % ipaddr)
2273             latest_version = self.urlopen(
2274                 'https://yt-dl.org/latest/version').read().decode('utf-8')
2275             if version_tuple(latest_version) > version_tuple(__version__):
2276                 self.report_warning(
2277                     'You are using an outdated version (newest version: %s)! '
2278                     'See https://yt-dl.org/update if you need help updating.' %
2279                     latest_version)
2280
2281     def _setup_opener(self):
2282         timeout_val = self.params.get('socket_timeout')
2283         self._socket_timeout = 600 if timeout_val is None else float(timeout_val)
2284
2285         opts_cookiefile = self.params.get('cookiefile')
2286         opts_proxy = self.params.get('proxy')
2287
2288         if opts_cookiefile is None:
2289             self.cookiejar = compat_cookiejar.CookieJar()
2290         else:
2291             opts_cookiefile = expand_path(opts_cookiefile)
2292             self.cookiejar = compat_cookiejar.MozillaCookieJar(
2293                 opts_cookiefile)
2294             if os.access(opts_cookiefile, os.R_OK):
2295                 self.cookiejar.load()
2296
2297         cookie_processor = YoutubeDLCookieProcessor(self.cookiejar)
2298         if opts_proxy is not None:
2299             if opts_proxy == '':
2300                 proxies = {}
2301             else:
2302                 proxies = {'http': opts_proxy, 'https': opts_proxy}
2303         else:
2304             proxies = compat_urllib_request.getproxies()
2305             # Set HTTPS proxy to HTTP one if given (https://github.com/rg3/youtube-dl/issues/805)
2306             if 'http' in proxies and 'https' not in proxies:
2307                 proxies['https'] = proxies['http']
2308         proxy_handler = PerRequestProxyHandler(proxies)
2309
2310         debuglevel = 1 if self.params.get('debug_printtraffic') else 0
2311         https_handler = make_HTTPS_handler(self.params, debuglevel=debuglevel)
2312         ydlh = YoutubeDLHandler(self.params, debuglevel=debuglevel)
2313         data_handler = compat_urllib_request_DataHandler()
2314
2315         # When passing our own FileHandler instance, build_opener won't add the
2316         # default FileHandler and allows us to disable the file protocol, which
2317         # can be used for malicious purposes (see
2318         # https://github.com/rg3/youtube-dl/issues/8227)
2319         file_handler = compat_urllib_request.FileHandler()
2320
2321         def file_open(*args, **kwargs):
2322             raise compat_urllib_error.URLError('file:// scheme is explicitly disabled in youtube-dl for security reasons')
2323         file_handler.file_open = file_open
2324
2325         opener = compat_urllib_request.build_opener(
2326             proxy_handler, https_handler, cookie_processor, ydlh, data_handler, file_handler)
2327
2328         # Delete the default user-agent header, which would otherwise apply in
2329         # cases where our custom HTTP handler doesn't come into play
2330         # (See https://github.com/rg3/youtube-dl/issues/1309 for details)
2331         opener.addheaders = []
2332         self._opener = opener
2333
2334     def encode(self, s):
2335         if isinstance(s, bytes):
2336             return s  # Already encoded
2337
2338         try:
2339             return s.encode(self.get_encoding())
2340         except UnicodeEncodeError as err:
2341             err.reason = err.reason + '. Check your system encoding configuration or use the --encoding option.'
2342             raise
2343
2344     def get_encoding(self):
2345         encoding = self.params.get('encoding')
2346         if encoding is None:
2347             encoding = preferredencoding()
2348         return encoding
2349
2350     def _write_thumbnails(self, info_dict, filename):
2351         if self.params.get('writethumbnail', False):
2352             thumbnails = info_dict.get('thumbnails')
2353             if thumbnails:
2354                 thumbnails = [thumbnails[-1]]
2355         elif self.params.get('write_all_thumbnails', False):
2356             thumbnails = info_dict.get('thumbnails')
2357         else:
2358             return
2359
2360         if not thumbnails:
2361             # No thumbnails present, so return immediately
2362             return
2363
2364         for t in thumbnails:
2365             thumb_ext = determine_ext(t['url'], 'jpg')
2366             suffix = '_%s' % t['id'] if len(thumbnails) > 1 else ''
2367             thumb_display_id = '%s ' % t['id'] if len(thumbnails) > 1 else ''
2368             t['filename'] = thumb_filename = os.path.splitext(filename)[0] + suffix + '.' + thumb_ext
2369
2370             if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(thumb_filename)):
2371                 self.to_screen('[%s] %s: Thumbnail %sis already present' %
2372                                (info_dict['extractor'], info_dict['id'], thumb_display_id))
2373             else:
2374                 self.to_screen('[%s] %s: Downloading thumbnail %s...' %
2375                                (info_dict['extractor'], info_dict['id'], thumb_display_id))
2376                 try:
2377                     uf = self.urlopen(t['url'])
2378                     with open(encodeFilename(thumb_filename), 'wb') as thumbf:
2379                         shutil.copyfileobj(uf, thumbf)
2380                     self.to_screen('[%s] %s: Writing thumbnail %sto: %s' %
2381                                    (info_dict['extractor'], info_dict['id'], thumb_display_id, thumb_filename))
2382                 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
2383                     self.report_warning('Unable to download thumbnail "%s": %s' %
2384                                         (t['url'], error_to_compat_str(err)))