[YoutubeDL] Fallback to ie_key of matching extractor while making download archive...
[youtube-dl] / youtube_dl / YoutubeDL.py
1 #!/usr/bin/env python
2 # coding: utf-8
3
4 from __future__ import absolute_import, unicode_literals
5
6 import collections
7 import contextlib
8 import copy
9 import datetime
10 import errno
11 import fileinput
12 import io
13 import itertools
14 import json
15 import locale
16 import operator
17 import os
18 import platform
19 import re
20 import shutil
21 import subprocess
22 import socket
23 import sys
24 import time
25 import tokenize
26 import traceback
27 import random
28
29 from string import ascii_letters
30
31 from .compat import (
32     compat_basestring,
33     compat_cookiejar,
34     compat_get_terminal_size,
35     compat_http_client,
36     compat_kwargs,
37     compat_numeric_types,
38     compat_os_name,
39     compat_str,
40     compat_tokenize_tokenize,
41     compat_urllib_error,
42     compat_urllib_request,
43     compat_urllib_request_DataHandler,
44 )
45 from .utils import (
46     age_restricted,
47     args_to_str,
48     ContentTooShortError,
49     date_from_str,
50     DateRange,
51     DEFAULT_OUTTMPL,
52     determine_ext,
53     determine_protocol,
54     DownloadError,
55     encode_compat_str,
56     encodeFilename,
57     error_to_compat_str,
58     expand_path,
59     ExtractorError,
60     format_bytes,
61     formatSeconds,
62     GeoRestrictedError,
63     int_or_none,
64     ISO3166Utils,
65     locked_file,
66     make_HTTPS_handler,
67     MaxDownloadsReached,
68     orderedSet,
69     PagedList,
70     parse_filesize,
71     PerRequestProxyHandler,
72     platform_name,
73     PostProcessingError,
74     preferredencoding,
75     prepend_extension,
76     register_socks_protocols,
77     render_table,
78     replace_extension,
79     SameFileError,
80     sanitize_filename,
81     sanitize_path,
82     sanitize_url,
83     sanitized_Request,
84     std_headers,
85     subtitles_filename,
86     UnavailableVideoError,
87     url_basename,
88     version_tuple,
89     write_json_file,
90     write_string,
91     YoutubeDLCookieJar,
92     YoutubeDLCookieProcessor,
93     YoutubeDLHandler,
94 )
95 from .cache import Cache
96 from .extractor import get_info_extractor, gen_extractor_classes, _LAZY_LOADER
97 from .extractor.openload import PhantomJSwrapper
98 from .downloader import get_suitable_downloader
99 from .downloader.rtmp import rtmpdump_version
100 from .postprocessor import (
101     FFmpegFixupM3u8PP,
102     FFmpegFixupM4aPP,
103     FFmpegFixupStretchedPP,
104     FFmpegMergerPP,
105     FFmpegPostProcessor,
106     get_postprocessor,
107 )
108 from .version import __version__
109
110 if compat_os_name == 'nt':
111     import ctypes
112
113
114 class YoutubeDL(object):
115     """YoutubeDL class.
116
117     YoutubeDL objects are the ones responsible of downloading the
118     actual video file and writing it to disk if the user has requested
119     it, among some other tasks. In most cases there should be one per
120     program. As, given a video URL, the downloader doesn't know how to
121     extract all the needed information, task that InfoExtractors do, it
122     has to pass the URL to one of them.
123
124     For this, YoutubeDL objects have a method that allows
125     InfoExtractors to be registered in a given order. When it is passed
126     a URL, the YoutubeDL object handles it to the first InfoExtractor it
127     finds that reports being able to handle it. The InfoExtractor extracts
128     all the information about the video or videos the URL refers to, and
129     YoutubeDL process the extracted information, possibly using a File
130     Downloader to download the video.
131
132     YoutubeDL objects accept a lot of parameters. In order not to saturate
133     the object constructor with arguments, it receives a dictionary of
134     options instead. These options are available through the params
135     attribute for the InfoExtractors to use. The YoutubeDL also
136     registers itself as the downloader in charge for the InfoExtractors
137     that are added to it, so this is a "mutual registration".
138
139     Available options:
140
141     username:          Username for authentication purposes.
142     password:          Password for authentication purposes.
143     videopassword:     Password for accessing a video.
144     ap_mso:            Adobe Pass multiple-system operator identifier.
145     ap_username:       Multiple-system operator account username.
146     ap_password:       Multiple-system operator account password.
147     usenetrc:          Use netrc for authentication instead.
148     verbose:           Print additional info to stdout.
149     quiet:             Do not print messages to stdout.
150     no_warnings:       Do not print out anything for warnings.
151     forceurl:          Force printing final URL.
152     forcetitle:        Force printing title.
153     forceid:           Force printing ID.
154     forcethumbnail:    Force printing thumbnail URL.
155     forcedescription:  Force printing description.
156     forcefilename:     Force printing final filename.
157     forceduration:     Force printing duration.
158     forcejson:         Force printing info_dict as JSON.
159     dump_single_json:  Force printing the info_dict of the whole playlist
160                        (or video) as a single JSON line.
161     simulate:          Do not download the video files.
162     format:            Video format code. See options.py for more information.
163     outtmpl:           Template for output names.
164     restrictfilenames: Do not allow "&" and spaces in file names
165     ignoreerrors:      Do not stop on download errors.
166     force_generic_extractor: Force downloader to use the generic extractor
167     nooverwrites:      Prevent overwriting files.
168     playliststart:     Playlist item to start at.
169     playlistend:       Playlist item to end at.
170     playlist_items:    Specific indices of playlist to download.
171     playlistreverse:   Download playlist items in reverse order.
172     playlistrandom:    Download playlist items in random order.
173     matchtitle:        Download only matching titles.
174     rejecttitle:       Reject downloads for matching titles.
175     logger:            Log messages to a logging.Logger instance.
176     logtostderr:       Log messages to stderr instead of stdout.
177     writedescription:  Write the video description to a .description file
178     writeinfojson:     Write the video description to a .info.json file
179     writeannotations:  Write the video annotations to a .annotations.xml file
180     writethumbnail:    Write the thumbnail image to a file
181     write_all_thumbnails:  Write all thumbnail formats to files
182     writesubtitles:    Write the video subtitles to a file
183     writeautomaticsub: Write the automatically generated subtitles to a file
184     allsubtitles:      Downloads all the subtitles of the video
185                        (requires writesubtitles or writeautomaticsub)
186     listsubtitles:     Lists all available subtitles for the video
187     subtitlesformat:   The format code for subtitles
188     subtitleslangs:    List of languages of the subtitles to download
189     keepvideo:         Keep the video file after post-processing
190     daterange:         A DateRange object, download only if the upload_date is in the range.
191     skip_download:     Skip the actual download of the video file
192     cachedir:          Location of the cache files in the filesystem.
193                        False to disable filesystem cache.
194     noplaylist:        Download single video instead of a playlist if in doubt.
195     age_limit:         An integer representing the user's age in years.
196                        Unsuitable videos for the given age are skipped.
197     min_views:         An integer representing the minimum view count the video
198                        must have in order to not be skipped.
199                        Videos without view count information are always
200                        downloaded. None for no limit.
201     max_views:         An integer representing the maximum view count.
202                        Videos that are more popular than that are not
203                        downloaded.
204                        Videos without view count information are always
205                        downloaded. None for no limit.
206     download_archive:  File name of a file where all downloads are recorded.
207                        Videos already present in the file are not downloaded
208                        again.
209     cookiefile:        File name where cookies should be read from and dumped to.
210     nocheckcertificate:Do not verify SSL certificates
211     prefer_insecure:   Use HTTP instead of HTTPS to retrieve information.
212                        At the moment, this is only supported by YouTube.
213     proxy:             URL of the proxy server to use
214     geo_verification_proxy:  URL of the proxy to use for IP address verification
215                        on geo-restricted sites.
216     socket_timeout:    Time to wait for unresponsive hosts, in seconds
217     bidi_workaround:   Work around buggy terminals without bidirectional text
218                        support, using fridibi
219     debug_printtraffic:Print out sent and received HTTP traffic
220     include_ads:       Download ads as well
221     default_search:    Prepend this string if an input url is not valid.
222                        'auto' for elaborate guessing
223     encoding:          Use this encoding instead of the system-specified.
224     extract_flat:      Do not resolve URLs, return the immediate result.
225                        Pass in 'in_playlist' to only show this behavior for
226                        playlist items.
227     postprocessors:    A list of dictionaries, each with an entry
228                        * key:  The name of the postprocessor. See
229                                youtube_dl/postprocessor/__init__.py for a list.
230                        as well as any further keyword arguments for the
231                        postprocessor.
232     progress_hooks:    A list of functions that get called on download
233                        progress, with a dictionary with the entries
234                        * status: One of "downloading", "error", or "finished".
235                                  Check this first and ignore unknown values.
236
237                        If status is one of "downloading", or "finished", the
238                        following properties may also be present:
239                        * filename: The final filename (always present)
240                        * tmpfilename: The filename we're currently writing to
241                        * downloaded_bytes: Bytes on disk
242                        * total_bytes: Size of the whole file, None if unknown
243                        * total_bytes_estimate: Guess of the eventual file size,
244                                                None if unavailable.
245                        * elapsed: The number of seconds since download started.
246                        * eta: The estimated time in seconds, None if unknown
247                        * speed: The download speed in bytes/second, None if
248                                 unknown
249                        * fragment_index: The counter of the currently
250                                          downloaded video fragment.
251                        * fragment_count: The number of fragments (= individual
252                                          files that will be merged)
253
254                        Progress hooks are guaranteed to be called at least once
255                        (with status "finished") if the download is successful.
256     merge_output_format: Extension to use when merging formats.
257     fixup:             Automatically correct known faults of the file.
258                        One of:
259                        - "never": do nothing
260                        - "warn": only emit a warning
261                        - "detect_or_warn": check whether we can do anything
262                                            about it, warn otherwise (default)
263     source_address:    Client-side IP address to bind to.
264     call_home:         Boolean, true iff we are allowed to contact the
265                        youtube-dl servers for debugging.
266     sleep_interval:    Number of seconds to sleep before each download when
267                        used alone or a lower bound of a range for randomized
268                        sleep before each download (minimum possible number
269                        of seconds to sleep) when used along with
270                        max_sleep_interval.
271     max_sleep_interval:Upper bound of a range for randomized sleep before each
272                        download (maximum possible number of seconds to sleep).
273                        Must only be used along with sleep_interval.
274                        Actual sleep time will be a random float from range
275                        [sleep_interval; max_sleep_interval].
276     listformats:       Print an overview of available video formats and exit.
277     list_thumbnails:   Print a table of all thumbnails and exit.
278     match_filter:      A function that gets called with the info_dict of
279                        every video.
280                        If it returns a message, the video is ignored.
281                        If it returns None, the video is downloaded.
282                        match_filter_func in utils.py is one example for this.
283     no_color:          Do not emit color codes in output.
284     geo_bypass:        Bypass geographic restriction via faking X-Forwarded-For
285                        HTTP header
286     geo_bypass_country:
287                        Two-letter ISO 3166-2 country code that will be used for
288                        explicit geographic restriction bypassing via faking
289                        X-Forwarded-For HTTP header
290     geo_bypass_ip_block:
291                        IP range in CIDR notation that will be used similarly to
292                        geo_bypass_country
293
294     The following options determine which downloader is picked:
295     external_downloader: Executable of the external downloader to call.
296                        None or unset for standard (built-in) downloader.
297     hls_prefer_native: Use the native HLS downloader instead of ffmpeg/avconv
298                        if True, otherwise use ffmpeg/avconv if False, otherwise
299                        use downloader suggested by extractor if None.
300
301     The following parameters are not used by YoutubeDL itself, they are used by
302     the downloader (see youtube_dl/downloader/common.py):
303     nopart, updatetime, buffersize, ratelimit, min_filesize, max_filesize, test,
304     noresizebuffer, retries, continuedl, noprogress, consoletitle,
305     xattr_set_filesize, external_downloader_args, hls_use_mpegts,
306     http_chunk_size.
307
308     The following options are used by the post processors:
309     prefer_ffmpeg:     If False, use avconv instead of ffmpeg if both are available,
310                        otherwise prefer ffmpeg.
311     postprocessor_args: A list of additional command-line arguments for the
312                         postprocessor.
313
314     The following options are used by the Youtube extractor:
315     youtube_include_dash_manifest: If True (default), DASH manifests and related
316                         data will be downloaded and processed by extractor.
317                         You can reduce network I/O by disabling it if you don't
318                         care about DASH.
319     """
320
321     _NUMERIC_FIELDS = set((
322         'width', 'height', 'tbr', 'abr', 'asr', 'vbr', 'fps', 'filesize', 'filesize_approx',
323         'timestamp', 'upload_year', 'upload_month', 'upload_day',
324         'duration', 'view_count', 'like_count', 'dislike_count', 'repost_count',
325         'average_rating', 'comment_count', 'age_limit',
326         'start_time', 'end_time',
327         'chapter_number', 'season_number', 'episode_number',
328         'track_number', 'disc_number', 'release_year',
329         'playlist_index',
330     ))
331
332     params = None
333     _ies = []
334     _pps = []
335     _download_retcode = None
336     _num_downloads = None
337     _screen_file = None
338
339     def __init__(self, params=None, auto_init=True):
340         """Create a FileDownloader object with the given options."""
341         if params is None:
342             params = {}
343         self._ies = []
344         self._ies_instances = {}
345         self._pps = []
346         self._progress_hooks = []
347         self._download_retcode = 0
348         self._num_downloads = 0
349         self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)]
350         self._err_file = sys.stderr
351         self.params = {
352             # Default parameters
353             'nocheckcertificate': False,
354         }
355         self.params.update(params)
356         self.cache = Cache(self)
357
358         def check_deprecated(param, option, suggestion):
359             if self.params.get(param) is not None:
360                 self.report_warning(
361                     '%s is deprecated. Use %s instead.' % (option, suggestion))
362                 return True
363             return False
364
365         if check_deprecated('cn_verification_proxy', '--cn-verification-proxy', '--geo-verification-proxy'):
366             if self.params.get('geo_verification_proxy') is None:
367                 self.params['geo_verification_proxy'] = self.params['cn_verification_proxy']
368
369         check_deprecated('autonumber_size', '--autonumber-size', 'output template with %(autonumber)0Nd, where N in the number of digits')
370         check_deprecated('autonumber', '--auto-number', '-o "%(autonumber)s-%(title)s.%(ext)s"')
371         check_deprecated('usetitle', '--title', '-o "%(title)s-%(id)s.%(ext)s"')
372
373         if params.get('bidi_workaround', False):
374             try:
375                 import pty
376                 master, slave = pty.openpty()
377                 width = compat_get_terminal_size().columns
378                 if width is None:
379                     width_args = []
380                 else:
381                     width_args = ['-w', str(width)]
382                 sp_kwargs = dict(
383                     stdin=subprocess.PIPE,
384                     stdout=slave,
385                     stderr=self._err_file)
386                 try:
387                     self._output_process = subprocess.Popen(
388                         ['bidiv'] + width_args, **sp_kwargs
389                     )
390                 except OSError:
391                     self._output_process = subprocess.Popen(
392                         ['fribidi', '-c', 'UTF-8'] + width_args, **sp_kwargs)
393                 self._output_channel = os.fdopen(master, 'rb')
394             except OSError as ose:
395                 if ose.errno == errno.ENOENT:
396                     self.report_warning('Could not find fribidi executable, ignoring --bidi-workaround . Make sure that  fribidi  is an executable file in one of the directories in your $PATH.')
397                 else:
398                     raise
399
400         if (sys.platform != 'win32' and
401                 sys.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968'] and
402                 not params.get('restrictfilenames', False)):
403             # Unicode filesystem API will throw errors (#1474, #13027)
404             self.report_warning(
405                 'Assuming --restrict-filenames since file system encoding '
406                 'cannot encode all characters. '
407                 'Set the LC_ALL environment variable to fix this.')
408             self.params['restrictfilenames'] = True
409
410         if isinstance(params.get('outtmpl'), bytes):
411             self.report_warning(
412                 'Parameter outtmpl is bytes, but should be a unicode string. '
413                 'Put  from __future__ import unicode_literals  at the top of your code file or consider switching to Python 3.x.')
414
415         self._setup_opener()
416
417         if auto_init:
418             self.print_debug_header()
419             self.add_default_info_extractors()
420
421         for pp_def_raw in self.params.get('postprocessors', []):
422             pp_class = get_postprocessor(pp_def_raw['key'])
423             pp_def = dict(pp_def_raw)
424             del pp_def['key']
425             pp = pp_class(self, **compat_kwargs(pp_def))
426             self.add_post_processor(pp)
427
428         for ph in self.params.get('progress_hooks', []):
429             self.add_progress_hook(ph)
430
431         register_socks_protocols()
432
433     def warn_if_short_id(self, argv):
434         # short YouTube ID starting with dash?
435         idxs = [
436             i for i, a in enumerate(argv)
437             if re.match(r'^-[0-9A-Za-z_-]{10}$', a)]
438         if idxs:
439             correct_argv = (
440                 ['youtube-dl'] +
441                 [a for i, a in enumerate(argv) if i not in idxs] +
442                 ['--'] + [argv[i] for i in idxs]
443             )
444             self.report_warning(
445                 'Long argument string detected. '
446                 'Use -- to separate parameters and URLs, like this:\n%s\n' %
447                 args_to_str(correct_argv))
448
449     def add_info_extractor(self, ie):
450         """Add an InfoExtractor object to the end of the list."""
451         self._ies.append(ie)
452         if not isinstance(ie, type):
453             self._ies_instances[ie.ie_key()] = ie
454             ie.set_downloader(self)
455
456     def get_info_extractor(self, ie_key):
457         """
458         Get an instance of an IE with name ie_key, it will try to get one from
459         the _ies list, if there's no instance it will create a new one and add
460         it to the extractor list.
461         """
462         ie = self._ies_instances.get(ie_key)
463         if ie is None:
464             ie = get_info_extractor(ie_key)()
465             self.add_info_extractor(ie)
466         return ie
467
468     def add_default_info_extractors(self):
469         """
470         Add the InfoExtractors returned by gen_extractors to the end of the list
471         """
472         for ie in gen_extractor_classes():
473             self.add_info_extractor(ie)
474
475     def add_post_processor(self, pp):
476         """Add a PostProcessor object to the end of the chain."""
477         self._pps.append(pp)
478         pp.set_downloader(self)
479
480     def add_progress_hook(self, ph):
481         """Add the progress hook (currently only for the file downloader)"""
482         self._progress_hooks.append(ph)
483
484     def _bidi_workaround(self, message):
485         if not hasattr(self, '_output_channel'):
486             return message
487
488         assert hasattr(self, '_output_process')
489         assert isinstance(message, compat_str)
490         line_count = message.count('\n') + 1
491         self._output_process.stdin.write((message + '\n').encode('utf-8'))
492         self._output_process.stdin.flush()
493         res = ''.join(self._output_channel.readline().decode('utf-8')
494                       for _ in range(line_count))
495         return res[:-len('\n')]
496
497     def to_screen(self, message, skip_eol=False):
498         """Print message to stdout if not in quiet mode."""
499         return self.to_stdout(message, skip_eol, check_quiet=True)
500
501     def _write_string(self, s, out=None):
502         write_string(s, out=out, encoding=self.params.get('encoding'))
503
504     def to_stdout(self, message, skip_eol=False, check_quiet=False):
505         """Print message to stdout if not in quiet mode."""
506         if self.params.get('logger'):
507             self.params['logger'].debug(message)
508         elif not check_quiet or not self.params.get('quiet', False):
509             message = self._bidi_workaround(message)
510             terminator = ['\n', ''][skip_eol]
511             output = message + terminator
512
513             self._write_string(output, self._screen_file)
514
515     def to_stderr(self, message):
516         """Print message to stderr."""
517         assert isinstance(message, compat_str)
518         if self.params.get('logger'):
519             self.params['logger'].error(message)
520         else:
521             message = self._bidi_workaround(message)
522             output = message + '\n'
523             self._write_string(output, self._err_file)
524
525     def to_console_title(self, message):
526         if not self.params.get('consoletitle', False):
527             return
528         if compat_os_name == 'nt':
529             if ctypes.windll.kernel32.GetConsoleWindow():
530                 # c_wchar_p() might not be necessary if `message` is
531                 # already of type unicode()
532                 ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message))
533         elif 'TERM' in os.environ:
534             self._write_string('\033]0;%s\007' % message, self._screen_file)
535
536     def save_console_title(self):
537         if not self.params.get('consoletitle', False):
538             return
539         if self.params.get('simulate', False):
540             return
541         if compat_os_name != 'nt' and 'TERM' in os.environ:
542             # Save the title on stack
543             self._write_string('\033[22;0t', self._screen_file)
544
545     def restore_console_title(self):
546         if not self.params.get('consoletitle', False):
547             return
548         if self.params.get('simulate', False):
549             return
550         if compat_os_name != 'nt' and 'TERM' in os.environ:
551             # Restore the title from stack
552             self._write_string('\033[23;0t', self._screen_file)
553
554     def __enter__(self):
555         self.save_console_title()
556         return self
557
558     def __exit__(self, *args):
559         self.restore_console_title()
560
561         if self.params.get('cookiefile') is not None:
562             self.cookiejar.save(ignore_discard=True, ignore_expires=True)
563
564     def trouble(self, message=None, tb=None):
565         """Determine action to take when a download problem appears.
566
567         Depending on if the downloader has been configured to ignore
568         download errors or not, this method may throw an exception or
569         not when errors are found, after printing the message.
570
571         tb, if given, is additional traceback information.
572         """
573         if message is not None:
574             self.to_stderr(message)
575         if self.params.get('verbose'):
576             if tb is None:
577                 if sys.exc_info()[0]:  # if .trouble has been called from an except block
578                     tb = ''
579                     if hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
580                         tb += ''.join(traceback.format_exception(*sys.exc_info()[1].exc_info))
581                     tb += encode_compat_str(traceback.format_exc())
582                 else:
583                     tb_data = traceback.format_list(traceback.extract_stack())
584                     tb = ''.join(tb_data)
585             self.to_stderr(tb)
586         if not self.params.get('ignoreerrors', False):
587             if sys.exc_info()[0] and hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
588                 exc_info = sys.exc_info()[1].exc_info
589             else:
590                 exc_info = sys.exc_info()
591             raise DownloadError(message, exc_info)
592         self._download_retcode = 1
593
594     def report_warning(self, message):
595         '''
596         Print the message to stderr, it will be prefixed with 'WARNING:'
597         If stderr is a tty file the 'WARNING:' will be colored
598         '''
599         if self.params.get('logger') is not None:
600             self.params['logger'].warning(message)
601         else:
602             if self.params.get('no_warnings'):
603                 return
604             if not self.params.get('no_color') and self._err_file.isatty() and compat_os_name != 'nt':
605                 _msg_header = '\033[0;33mWARNING:\033[0m'
606             else:
607                 _msg_header = 'WARNING:'
608             warning_message = '%s %s' % (_msg_header, message)
609             self.to_stderr(warning_message)
610
611     def report_error(self, message, tb=None):
612         '''
613         Do the same as trouble, but prefixes the message with 'ERROR:', colored
614         in red if stderr is a tty file.
615         '''
616         if not self.params.get('no_color') and self._err_file.isatty() and compat_os_name != 'nt':
617             _msg_header = '\033[0;31mERROR:\033[0m'
618         else:
619             _msg_header = 'ERROR:'
620         error_message = '%s %s' % (_msg_header, message)
621         self.trouble(error_message, tb)
622
623     def report_file_already_downloaded(self, file_name):
624         """Report file has already been fully downloaded."""
625         try:
626             self.to_screen('[download] %s has already been downloaded' % file_name)
627         except UnicodeEncodeError:
628             self.to_screen('[download] The file has already been downloaded')
629
630     def prepare_filename(self, info_dict):
631         """Generate the output filename."""
632         try:
633             template_dict = dict(info_dict)
634
635             template_dict['epoch'] = int(time.time())
636             autonumber_size = self.params.get('autonumber_size')
637             if autonumber_size is None:
638                 autonumber_size = 5
639             template_dict['autonumber'] = self.params.get('autonumber_start', 1) - 1 + self._num_downloads
640             if template_dict.get('resolution') is None:
641                 if template_dict.get('width') and template_dict.get('height'):
642                     template_dict['resolution'] = '%dx%d' % (template_dict['width'], template_dict['height'])
643                 elif template_dict.get('height'):
644                     template_dict['resolution'] = '%sp' % template_dict['height']
645                 elif template_dict.get('width'):
646                     template_dict['resolution'] = '%dx?' % template_dict['width']
647
648             sanitize = lambda k, v: sanitize_filename(
649                 compat_str(v),
650                 restricted=self.params.get('restrictfilenames'),
651                 is_id=(k == 'id' or k.endswith('_id')))
652             template_dict = dict((k, v if isinstance(v, compat_numeric_types) else sanitize(k, v))
653                                  for k, v in template_dict.items()
654                                  if v is not None and not isinstance(v, (list, tuple, dict)))
655             template_dict = collections.defaultdict(lambda: 'NA', template_dict)
656
657             outtmpl = self.params.get('outtmpl', DEFAULT_OUTTMPL)
658
659             # For fields playlist_index and autonumber convert all occurrences
660             # of %(field)s to %(field)0Nd for backward compatibility
661             field_size_compat_map = {
662                 'playlist_index': len(str(template_dict['n_entries'])),
663                 'autonumber': autonumber_size,
664             }
665             FIELD_SIZE_COMPAT_RE = r'(?<!%)%\((?P<field>autonumber|playlist_index)\)s'
666             mobj = re.search(FIELD_SIZE_COMPAT_RE, outtmpl)
667             if mobj:
668                 outtmpl = re.sub(
669                     FIELD_SIZE_COMPAT_RE,
670                     r'%%(\1)0%dd' % field_size_compat_map[mobj.group('field')],
671                     outtmpl)
672
673             # Missing numeric fields used together with integer presentation types
674             # in format specification will break the argument substitution since
675             # string 'NA' is returned for missing fields. We will patch output
676             # template for missing fields to meet string presentation type.
677             for numeric_field in self._NUMERIC_FIELDS:
678                 if numeric_field not in template_dict:
679                     # As of [1] format syntax is:
680                     #  %[mapping_key][conversion_flags][minimum_width][.precision][length_modifier]type
681                     # 1. https://docs.python.org/2/library/stdtypes.html#string-formatting
682                     FORMAT_RE = r'''(?x)
683                         (?<!%)
684                         %
685                         \({0}\)  # mapping key
686                         (?:[#0\-+ ]+)?  # conversion flags (optional)
687                         (?:\d+)?  # minimum field width (optional)
688                         (?:\.\d+)?  # precision (optional)
689                         [hlL]?  # length modifier (optional)
690                         [diouxXeEfFgGcrs%]  # conversion type
691                     '''
692                     outtmpl = re.sub(
693                         FORMAT_RE.format(numeric_field),
694                         r'%({0})s'.format(numeric_field), outtmpl)
695
696             # expand_path translates '%%' into '%' and '$$' into '$'
697             # correspondingly that is not what we want since we need to keep
698             # '%%' intact for template dict substitution step. Working around
699             # with boundary-alike separator hack.
700             sep = ''.join([random.choice(ascii_letters) for _ in range(32)])
701             outtmpl = outtmpl.replace('%%', '%{0}%'.format(sep)).replace('$$', '${0}$'.format(sep))
702
703             # outtmpl should be expand_path'ed before template dict substitution
704             # because meta fields may contain env variables we don't want to
705             # be expanded. For example, for outtmpl "%(title)s.%(ext)s" and
706             # title "Hello $PATH", we don't want `$PATH` to be expanded.
707             filename = expand_path(outtmpl).replace(sep, '') % template_dict
708
709             # Temporary fix for #4787
710             # 'Treat' all problem characters by passing filename through preferredencoding
711             # to workaround encoding issues with subprocess on python2 @ Windows
712             if sys.version_info < (3, 0) and sys.platform == 'win32':
713                 filename = encodeFilename(filename, True).decode(preferredencoding())
714             return sanitize_path(filename)
715         except ValueError as err:
716             self.report_error('Error in output template: ' + str(err) + ' (encoding: ' + repr(preferredencoding()) + ')')
717             return None
718
719     def _match_entry(self, info_dict, incomplete):
720         """ Returns None iff the file should be downloaded """
721
722         video_title = info_dict.get('title', info_dict.get('id', 'video'))
723         if 'title' in info_dict:
724             # This can happen when we're just evaluating the playlist
725             title = info_dict['title']
726             matchtitle = self.params.get('matchtitle', False)
727             if matchtitle:
728                 if not re.search(matchtitle, title, re.IGNORECASE):
729                     return '"' + title + '" title did not match pattern "' + matchtitle + '"'
730             rejecttitle = self.params.get('rejecttitle', False)
731             if rejecttitle:
732                 if re.search(rejecttitle, title, re.IGNORECASE):
733                     return '"' + title + '" title matched reject pattern "' + rejecttitle + '"'
734         date = info_dict.get('upload_date')
735         if date is not None:
736             dateRange = self.params.get('daterange', DateRange())
737             if date not in dateRange:
738                 return '%s upload date is not in range %s' % (date_from_str(date).isoformat(), dateRange)
739         view_count = info_dict.get('view_count')
740         if view_count is not None:
741             min_views = self.params.get('min_views')
742             if min_views is not None and view_count < min_views:
743                 return 'Skipping %s, because it has not reached minimum view count (%d/%d)' % (video_title, view_count, min_views)
744             max_views = self.params.get('max_views')
745             if max_views is not None and view_count > max_views:
746                 return 'Skipping %s, because it has exceeded the maximum view count (%d/%d)' % (video_title, view_count, max_views)
747         if age_restricted(info_dict.get('age_limit'), self.params.get('age_limit')):
748             return 'Skipping "%s" because it is age restricted' % video_title
749         if self.in_download_archive(info_dict):
750             return '%s has already been recorded in archive' % video_title
751
752         if not incomplete:
753             match_filter = self.params.get('match_filter')
754             if match_filter is not None:
755                 ret = match_filter(info_dict)
756                 if ret is not None:
757                     return ret
758
759         return None
760
761     @staticmethod
762     def add_extra_info(info_dict, extra_info):
763         '''Set the keys from extra_info in info dict if they are missing'''
764         for key, value in extra_info.items():
765             info_dict.setdefault(key, value)
766
767     def extract_info(self, url, download=True, ie_key=None, extra_info={},
768                      process=True, force_generic_extractor=False):
769         '''
770         Returns a list with a dictionary for each video we find.
771         If 'download', also downloads the videos.
772         extra_info is a dict containing the extra values to add to each result
773         '''
774
775         if not ie_key and force_generic_extractor:
776             ie_key = 'Generic'
777
778         if ie_key:
779             ies = [self.get_info_extractor(ie_key)]
780         else:
781             ies = self._ies
782
783         for ie in ies:
784             if not ie.suitable(url):
785                 continue
786
787             ie = self.get_info_extractor(ie.ie_key())
788             if not ie.working():
789                 self.report_warning('The program functionality for this site has been marked as broken, '
790                                     'and will probably not work.')
791
792             try:
793                 ie_result = ie.extract(url)
794                 if ie_result is None:  # Finished already (backwards compatibility; listformats and friends should be moved here)
795                     break
796                 if isinstance(ie_result, list):
797                     # Backwards compatibility: old IE result format
798                     ie_result = {
799                         '_type': 'compat_list',
800                         'entries': ie_result,
801                     }
802                 self.add_default_extra_info(ie_result, ie, url)
803                 if process:
804                     return self.process_ie_result(ie_result, download, extra_info)
805                 else:
806                     return ie_result
807             except GeoRestrictedError as e:
808                 msg = e.msg
809                 if e.countries:
810                     msg += '\nThis video is available in %s.' % ', '.join(
811                         map(ISO3166Utils.short2full, e.countries))
812                 msg += '\nYou might want to use a VPN or a proxy server (with --proxy) to workaround.'
813                 self.report_error(msg)
814                 break
815             except ExtractorError as e:  # An error we somewhat expected
816                 self.report_error(compat_str(e), e.format_traceback())
817                 break
818             except MaxDownloadsReached:
819                 raise
820             except Exception as e:
821                 if self.params.get('ignoreerrors', False):
822                     self.report_error(error_to_compat_str(e), tb=encode_compat_str(traceback.format_exc()))
823                     break
824                 else:
825                     raise
826         else:
827             self.report_error('no suitable InfoExtractor for URL %s' % url)
828
829     def add_default_extra_info(self, ie_result, ie, url):
830         self.add_extra_info(ie_result, {
831             'extractor': ie.IE_NAME,
832             'webpage_url': url,
833             'webpage_url_basename': url_basename(url),
834             'extractor_key': ie.ie_key(),
835         })
836
837     def process_ie_result(self, ie_result, download=True, extra_info={}):
838         """
839         Take the result of the ie(may be modified) and resolve all unresolved
840         references (URLs, playlist items).
841
842         It will also download the videos if 'download'.
843         Returns the resolved ie_result.
844         """
845         result_type = ie_result.get('_type', 'video')
846
847         if result_type in ('url', 'url_transparent'):
848             ie_result['url'] = sanitize_url(ie_result['url'])
849             extract_flat = self.params.get('extract_flat', False)
850             if ((extract_flat == 'in_playlist' and 'playlist' in extra_info) or
851                     extract_flat is True):
852                 if self.params.get('forcejson', False):
853                     self.to_stdout(json.dumps(ie_result))
854                 return ie_result
855
856         if result_type == 'video':
857             self.add_extra_info(ie_result, extra_info)
858             return self.process_video_result(ie_result, download=download)
859         elif result_type == 'url':
860             # We have to add extra_info to the results because it may be
861             # contained in a playlist
862             return self.extract_info(ie_result['url'],
863                                      download,
864                                      ie_key=ie_result.get('ie_key'),
865                                      extra_info=extra_info)
866         elif result_type == 'url_transparent':
867             # Use the information from the embedding page
868             info = self.extract_info(
869                 ie_result['url'], ie_key=ie_result.get('ie_key'),
870                 extra_info=extra_info, download=False, process=False)
871
872             # extract_info may return None when ignoreerrors is enabled and
873             # extraction failed with an error, don't crash and return early
874             # in this case
875             if not info:
876                 return info
877
878             force_properties = dict(
879                 (k, v) for k, v in ie_result.items() if v is not None)
880             for f in ('_type', 'url', 'id', 'extractor', 'extractor_key', 'ie_key'):
881                 if f in force_properties:
882                     del force_properties[f]
883             new_result = info.copy()
884             new_result.update(force_properties)
885
886             # Extracted info may not be a video result (i.e.
887             # info.get('_type', 'video') != video) but rather an url or
888             # url_transparent. In such cases outer metadata (from ie_result)
889             # should be propagated to inner one (info). For this to happen
890             # _type of info should be overridden with url_transparent. This
891             # fixes issue from https://github.com/rg3/youtube-dl/pull/11163.
892             if new_result.get('_type') == 'url':
893                 new_result['_type'] = 'url_transparent'
894
895             return self.process_ie_result(
896                 new_result, download=download, extra_info=extra_info)
897         elif result_type in ('playlist', 'multi_video'):
898             # We process each entry in the playlist
899             playlist = ie_result.get('title') or ie_result.get('id')
900             self.to_screen('[download] Downloading playlist: %s' % playlist)
901
902             playlist_results = []
903
904             playliststart = self.params.get('playliststart', 1) - 1
905             playlistend = self.params.get('playlistend')
906             # For backwards compatibility, interpret -1 as whole list
907             if playlistend == -1:
908                 playlistend = None
909
910             playlistitems_str = self.params.get('playlist_items')
911             playlistitems = None
912             if playlistitems_str is not None:
913                 def iter_playlistitems(format):
914                     for string_segment in format.split(','):
915                         if '-' in string_segment:
916                             start, end = string_segment.split('-')
917                             for item in range(int(start), int(end) + 1):
918                                 yield int(item)
919                         else:
920                             yield int(string_segment)
921                 playlistitems = orderedSet(iter_playlistitems(playlistitems_str))
922
923             ie_entries = ie_result['entries']
924
925             def make_playlistitems_entries(list_ie_entries):
926                 num_entries = len(list_ie_entries)
927                 return [
928                     list_ie_entries[i - 1] for i in playlistitems
929                     if -num_entries <= i - 1 < num_entries]
930
931             def report_download(num_entries):
932                 self.to_screen(
933                     '[%s] playlist %s: Downloading %d videos' %
934                     (ie_result['extractor'], playlist, num_entries))
935
936             if isinstance(ie_entries, list):
937                 n_all_entries = len(ie_entries)
938                 if playlistitems:
939                     entries = make_playlistitems_entries(ie_entries)
940                 else:
941                     entries = ie_entries[playliststart:playlistend]
942                 n_entries = len(entries)
943                 self.to_screen(
944                     '[%s] playlist %s: Collected %d video ids (downloading %d of them)' %
945                     (ie_result['extractor'], playlist, n_all_entries, n_entries))
946             elif isinstance(ie_entries, PagedList):
947                 if playlistitems:
948                     entries = []
949                     for item in playlistitems:
950                         entries.extend(ie_entries.getslice(
951                             item - 1, item
952                         ))
953                 else:
954                     entries = ie_entries.getslice(
955                         playliststart, playlistend)
956                 n_entries = len(entries)
957                 report_download(n_entries)
958             else:  # iterable
959                 if playlistitems:
960                     entries = make_playlistitems_entries(list(itertools.islice(
961                         ie_entries, 0, max(playlistitems))))
962                 else:
963                     entries = list(itertools.islice(
964                         ie_entries, playliststart, playlistend))
965                 n_entries = len(entries)
966                 report_download(n_entries)
967
968             if self.params.get('playlistreverse', False):
969                 entries = entries[::-1]
970
971             if self.params.get('playlistrandom', False):
972                 random.shuffle(entries)
973
974             x_forwarded_for = ie_result.get('__x_forwarded_for_ip')
975
976             for i, entry in enumerate(entries, 1):
977                 self.to_screen('[download] Downloading video %s of %s' % (i, n_entries))
978                 # This __x_forwarded_for_ip thing is a bit ugly but requires
979                 # minimal changes
980                 if x_forwarded_for:
981                     entry['__x_forwarded_for_ip'] = x_forwarded_for
982                 extra = {
983                     'n_entries': n_entries,
984                     'playlist': playlist,
985                     'playlist_id': ie_result.get('id'),
986                     'playlist_title': ie_result.get('title'),
987                     'playlist_uploader': ie_result.get('uploader'),
988                     'playlist_uploader_id': ie_result.get('uploader_id'),
989                     'playlist_index': i + playliststart,
990                     'extractor': ie_result['extractor'],
991                     'webpage_url': ie_result['webpage_url'],
992                     'webpage_url_basename': url_basename(ie_result['webpage_url']),
993                     'extractor_key': ie_result['extractor_key'],
994                 }
995
996                 reason = self._match_entry(entry, incomplete=True)
997                 if reason is not None:
998                     self.to_screen('[download] ' + reason)
999                     continue
1000
1001                 entry_result = self.process_ie_result(entry,
1002                                                       download=download,
1003                                                       extra_info=extra)
1004                 playlist_results.append(entry_result)
1005             ie_result['entries'] = playlist_results
1006             self.to_screen('[download] Finished downloading playlist: %s' % playlist)
1007             return ie_result
1008         elif result_type == 'compat_list':
1009             self.report_warning(
1010                 'Extractor %s returned a compat_list result. '
1011                 'It needs to be updated.' % ie_result.get('extractor'))
1012
1013             def _fixup(r):
1014                 self.add_extra_info(
1015                     r,
1016                     {
1017                         'extractor': ie_result['extractor'],
1018                         'webpage_url': ie_result['webpage_url'],
1019                         'webpage_url_basename': url_basename(ie_result['webpage_url']),
1020                         'extractor_key': ie_result['extractor_key'],
1021                     }
1022                 )
1023                 return r
1024             ie_result['entries'] = [
1025                 self.process_ie_result(_fixup(r), download, extra_info)
1026                 for r in ie_result['entries']
1027             ]
1028             return ie_result
1029         else:
1030             raise Exception('Invalid result type: %s' % result_type)
1031
1032     def _build_format_filter(self, filter_spec):
1033         " Returns a function to filter the formats according to the filter_spec "
1034
1035         OPERATORS = {
1036             '<': operator.lt,
1037             '<=': operator.le,
1038             '>': operator.gt,
1039             '>=': operator.ge,
1040             '=': operator.eq,
1041             '!=': operator.ne,
1042         }
1043         operator_rex = re.compile(r'''(?x)\s*
1044             (?P<key>width|height|tbr|abr|vbr|asr|filesize|filesize_approx|fps)
1045             \s*(?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
1046             (?P<value>[0-9.]+(?:[kKmMgGtTpPeEzZyY]i?[Bb]?)?)
1047             $
1048             ''' % '|'.join(map(re.escape, OPERATORS.keys())))
1049         m = operator_rex.search(filter_spec)
1050         if m:
1051             try:
1052                 comparison_value = int(m.group('value'))
1053             except ValueError:
1054                 comparison_value = parse_filesize(m.group('value'))
1055                 if comparison_value is None:
1056                     comparison_value = parse_filesize(m.group('value') + 'B')
1057                 if comparison_value is None:
1058                     raise ValueError(
1059                         'Invalid value %r in format specification %r' % (
1060                             m.group('value'), filter_spec))
1061             op = OPERATORS[m.group('op')]
1062
1063         if not m:
1064             STR_OPERATORS = {
1065                 '=': operator.eq,
1066                 '^=': lambda attr, value: attr.startswith(value),
1067                 '$=': lambda attr, value: attr.endswith(value),
1068                 '*=': lambda attr, value: value in attr,
1069             }
1070             str_operator_rex = re.compile(r'''(?x)
1071                 \s*(?P<key>ext|acodec|vcodec|container|protocol|format_id)
1072                 \s*(?P<negation>!\s*)?(?P<op>%s)(?P<none_inclusive>\s*\?)?
1073                 \s*(?P<value>[a-zA-Z0-9._-]+)
1074                 \s*$
1075                 ''' % '|'.join(map(re.escape, STR_OPERATORS.keys())))
1076             m = str_operator_rex.search(filter_spec)
1077             if m:
1078                 comparison_value = m.group('value')
1079                 str_op = STR_OPERATORS[m.group('op')]
1080                 if m.group('negation'):
1081                     op = lambda attr, value: not str_op(attr, value)
1082                 else:
1083                     op = str_op
1084
1085         if not m:
1086             raise ValueError('Invalid filter specification %r' % filter_spec)
1087
1088         def _filter(f):
1089             actual_value = f.get(m.group('key'))
1090             if actual_value is None:
1091                 return m.group('none_inclusive')
1092             return op(actual_value, comparison_value)
1093         return _filter
1094
1095     def _default_format_spec(self, info_dict, download=True):
1096
1097         def can_merge():
1098             merger = FFmpegMergerPP(self)
1099             return merger.available and merger.can_merge()
1100
1101         def prefer_best():
1102             if self.params.get('simulate', False):
1103                 return False
1104             if not download:
1105                 return False
1106             if self.params.get('outtmpl', DEFAULT_OUTTMPL) == '-':
1107                 return True
1108             if info_dict.get('is_live'):
1109                 return True
1110             if not can_merge():
1111                 return True
1112             return False
1113
1114         req_format_list = ['bestvideo+bestaudio', 'best']
1115         if prefer_best():
1116             req_format_list.reverse()
1117         return '/'.join(req_format_list)
1118
1119     def build_format_selector(self, format_spec):
1120         def syntax_error(note, start):
1121             message = (
1122                 'Invalid format specification: '
1123                 '{0}\n\t{1}\n\t{2}^'.format(note, format_spec, ' ' * start[1]))
1124             return SyntaxError(message)
1125
1126         PICKFIRST = 'PICKFIRST'
1127         MERGE = 'MERGE'
1128         SINGLE = 'SINGLE'
1129         GROUP = 'GROUP'
1130         FormatSelector = collections.namedtuple('FormatSelector', ['type', 'selector', 'filters'])
1131
1132         def _parse_filter(tokens):
1133             filter_parts = []
1134             for type, string, start, _, _ in tokens:
1135                 if type == tokenize.OP and string == ']':
1136                     return ''.join(filter_parts)
1137                 else:
1138                     filter_parts.append(string)
1139
1140         def _remove_unused_ops(tokens):
1141             # Remove operators that we don't use and join them with the surrounding strings
1142             # for example: 'mp4' '-' 'baseline' '-' '16x9' is converted to 'mp4-baseline-16x9'
1143             ALLOWED_OPS = ('/', '+', ',', '(', ')')
1144             last_string, last_start, last_end, last_line = None, None, None, None
1145             for type, string, start, end, line in tokens:
1146                 if type == tokenize.OP and string == '[':
1147                     if last_string:
1148                         yield tokenize.NAME, last_string, last_start, last_end, last_line
1149                         last_string = None
1150                     yield type, string, start, end, line
1151                     # everything inside brackets will be handled by _parse_filter
1152                     for type, string, start, end, line in tokens:
1153                         yield type, string, start, end, line
1154                         if type == tokenize.OP and string == ']':
1155                             break
1156                 elif type == tokenize.OP and string in ALLOWED_OPS:
1157                     if last_string:
1158                         yield tokenize.NAME, last_string, last_start, last_end, last_line
1159                         last_string = None
1160                     yield type, string, start, end, line
1161                 elif type in [tokenize.NAME, tokenize.NUMBER, tokenize.OP]:
1162                     if not last_string:
1163                         last_string = string
1164                         last_start = start
1165                         last_end = end
1166                     else:
1167                         last_string += string
1168             if last_string:
1169                 yield tokenize.NAME, last_string, last_start, last_end, last_line
1170
1171         def _parse_format_selection(tokens, inside_merge=False, inside_choice=False, inside_group=False):
1172             selectors = []
1173             current_selector = None
1174             for type, string, start, _, _ in tokens:
1175                 # ENCODING is only defined in python 3.x
1176                 if type == getattr(tokenize, 'ENCODING', None):
1177                     continue
1178                 elif type in [tokenize.NAME, tokenize.NUMBER]:
1179                     current_selector = FormatSelector(SINGLE, string, [])
1180                 elif type == tokenize.OP:
1181                     if string == ')':
1182                         if not inside_group:
1183                             # ')' will be handled by the parentheses group
1184                             tokens.restore_last_token()
1185                         break
1186                     elif inside_merge and string in ['/', ',']:
1187                         tokens.restore_last_token()
1188                         break
1189                     elif inside_choice and string == ',':
1190                         tokens.restore_last_token()
1191                         break
1192                     elif string == ',':
1193                         if not current_selector:
1194                             raise syntax_error('"," must follow a format selector', start)
1195                         selectors.append(current_selector)
1196                         current_selector = None
1197                     elif string == '/':
1198                         if not current_selector:
1199                             raise syntax_error('"/" must follow a format selector', start)
1200                         first_choice = current_selector
1201                         second_choice = _parse_format_selection(tokens, inside_choice=True)
1202                         current_selector = FormatSelector(PICKFIRST, (first_choice, second_choice), [])
1203                     elif string == '[':
1204                         if not current_selector:
1205                             current_selector = FormatSelector(SINGLE, 'best', [])
1206                         format_filter = _parse_filter(tokens)
1207                         current_selector.filters.append(format_filter)
1208                     elif string == '(':
1209                         if current_selector:
1210                             raise syntax_error('Unexpected "("', start)
1211                         group = _parse_format_selection(tokens, inside_group=True)
1212                         current_selector = FormatSelector(GROUP, group, [])
1213                     elif string == '+':
1214                         video_selector = current_selector
1215                         audio_selector = _parse_format_selection(tokens, inside_merge=True)
1216                         if not video_selector or not audio_selector:
1217                             raise syntax_error('"+" must be between two format selectors', start)
1218                         current_selector = FormatSelector(MERGE, (video_selector, audio_selector), [])
1219                     else:
1220                         raise syntax_error('Operator not recognized: "{0}"'.format(string), start)
1221                 elif type == tokenize.ENDMARKER:
1222                     break
1223             if current_selector:
1224                 selectors.append(current_selector)
1225             return selectors
1226
1227         def _build_selector_function(selector):
1228             if isinstance(selector, list):
1229                 fs = [_build_selector_function(s) for s in selector]
1230
1231                 def selector_function(ctx):
1232                     for f in fs:
1233                         for format in f(ctx):
1234                             yield format
1235                 return selector_function
1236             elif selector.type == GROUP:
1237                 selector_function = _build_selector_function(selector.selector)
1238             elif selector.type == PICKFIRST:
1239                 fs = [_build_selector_function(s) for s in selector.selector]
1240
1241                 def selector_function(ctx):
1242                     for f in fs:
1243                         picked_formats = list(f(ctx))
1244                         if picked_formats:
1245                             return picked_formats
1246                     return []
1247             elif selector.type == SINGLE:
1248                 format_spec = selector.selector
1249
1250                 def selector_function(ctx):
1251                     formats = list(ctx['formats'])
1252                     if not formats:
1253                         return
1254                     if format_spec == 'all':
1255                         for f in formats:
1256                             yield f
1257                     elif format_spec in ['best', 'worst', None]:
1258                         format_idx = 0 if format_spec == 'worst' else -1
1259                         audiovideo_formats = [
1260                             f for f in formats
1261                             if f.get('vcodec') != 'none' and f.get('acodec') != 'none']
1262                         if audiovideo_formats:
1263                             yield audiovideo_formats[format_idx]
1264                         # for extractors with incomplete formats (audio only (soundcloud)
1265                         # or video only (imgur)) we will fallback to best/worst
1266                         # {video,audio}-only format
1267                         elif ctx['incomplete_formats']:
1268                             yield formats[format_idx]
1269                     elif format_spec == 'bestaudio':
1270                         audio_formats = [
1271                             f for f in formats
1272                             if f.get('vcodec') == 'none']
1273                         if audio_formats:
1274                             yield audio_formats[-1]
1275                     elif format_spec == 'worstaudio':
1276                         audio_formats = [
1277                             f for f in formats
1278                             if f.get('vcodec') == 'none']
1279                         if audio_formats:
1280                             yield audio_formats[0]
1281                     elif format_spec == 'bestvideo':
1282                         video_formats = [
1283                             f for f in formats
1284                             if f.get('acodec') == 'none']
1285                         if video_formats:
1286                             yield video_formats[-1]
1287                     elif format_spec == 'worstvideo':
1288                         video_formats = [
1289                             f for f in formats
1290                             if f.get('acodec') == 'none']
1291                         if video_formats:
1292                             yield video_formats[0]
1293                     else:
1294                         extensions = ['mp4', 'flv', 'webm', '3gp', 'm4a', 'mp3', 'ogg', 'aac', 'wav']
1295                         if format_spec in extensions:
1296                             filter_f = lambda f: f['ext'] == format_spec
1297                         else:
1298                             filter_f = lambda f: f['format_id'] == format_spec
1299                         matches = list(filter(filter_f, formats))
1300                         if matches:
1301                             yield matches[-1]
1302             elif selector.type == MERGE:
1303                 def _merge(formats_info):
1304                     format_1, format_2 = [f['format_id'] for f in formats_info]
1305                     # The first format must contain the video and the
1306                     # second the audio
1307                     if formats_info[0].get('vcodec') == 'none':
1308                         self.report_error('The first format must '
1309                                           'contain the video, try using '
1310                                           '"-f %s+%s"' % (format_2, format_1))
1311                         return
1312                     # Formats must be opposite (video+audio)
1313                     if formats_info[0].get('acodec') == 'none' and formats_info[1].get('acodec') == 'none':
1314                         self.report_error(
1315                             'Both formats %s and %s are video-only, you must specify "-f video+audio"'
1316                             % (format_1, format_2))
1317                         return
1318                     output_ext = (
1319                         formats_info[0]['ext']
1320                         if self.params.get('merge_output_format') is None
1321                         else self.params['merge_output_format'])
1322                     return {
1323                         'requested_formats': formats_info,
1324                         'format': '%s+%s' % (formats_info[0].get('format'),
1325                                              formats_info[1].get('format')),
1326                         'format_id': '%s+%s' % (formats_info[0].get('format_id'),
1327                                                 formats_info[1].get('format_id')),
1328                         'width': formats_info[0].get('width'),
1329                         'height': formats_info[0].get('height'),
1330                         'resolution': formats_info[0].get('resolution'),
1331                         'fps': formats_info[0].get('fps'),
1332                         'vcodec': formats_info[0].get('vcodec'),
1333                         'vbr': formats_info[0].get('vbr'),
1334                         'stretched_ratio': formats_info[0].get('stretched_ratio'),
1335                         'acodec': formats_info[1].get('acodec'),
1336                         'abr': formats_info[1].get('abr'),
1337                         'ext': output_ext,
1338                     }
1339                 video_selector, audio_selector = map(_build_selector_function, selector.selector)
1340
1341                 def selector_function(ctx):
1342                     for pair in itertools.product(
1343                             video_selector(copy.deepcopy(ctx)), audio_selector(copy.deepcopy(ctx))):
1344                         yield _merge(pair)
1345
1346             filters = [self._build_format_filter(f) for f in selector.filters]
1347
1348             def final_selector(ctx):
1349                 ctx_copy = copy.deepcopy(ctx)
1350                 for _filter in filters:
1351                     ctx_copy['formats'] = list(filter(_filter, ctx_copy['formats']))
1352                 return selector_function(ctx_copy)
1353             return final_selector
1354
1355         stream = io.BytesIO(format_spec.encode('utf-8'))
1356         try:
1357             tokens = list(_remove_unused_ops(compat_tokenize_tokenize(stream.readline)))
1358         except tokenize.TokenError:
1359             raise syntax_error('Missing closing/opening brackets or parenthesis', (0, len(format_spec)))
1360
1361         class TokenIterator(object):
1362             def __init__(self, tokens):
1363                 self.tokens = tokens
1364                 self.counter = 0
1365
1366             def __iter__(self):
1367                 return self
1368
1369             def __next__(self):
1370                 if self.counter >= len(self.tokens):
1371                     raise StopIteration()
1372                 value = self.tokens[self.counter]
1373                 self.counter += 1
1374                 return value
1375
1376             next = __next__
1377
1378             def restore_last_token(self):
1379                 self.counter -= 1
1380
1381         parsed_selector = _parse_format_selection(iter(TokenIterator(tokens)))
1382         return _build_selector_function(parsed_selector)
1383
1384     def _calc_headers(self, info_dict):
1385         res = std_headers.copy()
1386
1387         add_headers = info_dict.get('http_headers')
1388         if add_headers:
1389             res.update(add_headers)
1390
1391         cookies = self._calc_cookies(info_dict)
1392         if cookies:
1393             res['Cookie'] = cookies
1394
1395         if 'X-Forwarded-For' not in res:
1396             x_forwarded_for_ip = info_dict.get('__x_forwarded_for_ip')
1397             if x_forwarded_for_ip:
1398                 res['X-Forwarded-For'] = x_forwarded_for_ip
1399
1400         return res
1401
1402     def _calc_cookies(self, info_dict):
1403         pr = sanitized_Request(info_dict['url'])
1404         self.cookiejar.add_cookie_header(pr)
1405         return pr.get_header('Cookie')
1406
1407     def process_video_result(self, info_dict, download=True):
1408         assert info_dict.get('_type', 'video') == 'video'
1409
1410         if 'id' not in info_dict:
1411             raise ExtractorError('Missing "id" field in extractor result')
1412         if 'title' not in info_dict:
1413             raise ExtractorError('Missing "title" field in extractor result')
1414
1415         def report_force_conversion(field, field_not, conversion):
1416             self.report_warning(
1417                 '"%s" field is not %s - forcing %s conversion, there is an error in extractor'
1418                 % (field, field_not, conversion))
1419
1420         def sanitize_string_field(info, string_field):
1421             field = info.get(string_field)
1422             if field is None or isinstance(field, compat_str):
1423                 return
1424             report_force_conversion(string_field, 'a string', 'string')
1425             info[string_field] = compat_str(field)
1426
1427         def sanitize_numeric_fields(info):
1428             for numeric_field in self._NUMERIC_FIELDS:
1429                 field = info.get(numeric_field)
1430                 if field is None or isinstance(field, compat_numeric_types):
1431                     continue
1432                 report_force_conversion(numeric_field, 'numeric', 'int')
1433                 info[numeric_field] = int_or_none(field)
1434
1435         sanitize_string_field(info_dict, 'id')
1436         sanitize_numeric_fields(info_dict)
1437
1438         if 'playlist' not in info_dict:
1439             # It isn't part of a playlist
1440             info_dict['playlist'] = None
1441             info_dict['playlist_index'] = None
1442
1443         thumbnails = info_dict.get('thumbnails')
1444         if thumbnails is None:
1445             thumbnail = info_dict.get('thumbnail')
1446             if thumbnail:
1447                 info_dict['thumbnails'] = thumbnails = [{'url': thumbnail}]
1448         if thumbnails:
1449             thumbnails.sort(key=lambda t: (
1450                 t.get('preference') if t.get('preference') is not None else -1,
1451                 t.get('width') if t.get('width') is not None else -1,
1452                 t.get('height') if t.get('height') is not None else -1,
1453                 t.get('id') if t.get('id') is not None else '', t.get('url')))
1454             for i, t in enumerate(thumbnails):
1455                 t['url'] = sanitize_url(t['url'])
1456                 if t.get('width') and t.get('height'):
1457                     t['resolution'] = '%dx%d' % (t['width'], t['height'])
1458                 if t.get('id') is None:
1459                     t['id'] = '%d' % i
1460
1461         if self.params.get('list_thumbnails'):
1462             self.list_thumbnails(info_dict)
1463             return
1464
1465         thumbnail = info_dict.get('thumbnail')
1466         if thumbnail:
1467             info_dict['thumbnail'] = sanitize_url(thumbnail)
1468         elif thumbnails:
1469             info_dict['thumbnail'] = thumbnails[-1]['url']
1470
1471         if 'display_id' not in info_dict and 'id' in info_dict:
1472             info_dict['display_id'] = info_dict['id']
1473
1474         if info_dict.get('upload_date') is None and info_dict.get('timestamp') is not None:
1475             # Working around out-of-range timestamp values (e.g. negative ones on Windows,
1476             # see http://bugs.python.org/issue1646728)
1477             try:
1478                 upload_date = datetime.datetime.utcfromtimestamp(info_dict['timestamp'])
1479                 info_dict['upload_date'] = upload_date.strftime('%Y%m%d')
1480             except (ValueError, OverflowError, OSError):
1481                 pass
1482
1483         # Auto generate title fields corresponding to the *_number fields when missing
1484         # in order to always have clean titles. This is very common for TV series.
1485         for field in ('chapter', 'season', 'episode'):
1486             if info_dict.get('%s_number' % field) is not None and not info_dict.get(field):
1487                 info_dict[field] = '%s %d' % (field.capitalize(), info_dict['%s_number' % field])
1488
1489         for cc_kind in ('subtitles', 'automatic_captions'):
1490             cc = info_dict.get(cc_kind)
1491             if cc:
1492                 for _, subtitle in cc.items():
1493                     for subtitle_format in subtitle:
1494                         if subtitle_format.get('url'):
1495                             subtitle_format['url'] = sanitize_url(subtitle_format['url'])
1496                         if subtitle_format.get('ext') is None:
1497                             subtitle_format['ext'] = determine_ext(subtitle_format['url']).lower()
1498
1499         automatic_captions = info_dict.get('automatic_captions')
1500         subtitles = info_dict.get('subtitles')
1501
1502         if self.params.get('listsubtitles', False):
1503             if 'automatic_captions' in info_dict:
1504                 self.list_subtitles(
1505                     info_dict['id'], automatic_captions, 'automatic captions')
1506             self.list_subtitles(info_dict['id'], subtitles, 'subtitles')
1507             return
1508
1509         info_dict['requested_subtitles'] = self.process_subtitles(
1510             info_dict['id'], subtitles, automatic_captions)
1511
1512         # We now pick which formats have to be downloaded
1513         if info_dict.get('formats') is None:
1514             # There's only one format available
1515             formats = [info_dict]
1516         else:
1517             formats = info_dict['formats']
1518
1519         if not formats:
1520             raise ExtractorError('No video formats found!')
1521
1522         def is_wellformed(f):
1523             url = f.get('url')
1524             if not url:
1525                 self.report_warning(
1526                     '"url" field is missing or empty - skipping format, '
1527                     'there is an error in extractor')
1528                 return False
1529             if isinstance(url, bytes):
1530                 sanitize_string_field(f, 'url')
1531             return True
1532
1533         # Filter out malformed formats for better extraction robustness
1534         formats = list(filter(is_wellformed, formats))
1535
1536         formats_dict = {}
1537
1538         # We check that all the formats have the format and format_id fields
1539         for i, format in enumerate(formats):
1540             sanitize_string_field(format, 'format_id')
1541             sanitize_numeric_fields(format)
1542             format['url'] = sanitize_url(format['url'])
1543             if not format.get('format_id'):
1544                 format['format_id'] = compat_str(i)
1545             else:
1546                 # Sanitize format_id from characters used in format selector expression
1547                 format['format_id'] = re.sub(r'[\s,/+\[\]()]', '_', format['format_id'])
1548             format_id = format['format_id']
1549             if format_id not in formats_dict:
1550                 formats_dict[format_id] = []
1551             formats_dict[format_id].append(format)
1552
1553         # Make sure all formats have unique format_id
1554         for format_id, ambiguous_formats in formats_dict.items():
1555             if len(ambiguous_formats) > 1:
1556                 for i, format in enumerate(ambiguous_formats):
1557                     format['format_id'] = '%s-%d' % (format_id, i)
1558
1559         for i, format in enumerate(formats):
1560             if format.get('format') is None:
1561                 format['format'] = '{id} - {res}{note}'.format(
1562                     id=format['format_id'],
1563                     res=self.format_resolution(format),
1564                     note=' ({0})'.format(format['format_note']) if format.get('format_note') is not None else '',
1565                 )
1566             # Automatically determine file extension if missing
1567             if format.get('ext') is None:
1568                 format['ext'] = determine_ext(format['url']).lower()
1569             # Automatically determine protocol if missing (useful for format
1570             # selection purposes)
1571             if format.get('protocol') is None:
1572                 format['protocol'] = determine_protocol(format)
1573             # Add HTTP headers, so that external programs can use them from the
1574             # json output
1575             full_format_info = info_dict.copy()
1576             full_format_info.update(format)
1577             format['http_headers'] = self._calc_headers(full_format_info)
1578         # Remove private housekeeping stuff
1579         if '__x_forwarded_for_ip' in info_dict:
1580             del info_dict['__x_forwarded_for_ip']
1581
1582         # TODO Central sorting goes here
1583
1584         if formats[0] is not info_dict:
1585             # only set the 'formats' fields if the original info_dict list them
1586             # otherwise we end up with a circular reference, the first (and unique)
1587             # element in the 'formats' field in info_dict is info_dict itself,
1588             # which can't be exported to json
1589             info_dict['formats'] = formats
1590         if self.params.get('listformats'):
1591             self.list_formats(info_dict)
1592             return
1593
1594         req_format = self.params.get('format')
1595         if req_format is None:
1596             req_format = self._default_format_spec(info_dict, download=download)
1597             if self.params.get('verbose'):
1598                 self.to_stdout('[debug] Default format spec: %s' % req_format)
1599
1600         format_selector = self.build_format_selector(req_format)
1601
1602         # While in format selection we may need to have an access to the original
1603         # format set in order to calculate some metrics or do some processing.
1604         # For now we need to be able to guess whether original formats provided
1605         # by extractor are incomplete or not (i.e. whether extractor provides only
1606         # video-only or audio-only formats) for proper formats selection for
1607         # extractors with such incomplete formats (see
1608         # https://github.com/rg3/youtube-dl/pull/5556).
1609         # Since formats may be filtered during format selection and may not match
1610         # the original formats the results may be incorrect. Thus original formats
1611         # or pre-calculated metrics should be passed to format selection routines
1612         # as well.
1613         # We will pass a context object containing all necessary additional data
1614         # instead of just formats.
1615         # This fixes incorrect format selection issue (see
1616         # https://github.com/rg3/youtube-dl/issues/10083).
1617         incomplete_formats = (
1618             # All formats are video-only or
1619             all(f.get('vcodec') != 'none' and f.get('acodec') == 'none' for f in formats) or
1620             # all formats are audio-only
1621             all(f.get('vcodec') == 'none' and f.get('acodec') != 'none' for f in formats))
1622
1623         ctx = {
1624             'formats': formats,
1625             'incomplete_formats': incomplete_formats,
1626         }
1627
1628         formats_to_download = list(format_selector(ctx))
1629         if not formats_to_download:
1630             raise ExtractorError('requested format not available',
1631                                  expected=True)
1632
1633         if download:
1634             if len(formats_to_download) > 1:
1635                 self.to_screen('[info] %s: downloading video in %s formats' % (info_dict['id'], len(formats_to_download)))
1636             for format in formats_to_download:
1637                 new_info = dict(info_dict)
1638                 new_info.update(format)
1639                 self.process_info(new_info)
1640         # We update the info dict with the best quality format (backwards compatibility)
1641         info_dict.update(formats_to_download[-1])
1642         return info_dict
1643
1644     def process_subtitles(self, video_id, normal_subtitles, automatic_captions):
1645         """Select the requested subtitles and their format"""
1646         available_subs = {}
1647         if normal_subtitles and self.params.get('writesubtitles'):
1648             available_subs.update(normal_subtitles)
1649         if automatic_captions and self.params.get('writeautomaticsub'):
1650             for lang, cap_info in automatic_captions.items():
1651                 if lang not in available_subs:
1652                     available_subs[lang] = cap_info
1653
1654         if (not self.params.get('writesubtitles') and not
1655                 self.params.get('writeautomaticsub') or not
1656                 available_subs):
1657             return None
1658
1659         if self.params.get('allsubtitles', False):
1660             requested_langs = available_subs.keys()
1661         else:
1662             if self.params.get('subtitleslangs', False):
1663                 requested_langs = self.params.get('subtitleslangs')
1664             elif 'en' in available_subs:
1665                 requested_langs = ['en']
1666             else:
1667                 requested_langs = [list(available_subs.keys())[0]]
1668
1669         formats_query = self.params.get('subtitlesformat', 'best')
1670         formats_preference = formats_query.split('/') if formats_query else []
1671         subs = {}
1672         for lang in requested_langs:
1673             formats = available_subs.get(lang)
1674             if formats is None:
1675                 self.report_warning('%s subtitles not available for %s' % (lang, video_id))
1676                 continue
1677             for ext in formats_preference:
1678                 if ext == 'best':
1679                     f = formats[-1]
1680                     break
1681                 matches = list(filter(lambda f: f['ext'] == ext, formats))
1682                 if matches:
1683                     f = matches[-1]
1684                     break
1685             else:
1686                 f = formats[-1]
1687                 self.report_warning(
1688                     'No subtitle format found matching "%s" for language %s, '
1689                     'using %s' % (formats_query, lang, f['ext']))
1690             subs[lang] = f
1691         return subs
1692
1693     def process_info(self, info_dict):
1694         """Process a single resolved IE result."""
1695
1696         assert info_dict.get('_type', 'video') == 'video'
1697
1698         max_downloads = self.params.get('max_downloads')
1699         if max_downloads is not None:
1700             if self._num_downloads >= int(max_downloads):
1701                 raise MaxDownloadsReached()
1702
1703         info_dict['fulltitle'] = info_dict['title']
1704         if len(info_dict['title']) > 200:
1705             info_dict['title'] = info_dict['title'][:197] + '...'
1706
1707         if 'format' not in info_dict:
1708             info_dict['format'] = info_dict['ext']
1709
1710         reason = self._match_entry(info_dict, incomplete=False)
1711         if reason is not None:
1712             self.to_screen('[download] ' + reason)
1713             return
1714
1715         self._num_downloads += 1
1716
1717         info_dict['_filename'] = filename = self.prepare_filename(info_dict)
1718
1719         # Forced printings
1720         if self.params.get('forcetitle', False):
1721             self.to_stdout(info_dict['fulltitle'])
1722         if self.params.get('forceid', False):
1723             self.to_stdout(info_dict['id'])
1724         if self.params.get('forceurl', False):
1725             if info_dict.get('requested_formats') is not None:
1726                 for f in info_dict['requested_formats']:
1727                     self.to_stdout(f['url'] + f.get('play_path', ''))
1728             else:
1729                 # For RTMP URLs, also include the playpath
1730                 self.to_stdout(info_dict['url'] + info_dict.get('play_path', ''))
1731         if self.params.get('forcethumbnail', False) and info_dict.get('thumbnail') is not None:
1732             self.to_stdout(info_dict['thumbnail'])
1733         if self.params.get('forcedescription', False) and info_dict.get('description') is not None:
1734             self.to_stdout(info_dict['description'])
1735         if self.params.get('forcefilename', False) and filename is not None:
1736             self.to_stdout(filename)
1737         if self.params.get('forceduration', False) and info_dict.get('duration') is not None:
1738             self.to_stdout(formatSeconds(info_dict['duration']))
1739         if self.params.get('forceformat', False):
1740             self.to_stdout(info_dict['format'])
1741         if self.params.get('forcejson', False):
1742             self.to_stdout(json.dumps(info_dict))
1743
1744         # Do nothing else if in simulate mode
1745         if self.params.get('simulate', False):
1746             return
1747
1748         if filename is None:
1749             return
1750
1751         def ensure_dir_exists(path):
1752             try:
1753                 dn = os.path.dirname(path)
1754                 if dn and not os.path.exists(dn):
1755                     os.makedirs(dn)
1756                 return True
1757             except (OSError, IOError) as err:
1758                 self.report_error('unable to create directory ' + error_to_compat_str(err))
1759                 return False
1760
1761         if not ensure_dir_exists(sanitize_path(encodeFilename(filename))):
1762             return
1763
1764         if self.params.get('writedescription', False):
1765             descfn = replace_extension(filename, 'description', info_dict.get('ext'))
1766             if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(descfn)):
1767                 self.to_screen('[info] Video description is already present')
1768             elif info_dict.get('description') is None:
1769                 self.report_warning('There\'s no description to write.')
1770             else:
1771                 try:
1772                     self.to_screen('[info] Writing video description to: ' + descfn)
1773                     with io.open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile:
1774                         descfile.write(info_dict['description'])
1775                 except (OSError, IOError):
1776                     self.report_error('Cannot write description file ' + descfn)
1777                     return
1778
1779         if self.params.get('writeannotations', False):
1780             annofn = replace_extension(filename, 'annotations.xml', info_dict.get('ext'))
1781             if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(annofn)):
1782                 self.to_screen('[info] Video annotations are already present')
1783             else:
1784                 try:
1785                     self.to_screen('[info] Writing video annotations to: ' + annofn)
1786                     with io.open(encodeFilename(annofn), 'w', encoding='utf-8') as annofile:
1787                         annofile.write(info_dict['annotations'])
1788                 except (KeyError, TypeError):
1789                     self.report_warning('There are no annotations to write.')
1790                 except (OSError, IOError):
1791                     self.report_error('Cannot write annotations file: ' + annofn)
1792                     return
1793
1794         subtitles_are_requested = any([self.params.get('writesubtitles', False),
1795                                        self.params.get('writeautomaticsub')])
1796
1797         if subtitles_are_requested and info_dict.get('requested_subtitles'):
1798             # subtitles download errors are already managed as troubles in relevant IE
1799             # that way it will silently go on when used with unsupporting IE
1800             subtitles = info_dict['requested_subtitles']
1801             ie = self.get_info_extractor(info_dict['extractor_key'])
1802             for sub_lang, sub_info in subtitles.items():
1803                 sub_format = sub_info['ext']
1804                 sub_filename = subtitles_filename(filename, sub_lang, sub_format)
1805                 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(sub_filename)):
1806                     self.to_screen('[info] Video subtitle %s.%s is already present' % (sub_lang, sub_format))
1807                 else:
1808                     self.to_screen('[info] Writing video subtitles to: ' + sub_filename)
1809                     if sub_info.get('data') is not None:
1810                         try:
1811                             # Use newline='' to prevent conversion of newline characters
1812                             # See https://github.com/rg3/youtube-dl/issues/10268
1813                             with io.open(encodeFilename(sub_filename), 'w', encoding='utf-8', newline='') as subfile:
1814                                 subfile.write(sub_info['data'])
1815                         except (OSError, IOError):
1816                             self.report_error('Cannot write subtitles file ' + sub_filename)
1817                             return
1818                     else:
1819                         try:
1820                             sub_data = ie._request_webpage(
1821                                 sub_info['url'], info_dict['id'], note=False).read()
1822                             with io.open(encodeFilename(sub_filename), 'wb') as subfile:
1823                                 subfile.write(sub_data)
1824                         except (ExtractorError, IOError, OSError, ValueError) as err:
1825                             self.report_warning('Unable to download subtitle for "%s": %s' %
1826                                                 (sub_lang, error_to_compat_str(err)))
1827                             continue
1828
1829         if self.params.get('writeinfojson', False):
1830             infofn = replace_extension(filename, 'info.json', info_dict.get('ext'))
1831             if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(infofn)):
1832                 self.to_screen('[info] Video description metadata is already present')
1833             else:
1834                 self.to_screen('[info] Writing video description metadata as JSON to: ' + infofn)
1835                 try:
1836                     write_json_file(self.filter_requested_info(info_dict), infofn)
1837                 except (OSError, IOError):
1838                     self.report_error('Cannot write metadata to JSON file ' + infofn)
1839                     return
1840
1841         self._write_thumbnails(info_dict, filename)
1842
1843         if not self.params.get('skip_download', False):
1844             try:
1845                 def dl(name, info):
1846                     fd = get_suitable_downloader(info, self.params)(self, self.params)
1847                     for ph in self._progress_hooks:
1848                         fd.add_progress_hook(ph)
1849                     if self.params.get('verbose'):
1850                         self.to_stdout('[debug] Invoking downloader on %r' % info.get('url'))
1851                     return fd.download(name, info)
1852
1853                 if info_dict.get('requested_formats') is not None:
1854                     downloaded = []
1855                     success = True
1856                     merger = FFmpegMergerPP(self)
1857                     if not merger.available:
1858                         postprocessors = []
1859                         self.report_warning('You have requested multiple '
1860                                             'formats but ffmpeg or avconv are not installed.'
1861                                             ' The formats won\'t be merged.')
1862                     else:
1863                         postprocessors = [merger]
1864
1865                     def compatible_formats(formats):
1866                         video, audio = formats
1867                         # Check extension
1868                         video_ext, audio_ext = video.get('ext'), audio.get('ext')
1869                         if video_ext and audio_ext:
1870                             COMPATIBLE_EXTS = (
1871                                 ('mp3', 'mp4', 'm4a', 'm4p', 'm4b', 'm4r', 'm4v', 'ismv', 'isma'),
1872                                 ('webm')
1873                             )
1874                             for exts in COMPATIBLE_EXTS:
1875                                 if video_ext in exts and audio_ext in exts:
1876                                     return True
1877                         # TODO: Check acodec/vcodec
1878                         return False
1879
1880                     filename_real_ext = os.path.splitext(filename)[1][1:]
1881                     filename_wo_ext = (
1882                         os.path.splitext(filename)[0]
1883                         if filename_real_ext == info_dict['ext']
1884                         else filename)
1885                     requested_formats = info_dict['requested_formats']
1886                     if self.params.get('merge_output_format') is None and not compatible_formats(requested_formats):
1887                         info_dict['ext'] = 'mkv'
1888                         self.report_warning(
1889                             'Requested formats are incompatible for merge and will be merged into mkv.')
1890                     # Ensure filename always has a correct extension for successful merge
1891                     filename = '%s.%s' % (filename_wo_ext, info_dict['ext'])
1892                     if os.path.exists(encodeFilename(filename)):
1893                         self.to_screen(
1894                             '[download] %s has already been downloaded and '
1895                             'merged' % filename)
1896                     else:
1897                         for f in requested_formats:
1898                             new_info = dict(info_dict)
1899                             new_info.update(f)
1900                             fname = prepend_extension(
1901                                 self.prepare_filename(new_info),
1902                                 'f%s' % f['format_id'], new_info['ext'])
1903                             if not ensure_dir_exists(fname):
1904                                 return
1905                             downloaded.append(fname)
1906                             partial_success = dl(fname, new_info)
1907                             success = success and partial_success
1908                         info_dict['__postprocessors'] = postprocessors
1909                         info_dict['__files_to_merge'] = downloaded
1910                 else:
1911                     # Just a single file
1912                     success = dl(filename, info_dict)
1913             except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
1914                 self.report_error('unable to download video data: %s' % error_to_compat_str(err))
1915                 return
1916             except (OSError, IOError) as err:
1917                 raise UnavailableVideoError(err)
1918             except (ContentTooShortError, ) as err:
1919                 self.report_error('content too short (expected %s bytes and served %s)' % (err.expected, err.downloaded))
1920                 return
1921
1922             if success and filename != '-':
1923                 # Fixup content
1924                 fixup_policy = self.params.get('fixup')
1925                 if fixup_policy is None:
1926                     fixup_policy = 'detect_or_warn'
1927
1928                 INSTALL_FFMPEG_MESSAGE = 'Install ffmpeg or avconv to fix this automatically.'
1929
1930                 stretched_ratio = info_dict.get('stretched_ratio')
1931                 if stretched_ratio is not None and stretched_ratio != 1:
1932                     if fixup_policy == 'warn':
1933                         self.report_warning('%s: Non-uniform pixel ratio (%s)' % (
1934                             info_dict['id'], stretched_ratio))
1935                     elif fixup_policy == 'detect_or_warn':
1936                         stretched_pp = FFmpegFixupStretchedPP(self)
1937                         if stretched_pp.available:
1938                             info_dict.setdefault('__postprocessors', [])
1939                             info_dict['__postprocessors'].append(stretched_pp)
1940                         else:
1941                             self.report_warning(
1942                                 '%s: Non-uniform pixel ratio (%s). %s'
1943                                 % (info_dict['id'], stretched_ratio, INSTALL_FFMPEG_MESSAGE))
1944                     else:
1945                         assert fixup_policy in ('ignore', 'never')
1946
1947                 if (info_dict.get('requested_formats') is None and
1948                         info_dict.get('container') == 'm4a_dash'):
1949                     if fixup_policy == 'warn':
1950                         self.report_warning(
1951                             '%s: writing DASH m4a. '
1952                             'Only some players support this container.'
1953                             % info_dict['id'])
1954                     elif fixup_policy == 'detect_or_warn':
1955                         fixup_pp = FFmpegFixupM4aPP(self)
1956                         if fixup_pp.available:
1957                             info_dict.setdefault('__postprocessors', [])
1958                             info_dict['__postprocessors'].append(fixup_pp)
1959                         else:
1960                             self.report_warning(
1961                                 '%s: writing DASH m4a. '
1962                                 'Only some players support this container. %s'
1963                                 % (info_dict['id'], INSTALL_FFMPEG_MESSAGE))
1964                     else:
1965                         assert fixup_policy in ('ignore', 'never')
1966
1967                 if (info_dict.get('protocol') == 'm3u8_native' or
1968                         info_dict.get('protocol') == 'm3u8' and
1969                         self.params.get('hls_prefer_native')):
1970                     if fixup_policy == 'warn':
1971                         self.report_warning('%s: malformed AAC bitstream detected.' % (
1972                             info_dict['id']))
1973                     elif fixup_policy == 'detect_or_warn':
1974                         fixup_pp = FFmpegFixupM3u8PP(self)
1975                         if fixup_pp.available:
1976                             info_dict.setdefault('__postprocessors', [])
1977                             info_dict['__postprocessors'].append(fixup_pp)
1978                         else:
1979                             self.report_warning(
1980                                 '%s: malformed AAC bitstream detected. %s'
1981                                 % (info_dict['id'], INSTALL_FFMPEG_MESSAGE))
1982                     else:
1983                         assert fixup_policy in ('ignore', 'never')
1984
1985                 try:
1986                     self.post_process(filename, info_dict)
1987                 except (PostProcessingError) as err:
1988                     self.report_error('postprocessing: %s' % str(err))
1989                     return
1990                 self.record_download_archive(info_dict)
1991
1992     def download(self, url_list):
1993         """Download a given list of URLs."""
1994         outtmpl = self.params.get('outtmpl', DEFAULT_OUTTMPL)
1995         if (len(url_list) > 1 and
1996                 outtmpl != '-' and
1997                 '%' not in outtmpl and
1998                 self.params.get('max_downloads') != 1):
1999             raise SameFileError(outtmpl)
2000
2001         for url in url_list:
2002             try:
2003                 # It also downloads the videos
2004                 res = self.extract_info(
2005                     url, force_generic_extractor=self.params.get('force_generic_extractor', False))
2006             except UnavailableVideoError:
2007                 self.report_error('unable to download video')
2008             except MaxDownloadsReached:
2009                 self.to_screen('[info] Maximum number of downloaded files reached.')
2010                 raise
2011             else:
2012                 if self.params.get('dump_single_json', False):
2013                     self.to_stdout(json.dumps(res))
2014
2015         return self._download_retcode
2016
2017     def download_with_info_file(self, info_filename):
2018         with contextlib.closing(fileinput.FileInput(
2019                 [info_filename], mode='r',
2020                 openhook=fileinput.hook_encoded('utf-8'))) as f:
2021             # FileInput doesn't have a read method, we can't call json.load
2022             info = self.filter_requested_info(json.loads('\n'.join(f)))
2023         try:
2024             self.process_ie_result(info, download=True)
2025         except DownloadError:
2026             webpage_url = info.get('webpage_url')
2027             if webpage_url is not None:
2028                 self.report_warning('The info failed to download, trying with "%s"' % webpage_url)
2029                 return self.download([webpage_url])
2030             else:
2031                 raise
2032         return self._download_retcode
2033
2034     @staticmethod
2035     def filter_requested_info(info_dict):
2036         return dict(
2037             (k, v) for k, v in info_dict.items()
2038             if k not in ['requested_formats', 'requested_subtitles'])
2039
2040     def post_process(self, filename, ie_info):
2041         """Run all the postprocessors on the given file."""
2042         info = dict(ie_info)
2043         info['filepath'] = filename
2044         pps_chain = []
2045         if ie_info.get('__postprocessors') is not None:
2046             pps_chain.extend(ie_info['__postprocessors'])
2047         pps_chain.extend(self._pps)
2048         for pp in pps_chain:
2049             files_to_delete = []
2050             try:
2051                 files_to_delete, info = pp.run(info)
2052             except PostProcessingError as e:
2053                 self.report_error(e.msg)
2054             if files_to_delete and not self.params.get('keepvideo', False):
2055                 for old_filename in files_to_delete:
2056                     self.to_screen('Deleting original file %s (pass -k to keep)' % old_filename)
2057                     try:
2058                         os.remove(encodeFilename(old_filename))
2059                     except (IOError, OSError):
2060                         self.report_warning('Unable to remove downloaded original file')
2061
2062     def _make_archive_id(self, info_dict):
2063         video_id = info_dict.get('id')
2064         if not video_id:
2065             return
2066         # Future-proof against any change in case
2067         # and backwards compatibility with prior versions
2068         extractor = info_dict.get('extractor_key') or info_dict.get('ie_key')  # key in a playlist
2069         if extractor is None:
2070             # Try to find matching extractor for the URL and take its ie_key
2071             for ie in self._ies:
2072                 if ie.suitable(info_dict['url']):
2073                     extractor = ie.ie_key()
2074                     break
2075             else:
2076                 return
2077         return extractor.lower() + ' ' + video_id
2078
2079     def in_download_archive(self, info_dict):
2080         fn = self.params.get('download_archive')
2081         if fn is None:
2082             return False
2083
2084         vid_id = self._make_archive_id(info_dict)
2085         if not vid_id:
2086             return False  # Incomplete video information
2087
2088         try:
2089             with locked_file(fn, 'r', encoding='utf-8') as archive_file:
2090                 for line in archive_file:
2091                     if line.strip() == vid_id:
2092                         return True
2093         except IOError as ioe:
2094             if ioe.errno != errno.ENOENT:
2095                 raise
2096         return False
2097
2098     def record_download_archive(self, info_dict):
2099         fn = self.params.get('download_archive')
2100         if fn is None:
2101             return
2102         vid_id = self._make_archive_id(info_dict)
2103         assert vid_id
2104         with locked_file(fn, 'a', encoding='utf-8') as archive_file:
2105             archive_file.write(vid_id + '\n')
2106
2107     @staticmethod
2108     def format_resolution(format, default='unknown'):
2109         if format.get('vcodec') == 'none':
2110             return 'audio only'
2111         if format.get('resolution') is not None:
2112             return format['resolution']
2113         if format.get('height') is not None:
2114             if format.get('width') is not None:
2115                 res = '%sx%s' % (format['width'], format['height'])
2116             else:
2117                 res = '%sp' % format['height']
2118         elif format.get('width') is not None:
2119             res = '%dx?' % format['width']
2120         else:
2121             res = default
2122         return res
2123
2124     def _format_note(self, fdict):
2125         res = ''
2126         if fdict.get('ext') in ['f4f', 'f4m']:
2127             res += '(unsupported) '
2128         if fdict.get('language'):
2129             if res:
2130                 res += ' '
2131             res += '[%s] ' % fdict['language']
2132         if fdict.get('format_note') is not None:
2133             res += fdict['format_note'] + ' '
2134         if fdict.get('tbr') is not None:
2135             res += '%4dk ' % fdict['tbr']
2136         if fdict.get('container') is not None:
2137             if res:
2138                 res += ', '
2139             res += '%s container' % fdict['container']
2140         if (fdict.get('vcodec') is not None and
2141                 fdict.get('vcodec') != 'none'):
2142             if res:
2143                 res += ', '
2144             res += fdict['vcodec']
2145             if fdict.get('vbr') is not None:
2146                 res += '@'
2147         elif fdict.get('vbr') is not None and fdict.get('abr') is not None:
2148             res += 'video@'
2149         if fdict.get('vbr') is not None:
2150             res += '%4dk' % fdict['vbr']
2151         if fdict.get('fps') is not None:
2152             if res:
2153                 res += ', '
2154             res += '%sfps' % fdict['fps']
2155         if fdict.get('acodec') is not None:
2156             if res:
2157                 res += ', '
2158             if fdict['acodec'] == 'none':
2159                 res += 'video only'
2160             else:
2161                 res += '%-5s' % fdict['acodec']
2162         elif fdict.get('abr') is not None:
2163             if res:
2164                 res += ', '
2165             res += 'audio'
2166         if fdict.get('abr') is not None:
2167             res += '@%3dk' % fdict['abr']
2168         if fdict.get('asr') is not None:
2169             res += ' (%5dHz)' % fdict['asr']
2170         if fdict.get('filesize') is not None:
2171             if res:
2172                 res += ', '
2173             res += format_bytes(fdict['filesize'])
2174         elif fdict.get('filesize_approx') is not None:
2175             if res:
2176                 res += ', '
2177             res += '~' + format_bytes(fdict['filesize_approx'])
2178         return res
2179
2180     def list_formats(self, info_dict):
2181         formats = info_dict.get('formats', [info_dict])
2182         table = [
2183             [f['format_id'], f['ext'], self.format_resolution(f), self._format_note(f)]
2184             for f in formats
2185             if f.get('preference') is None or f['preference'] >= -1000]
2186         if len(formats) > 1:
2187             table[-1][-1] += (' ' if table[-1][-1] else '') + '(best)'
2188
2189         header_line = ['format code', 'extension', 'resolution', 'note']
2190         self.to_screen(
2191             '[info] Available formats for %s:\n%s' %
2192             (info_dict['id'], render_table(header_line, table)))
2193
2194     def list_thumbnails(self, info_dict):
2195         thumbnails = info_dict.get('thumbnails')
2196         if not thumbnails:
2197             self.to_screen('[info] No thumbnails present for %s' % info_dict['id'])
2198             return
2199
2200         self.to_screen(
2201             '[info] Thumbnails for %s:' % info_dict['id'])
2202         self.to_screen(render_table(
2203             ['ID', 'width', 'height', 'URL'],
2204             [[t['id'], t.get('width', 'unknown'), t.get('height', 'unknown'), t['url']] for t in thumbnails]))
2205
2206     def list_subtitles(self, video_id, subtitles, name='subtitles'):
2207         if not subtitles:
2208             self.to_screen('%s has no %s' % (video_id, name))
2209             return
2210         self.to_screen(
2211             'Available %s for %s:' % (name, video_id))
2212         self.to_screen(render_table(
2213             ['Language', 'formats'],
2214             [[lang, ', '.join(f['ext'] for f in reversed(formats))]
2215                 for lang, formats in subtitles.items()]))
2216
2217     def urlopen(self, req):
2218         """ Start an HTTP download """
2219         if isinstance(req, compat_basestring):
2220             req = sanitized_Request(req)
2221         return self._opener.open(req, timeout=self._socket_timeout)
2222
2223     def print_debug_header(self):
2224         if not self.params.get('verbose'):
2225             return
2226
2227         if type('') is not compat_str:
2228             # Python 2.6 on SLES11 SP1 (https://github.com/rg3/youtube-dl/issues/3326)
2229             self.report_warning(
2230                 'Your Python is broken! Update to a newer and supported version')
2231
2232         stdout_encoding = getattr(
2233             sys.stdout, 'encoding', 'missing (%s)' % type(sys.stdout).__name__)
2234         encoding_str = (
2235             '[debug] Encodings: locale %s, fs %s, out %s, pref %s\n' % (
2236                 locale.getpreferredencoding(),
2237                 sys.getfilesystemencoding(),
2238                 stdout_encoding,
2239                 self.get_encoding()))
2240         write_string(encoding_str, encoding=None)
2241
2242         self._write_string('[debug] youtube-dl version ' + __version__ + '\n')
2243         if _LAZY_LOADER:
2244             self._write_string('[debug] Lazy loading extractors enabled' + '\n')
2245         try:
2246             sp = subprocess.Popen(
2247                 ['git', 'rev-parse', '--short', 'HEAD'],
2248                 stdout=subprocess.PIPE, stderr=subprocess.PIPE,
2249                 cwd=os.path.dirname(os.path.abspath(__file__)))
2250             out, err = sp.communicate()
2251             out = out.decode().strip()
2252             if re.match('[0-9a-f]+', out):
2253                 self._write_string('[debug] Git HEAD: ' + out + '\n')
2254         except Exception:
2255             try:
2256                 sys.exc_clear()
2257             except Exception:
2258                 pass
2259
2260         def python_implementation():
2261             impl_name = platform.python_implementation()
2262             if impl_name == 'PyPy' and hasattr(sys, 'pypy_version_info'):
2263                 return impl_name + ' version %d.%d.%d' % sys.pypy_version_info[:3]
2264             return impl_name
2265
2266         self._write_string('[debug] Python version %s (%s) - %s\n' % (
2267             platform.python_version(), python_implementation(),
2268             platform_name()))
2269
2270         exe_versions = FFmpegPostProcessor.get_versions(self)
2271         exe_versions['rtmpdump'] = rtmpdump_version()
2272         exe_versions['phantomjs'] = PhantomJSwrapper._version()
2273         exe_str = ', '.join(
2274             '%s %s' % (exe, v)
2275             for exe, v in sorted(exe_versions.items())
2276             if v
2277         )
2278         if not exe_str:
2279             exe_str = 'none'
2280         self._write_string('[debug] exe versions: %s\n' % exe_str)
2281
2282         proxy_map = {}
2283         for handler in self._opener.handlers:
2284             if hasattr(handler, 'proxies'):
2285                 proxy_map.update(handler.proxies)
2286         self._write_string('[debug] Proxy map: ' + compat_str(proxy_map) + '\n')
2287
2288         if self.params.get('call_home', False):
2289             ipaddr = self.urlopen('https://yt-dl.org/ip').read().decode('utf-8')
2290             self._write_string('[debug] Public IP address: %s\n' % ipaddr)
2291             latest_version = self.urlopen(
2292                 'https://yt-dl.org/latest/version').read().decode('utf-8')
2293             if version_tuple(latest_version) > version_tuple(__version__):
2294                 self.report_warning(
2295                     'You are using an outdated version (newest version: %s)! '
2296                     'See https://yt-dl.org/update if you need help updating.' %
2297                     latest_version)
2298
2299     def _setup_opener(self):
2300         timeout_val = self.params.get('socket_timeout')
2301         self._socket_timeout = 600 if timeout_val is None else float(timeout_val)
2302
2303         opts_cookiefile = self.params.get('cookiefile')
2304         opts_proxy = self.params.get('proxy')
2305
2306         if opts_cookiefile is None:
2307             self.cookiejar = compat_cookiejar.CookieJar()
2308         else:
2309             opts_cookiefile = expand_path(opts_cookiefile)
2310             self.cookiejar = YoutubeDLCookieJar(opts_cookiefile)
2311             if os.access(opts_cookiefile, os.R_OK):
2312                 self.cookiejar.load(ignore_discard=True, ignore_expires=True)
2313
2314         cookie_processor = YoutubeDLCookieProcessor(self.cookiejar)
2315         if opts_proxy is not None:
2316             if opts_proxy == '':
2317                 proxies = {}
2318             else:
2319                 proxies = {'http': opts_proxy, 'https': opts_proxy}
2320         else:
2321             proxies = compat_urllib_request.getproxies()
2322             # Set HTTPS proxy to HTTP one if given (https://github.com/rg3/youtube-dl/issues/805)
2323             if 'http' in proxies and 'https' not in proxies:
2324                 proxies['https'] = proxies['http']
2325         proxy_handler = PerRequestProxyHandler(proxies)
2326
2327         debuglevel = 1 if self.params.get('debug_printtraffic') else 0
2328         https_handler = make_HTTPS_handler(self.params, debuglevel=debuglevel)
2329         ydlh = YoutubeDLHandler(self.params, debuglevel=debuglevel)
2330         data_handler = compat_urllib_request_DataHandler()
2331
2332         # When passing our own FileHandler instance, build_opener won't add the
2333         # default FileHandler and allows us to disable the file protocol, which
2334         # can be used for malicious purposes (see
2335         # https://github.com/rg3/youtube-dl/issues/8227)
2336         file_handler = compat_urllib_request.FileHandler()
2337
2338         def file_open(*args, **kwargs):
2339             raise compat_urllib_error.URLError('file:// scheme is explicitly disabled in youtube-dl for security reasons')
2340         file_handler.file_open = file_open
2341
2342         opener = compat_urllib_request.build_opener(
2343             proxy_handler, https_handler, cookie_processor, ydlh, data_handler, file_handler)
2344
2345         # Delete the default user-agent header, which would otherwise apply in
2346         # cases where our custom HTTP handler doesn't come into play
2347         # (See https://github.com/rg3/youtube-dl/issues/1309 for details)
2348         opener.addheaders = []
2349         self._opener = opener
2350
2351     def encode(self, s):
2352         if isinstance(s, bytes):
2353             return s  # Already encoded
2354
2355         try:
2356             return s.encode(self.get_encoding())
2357         except UnicodeEncodeError as err:
2358             err.reason = err.reason + '. Check your system encoding configuration or use the --encoding option.'
2359             raise
2360
2361     def get_encoding(self):
2362         encoding = self.params.get('encoding')
2363         if encoding is None:
2364             encoding = preferredencoding()
2365         return encoding
2366
2367     def _write_thumbnails(self, info_dict, filename):
2368         if self.params.get('writethumbnail', False):
2369             thumbnails = info_dict.get('thumbnails')
2370             if thumbnails:
2371                 thumbnails = [thumbnails[-1]]
2372         elif self.params.get('write_all_thumbnails', False):
2373             thumbnails = info_dict.get('thumbnails')
2374         else:
2375             return
2376
2377         if not thumbnails:
2378             # No thumbnails present, so return immediately
2379             return
2380
2381         for t in thumbnails:
2382             thumb_ext = determine_ext(t['url'], 'jpg')
2383             suffix = '_%s' % t['id'] if len(thumbnails) > 1 else ''
2384             thumb_display_id = '%s ' % t['id'] if len(thumbnails) > 1 else ''
2385             t['filename'] = thumb_filename = os.path.splitext(filename)[0] + suffix + '.' + thumb_ext
2386
2387             if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(thumb_filename)):
2388                 self.to_screen('[%s] %s: Thumbnail %sis already present' %
2389                                (info_dict['extractor'], info_dict['id'], thumb_display_id))
2390             else:
2391                 self.to_screen('[%s] %s: Downloading thumbnail %s...' %
2392                                (info_dict['extractor'], info_dict['id'], thumb_display_id))
2393                 try:
2394                     uf = self.urlopen(t['url'])
2395                     with open(encodeFilename(thumb_filename), 'wb') as thumbf:
2396                         shutil.copyfileobj(uf, thumbf)
2397                     self.to_screen('[%s] %s: Writing thumbnail %sto: %s' %
2398                                    (info_dict['extractor'], info_dict['id'], thumb_display_id, thumb_filename))
2399                 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
2400                     self.report_warning('Unable to download thumbnail "%s": %s' %
2401                                         (t['url'], error_to_compat_str(err)))