[YoutubeDL] Recognize expires=0 as session cookies and send session cookies with...
[youtube-dl] / youtube_dl / YoutubeDL.py
1 #!/usr/bin/env python
2 # coding: utf-8
3
4 from __future__ import absolute_import, unicode_literals
5
6 import collections
7 import contextlib
8 import copy
9 import datetime
10 import errno
11 import fileinput
12 import io
13 import itertools
14 import json
15 import locale
16 import operator
17 import os
18 import platform
19 import re
20 import shutil
21 import subprocess
22 import socket
23 import sys
24 import time
25 import tokenize
26 import traceback
27 import random
28
29 from string import ascii_letters
30
31 from .compat import (
32     compat_basestring,
33     compat_cookiejar,
34     compat_get_terminal_size,
35     compat_http_client,
36     compat_kwargs,
37     compat_numeric_types,
38     compat_os_name,
39     compat_str,
40     compat_tokenize_tokenize,
41     compat_urllib_error,
42     compat_urllib_request,
43     compat_urllib_request_DataHandler,
44 )
45 from .utils import (
46     age_restricted,
47     args_to_str,
48     ContentTooShortError,
49     date_from_str,
50     DateRange,
51     DEFAULT_OUTTMPL,
52     determine_ext,
53     determine_protocol,
54     DownloadError,
55     encode_compat_str,
56     encodeFilename,
57     error_to_compat_str,
58     expand_path,
59     ExtractorError,
60     format_bytes,
61     formatSeconds,
62     GeoRestrictedError,
63     int_or_none,
64     ISO3166Utils,
65     locked_file,
66     make_HTTPS_handler,
67     MaxDownloadsReached,
68     orderedSet,
69     PagedList,
70     parse_filesize,
71     PerRequestProxyHandler,
72     platform_name,
73     PostProcessingError,
74     preferredencoding,
75     prepend_extension,
76     register_socks_protocols,
77     render_table,
78     replace_extension,
79     SameFileError,
80     sanitize_filename,
81     sanitize_path,
82     sanitize_url,
83     sanitized_Request,
84     std_headers,
85     subtitles_filename,
86     UnavailableVideoError,
87     url_basename,
88     version_tuple,
89     write_json_file,
90     write_string,
91     YoutubeDLCookieProcessor,
92     YoutubeDLHandler,
93 )
94 from .cache import Cache
95 from .extractor import get_info_extractor, gen_extractor_classes, _LAZY_LOADER
96 from .extractor.openload import PhantomJSwrapper
97 from .downloader import get_suitable_downloader
98 from .downloader.rtmp import rtmpdump_version
99 from .postprocessor import (
100     FFmpegFixupM3u8PP,
101     FFmpegFixupM4aPP,
102     FFmpegFixupStretchedPP,
103     FFmpegMergerPP,
104     FFmpegPostProcessor,
105     get_postprocessor,
106 )
107 from .version import __version__
108
109 if compat_os_name == 'nt':
110     import ctypes
111
112
113 class YoutubeDL(object):
114     """YoutubeDL class.
115
116     YoutubeDL objects are the ones responsible of downloading the
117     actual video file and writing it to disk if the user has requested
118     it, among some other tasks. In most cases there should be one per
119     program. As, given a video URL, the downloader doesn't know how to
120     extract all the needed information, task that InfoExtractors do, it
121     has to pass the URL to one of them.
122
123     For this, YoutubeDL objects have a method that allows
124     InfoExtractors to be registered in a given order. When it is passed
125     a URL, the YoutubeDL object handles it to the first InfoExtractor it
126     finds that reports being able to handle it. The InfoExtractor extracts
127     all the information about the video or videos the URL refers to, and
128     YoutubeDL process the extracted information, possibly using a File
129     Downloader to download the video.
130
131     YoutubeDL objects accept a lot of parameters. In order not to saturate
132     the object constructor with arguments, it receives a dictionary of
133     options instead. These options are available through the params
134     attribute for the InfoExtractors to use. The YoutubeDL also
135     registers itself as the downloader in charge for the InfoExtractors
136     that are added to it, so this is a "mutual registration".
137
138     Available options:
139
140     username:          Username for authentication purposes.
141     password:          Password for authentication purposes.
142     videopassword:     Password for accessing a video.
143     ap_mso:            Adobe Pass multiple-system operator identifier.
144     ap_username:       Multiple-system operator account username.
145     ap_password:       Multiple-system operator account password.
146     usenetrc:          Use netrc for authentication instead.
147     verbose:           Print additional info to stdout.
148     quiet:             Do not print messages to stdout.
149     no_warnings:       Do not print out anything for warnings.
150     forceurl:          Force printing final URL.
151     forcetitle:        Force printing title.
152     forceid:           Force printing ID.
153     forcethumbnail:    Force printing thumbnail URL.
154     forcedescription:  Force printing description.
155     forcefilename:     Force printing final filename.
156     forceduration:     Force printing duration.
157     forcejson:         Force printing info_dict as JSON.
158     dump_single_json:  Force printing the info_dict of the whole playlist
159                        (or video) as a single JSON line.
160     simulate:          Do not download the video files.
161     format:            Video format code. See options.py for more information.
162     outtmpl:           Template for output names.
163     restrictfilenames: Do not allow "&" and spaces in file names
164     ignoreerrors:      Do not stop on download errors.
165     force_generic_extractor: Force downloader to use the generic extractor
166     nooverwrites:      Prevent overwriting files.
167     playliststart:     Playlist item to start at.
168     playlistend:       Playlist item to end at.
169     playlist_items:    Specific indices of playlist to download.
170     playlistreverse:   Download playlist items in reverse order.
171     playlistrandom:    Download playlist items in random order.
172     matchtitle:        Download only matching titles.
173     rejecttitle:       Reject downloads for matching titles.
174     logger:            Log messages to a logging.Logger instance.
175     logtostderr:       Log messages to stderr instead of stdout.
176     writedescription:  Write the video description to a .description file
177     writeinfojson:     Write the video description to a .info.json file
178     writeannotations:  Write the video annotations to a .annotations.xml file
179     writethumbnail:    Write the thumbnail image to a file
180     write_all_thumbnails:  Write all thumbnail formats to files
181     writesubtitles:    Write the video subtitles to a file
182     writeautomaticsub: Write the automatically generated subtitles to a file
183     allsubtitles:      Downloads all the subtitles of the video
184                        (requires writesubtitles or writeautomaticsub)
185     listsubtitles:     Lists all available subtitles for the video
186     subtitlesformat:   The format code for subtitles
187     subtitleslangs:    List of languages of the subtitles to download
188     keepvideo:         Keep the video file after post-processing
189     daterange:         A DateRange object, download only if the upload_date is in the range.
190     skip_download:     Skip the actual download of the video file
191     cachedir:          Location of the cache files in the filesystem.
192                        False to disable filesystem cache.
193     noplaylist:        Download single video instead of a playlist if in doubt.
194     age_limit:         An integer representing the user's age in years.
195                        Unsuitable videos for the given age are skipped.
196     min_views:         An integer representing the minimum view count the video
197                        must have in order to not be skipped.
198                        Videos without view count information are always
199                        downloaded. None for no limit.
200     max_views:         An integer representing the maximum view count.
201                        Videos that are more popular than that are not
202                        downloaded.
203                        Videos without view count information are always
204                        downloaded. None for no limit.
205     download_archive:  File name of a file where all downloads are recorded.
206                        Videos already present in the file are not downloaded
207                        again.
208     cookiefile:        File name where cookies should be read from and dumped to.
209     nocheckcertificate:Do not verify SSL certificates
210     prefer_insecure:   Use HTTP instead of HTTPS to retrieve information.
211                        At the moment, this is only supported by YouTube.
212     proxy:             URL of the proxy server to use
213     geo_verification_proxy:  URL of the proxy to use for IP address verification
214                        on geo-restricted sites.
215     socket_timeout:    Time to wait for unresponsive hosts, in seconds
216     bidi_workaround:   Work around buggy terminals without bidirectional text
217                        support, using fridibi
218     debug_printtraffic:Print out sent and received HTTP traffic
219     include_ads:       Download ads as well
220     default_search:    Prepend this string if an input url is not valid.
221                        'auto' for elaborate guessing
222     encoding:          Use this encoding instead of the system-specified.
223     extract_flat:      Do not resolve URLs, return the immediate result.
224                        Pass in 'in_playlist' to only show this behavior for
225                        playlist items.
226     postprocessors:    A list of dictionaries, each with an entry
227                        * key:  The name of the postprocessor. See
228                                youtube_dl/postprocessor/__init__.py for a list.
229                        as well as any further keyword arguments for the
230                        postprocessor.
231     progress_hooks:    A list of functions that get called on download
232                        progress, with a dictionary with the entries
233                        * status: One of "downloading", "error", or "finished".
234                                  Check this first and ignore unknown values.
235
236                        If status is one of "downloading", or "finished", the
237                        following properties may also be present:
238                        * filename: The final filename (always present)
239                        * tmpfilename: The filename we're currently writing to
240                        * downloaded_bytes: Bytes on disk
241                        * total_bytes: Size of the whole file, None if unknown
242                        * total_bytes_estimate: Guess of the eventual file size,
243                                                None if unavailable.
244                        * elapsed: The number of seconds since download started.
245                        * eta: The estimated time in seconds, None if unknown
246                        * speed: The download speed in bytes/second, None if
247                                 unknown
248                        * fragment_index: The counter of the currently
249                                          downloaded video fragment.
250                        * fragment_count: The number of fragments (= individual
251                                          files that will be merged)
252
253                        Progress hooks are guaranteed to be called at least once
254                        (with status "finished") if the download is successful.
255     merge_output_format: Extension to use when merging formats.
256     fixup:             Automatically correct known faults of the file.
257                        One of:
258                        - "never": do nothing
259                        - "warn": only emit a warning
260                        - "detect_or_warn": check whether we can do anything
261                                            about it, warn otherwise (default)
262     source_address:    Client-side IP address to bind to.
263     call_home:         Boolean, true iff we are allowed to contact the
264                        youtube-dl servers for debugging.
265     sleep_interval:    Number of seconds to sleep before each download when
266                        used alone or a lower bound of a range for randomized
267                        sleep before each download (minimum possible number
268                        of seconds to sleep) when used along with
269                        max_sleep_interval.
270     max_sleep_interval:Upper bound of a range for randomized sleep before each
271                        download (maximum possible number of seconds to sleep).
272                        Must only be used along with sleep_interval.
273                        Actual sleep time will be a random float from range
274                        [sleep_interval; max_sleep_interval].
275     listformats:       Print an overview of available video formats and exit.
276     list_thumbnails:   Print a table of all thumbnails and exit.
277     match_filter:      A function that gets called with the info_dict of
278                        every video.
279                        If it returns a message, the video is ignored.
280                        If it returns None, the video is downloaded.
281                        match_filter_func in utils.py is one example for this.
282     no_color:          Do not emit color codes in output.
283     geo_bypass:        Bypass geographic restriction via faking X-Forwarded-For
284                        HTTP header
285     geo_bypass_country:
286                        Two-letter ISO 3166-2 country code that will be used for
287                        explicit geographic restriction bypassing via faking
288                        X-Forwarded-For HTTP header
289     geo_bypass_ip_block:
290                        IP range in CIDR notation that will be used similarly to
291                        geo_bypass_country
292
293     The following options determine which downloader is picked:
294     external_downloader: Executable of the external downloader to call.
295                        None or unset for standard (built-in) downloader.
296     hls_prefer_native: Use the native HLS downloader instead of ffmpeg/avconv
297                        if True, otherwise use ffmpeg/avconv if False, otherwise
298                        use downloader suggested by extractor if None.
299
300     The following parameters are not used by YoutubeDL itself, they are used by
301     the downloader (see youtube_dl/downloader/common.py):
302     nopart, updatetime, buffersize, ratelimit, min_filesize, max_filesize, test,
303     noresizebuffer, retries, continuedl, noprogress, consoletitle,
304     xattr_set_filesize, external_downloader_args, hls_use_mpegts,
305     http_chunk_size.
306
307     The following options are used by the post processors:
308     prefer_ffmpeg:     If False, use avconv instead of ffmpeg if both are available,
309                        otherwise prefer ffmpeg.
310     postprocessor_args: A list of additional command-line arguments for the
311                         postprocessor.
312
313     The following options are used by the Youtube extractor:
314     youtube_include_dash_manifest: If True (default), DASH manifests and related
315                         data will be downloaded and processed by extractor.
316                         You can reduce network I/O by disabling it if you don't
317                         care about DASH.
318     """
319
320     _NUMERIC_FIELDS = set((
321         'width', 'height', 'tbr', 'abr', 'asr', 'vbr', 'fps', 'filesize', 'filesize_approx',
322         'timestamp', 'upload_year', 'upload_month', 'upload_day',
323         'duration', 'view_count', 'like_count', 'dislike_count', 'repost_count',
324         'average_rating', 'comment_count', 'age_limit',
325         'start_time', 'end_time',
326         'chapter_number', 'season_number', 'episode_number',
327         'track_number', 'disc_number', 'release_year',
328         'playlist_index',
329     ))
330
331     params = None
332     _ies = []
333     _pps = []
334     _download_retcode = None
335     _num_downloads = None
336     _screen_file = None
337
338     def __init__(self, params=None, auto_init=True):
339         """Create a FileDownloader object with the given options."""
340         if params is None:
341             params = {}
342         self._ies = []
343         self._ies_instances = {}
344         self._pps = []
345         self._progress_hooks = []
346         self._download_retcode = 0
347         self._num_downloads = 0
348         self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)]
349         self._err_file = sys.stderr
350         self.params = {
351             # Default parameters
352             'nocheckcertificate': False,
353         }
354         self.params.update(params)
355         self.cache = Cache(self)
356
357         def check_deprecated(param, option, suggestion):
358             if self.params.get(param) is not None:
359                 self.report_warning(
360                     '%s is deprecated. Use %s instead.' % (option, suggestion))
361                 return True
362             return False
363
364         if check_deprecated('cn_verification_proxy', '--cn-verification-proxy', '--geo-verification-proxy'):
365             if self.params.get('geo_verification_proxy') is None:
366                 self.params['geo_verification_proxy'] = self.params['cn_verification_proxy']
367
368         check_deprecated('autonumber_size', '--autonumber-size', 'output template with %(autonumber)0Nd, where N in the number of digits')
369         check_deprecated('autonumber', '--auto-number', '-o "%(autonumber)s-%(title)s.%(ext)s"')
370         check_deprecated('usetitle', '--title', '-o "%(title)s-%(id)s.%(ext)s"')
371
372         if params.get('bidi_workaround', False):
373             try:
374                 import pty
375                 master, slave = pty.openpty()
376                 width = compat_get_terminal_size().columns
377                 if width is None:
378                     width_args = []
379                 else:
380                     width_args = ['-w', str(width)]
381                 sp_kwargs = dict(
382                     stdin=subprocess.PIPE,
383                     stdout=slave,
384                     stderr=self._err_file)
385                 try:
386                     self._output_process = subprocess.Popen(
387                         ['bidiv'] + width_args, **sp_kwargs
388                     )
389                 except OSError:
390                     self._output_process = subprocess.Popen(
391                         ['fribidi', '-c', 'UTF-8'] + width_args, **sp_kwargs)
392                 self._output_channel = os.fdopen(master, 'rb')
393             except OSError as ose:
394                 if ose.errno == errno.ENOENT:
395                     self.report_warning('Could not find fribidi executable, ignoring --bidi-workaround . Make sure that  fribidi  is an executable file in one of the directories in your $PATH.')
396                 else:
397                     raise
398
399         if (sys.platform != 'win32' and
400                 sys.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968'] and
401                 not params.get('restrictfilenames', False)):
402             # Unicode filesystem API will throw errors (#1474, #13027)
403             self.report_warning(
404                 'Assuming --restrict-filenames since file system encoding '
405                 'cannot encode all characters. '
406                 'Set the LC_ALL environment variable to fix this.')
407             self.params['restrictfilenames'] = True
408
409         if isinstance(params.get('outtmpl'), bytes):
410             self.report_warning(
411                 'Parameter outtmpl is bytes, but should be a unicode string. '
412                 'Put  from __future__ import unicode_literals  at the top of your code file or consider switching to Python 3.x.')
413
414         self._setup_opener()
415
416         if auto_init:
417             self.print_debug_header()
418             self.add_default_info_extractors()
419
420         for pp_def_raw in self.params.get('postprocessors', []):
421             pp_class = get_postprocessor(pp_def_raw['key'])
422             pp_def = dict(pp_def_raw)
423             del pp_def['key']
424             pp = pp_class(self, **compat_kwargs(pp_def))
425             self.add_post_processor(pp)
426
427         for ph in self.params.get('progress_hooks', []):
428             self.add_progress_hook(ph)
429
430         register_socks_protocols()
431
432     def warn_if_short_id(self, argv):
433         # short YouTube ID starting with dash?
434         idxs = [
435             i for i, a in enumerate(argv)
436             if re.match(r'^-[0-9A-Za-z_-]{10}$', a)]
437         if idxs:
438             correct_argv = (
439                 ['youtube-dl'] +
440                 [a for i, a in enumerate(argv) if i not in idxs] +
441                 ['--'] + [argv[i] for i in idxs]
442             )
443             self.report_warning(
444                 'Long argument string detected. '
445                 'Use -- to separate parameters and URLs, like this:\n%s\n' %
446                 args_to_str(correct_argv))
447
448     def add_info_extractor(self, ie):
449         """Add an InfoExtractor object to the end of the list."""
450         self._ies.append(ie)
451         if not isinstance(ie, type):
452             self._ies_instances[ie.ie_key()] = ie
453             ie.set_downloader(self)
454
455     def get_info_extractor(self, ie_key):
456         """
457         Get an instance of an IE with name ie_key, it will try to get one from
458         the _ies list, if there's no instance it will create a new one and add
459         it to the extractor list.
460         """
461         ie = self._ies_instances.get(ie_key)
462         if ie is None:
463             ie = get_info_extractor(ie_key)()
464             self.add_info_extractor(ie)
465         return ie
466
467     def add_default_info_extractors(self):
468         """
469         Add the InfoExtractors returned by gen_extractors to the end of the list
470         """
471         for ie in gen_extractor_classes():
472             self.add_info_extractor(ie)
473
474     def add_post_processor(self, pp):
475         """Add a PostProcessor object to the end of the chain."""
476         self._pps.append(pp)
477         pp.set_downloader(self)
478
479     def add_progress_hook(self, ph):
480         """Add the progress hook (currently only for the file downloader)"""
481         self._progress_hooks.append(ph)
482
483     def _bidi_workaround(self, message):
484         if not hasattr(self, '_output_channel'):
485             return message
486
487         assert hasattr(self, '_output_process')
488         assert isinstance(message, compat_str)
489         line_count = message.count('\n') + 1
490         self._output_process.stdin.write((message + '\n').encode('utf-8'))
491         self._output_process.stdin.flush()
492         res = ''.join(self._output_channel.readline().decode('utf-8')
493                       for _ in range(line_count))
494         return res[:-len('\n')]
495
496     def to_screen(self, message, skip_eol=False):
497         """Print message to stdout if not in quiet mode."""
498         return self.to_stdout(message, skip_eol, check_quiet=True)
499
500     def _write_string(self, s, out=None):
501         write_string(s, out=out, encoding=self.params.get('encoding'))
502
503     def to_stdout(self, message, skip_eol=False, check_quiet=False):
504         """Print message to stdout if not in quiet mode."""
505         if self.params.get('logger'):
506             self.params['logger'].debug(message)
507         elif not check_quiet or not self.params.get('quiet', False):
508             message = self._bidi_workaround(message)
509             terminator = ['\n', ''][skip_eol]
510             output = message + terminator
511
512             self._write_string(output, self._screen_file)
513
514     def to_stderr(self, message):
515         """Print message to stderr."""
516         assert isinstance(message, compat_str)
517         if self.params.get('logger'):
518             self.params['logger'].error(message)
519         else:
520             message = self._bidi_workaround(message)
521             output = message + '\n'
522             self._write_string(output, self._err_file)
523
524     def to_console_title(self, message):
525         if not self.params.get('consoletitle', False):
526             return
527         if compat_os_name == 'nt':
528             if ctypes.windll.kernel32.GetConsoleWindow():
529                 # c_wchar_p() might not be necessary if `message` is
530                 # already of type unicode()
531                 ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message))
532         elif 'TERM' in os.environ:
533             self._write_string('\033]0;%s\007' % message, self._screen_file)
534
535     def save_console_title(self):
536         if not self.params.get('consoletitle', False):
537             return
538         if self.params.get('simulate', False):
539             return
540         if compat_os_name != 'nt' and 'TERM' in os.environ:
541             # Save the title on stack
542             self._write_string('\033[22;0t', self._screen_file)
543
544     def restore_console_title(self):
545         if not self.params.get('consoletitle', False):
546             return
547         if self.params.get('simulate', False):
548             return
549         if compat_os_name != 'nt' and 'TERM' in os.environ:
550             # Restore the title from stack
551             self._write_string('\033[23;0t', self._screen_file)
552
553     def __enter__(self):
554         self.save_console_title()
555         return self
556
557     def __exit__(self, *args):
558         self.restore_console_title()
559
560         if self.params.get('cookiefile') is not None:
561             self.cookiejar.save()
562
563     def trouble(self, message=None, tb=None):
564         """Determine action to take when a download problem appears.
565
566         Depending on if the downloader has been configured to ignore
567         download errors or not, this method may throw an exception or
568         not when errors are found, after printing the message.
569
570         tb, if given, is additional traceback information.
571         """
572         if message is not None:
573             self.to_stderr(message)
574         if self.params.get('verbose'):
575             if tb is None:
576                 if sys.exc_info()[0]:  # if .trouble has been called from an except block
577                     tb = ''
578                     if hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
579                         tb += ''.join(traceback.format_exception(*sys.exc_info()[1].exc_info))
580                     tb += encode_compat_str(traceback.format_exc())
581                 else:
582                     tb_data = traceback.format_list(traceback.extract_stack())
583                     tb = ''.join(tb_data)
584             self.to_stderr(tb)
585         if not self.params.get('ignoreerrors', False):
586             if sys.exc_info()[0] and hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
587                 exc_info = sys.exc_info()[1].exc_info
588             else:
589                 exc_info = sys.exc_info()
590             raise DownloadError(message, exc_info)
591         self._download_retcode = 1
592
593     def report_warning(self, message):
594         '''
595         Print the message to stderr, it will be prefixed with 'WARNING:'
596         If stderr is a tty file the 'WARNING:' will be colored
597         '''
598         if self.params.get('logger') is not None:
599             self.params['logger'].warning(message)
600         else:
601             if self.params.get('no_warnings'):
602                 return
603             if not self.params.get('no_color') and self._err_file.isatty() and compat_os_name != 'nt':
604                 _msg_header = '\033[0;33mWARNING:\033[0m'
605             else:
606                 _msg_header = 'WARNING:'
607             warning_message = '%s %s' % (_msg_header, message)
608             self.to_stderr(warning_message)
609
610     def report_error(self, message, tb=None):
611         '''
612         Do the same as trouble, but prefixes the message with 'ERROR:', colored
613         in red if stderr is a tty file.
614         '''
615         if not self.params.get('no_color') and self._err_file.isatty() and compat_os_name != 'nt':
616             _msg_header = '\033[0;31mERROR:\033[0m'
617         else:
618             _msg_header = 'ERROR:'
619         error_message = '%s %s' % (_msg_header, message)
620         self.trouble(error_message, tb)
621
622     def report_file_already_downloaded(self, file_name):
623         """Report file has already been fully downloaded."""
624         try:
625             self.to_screen('[download] %s has already been downloaded' % file_name)
626         except UnicodeEncodeError:
627             self.to_screen('[download] The file has already been downloaded')
628
629     def prepare_filename(self, info_dict):
630         """Generate the output filename."""
631         try:
632             template_dict = dict(info_dict)
633
634             template_dict['epoch'] = int(time.time())
635             autonumber_size = self.params.get('autonumber_size')
636             if autonumber_size is None:
637                 autonumber_size = 5
638             template_dict['autonumber'] = self.params.get('autonumber_start', 1) - 1 + self._num_downloads
639             if template_dict.get('resolution') is None:
640                 if template_dict.get('width') and template_dict.get('height'):
641                     template_dict['resolution'] = '%dx%d' % (template_dict['width'], template_dict['height'])
642                 elif template_dict.get('height'):
643                     template_dict['resolution'] = '%sp' % template_dict['height']
644                 elif template_dict.get('width'):
645                     template_dict['resolution'] = '%dx?' % template_dict['width']
646
647             sanitize = lambda k, v: sanitize_filename(
648                 compat_str(v),
649                 restricted=self.params.get('restrictfilenames'),
650                 is_id=(k == 'id' or k.endswith('_id')))
651             template_dict = dict((k, v if isinstance(v, compat_numeric_types) else sanitize(k, v))
652                                  for k, v in template_dict.items()
653                                  if v is not None and not isinstance(v, (list, tuple, dict)))
654             template_dict = collections.defaultdict(lambda: 'NA', template_dict)
655
656             outtmpl = self.params.get('outtmpl', DEFAULT_OUTTMPL)
657
658             # For fields playlist_index and autonumber convert all occurrences
659             # of %(field)s to %(field)0Nd for backward compatibility
660             field_size_compat_map = {
661                 'playlist_index': len(str(template_dict['n_entries'])),
662                 'autonumber': autonumber_size,
663             }
664             FIELD_SIZE_COMPAT_RE = r'(?<!%)%\((?P<field>autonumber|playlist_index)\)s'
665             mobj = re.search(FIELD_SIZE_COMPAT_RE, outtmpl)
666             if mobj:
667                 outtmpl = re.sub(
668                     FIELD_SIZE_COMPAT_RE,
669                     r'%%(\1)0%dd' % field_size_compat_map[mobj.group('field')],
670                     outtmpl)
671
672             # Missing numeric fields used together with integer presentation types
673             # in format specification will break the argument substitution since
674             # string 'NA' is returned for missing fields. We will patch output
675             # template for missing fields to meet string presentation type.
676             for numeric_field in self._NUMERIC_FIELDS:
677                 if numeric_field not in template_dict:
678                     # As of [1] format syntax is:
679                     #  %[mapping_key][conversion_flags][minimum_width][.precision][length_modifier]type
680                     # 1. https://docs.python.org/2/library/stdtypes.html#string-formatting
681                     FORMAT_RE = r'''(?x)
682                         (?<!%)
683                         %
684                         \({0}\)  # mapping key
685                         (?:[#0\-+ ]+)?  # conversion flags (optional)
686                         (?:\d+)?  # minimum field width (optional)
687                         (?:\.\d+)?  # precision (optional)
688                         [hlL]?  # length modifier (optional)
689                         [diouxXeEfFgGcrs%]  # conversion type
690                     '''
691                     outtmpl = re.sub(
692                         FORMAT_RE.format(numeric_field),
693                         r'%({0})s'.format(numeric_field), outtmpl)
694
695             # expand_path translates '%%' into '%' and '$$' into '$'
696             # correspondingly that is not what we want since we need to keep
697             # '%%' intact for template dict substitution step. Working around
698             # with boundary-alike separator hack.
699             sep = ''.join([random.choice(ascii_letters) for _ in range(32)])
700             outtmpl = outtmpl.replace('%%', '%{0}%'.format(sep)).replace('$$', '${0}$'.format(sep))
701
702             # outtmpl should be expand_path'ed before template dict substitution
703             # because meta fields may contain env variables we don't want to
704             # be expanded. For example, for outtmpl "%(title)s.%(ext)s" and
705             # title "Hello $PATH", we don't want `$PATH` to be expanded.
706             filename = expand_path(outtmpl).replace(sep, '') % template_dict
707
708             # Temporary fix for #4787
709             # 'Treat' all problem characters by passing filename through preferredencoding
710             # to workaround encoding issues with subprocess on python2 @ Windows
711             if sys.version_info < (3, 0) and sys.platform == 'win32':
712                 filename = encodeFilename(filename, True).decode(preferredencoding())
713             return sanitize_path(filename)
714         except ValueError as err:
715             self.report_error('Error in output template: ' + str(err) + ' (encoding: ' + repr(preferredencoding()) + ')')
716             return None
717
718     def _match_entry(self, info_dict, incomplete):
719         """ Returns None iff the file should be downloaded """
720
721         video_title = info_dict.get('title', info_dict.get('id', 'video'))
722         if 'title' in info_dict:
723             # This can happen when we're just evaluating the playlist
724             title = info_dict['title']
725             matchtitle = self.params.get('matchtitle', False)
726             if matchtitle:
727                 if not re.search(matchtitle, title, re.IGNORECASE):
728                     return '"' + title + '" title did not match pattern "' + matchtitle + '"'
729             rejecttitle = self.params.get('rejecttitle', False)
730             if rejecttitle:
731                 if re.search(rejecttitle, title, re.IGNORECASE):
732                     return '"' + title + '" title matched reject pattern "' + rejecttitle + '"'
733         date = info_dict.get('upload_date')
734         if date is not None:
735             dateRange = self.params.get('daterange', DateRange())
736             if date not in dateRange:
737                 return '%s upload date is not in range %s' % (date_from_str(date).isoformat(), dateRange)
738         view_count = info_dict.get('view_count')
739         if view_count is not None:
740             min_views = self.params.get('min_views')
741             if min_views is not None and view_count < min_views:
742                 return 'Skipping %s, because it has not reached minimum view count (%d/%d)' % (video_title, view_count, min_views)
743             max_views = self.params.get('max_views')
744             if max_views is not None and view_count > max_views:
745                 return 'Skipping %s, because it has exceeded the maximum view count (%d/%d)' % (video_title, view_count, max_views)
746         if age_restricted(info_dict.get('age_limit'), self.params.get('age_limit')):
747             return 'Skipping "%s" because it is age restricted' % video_title
748         if self.in_download_archive(info_dict):
749             return '%s has already been recorded in archive' % video_title
750
751         if not incomplete:
752             match_filter = self.params.get('match_filter')
753             if match_filter is not None:
754                 ret = match_filter(info_dict)
755                 if ret is not None:
756                     return ret
757
758         return None
759
760     @staticmethod
761     def add_extra_info(info_dict, extra_info):
762         '''Set the keys from extra_info in info dict if they are missing'''
763         for key, value in extra_info.items():
764             info_dict.setdefault(key, value)
765
766     def extract_info(self, url, download=True, ie_key=None, extra_info={},
767                      process=True, force_generic_extractor=False):
768         '''
769         Returns a list with a dictionary for each video we find.
770         If 'download', also downloads the videos.
771         extra_info is a dict containing the extra values to add to each result
772         '''
773
774         if not ie_key and force_generic_extractor:
775             ie_key = 'Generic'
776
777         if ie_key:
778             ies = [self.get_info_extractor(ie_key)]
779         else:
780             ies = self._ies
781
782         for ie in ies:
783             if not ie.suitable(url):
784                 continue
785
786             ie = self.get_info_extractor(ie.ie_key())
787             if not ie.working():
788                 self.report_warning('The program functionality for this site has been marked as broken, '
789                                     'and will probably not work.')
790
791             try:
792                 ie_result = ie.extract(url)
793                 if ie_result is None:  # Finished already (backwards compatibility; listformats and friends should be moved here)
794                     break
795                 if isinstance(ie_result, list):
796                     # Backwards compatibility: old IE result format
797                     ie_result = {
798                         '_type': 'compat_list',
799                         'entries': ie_result,
800                     }
801                 self.add_default_extra_info(ie_result, ie, url)
802                 if process:
803                     return self.process_ie_result(ie_result, download, extra_info)
804                 else:
805                     return ie_result
806             except GeoRestrictedError as e:
807                 msg = e.msg
808                 if e.countries:
809                     msg += '\nThis video is available in %s.' % ', '.join(
810                         map(ISO3166Utils.short2full, e.countries))
811                 msg += '\nYou might want to use a VPN or a proxy server (with --proxy) to workaround.'
812                 self.report_error(msg)
813                 break
814             except ExtractorError as e:  # An error we somewhat expected
815                 self.report_error(compat_str(e), e.format_traceback())
816                 break
817             except MaxDownloadsReached:
818                 raise
819             except Exception as e:
820                 if self.params.get('ignoreerrors', False):
821                     self.report_error(error_to_compat_str(e), tb=encode_compat_str(traceback.format_exc()))
822                     break
823                 else:
824                     raise
825         else:
826             self.report_error('no suitable InfoExtractor for URL %s' % url)
827
828     def add_default_extra_info(self, ie_result, ie, url):
829         self.add_extra_info(ie_result, {
830             'extractor': ie.IE_NAME,
831             'webpage_url': url,
832             'webpage_url_basename': url_basename(url),
833             'extractor_key': ie.ie_key(),
834         })
835
836     def process_ie_result(self, ie_result, download=True, extra_info={}):
837         """
838         Take the result of the ie(may be modified) and resolve all unresolved
839         references (URLs, playlist items).
840
841         It will also download the videos if 'download'.
842         Returns the resolved ie_result.
843         """
844         result_type = ie_result.get('_type', 'video')
845
846         if result_type in ('url', 'url_transparent'):
847             ie_result['url'] = sanitize_url(ie_result['url'])
848             extract_flat = self.params.get('extract_flat', False)
849             if ((extract_flat == 'in_playlist' and 'playlist' in extra_info) or
850                     extract_flat is True):
851                 if self.params.get('forcejson', False):
852                     self.to_stdout(json.dumps(ie_result))
853                 return ie_result
854
855         if result_type == 'video':
856             self.add_extra_info(ie_result, extra_info)
857             return self.process_video_result(ie_result, download=download)
858         elif result_type == 'url':
859             # We have to add extra_info to the results because it may be
860             # contained in a playlist
861             return self.extract_info(ie_result['url'],
862                                      download,
863                                      ie_key=ie_result.get('ie_key'),
864                                      extra_info=extra_info)
865         elif result_type == 'url_transparent':
866             # Use the information from the embedding page
867             info = self.extract_info(
868                 ie_result['url'], ie_key=ie_result.get('ie_key'),
869                 extra_info=extra_info, download=False, process=False)
870
871             # extract_info may return None when ignoreerrors is enabled and
872             # extraction failed with an error, don't crash and return early
873             # in this case
874             if not info:
875                 return info
876
877             force_properties = dict(
878                 (k, v) for k, v in ie_result.items() if v is not None)
879             for f in ('_type', 'url', 'id', 'extractor', 'extractor_key', 'ie_key'):
880                 if f in force_properties:
881                     del force_properties[f]
882             new_result = info.copy()
883             new_result.update(force_properties)
884
885             # Extracted info may not be a video result (i.e.
886             # info.get('_type', 'video') != video) but rather an url or
887             # url_transparent. In such cases outer metadata (from ie_result)
888             # should be propagated to inner one (info). For this to happen
889             # _type of info should be overridden with url_transparent. This
890             # fixes issue from https://github.com/rg3/youtube-dl/pull/11163.
891             if new_result.get('_type') == 'url':
892                 new_result['_type'] = 'url_transparent'
893
894             return self.process_ie_result(
895                 new_result, download=download, extra_info=extra_info)
896         elif result_type in ('playlist', 'multi_video'):
897             # We process each entry in the playlist
898             playlist = ie_result.get('title') or ie_result.get('id')
899             self.to_screen('[download] Downloading playlist: %s' % playlist)
900
901             playlist_results = []
902
903             playliststart = self.params.get('playliststart', 1) - 1
904             playlistend = self.params.get('playlistend')
905             # For backwards compatibility, interpret -1 as whole list
906             if playlistend == -1:
907                 playlistend = None
908
909             playlistitems_str = self.params.get('playlist_items')
910             playlistitems = None
911             if playlistitems_str is not None:
912                 def iter_playlistitems(format):
913                     for string_segment in format.split(','):
914                         if '-' in string_segment:
915                             start, end = string_segment.split('-')
916                             for item in range(int(start), int(end) + 1):
917                                 yield int(item)
918                         else:
919                             yield int(string_segment)
920                 playlistitems = orderedSet(iter_playlistitems(playlistitems_str))
921
922             ie_entries = ie_result['entries']
923
924             def make_playlistitems_entries(list_ie_entries):
925                 num_entries = len(list_ie_entries)
926                 return [
927                     list_ie_entries[i - 1] for i in playlistitems
928                     if -num_entries <= i - 1 < num_entries]
929
930             def report_download(num_entries):
931                 self.to_screen(
932                     '[%s] playlist %s: Downloading %d videos' %
933                     (ie_result['extractor'], playlist, num_entries))
934
935             if isinstance(ie_entries, list):
936                 n_all_entries = len(ie_entries)
937                 if playlistitems:
938                     entries = make_playlistitems_entries(ie_entries)
939                 else:
940                     entries = ie_entries[playliststart:playlistend]
941                 n_entries = len(entries)
942                 self.to_screen(
943                     '[%s] playlist %s: Collected %d video ids (downloading %d of them)' %
944                     (ie_result['extractor'], playlist, n_all_entries, n_entries))
945             elif isinstance(ie_entries, PagedList):
946                 if playlistitems:
947                     entries = []
948                     for item in playlistitems:
949                         entries.extend(ie_entries.getslice(
950                             item - 1, item
951                         ))
952                 else:
953                     entries = ie_entries.getslice(
954                         playliststart, playlistend)
955                 n_entries = len(entries)
956                 report_download(n_entries)
957             else:  # iterable
958                 if playlistitems:
959                     entries = make_playlistitems_entries(list(itertools.islice(
960                         ie_entries, 0, max(playlistitems))))
961                 else:
962                     entries = list(itertools.islice(
963                         ie_entries, playliststart, playlistend))
964                 n_entries = len(entries)
965                 report_download(n_entries)
966
967             if self.params.get('playlistreverse', False):
968                 entries = entries[::-1]
969
970             if self.params.get('playlistrandom', False):
971                 random.shuffle(entries)
972
973             x_forwarded_for = ie_result.get('__x_forwarded_for_ip')
974
975             for i, entry in enumerate(entries, 1):
976                 self.to_screen('[download] Downloading video %s of %s' % (i, n_entries))
977                 # This __x_forwarded_for_ip thing is a bit ugly but requires
978                 # minimal changes
979                 if x_forwarded_for:
980                     entry['__x_forwarded_for_ip'] = x_forwarded_for
981                 extra = {
982                     'n_entries': n_entries,
983                     'playlist': playlist,
984                     'playlist_id': ie_result.get('id'),
985                     'playlist_title': ie_result.get('title'),
986                     'playlist_uploader': ie_result.get('uploader'),
987                     'playlist_uploader_id': ie_result.get('uploader_id'),
988                     'playlist_index': i + playliststart,
989                     'extractor': ie_result['extractor'],
990                     'webpage_url': ie_result['webpage_url'],
991                     'webpage_url_basename': url_basename(ie_result['webpage_url']),
992                     'extractor_key': ie_result['extractor_key'],
993                 }
994
995                 reason = self._match_entry(entry, incomplete=True)
996                 if reason is not None:
997                     self.to_screen('[download] ' + reason)
998                     continue
999
1000                 entry_result = self.process_ie_result(entry,
1001                                                       download=download,
1002                                                       extra_info=extra)
1003                 playlist_results.append(entry_result)
1004             ie_result['entries'] = playlist_results
1005             self.to_screen('[download] Finished downloading playlist: %s' % playlist)
1006             return ie_result
1007         elif result_type == 'compat_list':
1008             self.report_warning(
1009                 'Extractor %s returned a compat_list result. '
1010                 'It needs to be updated.' % ie_result.get('extractor'))
1011
1012             def _fixup(r):
1013                 self.add_extra_info(
1014                     r,
1015                     {
1016                         'extractor': ie_result['extractor'],
1017                         'webpage_url': ie_result['webpage_url'],
1018                         'webpage_url_basename': url_basename(ie_result['webpage_url']),
1019                         'extractor_key': ie_result['extractor_key'],
1020                     }
1021                 )
1022                 return r
1023             ie_result['entries'] = [
1024                 self.process_ie_result(_fixup(r), download, extra_info)
1025                 for r in ie_result['entries']
1026             ]
1027             return ie_result
1028         else:
1029             raise Exception('Invalid result type: %s' % result_type)
1030
1031     def _build_format_filter(self, filter_spec):
1032         " Returns a function to filter the formats according to the filter_spec "
1033
1034         OPERATORS = {
1035             '<': operator.lt,
1036             '<=': operator.le,
1037             '>': operator.gt,
1038             '>=': operator.ge,
1039             '=': operator.eq,
1040             '!=': operator.ne,
1041         }
1042         operator_rex = re.compile(r'''(?x)\s*
1043             (?P<key>width|height|tbr|abr|vbr|asr|filesize|filesize_approx|fps)
1044             \s*(?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
1045             (?P<value>[0-9.]+(?:[kKmMgGtTpPeEzZyY]i?[Bb]?)?)
1046             $
1047             ''' % '|'.join(map(re.escape, OPERATORS.keys())))
1048         m = operator_rex.search(filter_spec)
1049         if m:
1050             try:
1051                 comparison_value = int(m.group('value'))
1052             except ValueError:
1053                 comparison_value = parse_filesize(m.group('value'))
1054                 if comparison_value is None:
1055                     comparison_value = parse_filesize(m.group('value') + 'B')
1056                 if comparison_value is None:
1057                     raise ValueError(
1058                         'Invalid value %r in format specification %r' % (
1059                             m.group('value'), filter_spec))
1060             op = OPERATORS[m.group('op')]
1061
1062         if not m:
1063             STR_OPERATORS = {
1064                 '=': operator.eq,
1065                 '!=': operator.ne,
1066                 '^=': lambda attr, value: attr.startswith(value),
1067                 '$=': lambda attr, value: attr.endswith(value),
1068                 '*=': lambda attr, value: value in attr,
1069             }
1070             str_operator_rex = re.compile(r'''(?x)
1071                 \s*(?P<key>ext|acodec|vcodec|container|protocol|format_id)
1072                 \s*(?P<op>%s)(?P<none_inclusive>\s*\?)?
1073                 \s*(?P<value>[a-zA-Z0-9._-]+)
1074                 \s*$
1075                 ''' % '|'.join(map(re.escape, STR_OPERATORS.keys())))
1076             m = str_operator_rex.search(filter_spec)
1077             if m:
1078                 comparison_value = m.group('value')
1079                 op = STR_OPERATORS[m.group('op')]
1080
1081         if not m:
1082             raise ValueError('Invalid filter specification %r' % filter_spec)
1083
1084         def _filter(f):
1085             actual_value = f.get(m.group('key'))
1086             if actual_value is None:
1087                 return m.group('none_inclusive')
1088             return op(actual_value, comparison_value)
1089         return _filter
1090
1091     def _default_format_spec(self, info_dict, download=True):
1092
1093         def can_merge():
1094             merger = FFmpegMergerPP(self)
1095             return merger.available and merger.can_merge()
1096
1097         def prefer_best():
1098             if self.params.get('simulate', False):
1099                 return False
1100             if not download:
1101                 return False
1102             if self.params.get('outtmpl', DEFAULT_OUTTMPL) == '-':
1103                 return True
1104             if info_dict.get('is_live'):
1105                 return True
1106             if not can_merge():
1107                 return True
1108             return False
1109
1110         req_format_list = ['bestvideo+bestaudio', 'best']
1111         if prefer_best():
1112             req_format_list.reverse()
1113         return '/'.join(req_format_list)
1114
1115     def build_format_selector(self, format_spec):
1116         def syntax_error(note, start):
1117             message = (
1118                 'Invalid format specification: '
1119                 '{0}\n\t{1}\n\t{2}^'.format(note, format_spec, ' ' * start[1]))
1120             return SyntaxError(message)
1121
1122         PICKFIRST = 'PICKFIRST'
1123         MERGE = 'MERGE'
1124         SINGLE = 'SINGLE'
1125         GROUP = 'GROUP'
1126         FormatSelector = collections.namedtuple('FormatSelector', ['type', 'selector', 'filters'])
1127
1128         def _parse_filter(tokens):
1129             filter_parts = []
1130             for type, string, start, _, _ in tokens:
1131                 if type == tokenize.OP and string == ']':
1132                     return ''.join(filter_parts)
1133                 else:
1134                     filter_parts.append(string)
1135
1136         def _remove_unused_ops(tokens):
1137             # Remove operators that we don't use and join them with the surrounding strings
1138             # for example: 'mp4' '-' 'baseline' '-' '16x9' is converted to 'mp4-baseline-16x9'
1139             ALLOWED_OPS = ('/', '+', ',', '(', ')')
1140             last_string, last_start, last_end, last_line = None, None, None, None
1141             for type, string, start, end, line in tokens:
1142                 if type == tokenize.OP and string == '[':
1143                     if last_string:
1144                         yield tokenize.NAME, last_string, last_start, last_end, last_line
1145                         last_string = None
1146                     yield type, string, start, end, line
1147                     # everything inside brackets will be handled by _parse_filter
1148                     for type, string, start, end, line in tokens:
1149                         yield type, string, start, end, line
1150                         if type == tokenize.OP and string == ']':
1151                             break
1152                 elif type == tokenize.OP and string in ALLOWED_OPS:
1153                     if last_string:
1154                         yield tokenize.NAME, last_string, last_start, last_end, last_line
1155                         last_string = None
1156                     yield type, string, start, end, line
1157                 elif type in [tokenize.NAME, tokenize.NUMBER, tokenize.OP]:
1158                     if not last_string:
1159                         last_string = string
1160                         last_start = start
1161                         last_end = end
1162                     else:
1163                         last_string += string
1164             if last_string:
1165                 yield tokenize.NAME, last_string, last_start, last_end, last_line
1166
1167         def _parse_format_selection(tokens, inside_merge=False, inside_choice=False, inside_group=False):
1168             selectors = []
1169             current_selector = None
1170             for type, string, start, _, _ in tokens:
1171                 # ENCODING is only defined in python 3.x
1172                 if type == getattr(tokenize, 'ENCODING', None):
1173                     continue
1174                 elif type in [tokenize.NAME, tokenize.NUMBER]:
1175                     current_selector = FormatSelector(SINGLE, string, [])
1176                 elif type == tokenize.OP:
1177                     if string == ')':
1178                         if not inside_group:
1179                             # ')' will be handled by the parentheses group
1180                             tokens.restore_last_token()
1181                         break
1182                     elif inside_merge and string in ['/', ',']:
1183                         tokens.restore_last_token()
1184                         break
1185                     elif inside_choice and string == ',':
1186                         tokens.restore_last_token()
1187                         break
1188                     elif string == ',':
1189                         if not current_selector:
1190                             raise syntax_error('"," must follow a format selector', start)
1191                         selectors.append(current_selector)
1192                         current_selector = None
1193                     elif string == '/':
1194                         if not current_selector:
1195                             raise syntax_error('"/" must follow a format selector', start)
1196                         first_choice = current_selector
1197                         second_choice = _parse_format_selection(tokens, inside_choice=True)
1198                         current_selector = FormatSelector(PICKFIRST, (first_choice, second_choice), [])
1199                     elif string == '[':
1200                         if not current_selector:
1201                             current_selector = FormatSelector(SINGLE, 'best', [])
1202                         format_filter = _parse_filter(tokens)
1203                         current_selector.filters.append(format_filter)
1204                     elif string == '(':
1205                         if current_selector:
1206                             raise syntax_error('Unexpected "("', start)
1207                         group = _parse_format_selection(tokens, inside_group=True)
1208                         current_selector = FormatSelector(GROUP, group, [])
1209                     elif string == '+':
1210                         video_selector = current_selector
1211                         audio_selector = _parse_format_selection(tokens, inside_merge=True)
1212                         if not video_selector or not audio_selector:
1213                             raise syntax_error('"+" must be between two format selectors', start)
1214                         current_selector = FormatSelector(MERGE, (video_selector, audio_selector), [])
1215                     else:
1216                         raise syntax_error('Operator not recognized: "{0}"'.format(string), start)
1217                 elif type == tokenize.ENDMARKER:
1218                     break
1219             if current_selector:
1220                 selectors.append(current_selector)
1221             return selectors
1222
1223         def _build_selector_function(selector):
1224             if isinstance(selector, list):
1225                 fs = [_build_selector_function(s) for s in selector]
1226
1227                 def selector_function(ctx):
1228                     for f in fs:
1229                         for format in f(ctx):
1230                             yield format
1231                 return selector_function
1232             elif selector.type == GROUP:
1233                 selector_function = _build_selector_function(selector.selector)
1234             elif selector.type == PICKFIRST:
1235                 fs = [_build_selector_function(s) for s in selector.selector]
1236
1237                 def selector_function(ctx):
1238                     for f in fs:
1239                         picked_formats = list(f(ctx))
1240                         if picked_formats:
1241                             return picked_formats
1242                     return []
1243             elif selector.type == SINGLE:
1244                 format_spec = selector.selector
1245
1246                 def selector_function(ctx):
1247                     formats = list(ctx['formats'])
1248                     if not formats:
1249                         return
1250                     if format_spec == 'all':
1251                         for f in formats:
1252                             yield f
1253                     elif format_spec in ['best', 'worst', None]:
1254                         format_idx = 0 if format_spec == 'worst' else -1
1255                         audiovideo_formats = [
1256                             f for f in formats
1257                             if f.get('vcodec') != 'none' and f.get('acodec') != 'none']
1258                         if audiovideo_formats:
1259                             yield audiovideo_formats[format_idx]
1260                         # for extractors with incomplete formats (audio only (soundcloud)
1261                         # or video only (imgur)) we will fallback to best/worst
1262                         # {video,audio}-only format
1263                         elif ctx['incomplete_formats']:
1264                             yield formats[format_idx]
1265                     elif format_spec == 'bestaudio':
1266                         audio_formats = [
1267                             f for f in formats
1268                             if f.get('vcodec') == 'none']
1269                         if audio_formats:
1270                             yield audio_formats[-1]
1271                     elif format_spec == 'worstaudio':
1272                         audio_formats = [
1273                             f for f in formats
1274                             if f.get('vcodec') == 'none']
1275                         if audio_formats:
1276                             yield audio_formats[0]
1277                     elif format_spec == 'bestvideo':
1278                         video_formats = [
1279                             f for f in formats
1280                             if f.get('acodec') == 'none']
1281                         if video_formats:
1282                             yield video_formats[-1]
1283                     elif format_spec == 'worstvideo':
1284                         video_formats = [
1285                             f for f in formats
1286                             if f.get('acodec') == 'none']
1287                         if video_formats:
1288                             yield video_formats[0]
1289                     else:
1290                         extensions = ['mp4', 'flv', 'webm', '3gp', 'm4a', 'mp3', 'ogg', 'aac', 'wav']
1291                         if format_spec in extensions:
1292                             filter_f = lambda f: f['ext'] == format_spec
1293                         else:
1294                             filter_f = lambda f: f['format_id'] == format_spec
1295                         matches = list(filter(filter_f, formats))
1296                         if matches:
1297                             yield matches[-1]
1298             elif selector.type == MERGE:
1299                 def _merge(formats_info):
1300                     format_1, format_2 = [f['format_id'] for f in formats_info]
1301                     # The first format must contain the video and the
1302                     # second the audio
1303                     if formats_info[0].get('vcodec') == 'none':
1304                         self.report_error('The first format must '
1305                                           'contain the video, try using '
1306                                           '"-f %s+%s"' % (format_2, format_1))
1307                         return
1308                     # Formats must be opposite (video+audio)
1309                     if formats_info[0].get('acodec') == 'none' and formats_info[1].get('acodec') == 'none':
1310                         self.report_error(
1311                             'Both formats %s and %s are video-only, you must specify "-f video+audio"'
1312                             % (format_1, format_2))
1313                         return
1314                     output_ext = (
1315                         formats_info[0]['ext']
1316                         if self.params.get('merge_output_format') is None
1317                         else self.params['merge_output_format'])
1318                     return {
1319                         'requested_formats': formats_info,
1320                         'format': '%s+%s' % (formats_info[0].get('format'),
1321                                              formats_info[1].get('format')),
1322                         'format_id': '%s+%s' % (formats_info[0].get('format_id'),
1323                                                 formats_info[1].get('format_id')),
1324                         'width': formats_info[0].get('width'),
1325                         'height': formats_info[0].get('height'),
1326                         'resolution': formats_info[0].get('resolution'),
1327                         'fps': formats_info[0].get('fps'),
1328                         'vcodec': formats_info[0].get('vcodec'),
1329                         'vbr': formats_info[0].get('vbr'),
1330                         'stretched_ratio': formats_info[0].get('stretched_ratio'),
1331                         'acodec': formats_info[1].get('acodec'),
1332                         'abr': formats_info[1].get('abr'),
1333                         'ext': output_ext,
1334                     }
1335                 video_selector, audio_selector = map(_build_selector_function, selector.selector)
1336
1337                 def selector_function(ctx):
1338                     for pair in itertools.product(
1339                             video_selector(copy.deepcopy(ctx)), audio_selector(copy.deepcopy(ctx))):
1340                         yield _merge(pair)
1341
1342             filters = [self._build_format_filter(f) for f in selector.filters]
1343
1344             def final_selector(ctx):
1345                 ctx_copy = copy.deepcopy(ctx)
1346                 for _filter in filters:
1347                     ctx_copy['formats'] = list(filter(_filter, ctx_copy['formats']))
1348                 return selector_function(ctx_copy)
1349             return final_selector
1350
1351         stream = io.BytesIO(format_spec.encode('utf-8'))
1352         try:
1353             tokens = list(_remove_unused_ops(compat_tokenize_tokenize(stream.readline)))
1354         except tokenize.TokenError:
1355             raise syntax_error('Missing closing/opening brackets or parenthesis', (0, len(format_spec)))
1356
1357         class TokenIterator(object):
1358             def __init__(self, tokens):
1359                 self.tokens = tokens
1360                 self.counter = 0
1361
1362             def __iter__(self):
1363                 return self
1364
1365             def __next__(self):
1366                 if self.counter >= len(self.tokens):
1367                     raise StopIteration()
1368                 value = self.tokens[self.counter]
1369                 self.counter += 1
1370                 return value
1371
1372             next = __next__
1373
1374             def restore_last_token(self):
1375                 self.counter -= 1
1376
1377         parsed_selector = _parse_format_selection(iter(TokenIterator(tokens)))
1378         return _build_selector_function(parsed_selector)
1379
1380     def _calc_headers(self, info_dict):
1381         res = std_headers.copy()
1382
1383         add_headers = info_dict.get('http_headers')
1384         if add_headers:
1385             res.update(add_headers)
1386
1387         cookies = self._calc_cookies(info_dict)
1388         if cookies:
1389             res['Cookie'] = cookies
1390
1391         if 'X-Forwarded-For' not in res:
1392             x_forwarded_for_ip = info_dict.get('__x_forwarded_for_ip')
1393             if x_forwarded_for_ip:
1394                 res['X-Forwarded-For'] = x_forwarded_for_ip
1395
1396         return res
1397
1398     def _calc_cookies(self, info_dict):
1399         pr = sanitized_Request(info_dict['url'])
1400         self.cookiejar.add_cookie_header(pr)
1401         return pr.get_header('Cookie')
1402
1403     def process_video_result(self, info_dict, download=True):
1404         assert info_dict.get('_type', 'video') == 'video'
1405
1406         if 'id' not in info_dict:
1407             raise ExtractorError('Missing "id" field in extractor result')
1408         if 'title' not in info_dict:
1409             raise ExtractorError('Missing "title" field in extractor result')
1410
1411         def report_force_conversion(field, field_not, conversion):
1412             self.report_warning(
1413                 '"%s" field is not %s - forcing %s conversion, there is an error in extractor'
1414                 % (field, field_not, conversion))
1415
1416         def sanitize_string_field(info, string_field):
1417             field = info.get(string_field)
1418             if field is None or isinstance(field, compat_str):
1419                 return
1420             report_force_conversion(string_field, 'a string', 'string')
1421             info[string_field] = compat_str(field)
1422
1423         def sanitize_numeric_fields(info):
1424             for numeric_field in self._NUMERIC_FIELDS:
1425                 field = info.get(numeric_field)
1426                 if field is None or isinstance(field, compat_numeric_types):
1427                     continue
1428                 report_force_conversion(numeric_field, 'numeric', 'int')
1429                 info[numeric_field] = int_or_none(field)
1430
1431         sanitize_string_field(info_dict, 'id')
1432         sanitize_numeric_fields(info_dict)
1433
1434         if 'playlist' not in info_dict:
1435             # It isn't part of a playlist
1436             info_dict['playlist'] = None
1437             info_dict['playlist_index'] = None
1438
1439         thumbnails = info_dict.get('thumbnails')
1440         if thumbnails is None:
1441             thumbnail = info_dict.get('thumbnail')
1442             if thumbnail:
1443                 info_dict['thumbnails'] = thumbnails = [{'url': thumbnail}]
1444         if thumbnails:
1445             thumbnails.sort(key=lambda t: (
1446                 t.get('preference') if t.get('preference') is not None else -1,
1447                 t.get('width') if t.get('width') is not None else -1,
1448                 t.get('height') if t.get('height') is not None else -1,
1449                 t.get('id') if t.get('id') is not None else '', t.get('url')))
1450             for i, t in enumerate(thumbnails):
1451                 t['url'] = sanitize_url(t['url'])
1452                 if t.get('width') and t.get('height'):
1453                     t['resolution'] = '%dx%d' % (t['width'], t['height'])
1454                 if t.get('id') is None:
1455                     t['id'] = '%d' % i
1456
1457         if self.params.get('list_thumbnails'):
1458             self.list_thumbnails(info_dict)
1459             return
1460
1461         thumbnail = info_dict.get('thumbnail')
1462         if thumbnail:
1463             info_dict['thumbnail'] = sanitize_url(thumbnail)
1464         elif thumbnails:
1465             info_dict['thumbnail'] = thumbnails[-1]['url']
1466
1467         if 'display_id' not in info_dict and 'id' in info_dict:
1468             info_dict['display_id'] = info_dict['id']
1469
1470         if info_dict.get('upload_date') is None and info_dict.get('timestamp') is not None:
1471             # Working around out-of-range timestamp values (e.g. negative ones on Windows,
1472             # see http://bugs.python.org/issue1646728)
1473             try:
1474                 upload_date = datetime.datetime.utcfromtimestamp(info_dict['timestamp'])
1475                 info_dict['upload_date'] = upload_date.strftime('%Y%m%d')
1476             except (ValueError, OverflowError, OSError):
1477                 pass
1478
1479         # Auto generate title fields corresponding to the *_number fields when missing
1480         # in order to always have clean titles. This is very common for TV series.
1481         for field in ('chapter', 'season', 'episode'):
1482             if info_dict.get('%s_number' % field) is not None and not info_dict.get(field):
1483                 info_dict[field] = '%s %d' % (field.capitalize(), info_dict['%s_number' % field])
1484
1485         for cc_kind in ('subtitles', 'automatic_captions'):
1486             cc = info_dict.get(cc_kind)
1487             if cc:
1488                 for _, subtitle in cc.items():
1489                     for subtitle_format in subtitle:
1490                         if subtitle_format.get('url'):
1491                             subtitle_format['url'] = sanitize_url(subtitle_format['url'])
1492                         if subtitle_format.get('ext') is None:
1493                             subtitle_format['ext'] = determine_ext(subtitle_format['url']).lower()
1494
1495         automatic_captions = info_dict.get('automatic_captions')
1496         subtitles = info_dict.get('subtitles')
1497
1498         if self.params.get('listsubtitles', False):
1499             if 'automatic_captions' in info_dict:
1500                 self.list_subtitles(
1501                     info_dict['id'], automatic_captions, 'automatic captions')
1502             self.list_subtitles(info_dict['id'], subtitles, 'subtitles')
1503             return
1504
1505         info_dict['requested_subtitles'] = self.process_subtitles(
1506             info_dict['id'], subtitles, automatic_captions)
1507
1508         # We now pick which formats have to be downloaded
1509         if info_dict.get('formats') is None:
1510             # There's only one format available
1511             formats = [info_dict]
1512         else:
1513             formats = info_dict['formats']
1514
1515         if not formats:
1516             raise ExtractorError('No video formats found!')
1517
1518         def is_wellformed(f):
1519             url = f.get('url')
1520             if not url:
1521                 self.report_warning(
1522                     '"url" field is missing or empty - skipping format, '
1523                     'there is an error in extractor')
1524                 return False
1525             if isinstance(url, bytes):
1526                 sanitize_string_field(f, 'url')
1527             return True
1528
1529         # Filter out malformed formats for better extraction robustness
1530         formats = list(filter(is_wellformed, formats))
1531
1532         formats_dict = {}
1533
1534         # We check that all the formats have the format and format_id fields
1535         for i, format in enumerate(formats):
1536             sanitize_string_field(format, 'format_id')
1537             sanitize_numeric_fields(format)
1538             format['url'] = sanitize_url(format['url'])
1539             if not format.get('format_id'):
1540                 format['format_id'] = compat_str(i)
1541             else:
1542                 # Sanitize format_id from characters used in format selector expression
1543                 format['format_id'] = re.sub(r'[\s,/+\[\]()]', '_', format['format_id'])
1544             format_id = format['format_id']
1545             if format_id not in formats_dict:
1546                 formats_dict[format_id] = []
1547             formats_dict[format_id].append(format)
1548
1549         # Make sure all formats have unique format_id
1550         for format_id, ambiguous_formats in formats_dict.items():
1551             if len(ambiguous_formats) > 1:
1552                 for i, format in enumerate(ambiguous_formats):
1553                     format['format_id'] = '%s-%d' % (format_id, i)
1554
1555         for i, format in enumerate(formats):
1556             if format.get('format') is None:
1557                 format['format'] = '{id} - {res}{note}'.format(
1558                     id=format['format_id'],
1559                     res=self.format_resolution(format),
1560                     note=' ({0})'.format(format['format_note']) if format.get('format_note') is not None else '',
1561                 )
1562             # Automatically determine file extension if missing
1563             if format.get('ext') is None:
1564                 format['ext'] = determine_ext(format['url']).lower()
1565             # Automatically determine protocol if missing (useful for format
1566             # selection purposes)
1567             if format.get('protocol') is None:
1568                 format['protocol'] = determine_protocol(format)
1569             # Add HTTP headers, so that external programs can use them from the
1570             # json output
1571             full_format_info = info_dict.copy()
1572             full_format_info.update(format)
1573             format['http_headers'] = self._calc_headers(full_format_info)
1574         # Remove private housekeeping stuff
1575         if '__x_forwarded_for_ip' in info_dict:
1576             del info_dict['__x_forwarded_for_ip']
1577
1578         # TODO Central sorting goes here
1579
1580         if formats[0] is not info_dict:
1581             # only set the 'formats' fields if the original info_dict list them
1582             # otherwise we end up with a circular reference, the first (and unique)
1583             # element in the 'formats' field in info_dict is info_dict itself,
1584             # which can't be exported to json
1585             info_dict['formats'] = formats
1586         if self.params.get('listformats'):
1587             self.list_formats(info_dict)
1588             return
1589
1590         req_format = self.params.get('format')
1591         if req_format is None:
1592             req_format = self._default_format_spec(info_dict, download=download)
1593             if self.params.get('verbose'):
1594                 self.to_stdout('[debug] Default format spec: %s' % req_format)
1595
1596         format_selector = self.build_format_selector(req_format)
1597
1598         # While in format selection we may need to have an access to the original
1599         # format set in order to calculate some metrics or do some processing.
1600         # For now we need to be able to guess whether original formats provided
1601         # by extractor are incomplete or not (i.e. whether extractor provides only
1602         # video-only or audio-only formats) for proper formats selection for
1603         # extractors with such incomplete formats (see
1604         # https://github.com/rg3/youtube-dl/pull/5556).
1605         # Since formats may be filtered during format selection and may not match
1606         # the original formats the results may be incorrect. Thus original formats
1607         # or pre-calculated metrics should be passed to format selection routines
1608         # as well.
1609         # We will pass a context object containing all necessary additional data
1610         # instead of just formats.
1611         # This fixes incorrect format selection issue (see
1612         # https://github.com/rg3/youtube-dl/issues/10083).
1613         incomplete_formats = (
1614             # All formats are video-only or
1615             all(f.get('vcodec') != 'none' and f.get('acodec') == 'none' for f in formats) or
1616             # all formats are audio-only
1617             all(f.get('vcodec') == 'none' and f.get('acodec') != 'none' for f in formats))
1618
1619         ctx = {
1620             'formats': formats,
1621             'incomplete_formats': incomplete_formats,
1622         }
1623
1624         formats_to_download = list(format_selector(ctx))
1625         if not formats_to_download:
1626             raise ExtractorError('requested format not available',
1627                                  expected=True)
1628
1629         if download:
1630             if len(formats_to_download) > 1:
1631                 self.to_screen('[info] %s: downloading video in %s formats' % (info_dict['id'], len(formats_to_download)))
1632             for format in formats_to_download:
1633                 new_info = dict(info_dict)
1634                 new_info.update(format)
1635                 self.process_info(new_info)
1636         # We update the info dict with the best quality format (backwards compatibility)
1637         info_dict.update(formats_to_download[-1])
1638         return info_dict
1639
1640     def process_subtitles(self, video_id, normal_subtitles, automatic_captions):
1641         """Select the requested subtitles and their format"""
1642         available_subs = {}
1643         if normal_subtitles and self.params.get('writesubtitles'):
1644             available_subs.update(normal_subtitles)
1645         if automatic_captions and self.params.get('writeautomaticsub'):
1646             for lang, cap_info in automatic_captions.items():
1647                 if lang not in available_subs:
1648                     available_subs[lang] = cap_info
1649
1650         if (not self.params.get('writesubtitles') and not
1651                 self.params.get('writeautomaticsub') or not
1652                 available_subs):
1653             return None
1654
1655         if self.params.get('allsubtitles', False):
1656             requested_langs = available_subs.keys()
1657         else:
1658             if self.params.get('subtitleslangs', False):
1659                 requested_langs = self.params.get('subtitleslangs')
1660             elif 'en' in available_subs:
1661                 requested_langs = ['en']
1662             else:
1663                 requested_langs = [list(available_subs.keys())[0]]
1664
1665         formats_query = self.params.get('subtitlesformat', 'best')
1666         formats_preference = formats_query.split('/') if formats_query else []
1667         subs = {}
1668         for lang in requested_langs:
1669             formats = available_subs.get(lang)
1670             if formats is None:
1671                 self.report_warning('%s subtitles not available for %s' % (lang, video_id))
1672                 continue
1673             for ext in formats_preference:
1674                 if ext == 'best':
1675                     f = formats[-1]
1676                     break
1677                 matches = list(filter(lambda f: f['ext'] == ext, formats))
1678                 if matches:
1679                     f = matches[-1]
1680                     break
1681             else:
1682                 f = formats[-1]
1683                 self.report_warning(
1684                     'No subtitle format found matching "%s" for language %s, '
1685                     'using %s' % (formats_query, lang, f['ext']))
1686             subs[lang] = f
1687         return subs
1688
1689     def process_info(self, info_dict):
1690         """Process a single resolved IE result."""
1691
1692         assert info_dict.get('_type', 'video') == 'video'
1693
1694         max_downloads = self.params.get('max_downloads')
1695         if max_downloads is not None:
1696             if self._num_downloads >= int(max_downloads):
1697                 raise MaxDownloadsReached()
1698
1699         info_dict['fulltitle'] = info_dict['title']
1700         if len(info_dict['title']) > 200:
1701             info_dict['title'] = info_dict['title'][:197] + '...'
1702
1703         if 'format' not in info_dict:
1704             info_dict['format'] = info_dict['ext']
1705
1706         reason = self._match_entry(info_dict, incomplete=False)
1707         if reason is not None:
1708             self.to_screen('[download] ' + reason)
1709             return
1710
1711         self._num_downloads += 1
1712
1713         info_dict['_filename'] = filename = self.prepare_filename(info_dict)
1714
1715         # Forced printings
1716         if self.params.get('forcetitle', False):
1717             self.to_stdout(info_dict['fulltitle'])
1718         if self.params.get('forceid', False):
1719             self.to_stdout(info_dict['id'])
1720         if self.params.get('forceurl', False):
1721             if info_dict.get('requested_formats') is not None:
1722                 for f in info_dict['requested_formats']:
1723                     self.to_stdout(f['url'] + f.get('play_path', ''))
1724             else:
1725                 # For RTMP URLs, also include the playpath
1726                 self.to_stdout(info_dict['url'] + info_dict.get('play_path', ''))
1727         if self.params.get('forcethumbnail', False) and info_dict.get('thumbnail') is not None:
1728             self.to_stdout(info_dict['thumbnail'])
1729         if self.params.get('forcedescription', False) and info_dict.get('description') is not None:
1730             self.to_stdout(info_dict['description'])
1731         if self.params.get('forcefilename', False) and filename is not None:
1732             self.to_stdout(filename)
1733         if self.params.get('forceduration', False) and info_dict.get('duration') is not None:
1734             self.to_stdout(formatSeconds(info_dict['duration']))
1735         if self.params.get('forceformat', False):
1736             self.to_stdout(info_dict['format'])
1737         if self.params.get('forcejson', False):
1738             self.to_stdout(json.dumps(info_dict))
1739
1740         # Do nothing else if in simulate mode
1741         if self.params.get('simulate', False):
1742             return
1743
1744         if filename is None:
1745             return
1746
1747         def ensure_dir_exists(path):
1748             try:
1749                 dn = os.path.dirname(path)
1750                 if dn and not os.path.exists(dn):
1751                     os.makedirs(dn)
1752                 return True
1753             except (OSError, IOError) as err:
1754                 self.report_error('unable to create directory ' + error_to_compat_str(err))
1755                 return False
1756
1757         if not ensure_dir_exists(sanitize_path(encodeFilename(filename))):
1758             return
1759
1760         if self.params.get('writedescription', False):
1761             descfn = replace_extension(filename, 'description', info_dict.get('ext'))
1762             if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(descfn)):
1763                 self.to_screen('[info] Video description is already present')
1764             elif info_dict.get('description') is None:
1765                 self.report_warning('There\'s no description to write.')
1766             else:
1767                 try:
1768                     self.to_screen('[info] Writing video description to: ' + descfn)
1769                     with io.open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile:
1770                         descfile.write(info_dict['description'])
1771                 except (OSError, IOError):
1772                     self.report_error('Cannot write description file ' + descfn)
1773                     return
1774
1775         if self.params.get('writeannotations', False):
1776             annofn = replace_extension(filename, 'annotations.xml', info_dict.get('ext'))
1777             if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(annofn)):
1778                 self.to_screen('[info] Video annotations are already present')
1779             else:
1780                 try:
1781                     self.to_screen('[info] Writing video annotations to: ' + annofn)
1782                     with io.open(encodeFilename(annofn), 'w', encoding='utf-8') as annofile:
1783                         annofile.write(info_dict['annotations'])
1784                 except (KeyError, TypeError):
1785                     self.report_warning('There are no annotations to write.')
1786                 except (OSError, IOError):
1787                     self.report_error('Cannot write annotations file: ' + annofn)
1788                     return
1789
1790         subtitles_are_requested = any([self.params.get('writesubtitles', False),
1791                                        self.params.get('writeautomaticsub')])
1792
1793         if subtitles_are_requested and info_dict.get('requested_subtitles'):
1794             # subtitles download errors are already managed as troubles in relevant IE
1795             # that way it will silently go on when used with unsupporting IE
1796             subtitles = info_dict['requested_subtitles']
1797             ie = self.get_info_extractor(info_dict['extractor_key'])
1798             for sub_lang, sub_info in subtitles.items():
1799                 sub_format = sub_info['ext']
1800                 sub_filename = subtitles_filename(filename, sub_lang, sub_format)
1801                 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(sub_filename)):
1802                     self.to_screen('[info] Video subtitle %s.%s is already present' % (sub_lang, sub_format))
1803                 else:
1804                     self.to_screen('[info] Writing video subtitles to: ' + sub_filename)
1805                     if sub_info.get('data') is not None:
1806                         try:
1807                             # Use newline='' to prevent conversion of newline characters
1808                             # See https://github.com/rg3/youtube-dl/issues/10268
1809                             with io.open(encodeFilename(sub_filename), 'w', encoding='utf-8', newline='') as subfile:
1810                                 subfile.write(sub_info['data'])
1811                         except (OSError, IOError):
1812                             self.report_error('Cannot write subtitles file ' + sub_filename)
1813                             return
1814                     else:
1815                         try:
1816                             sub_data = ie._request_webpage(
1817                                 sub_info['url'], info_dict['id'], note=False).read()
1818                             with io.open(encodeFilename(sub_filename), 'wb') as subfile:
1819                                 subfile.write(sub_data)
1820                         except (ExtractorError, IOError, OSError, ValueError) as err:
1821                             self.report_warning('Unable to download subtitle for "%s": %s' %
1822                                                 (sub_lang, error_to_compat_str(err)))
1823                             continue
1824
1825         if self.params.get('writeinfojson', False):
1826             infofn = replace_extension(filename, 'info.json', info_dict.get('ext'))
1827             if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(infofn)):
1828                 self.to_screen('[info] Video description metadata is already present')
1829             else:
1830                 self.to_screen('[info] Writing video description metadata as JSON to: ' + infofn)
1831                 try:
1832                     write_json_file(self.filter_requested_info(info_dict), infofn)
1833                 except (OSError, IOError):
1834                     self.report_error('Cannot write metadata to JSON file ' + infofn)
1835                     return
1836
1837         self._write_thumbnails(info_dict, filename)
1838
1839         if not self.params.get('skip_download', False):
1840             try:
1841                 def dl(name, info):
1842                     fd = get_suitable_downloader(info, self.params)(self, self.params)
1843                     for ph in self._progress_hooks:
1844                         fd.add_progress_hook(ph)
1845                     if self.params.get('verbose'):
1846                         self.to_stdout('[debug] Invoking downloader on %r' % info.get('url'))
1847                     return fd.download(name, info)
1848
1849                 if info_dict.get('requested_formats') is not None:
1850                     downloaded = []
1851                     success = True
1852                     merger = FFmpegMergerPP(self)
1853                     if not merger.available:
1854                         postprocessors = []
1855                         self.report_warning('You have requested multiple '
1856                                             'formats but ffmpeg or avconv are not installed.'
1857                                             ' The formats won\'t be merged.')
1858                     else:
1859                         postprocessors = [merger]
1860
1861                     def compatible_formats(formats):
1862                         video, audio = formats
1863                         # Check extension
1864                         video_ext, audio_ext = video.get('ext'), audio.get('ext')
1865                         if video_ext and audio_ext:
1866                             COMPATIBLE_EXTS = (
1867                                 ('mp3', 'mp4', 'm4a', 'm4p', 'm4b', 'm4r', 'm4v', 'ismv', 'isma'),
1868                                 ('webm')
1869                             )
1870                             for exts in COMPATIBLE_EXTS:
1871                                 if video_ext in exts and audio_ext in exts:
1872                                     return True
1873                         # TODO: Check acodec/vcodec
1874                         return False
1875
1876                     filename_real_ext = os.path.splitext(filename)[1][1:]
1877                     filename_wo_ext = (
1878                         os.path.splitext(filename)[0]
1879                         if filename_real_ext == info_dict['ext']
1880                         else filename)
1881                     requested_formats = info_dict['requested_formats']
1882                     if self.params.get('merge_output_format') is None and not compatible_formats(requested_formats):
1883                         info_dict['ext'] = 'mkv'
1884                         self.report_warning(
1885                             'Requested formats are incompatible for merge and will be merged into mkv.')
1886                     # Ensure filename always has a correct extension for successful merge
1887                     filename = '%s.%s' % (filename_wo_ext, info_dict['ext'])
1888                     if os.path.exists(encodeFilename(filename)):
1889                         self.to_screen(
1890                             '[download] %s has already been downloaded and '
1891                             'merged' % filename)
1892                     else:
1893                         for f in requested_formats:
1894                             new_info = dict(info_dict)
1895                             new_info.update(f)
1896                             fname = prepend_extension(
1897                                 self.prepare_filename(new_info),
1898                                 'f%s' % f['format_id'], new_info['ext'])
1899                             if not ensure_dir_exists(fname):
1900                                 return
1901                             downloaded.append(fname)
1902                             partial_success = dl(fname, new_info)
1903                             success = success and partial_success
1904                         info_dict['__postprocessors'] = postprocessors
1905                         info_dict['__files_to_merge'] = downloaded
1906                 else:
1907                     # Just a single file
1908                     success = dl(filename, info_dict)
1909             except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
1910                 self.report_error('unable to download video data: %s' % error_to_compat_str(err))
1911                 return
1912             except (OSError, IOError) as err:
1913                 raise UnavailableVideoError(err)
1914             except (ContentTooShortError, ) as err:
1915                 self.report_error('content too short (expected %s bytes and served %s)' % (err.expected, err.downloaded))
1916                 return
1917
1918             if success and filename != '-':
1919                 # Fixup content
1920                 fixup_policy = self.params.get('fixup')
1921                 if fixup_policy is None:
1922                     fixup_policy = 'detect_or_warn'
1923
1924                 INSTALL_FFMPEG_MESSAGE = 'Install ffmpeg or avconv to fix this automatically.'
1925
1926                 stretched_ratio = info_dict.get('stretched_ratio')
1927                 if stretched_ratio is not None and stretched_ratio != 1:
1928                     if fixup_policy == 'warn':
1929                         self.report_warning('%s: Non-uniform pixel ratio (%s)' % (
1930                             info_dict['id'], stretched_ratio))
1931                     elif fixup_policy == 'detect_or_warn':
1932                         stretched_pp = FFmpegFixupStretchedPP(self)
1933                         if stretched_pp.available:
1934                             info_dict.setdefault('__postprocessors', [])
1935                             info_dict['__postprocessors'].append(stretched_pp)
1936                         else:
1937                             self.report_warning(
1938                                 '%s: Non-uniform pixel ratio (%s). %s'
1939                                 % (info_dict['id'], stretched_ratio, INSTALL_FFMPEG_MESSAGE))
1940                     else:
1941                         assert fixup_policy in ('ignore', 'never')
1942
1943                 if (info_dict.get('requested_formats') is None and
1944                         info_dict.get('container') == 'm4a_dash'):
1945                     if fixup_policy == 'warn':
1946                         self.report_warning(
1947                             '%s: writing DASH m4a. '
1948                             'Only some players support this container.'
1949                             % info_dict['id'])
1950                     elif fixup_policy == 'detect_or_warn':
1951                         fixup_pp = FFmpegFixupM4aPP(self)
1952                         if fixup_pp.available:
1953                             info_dict.setdefault('__postprocessors', [])
1954                             info_dict['__postprocessors'].append(fixup_pp)
1955                         else:
1956                             self.report_warning(
1957                                 '%s: writing DASH m4a. '
1958                                 'Only some players support this container. %s'
1959                                 % (info_dict['id'], INSTALL_FFMPEG_MESSAGE))
1960                     else:
1961                         assert fixup_policy in ('ignore', 'never')
1962
1963                 if (info_dict.get('protocol') == 'm3u8_native' or
1964                         info_dict.get('protocol') == 'm3u8' and
1965                         self.params.get('hls_prefer_native')):
1966                     if fixup_policy == 'warn':
1967                         self.report_warning('%s: malformed AAC bitstream detected.' % (
1968                             info_dict['id']))
1969                     elif fixup_policy == 'detect_or_warn':
1970                         fixup_pp = FFmpegFixupM3u8PP(self)
1971                         if fixup_pp.available:
1972                             info_dict.setdefault('__postprocessors', [])
1973                             info_dict['__postprocessors'].append(fixup_pp)
1974                         else:
1975                             self.report_warning(
1976                                 '%s: malformed AAC bitstream detected. %s'
1977                                 % (info_dict['id'], INSTALL_FFMPEG_MESSAGE))
1978                     else:
1979                         assert fixup_policy in ('ignore', 'never')
1980
1981                 try:
1982                     self.post_process(filename, info_dict)
1983                 except (PostProcessingError) as err:
1984                     self.report_error('postprocessing: %s' % str(err))
1985                     return
1986                 self.record_download_archive(info_dict)
1987
1988     def download(self, url_list):
1989         """Download a given list of URLs."""
1990         outtmpl = self.params.get('outtmpl', DEFAULT_OUTTMPL)
1991         if (len(url_list) > 1 and
1992                 outtmpl != '-' and
1993                 '%' not in outtmpl and
1994                 self.params.get('max_downloads') != 1):
1995             raise SameFileError(outtmpl)
1996
1997         for url in url_list:
1998             try:
1999                 # It also downloads the videos
2000                 res = self.extract_info(
2001                     url, force_generic_extractor=self.params.get('force_generic_extractor', False))
2002             except UnavailableVideoError:
2003                 self.report_error('unable to download video')
2004             except MaxDownloadsReached:
2005                 self.to_screen('[info] Maximum number of downloaded files reached.')
2006                 raise
2007             else:
2008                 if self.params.get('dump_single_json', False):
2009                     self.to_stdout(json.dumps(res))
2010
2011         return self._download_retcode
2012
2013     def download_with_info_file(self, info_filename):
2014         with contextlib.closing(fileinput.FileInput(
2015                 [info_filename], mode='r',
2016                 openhook=fileinput.hook_encoded('utf-8'))) as f:
2017             # FileInput doesn't have a read method, we can't call json.load
2018             info = self.filter_requested_info(json.loads('\n'.join(f)))
2019         try:
2020             self.process_ie_result(info, download=True)
2021         except DownloadError:
2022             webpage_url = info.get('webpage_url')
2023             if webpage_url is not None:
2024                 self.report_warning('The info failed to download, trying with "%s"' % webpage_url)
2025                 return self.download([webpage_url])
2026             else:
2027                 raise
2028         return self._download_retcode
2029
2030     @staticmethod
2031     def filter_requested_info(info_dict):
2032         return dict(
2033             (k, v) for k, v in info_dict.items()
2034             if k not in ['requested_formats', 'requested_subtitles'])
2035
2036     def post_process(self, filename, ie_info):
2037         """Run all the postprocessors on the given file."""
2038         info = dict(ie_info)
2039         info['filepath'] = filename
2040         pps_chain = []
2041         if ie_info.get('__postprocessors') is not None:
2042             pps_chain.extend(ie_info['__postprocessors'])
2043         pps_chain.extend(self._pps)
2044         for pp in pps_chain:
2045             files_to_delete = []
2046             try:
2047                 files_to_delete, info = pp.run(info)
2048             except PostProcessingError as e:
2049                 self.report_error(e.msg)
2050             if files_to_delete and not self.params.get('keepvideo', False):
2051                 for old_filename in files_to_delete:
2052                     self.to_screen('Deleting original file %s (pass -k to keep)' % old_filename)
2053                     try:
2054                         os.remove(encodeFilename(old_filename))
2055                     except (IOError, OSError):
2056                         self.report_warning('Unable to remove downloaded original file')
2057
2058     def _make_archive_id(self, info_dict):
2059         # Future-proof against any change in case
2060         # and backwards compatibility with prior versions
2061         extractor = info_dict.get('extractor_key')
2062         if extractor is None:
2063             if 'id' in info_dict:
2064                 extractor = info_dict.get('ie_key')  # key in a playlist
2065         if extractor is None:
2066             return None  # Incomplete video information
2067         return extractor.lower() + ' ' + info_dict['id']
2068
2069     def in_download_archive(self, info_dict):
2070         fn = self.params.get('download_archive')
2071         if fn is None:
2072             return False
2073
2074         vid_id = self._make_archive_id(info_dict)
2075         if vid_id is None:
2076             return False  # Incomplete video information
2077
2078         try:
2079             with locked_file(fn, 'r', encoding='utf-8') as archive_file:
2080                 for line in archive_file:
2081                     if line.strip() == vid_id:
2082                         return True
2083         except IOError as ioe:
2084             if ioe.errno != errno.ENOENT:
2085                 raise
2086         return False
2087
2088     def record_download_archive(self, info_dict):
2089         fn = self.params.get('download_archive')
2090         if fn is None:
2091             return
2092         vid_id = self._make_archive_id(info_dict)
2093         assert vid_id
2094         with locked_file(fn, 'a', encoding='utf-8') as archive_file:
2095             archive_file.write(vid_id + '\n')
2096
2097     @staticmethod
2098     def format_resolution(format, default='unknown'):
2099         if format.get('vcodec') == 'none':
2100             return 'audio only'
2101         if format.get('resolution') is not None:
2102             return format['resolution']
2103         if format.get('height') is not None:
2104             if format.get('width') is not None:
2105                 res = '%sx%s' % (format['width'], format['height'])
2106             else:
2107                 res = '%sp' % format['height']
2108         elif format.get('width') is not None:
2109             res = '%dx?' % format['width']
2110         else:
2111             res = default
2112         return res
2113
2114     def _format_note(self, fdict):
2115         res = ''
2116         if fdict.get('ext') in ['f4f', 'f4m']:
2117             res += '(unsupported) '
2118         if fdict.get('language'):
2119             if res:
2120                 res += ' '
2121             res += '[%s] ' % fdict['language']
2122         if fdict.get('format_note') is not None:
2123             res += fdict['format_note'] + ' '
2124         if fdict.get('tbr') is not None:
2125             res += '%4dk ' % fdict['tbr']
2126         if fdict.get('container') is not None:
2127             if res:
2128                 res += ', '
2129             res += '%s container' % fdict['container']
2130         if (fdict.get('vcodec') is not None and
2131                 fdict.get('vcodec') != 'none'):
2132             if res:
2133                 res += ', '
2134             res += fdict['vcodec']
2135             if fdict.get('vbr') is not None:
2136                 res += '@'
2137         elif fdict.get('vbr') is not None and fdict.get('abr') is not None:
2138             res += 'video@'
2139         if fdict.get('vbr') is not None:
2140             res += '%4dk' % fdict['vbr']
2141         if fdict.get('fps') is not None:
2142             if res:
2143                 res += ', '
2144             res += '%sfps' % fdict['fps']
2145         if fdict.get('acodec') is not None:
2146             if res:
2147                 res += ', '
2148             if fdict['acodec'] == 'none':
2149                 res += 'video only'
2150             else:
2151                 res += '%-5s' % fdict['acodec']
2152         elif fdict.get('abr') is not None:
2153             if res:
2154                 res += ', '
2155             res += 'audio'
2156         if fdict.get('abr') is not None:
2157             res += '@%3dk' % fdict['abr']
2158         if fdict.get('asr') is not None:
2159             res += ' (%5dHz)' % fdict['asr']
2160         if fdict.get('filesize') is not None:
2161             if res:
2162                 res += ', '
2163             res += format_bytes(fdict['filesize'])
2164         elif fdict.get('filesize_approx') is not None:
2165             if res:
2166                 res += ', '
2167             res += '~' + format_bytes(fdict['filesize_approx'])
2168         return res
2169
2170     def list_formats(self, info_dict):
2171         formats = info_dict.get('formats', [info_dict])
2172         table = [
2173             [f['format_id'], f['ext'], self.format_resolution(f), self._format_note(f)]
2174             for f in formats
2175             if f.get('preference') is None or f['preference'] >= -1000]
2176         if len(formats) > 1:
2177             table[-1][-1] += (' ' if table[-1][-1] else '') + '(best)'
2178
2179         header_line = ['format code', 'extension', 'resolution', 'note']
2180         self.to_screen(
2181             '[info] Available formats for %s:\n%s' %
2182             (info_dict['id'], render_table(header_line, table)))
2183
2184     def list_thumbnails(self, info_dict):
2185         thumbnails = info_dict.get('thumbnails')
2186         if not thumbnails:
2187             self.to_screen('[info] No thumbnails present for %s' % info_dict['id'])
2188             return
2189
2190         self.to_screen(
2191             '[info] Thumbnails for %s:' % info_dict['id'])
2192         self.to_screen(render_table(
2193             ['ID', 'width', 'height', 'URL'],
2194             [[t['id'], t.get('width', 'unknown'), t.get('height', 'unknown'), t['url']] for t in thumbnails]))
2195
2196     def list_subtitles(self, video_id, subtitles, name='subtitles'):
2197         if not subtitles:
2198             self.to_screen('%s has no %s' % (video_id, name))
2199             return
2200         self.to_screen(
2201             'Available %s for %s:' % (name, video_id))
2202         self.to_screen(render_table(
2203             ['Language', 'formats'],
2204             [[lang, ', '.join(f['ext'] for f in reversed(formats))]
2205                 for lang, formats in subtitles.items()]))
2206
2207     def urlopen(self, req):
2208         """ Start an HTTP download """
2209         if isinstance(req, compat_basestring):
2210             req = sanitized_Request(req)
2211         return self._opener.open(req, timeout=self._socket_timeout)
2212
2213     def print_debug_header(self):
2214         if not self.params.get('verbose'):
2215             return
2216
2217         if type('') is not compat_str:
2218             # Python 2.6 on SLES11 SP1 (https://github.com/rg3/youtube-dl/issues/3326)
2219             self.report_warning(
2220                 'Your Python is broken! Update to a newer and supported version')
2221
2222         stdout_encoding = getattr(
2223             sys.stdout, 'encoding', 'missing (%s)' % type(sys.stdout).__name__)
2224         encoding_str = (
2225             '[debug] Encodings: locale %s, fs %s, out %s, pref %s\n' % (
2226                 locale.getpreferredencoding(),
2227                 sys.getfilesystemencoding(),
2228                 stdout_encoding,
2229                 self.get_encoding()))
2230         write_string(encoding_str, encoding=None)
2231
2232         self._write_string('[debug] youtube-dl version ' + __version__ + '\n')
2233         if _LAZY_LOADER:
2234             self._write_string('[debug] Lazy loading extractors enabled' + '\n')
2235         try:
2236             sp = subprocess.Popen(
2237                 ['git', 'rev-parse', '--short', 'HEAD'],
2238                 stdout=subprocess.PIPE, stderr=subprocess.PIPE,
2239                 cwd=os.path.dirname(os.path.abspath(__file__)))
2240             out, err = sp.communicate()
2241             out = out.decode().strip()
2242             if re.match('[0-9a-f]+', out):
2243                 self._write_string('[debug] Git HEAD: ' + out + '\n')
2244         except Exception:
2245             try:
2246                 sys.exc_clear()
2247             except Exception:
2248                 pass
2249
2250         def python_implementation():
2251             impl_name = platform.python_implementation()
2252             if impl_name == 'PyPy' and hasattr(sys, 'pypy_version_info'):
2253                 return impl_name + ' version %d.%d.%d' % sys.pypy_version_info[:3]
2254             return impl_name
2255
2256         self._write_string('[debug] Python version %s (%s) - %s\n' % (
2257             platform.python_version(), python_implementation(),
2258             platform_name()))
2259
2260         exe_versions = FFmpegPostProcessor.get_versions(self)
2261         exe_versions['rtmpdump'] = rtmpdump_version()
2262         exe_versions['phantomjs'] = PhantomJSwrapper._version()
2263         exe_str = ', '.join(
2264             '%s %s' % (exe, v)
2265             for exe, v in sorted(exe_versions.items())
2266             if v
2267         )
2268         if not exe_str:
2269             exe_str = 'none'
2270         self._write_string('[debug] exe versions: %s\n' % exe_str)
2271
2272         proxy_map = {}
2273         for handler in self._opener.handlers:
2274             if hasattr(handler, 'proxies'):
2275                 proxy_map.update(handler.proxies)
2276         self._write_string('[debug] Proxy map: ' + compat_str(proxy_map) + '\n')
2277
2278         if self.params.get('call_home', False):
2279             ipaddr = self.urlopen('https://yt-dl.org/ip').read().decode('utf-8')
2280             self._write_string('[debug] Public IP address: %s\n' % ipaddr)
2281             latest_version = self.urlopen(
2282                 'https://yt-dl.org/latest/version').read().decode('utf-8')
2283             if version_tuple(latest_version) > version_tuple(__version__):
2284                 self.report_warning(
2285                     'You are using an outdated version (newest version: %s)! '
2286                     'See https://yt-dl.org/update if you need help updating.' %
2287                     latest_version)
2288
2289     def _setup_opener(self):
2290         timeout_val = self.params.get('socket_timeout')
2291         self._socket_timeout = 600 if timeout_val is None else float(timeout_val)
2292
2293         opts_cookiefile = self.params.get('cookiefile')
2294         opts_proxy = self.params.get('proxy')
2295
2296         if opts_cookiefile is None:
2297             self.cookiejar = compat_cookiejar.CookieJar()
2298         else:
2299             opts_cookiefile = expand_path(opts_cookiefile)
2300             self.cookiejar = compat_cookiejar.MozillaCookieJar(
2301                 opts_cookiefile)
2302             if os.access(opts_cookiefile, os.R_OK):
2303                 self.cookiejar.load(ignore_discard=True, ignore_expires=True)
2304                 # Force CookieJar to treat 'expires=0' cookies as session/discard cookies
2305                 # Fixes https://bugs.python.org/issue17164
2306                 for cookie in self.cookiejar:
2307                     if cookie.expires == 0:
2308                         cookie.expires = None
2309                         cookie.discard = True
2310
2311         cookie_processor = YoutubeDLCookieProcessor(self.cookiejar)
2312         if opts_proxy is not None:
2313             if opts_proxy == '':
2314                 proxies = {}
2315             else:
2316                 proxies = {'http': opts_proxy, 'https': opts_proxy}
2317         else:
2318             proxies = compat_urllib_request.getproxies()
2319             # Set HTTPS proxy to HTTP one if given (https://github.com/rg3/youtube-dl/issues/805)
2320             if 'http' in proxies and 'https' not in proxies:
2321                 proxies['https'] = proxies['http']
2322         proxy_handler = PerRequestProxyHandler(proxies)
2323
2324         debuglevel = 1 if self.params.get('debug_printtraffic') else 0
2325         https_handler = make_HTTPS_handler(self.params, debuglevel=debuglevel)
2326         ydlh = YoutubeDLHandler(self.params, debuglevel=debuglevel)
2327         data_handler = compat_urllib_request_DataHandler()
2328
2329         # When passing our own FileHandler instance, build_opener won't add the
2330         # default FileHandler and allows us to disable the file protocol, which
2331         # can be used for malicious purposes (see
2332         # https://github.com/rg3/youtube-dl/issues/8227)
2333         file_handler = compat_urllib_request.FileHandler()
2334
2335         def file_open(*args, **kwargs):
2336             raise compat_urllib_error.URLError('file:// scheme is explicitly disabled in youtube-dl for security reasons')
2337         file_handler.file_open = file_open
2338
2339         opener = compat_urllib_request.build_opener(
2340             proxy_handler, https_handler, cookie_processor, ydlh, data_handler, file_handler)
2341
2342         # Delete the default user-agent header, which would otherwise apply in
2343         # cases where our custom HTTP handler doesn't come into play
2344         # (See https://github.com/rg3/youtube-dl/issues/1309 for details)
2345         opener.addheaders = []
2346         self._opener = opener
2347
2348     def encode(self, s):
2349         if isinstance(s, bytes):
2350             return s  # Already encoded
2351
2352         try:
2353             return s.encode(self.get_encoding())
2354         except UnicodeEncodeError as err:
2355             err.reason = err.reason + '. Check your system encoding configuration or use the --encoding option.'
2356             raise
2357
2358     def get_encoding(self):
2359         encoding = self.params.get('encoding')
2360         if encoding is None:
2361             encoding = preferredencoding()
2362         return encoding
2363
2364     def _write_thumbnails(self, info_dict, filename):
2365         if self.params.get('writethumbnail', False):
2366             thumbnails = info_dict.get('thumbnails')
2367             if thumbnails:
2368                 thumbnails = [thumbnails[-1]]
2369         elif self.params.get('write_all_thumbnails', False):
2370             thumbnails = info_dict.get('thumbnails')
2371         else:
2372             return
2373
2374         if not thumbnails:
2375             # No thumbnails present, so return immediately
2376             return
2377
2378         for t in thumbnails:
2379             thumb_ext = determine_ext(t['url'], 'jpg')
2380             suffix = '_%s' % t['id'] if len(thumbnails) > 1 else ''
2381             thumb_display_id = '%s ' % t['id'] if len(thumbnails) > 1 else ''
2382             t['filename'] = thumb_filename = os.path.splitext(filename)[0] + suffix + '.' + thumb_ext
2383
2384             if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(thumb_filename)):
2385                 self.to_screen('[%s] %s: Thumbnail %sis already present' %
2386                                (info_dict['extractor'], info_dict['id'], thumb_display_id))
2387             else:
2388                 self.to_screen('[%s] %s: Downloading thumbnail %s...' %
2389                                (info_dict['extractor'], info_dict['id'], thumb_display_id))
2390                 try:
2391                     uf = self.urlopen(t['url'])
2392                     with open(encodeFilename(thumb_filename), 'wb') as thumbf:
2393                         shutil.copyfileobj(uf, thumbf)
2394                     self.to_screen('[%s] %s: Writing thumbnail %sto: %s' %
2395                                    (info_dict['extractor'], info_dict['id'], thumb_display_id, thumb_filename))
2396                 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
2397                     self.report_warning('Unable to download thumbnail "%s": %s' %
2398                                         (t['url'], error_to_compat_str(err)))