[YoutubeDL] Ensure protocol is always present
[youtube-dl] / youtube_dl / YoutubeDL.py
1 #!/usr/bin/env python
2 # -*- coding: utf-8 -*-
3
4 from __future__ import absolute_import, unicode_literals
5
6 import collections
7 import contextlib
8 import datetime
9 import errno
10 import fileinput
11 import io
12 import itertools
13 import json
14 import locale
15 import operator
16 import os
17 import platform
18 import re
19 import shutil
20 import subprocess
21 import socket
22 import sys
23 import time
24 import tokenize
25 import traceback
26
27 if os.name == 'nt':
28     import ctypes
29
30 from .compat import (
31     compat_basestring,
32     compat_cookiejar,
33     compat_expanduser,
34     compat_get_terminal_size,
35     compat_http_client,
36     compat_kwargs,
37     compat_str,
38     compat_tokenize_tokenize,
39     compat_urllib_error,
40     compat_urllib_request,
41     compat_urllib_request_DataHandler,
42 )
43 from .utils import (
44     ContentTooShortError,
45     date_from_str,
46     DateRange,
47     DEFAULT_OUTTMPL,
48     determine_ext,
49     determine_protocol,
50     DownloadError,
51     encode_compat_str,
52     encodeFilename,
53     error_to_compat_str,
54     ExtractorError,
55     format_bytes,
56     formatSeconds,
57     locked_file,
58     make_HTTPS_handler,
59     MaxDownloadsReached,
60     PagedList,
61     parse_filesize,
62     PerRequestProxyHandler,
63     PostProcessingError,
64     platform_name,
65     preferredencoding,
66     render_table,
67     SameFileError,
68     sanitize_filename,
69     sanitize_path,
70     sanitized_Request,
71     std_headers,
72     subtitles_filename,
73     UnavailableVideoError,
74     url_basename,
75     version_tuple,
76     write_json_file,
77     write_string,
78     YoutubeDLCookieProcessor,
79     YoutubeDLHandler,
80     prepend_extension,
81     replace_extension,
82     args_to_str,
83     age_restricted,
84 )
85 from .cache import Cache
86 from .extractor import get_info_extractor, gen_extractors
87 from .downloader import get_suitable_downloader
88 from .downloader.rtmp import rtmpdump_version
89 from .postprocessor import (
90     FFmpegFixupM4aPP,
91     FFmpegFixupStretchedPP,
92     FFmpegMergerPP,
93     FFmpegPostProcessor,
94     get_postprocessor,
95 )
96 from .version import __version__
97
98
99 class YoutubeDL(object):
100     """YoutubeDL class.
101
102     YoutubeDL objects are the ones responsible of downloading the
103     actual video file and writing it to disk if the user has requested
104     it, among some other tasks. In most cases there should be one per
105     program. As, given a video URL, the downloader doesn't know how to
106     extract all the needed information, task that InfoExtractors do, it
107     has to pass the URL to one of them.
108
109     For this, YoutubeDL objects have a method that allows
110     InfoExtractors to be registered in a given order. When it is passed
111     a URL, the YoutubeDL object handles it to the first InfoExtractor it
112     finds that reports being able to handle it. The InfoExtractor extracts
113     all the information about the video or videos the URL refers to, and
114     YoutubeDL process the extracted information, possibly using a File
115     Downloader to download the video.
116
117     YoutubeDL objects accept a lot of parameters. In order not to saturate
118     the object constructor with arguments, it receives a dictionary of
119     options instead. These options are available through the params
120     attribute for the InfoExtractors to use. The YoutubeDL also
121     registers itself as the downloader in charge for the InfoExtractors
122     that are added to it, so this is a "mutual registration".
123
124     Available options:
125
126     username:          Username for authentication purposes.
127     password:          Password for authentication purposes.
128     videopassword:     Password for accessing a video.
129     usenetrc:          Use netrc for authentication instead.
130     verbose:           Print additional info to stdout.
131     quiet:             Do not print messages to stdout.
132     no_warnings:       Do not print out anything for warnings.
133     forceurl:          Force printing final URL.
134     forcetitle:        Force printing title.
135     forceid:           Force printing ID.
136     forcethumbnail:    Force printing thumbnail URL.
137     forcedescription:  Force printing description.
138     forcefilename:     Force printing final filename.
139     forceduration:     Force printing duration.
140     forcejson:         Force printing info_dict as JSON.
141     dump_single_json:  Force printing the info_dict of the whole playlist
142                        (or video) as a single JSON line.
143     simulate:          Do not download the video files.
144     format:            Video format code. See options.py for more information.
145     outtmpl:           Template for output names.
146     restrictfilenames: Do not allow "&" and spaces in file names
147     ignoreerrors:      Do not stop on download errors.
148     force_generic_extractor: Force downloader to use the generic extractor
149     nooverwrites:      Prevent overwriting files.
150     playliststart:     Playlist item to start at.
151     playlistend:       Playlist item to end at.
152     playlist_items:    Specific indices of playlist to download.
153     playlistreverse:   Download playlist items in reverse order.
154     matchtitle:        Download only matching titles.
155     rejecttitle:       Reject downloads for matching titles.
156     logger:            Log messages to a logging.Logger instance.
157     logtostderr:       Log messages to stderr instead of stdout.
158     writedescription:  Write the video description to a .description file
159     writeinfojson:     Write the video description to a .info.json file
160     writeannotations:  Write the video annotations to a .annotations.xml file
161     writethumbnail:    Write the thumbnail image to a file
162     write_all_thumbnails:  Write all thumbnail formats to files
163     writesubtitles:    Write the video subtitles to a file
164     writeautomaticsub: Write the automatically generated subtitles to a file
165     allsubtitles:      Downloads all the subtitles of the video
166                        (requires writesubtitles or writeautomaticsub)
167     listsubtitles:     Lists all available subtitles for the video
168     subtitlesformat:   The format code for subtitles
169     subtitleslangs:    List of languages of the subtitles to download
170     keepvideo:         Keep the video file after post-processing
171     daterange:         A DateRange object, download only if the upload_date is in the range.
172     skip_download:     Skip the actual download of the video file
173     cachedir:          Location of the cache files in the filesystem.
174                        False to disable filesystem cache.
175     noplaylist:        Download single video instead of a playlist if in doubt.
176     age_limit:         An integer representing the user's age in years.
177                        Unsuitable videos for the given age are skipped.
178     min_views:         An integer representing the minimum view count the video
179                        must have in order to not be skipped.
180                        Videos without view count information are always
181                        downloaded. None for no limit.
182     max_views:         An integer representing the maximum view count.
183                        Videos that are more popular than that are not
184                        downloaded.
185                        Videos without view count information are always
186                        downloaded. None for no limit.
187     download_archive:  File name of a file where all downloads are recorded.
188                        Videos already present in the file are not downloaded
189                        again.
190     cookiefile:        File name where cookies should be read from and dumped to.
191     nocheckcertificate:Do not verify SSL certificates
192     prefer_insecure:   Use HTTP instead of HTTPS to retrieve information.
193                        At the moment, this is only supported by YouTube.
194     proxy:             URL of the proxy server to use
195     cn_verification_proxy:  URL of the proxy to use for IP address verification
196                        on Chinese sites. (Experimental)
197     socket_timeout:    Time to wait for unresponsive hosts, in seconds
198     bidi_workaround:   Work around buggy terminals without bidirectional text
199                        support, using fridibi
200     debug_printtraffic:Print out sent and received HTTP traffic
201     include_ads:       Download ads as well
202     default_search:    Prepend this string if an input url is not valid.
203                        'auto' for elaborate guessing
204     encoding:          Use this encoding instead of the system-specified.
205     extract_flat:      Do not resolve URLs, return the immediate result.
206                        Pass in 'in_playlist' to only show this behavior for
207                        playlist items.
208     postprocessors:    A list of dictionaries, each with an entry
209                        * key:  The name of the postprocessor. See
210                                youtube_dl/postprocessor/__init__.py for a list.
211                        as well as any further keyword arguments for the
212                        postprocessor.
213     progress_hooks:    A list of functions that get called on download
214                        progress, with a dictionary with the entries
215                        * status: One of "downloading", "error", or "finished".
216                                  Check this first and ignore unknown values.
217
218                        If status is one of "downloading", or "finished", the
219                        following properties may also be present:
220                        * filename: The final filename (always present)
221                        * tmpfilename: The filename we're currently writing to
222                        * downloaded_bytes: Bytes on disk
223                        * total_bytes: Size of the whole file, None if unknown
224                        * total_bytes_estimate: Guess of the eventual file size,
225                                                None if unavailable.
226                        * elapsed: The number of seconds since download started.
227                        * eta: The estimated time in seconds, None if unknown
228                        * speed: The download speed in bytes/second, None if
229                                 unknown
230                        * fragment_index: The counter of the currently
231                                          downloaded video fragment.
232                        * fragment_count: The number of fragments (= individual
233                                          files that will be merged)
234
235                        Progress hooks are guaranteed to be called at least once
236                        (with status "finished") if the download is successful.
237     merge_output_format: Extension to use when merging formats.
238     fixup:             Automatically correct known faults of the file.
239                        One of:
240                        - "never": do nothing
241                        - "warn": only emit a warning
242                        - "detect_or_warn": check whether we can do anything
243                                            about it, warn otherwise (default)
244     source_address:    (Experimental) Client-side IP address to bind to.
245     call_home:         Boolean, true iff we are allowed to contact the
246                        youtube-dl servers for debugging.
247     sleep_interval:    Number of seconds to sleep before each download.
248     listformats:       Print an overview of available video formats and exit.
249     list_thumbnails:   Print a table of all thumbnails and exit.
250     match_filter:      A function that gets called with the info_dict of
251                        every video.
252                        If it returns a message, the video is ignored.
253                        If it returns None, the video is downloaded.
254                        match_filter_func in utils.py is one example for this.
255     no_color:          Do not emit color codes in output.
256
257     The following options determine which downloader is picked:
258     external_downloader: Executable of the external downloader to call.
259                        None or unset for standard (built-in) downloader.
260     hls_prefer_native: Use the native HLS downloader instead of ffmpeg/avconv.
261
262     The following parameters are not used by YoutubeDL itself, they are used by
263     the downloader (see youtube_dl/downloader/common.py):
264     nopart, updatetime, buffersize, ratelimit, min_filesize, max_filesize, test,
265     noresizebuffer, retries, continuedl, noprogress, consoletitle,
266     xattr_set_filesize, external_downloader_args.
267
268     The following options are used by the post processors:
269     prefer_ffmpeg:     If True, use ffmpeg instead of avconv if both are available,
270                        otherwise prefer avconv.
271     postprocessor_args: A list of additional command-line arguments for the
272                         postprocessor.
273     """
274
275     params = None
276     _ies = []
277     _pps = []
278     _download_retcode = None
279     _num_downloads = None
280     _screen_file = None
281
282     def __init__(self, params=None, auto_init=True):
283         """Create a FileDownloader object with the given options."""
284         if params is None:
285             params = {}
286         self._ies = []
287         self._ies_instances = {}
288         self._pps = []
289         self._progress_hooks = []
290         self._download_retcode = 0
291         self._num_downloads = 0
292         self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)]
293         self._err_file = sys.stderr
294         self.params = {
295             # Default parameters
296             'nocheckcertificate': False,
297         }
298         self.params.update(params)
299         self.cache = Cache(self)
300
301         if params.get('bidi_workaround', False):
302             try:
303                 import pty
304                 master, slave = pty.openpty()
305                 width = compat_get_terminal_size().columns
306                 if width is None:
307                     width_args = []
308                 else:
309                     width_args = ['-w', str(width)]
310                 sp_kwargs = dict(
311                     stdin=subprocess.PIPE,
312                     stdout=slave,
313                     stderr=self._err_file)
314                 try:
315                     self._output_process = subprocess.Popen(
316                         ['bidiv'] + width_args, **sp_kwargs
317                     )
318                 except OSError:
319                     self._output_process = subprocess.Popen(
320                         ['fribidi', '-c', 'UTF-8'] + width_args, **sp_kwargs)
321                 self._output_channel = os.fdopen(master, 'rb')
322             except OSError as ose:
323                 if ose.errno == 2:
324                     self.report_warning('Could not find fribidi executable, ignoring --bidi-workaround . Make sure that  fribidi  is an executable file in one of the directories in your $PATH.')
325                 else:
326                     raise
327
328         if (sys.version_info >= (3,) and sys.platform != 'win32' and
329                 sys.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968'] and
330                 not params.get('restrictfilenames', False)):
331             # On Python 3, the Unicode filesystem API will throw errors (#1474)
332             self.report_warning(
333                 'Assuming --restrict-filenames since file system encoding '
334                 'cannot encode all characters. '
335                 'Set the LC_ALL environment variable to fix this.')
336             self.params['restrictfilenames'] = True
337
338         if isinstance(params.get('outtmpl'), bytes):
339             self.report_warning(
340                 'Parameter outtmpl is bytes, but should be a unicode string. '
341                 'Put  from __future__ import unicode_literals  at the top of your code file or consider switching to Python 3.x.')
342
343         self._setup_opener()
344
345         if auto_init:
346             self.print_debug_header()
347             self.add_default_info_extractors()
348
349         for pp_def_raw in self.params.get('postprocessors', []):
350             pp_class = get_postprocessor(pp_def_raw['key'])
351             pp_def = dict(pp_def_raw)
352             del pp_def['key']
353             pp = pp_class(self, **compat_kwargs(pp_def))
354             self.add_post_processor(pp)
355
356         for ph in self.params.get('progress_hooks', []):
357             self.add_progress_hook(ph)
358
359     def warn_if_short_id(self, argv):
360         # short YouTube ID starting with dash?
361         idxs = [
362             i for i, a in enumerate(argv)
363             if re.match(r'^-[0-9A-Za-z_-]{10}$', a)]
364         if idxs:
365             correct_argv = (
366                 ['youtube-dl'] +
367                 [a for i, a in enumerate(argv) if i not in idxs] +
368                 ['--'] + [argv[i] for i in idxs]
369             )
370             self.report_warning(
371                 'Long argument string detected. '
372                 'Use -- to separate parameters and URLs, like this:\n%s\n' %
373                 args_to_str(correct_argv))
374
375     def add_info_extractor(self, ie):
376         """Add an InfoExtractor object to the end of the list."""
377         self._ies.append(ie)
378         self._ies_instances[ie.ie_key()] = ie
379         ie.set_downloader(self)
380
381     def get_info_extractor(self, ie_key):
382         """
383         Get an instance of an IE with name ie_key, it will try to get one from
384         the _ies list, if there's no instance it will create a new one and add
385         it to the extractor list.
386         """
387         ie = self._ies_instances.get(ie_key)
388         if ie is None:
389             ie = get_info_extractor(ie_key)()
390             self.add_info_extractor(ie)
391         return ie
392
393     def add_default_info_extractors(self):
394         """
395         Add the InfoExtractors returned by gen_extractors to the end of the list
396         """
397         for ie in gen_extractors():
398             self.add_info_extractor(ie)
399
400     def add_post_processor(self, pp):
401         """Add a PostProcessor object to the end of the chain."""
402         self._pps.append(pp)
403         pp.set_downloader(self)
404
405     def add_progress_hook(self, ph):
406         """Add the progress hook (currently only for the file downloader)"""
407         self._progress_hooks.append(ph)
408
409     def _bidi_workaround(self, message):
410         if not hasattr(self, '_output_channel'):
411             return message
412
413         assert hasattr(self, '_output_process')
414         assert isinstance(message, compat_str)
415         line_count = message.count('\n') + 1
416         self._output_process.stdin.write((message + '\n').encode('utf-8'))
417         self._output_process.stdin.flush()
418         res = ''.join(self._output_channel.readline().decode('utf-8')
419                       for _ in range(line_count))
420         return res[:-len('\n')]
421
422     def to_screen(self, message, skip_eol=False):
423         """Print message to stdout if not in quiet mode."""
424         return self.to_stdout(message, skip_eol, check_quiet=True)
425
426     def _write_string(self, s, out=None):
427         write_string(s, out=out, encoding=self.params.get('encoding'))
428
429     def to_stdout(self, message, skip_eol=False, check_quiet=False):
430         """Print message to stdout if not in quiet mode."""
431         if self.params.get('logger'):
432             self.params['logger'].debug(message)
433         elif not check_quiet or not self.params.get('quiet', False):
434             message = self._bidi_workaround(message)
435             terminator = ['\n', ''][skip_eol]
436             output = message + terminator
437
438             self._write_string(output, self._screen_file)
439
440     def to_stderr(self, message):
441         """Print message to stderr."""
442         assert isinstance(message, compat_str)
443         if self.params.get('logger'):
444             self.params['logger'].error(message)
445         else:
446             message = self._bidi_workaround(message)
447             output = message + '\n'
448             self._write_string(output, self._err_file)
449
450     def to_console_title(self, message):
451         if not self.params.get('consoletitle', False):
452             return
453         if os.name == 'nt' and ctypes.windll.kernel32.GetConsoleWindow():
454             # c_wchar_p() might not be necessary if `message` is
455             # already of type unicode()
456             ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message))
457         elif 'TERM' in os.environ:
458             self._write_string('\033]0;%s\007' % message, self._screen_file)
459
460     def save_console_title(self):
461         if not self.params.get('consoletitle', False):
462             return
463         if 'TERM' in os.environ:
464             # Save the title on stack
465             self._write_string('\033[22;0t', self._screen_file)
466
467     def restore_console_title(self):
468         if not self.params.get('consoletitle', False):
469             return
470         if 'TERM' in os.environ:
471             # Restore the title from stack
472             self._write_string('\033[23;0t', self._screen_file)
473
474     def __enter__(self):
475         self.save_console_title()
476         return self
477
478     def __exit__(self, *args):
479         self.restore_console_title()
480
481         if self.params.get('cookiefile') is not None:
482             self.cookiejar.save()
483
484     def trouble(self, message=None, tb=None):
485         """Determine action to take when a download problem appears.
486
487         Depending on if the downloader has been configured to ignore
488         download errors or not, this method may throw an exception or
489         not when errors are found, after printing the message.
490
491         tb, if given, is additional traceback information.
492         """
493         if message is not None:
494             self.to_stderr(message)
495         if self.params.get('verbose'):
496             if tb is None:
497                 if sys.exc_info()[0]:  # if .trouble has been called from an except block
498                     tb = ''
499                     if hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
500                         tb += ''.join(traceback.format_exception(*sys.exc_info()[1].exc_info))
501                     tb += encode_compat_str(traceback.format_exc())
502                 else:
503                     tb_data = traceback.format_list(traceback.extract_stack())
504                     tb = ''.join(tb_data)
505             self.to_stderr(tb)
506         if not self.params.get('ignoreerrors', False):
507             if sys.exc_info()[0] and hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
508                 exc_info = sys.exc_info()[1].exc_info
509             else:
510                 exc_info = sys.exc_info()
511             raise DownloadError(message, exc_info)
512         self._download_retcode = 1
513
514     def report_warning(self, message):
515         '''
516         Print the message to stderr, it will be prefixed with 'WARNING:'
517         If stderr is a tty file the 'WARNING:' will be colored
518         '''
519         if self.params.get('logger') is not None:
520             self.params['logger'].warning(message)
521         else:
522             if self.params.get('no_warnings'):
523                 return
524             if not self.params.get('no_color') and self._err_file.isatty() and os.name != 'nt':
525                 _msg_header = '\033[0;33mWARNING:\033[0m'
526             else:
527                 _msg_header = 'WARNING:'
528             warning_message = '%s %s' % (_msg_header, message)
529             self.to_stderr(warning_message)
530
531     def report_error(self, message, tb=None):
532         '''
533         Do the same as trouble, but prefixes the message with 'ERROR:', colored
534         in red if stderr is a tty file.
535         '''
536         if not self.params.get('no_color') and self._err_file.isatty() and os.name != 'nt':
537             _msg_header = '\033[0;31mERROR:\033[0m'
538         else:
539             _msg_header = 'ERROR:'
540         error_message = '%s %s' % (_msg_header, message)
541         self.trouble(error_message, tb)
542
543     def report_file_already_downloaded(self, file_name):
544         """Report file has already been fully downloaded."""
545         try:
546             self.to_screen('[download] %s has already been downloaded' % file_name)
547         except UnicodeEncodeError:
548             self.to_screen('[download] The file has already been downloaded')
549
550     def prepare_filename(self, info_dict):
551         """Generate the output filename."""
552         try:
553             template_dict = dict(info_dict)
554
555             template_dict['epoch'] = int(time.time())
556             autonumber_size = self.params.get('autonumber_size')
557             if autonumber_size is None:
558                 autonumber_size = 5
559             autonumber_templ = '%0' + str(autonumber_size) + 'd'
560             template_dict['autonumber'] = autonumber_templ % self._num_downloads
561             if template_dict.get('playlist_index') is not None:
562                 template_dict['playlist_index'] = '%0*d' % (len(str(template_dict['n_entries'])), template_dict['playlist_index'])
563             if template_dict.get('resolution') is None:
564                 if template_dict.get('width') and template_dict.get('height'):
565                     template_dict['resolution'] = '%dx%d' % (template_dict['width'], template_dict['height'])
566                 elif template_dict.get('height'):
567                     template_dict['resolution'] = '%sp' % template_dict['height']
568                 elif template_dict.get('width'):
569                     template_dict['resolution'] = '?x%d' % template_dict['width']
570
571             sanitize = lambda k, v: sanitize_filename(
572                 compat_str(v),
573                 restricted=self.params.get('restrictfilenames'),
574                 is_id=(k == 'id'))
575             template_dict = dict((k, sanitize(k, v))
576                                  for k, v in template_dict.items()
577                                  if v is not None)
578             template_dict = collections.defaultdict(lambda: 'NA', template_dict)
579
580             outtmpl = self.params.get('outtmpl', DEFAULT_OUTTMPL)
581             tmpl = compat_expanduser(outtmpl)
582             filename = tmpl % template_dict
583             # Temporary fix for #4787
584             # 'Treat' all problem characters by passing filename through preferredencoding
585             # to workaround encoding issues with subprocess on python2 @ Windows
586             if sys.version_info < (3, 0) and sys.platform == 'win32':
587                 filename = encodeFilename(filename, True).decode(preferredencoding())
588             return sanitize_path(filename)
589         except ValueError as err:
590             self.report_error('Error in output template: ' + str(err) + ' (encoding: ' + repr(preferredencoding()) + ')')
591             return None
592
593     def _match_entry(self, info_dict, incomplete):
594         """ Returns None iff the file should be downloaded """
595
596         video_title = info_dict.get('title', info_dict.get('id', 'video'))
597         if 'title' in info_dict:
598             # This can happen when we're just evaluating the playlist
599             title = info_dict['title']
600             matchtitle = self.params.get('matchtitle', False)
601             if matchtitle:
602                 if not re.search(matchtitle, title, re.IGNORECASE):
603                     return '"' + title + '" title did not match pattern "' + matchtitle + '"'
604             rejecttitle = self.params.get('rejecttitle', False)
605             if rejecttitle:
606                 if re.search(rejecttitle, title, re.IGNORECASE):
607                     return '"' + title + '" title matched reject pattern "' + rejecttitle + '"'
608         date = info_dict.get('upload_date', None)
609         if date is not None:
610             dateRange = self.params.get('daterange', DateRange())
611             if date not in dateRange:
612                 return '%s upload date is not in range %s' % (date_from_str(date).isoformat(), dateRange)
613         view_count = info_dict.get('view_count', None)
614         if view_count is not None:
615             min_views = self.params.get('min_views')
616             if min_views is not None and view_count < min_views:
617                 return 'Skipping %s, because it has not reached minimum view count (%d/%d)' % (video_title, view_count, min_views)
618             max_views = self.params.get('max_views')
619             if max_views is not None and view_count > max_views:
620                 return 'Skipping %s, because it has exceeded the maximum view count (%d/%d)' % (video_title, view_count, max_views)
621         if age_restricted(info_dict.get('age_limit'), self.params.get('age_limit')):
622             return 'Skipping "%s" because it is age restricted' % video_title
623         if self.in_download_archive(info_dict):
624             return '%s has already been recorded in archive' % video_title
625
626         if not incomplete:
627             match_filter = self.params.get('match_filter')
628             if match_filter is not None:
629                 ret = match_filter(info_dict)
630                 if ret is not None:
631                     return ret
632
633         return None
634
635     @staticmethod
636     def add_extra_info(info_dict, extra_info):
637         '''Set the keys from extra_info in info dict if they are missing'''
638         for key, value in extra_info.items():
639             info_dict.setdefault(key, value)
640
641     def extract_info(self, url, download=True, ie_key=None, extra_info={},
642                      process=True, force_generic_extractor=False):
643         '''
644         Returns a list with a dictionary for each video we find.
645         If 'download', also downloads the videos.
646         extra_info is a dict containing the extra values to add to each result
647         '''
648
649         if not ie_key and force_generic_extractor:
650             ie_key = 'Generic'
651
652         if ie_key:
653             ies = [self.get_info_extractor(ie_key)]
654         else:
655             ies = self._ies
656
657         for ie in ies:
658             if not ie.suitable(url):
659                 continue
660
661             if not ie.working():
662                 self.report_warning('The program functionality for this site has been marked as broken, '
663                                     'and will probably not work.')
664
665             try:
666                 ie_result = ie.extract(url)
667                 if ie_result is None:  # Finished already (backwards compatibility; listformats and friends should be moved here)
668                     break
669                 if isinstance(ie_result, list):
670                     # Backwards compatibility: old IE result format
671                     ie_result = {
672                         '_type': 'compat_list',
673                         'entries': ie_result,
674                     }
675                 self.add_default_extra_info(ie_result, ie, url)
676                 if process:
677                     return self.process_ie_result(ie_result, download, extra_info)
678                 else:
679                     return ie_result
680             except ExtractorError as e:  # An error we somewhat expected
681                 self.report_error(compat_str(e), e.format_traceback())
682                 break
683             except MaxDownloadsReached:
684                 raise
685             except Exception as e:
686                 if self.params.get('ignoreerrors', False):
687                     self.report_error(error_to_compat_str(e), tb=encode_compat_str(traceback.format_exc()))
688                     break
689                 else:
690                     raise
691         else:
692             self.report_error('no suitable InfoExtractor for URL %s' % url)
693
694     def add_default_extra_info(self, ie_result, ie, url):
695         self.add_extra_info(ie_result, {
696             'extractor': ie.IE_NAME,
697             'webpage_url': url,
698             'webpage_url_basename': url_basename(url),
699             'extractor_key': ie.ie_key(),
700         })
701
702     def process_ie_result(self, ie_result, download=True, extra_info={}):
703         """
704         Take the result of the ie(may be modified) and resolve all unresolved
705         references (URLs, playlist items).
706
707         It will also download the videos if 'download'.
708         Returns the resolved ie_result.
709         """
710
711         result_type = ie_result.get('_type', 'video')
712
713         if result_type in ('url', 'url_transparent'):
714             extract_flat = self.params.get('extract_flat', False)
715             if ((extract_flat == 'in_playlist' and 'playlist' in extra_info) or
716                     extract_flat is True):
717                 if self.params.get('forcejson', False):
718                     self.to_stdout(json.dumps(ie_result))
719                 return ie_result
720
721         if result_type == 'video':
722             self.add_extra_info(ie_result, extra_info)
723             return self.process_video_result(ie_result, download=download)
724         elif result_type == 'url':
725             # We have to add extra_info to the results because it may be
726             # contained in a playlist
727             return self.extract_info(ie_result['url'],
728                                      download,
729                                      ie_key=ie_result.get('ie_key'),
730                                      extra_info=extra_info)
731         elif result_type == 'url_transparent':
732             # Use the information from the embedding page
733             info = self.extract_info(
734                 ie_result['url'], ie_key=ie_result.get('ie_key'),
735                 extra_info=extra_info, download=False, process=False)
736
737             force_properties = dict(
738                 (k, v) for k, v in ie_result.items() if v is not None)
739             for f in ('_type', 'url'):
740                 if f in force_properties:
741                     del force_properties[f]
742             new_result = info.copy()
743             new_result.update(force_properties)
744
745             assert new_result.get('_type') != 'url_transparent'
746
747             return self.process_ie_result(
748                 new_result, download=download, extra_info=extra_info)
749         elif result_type == 'playlist' or result_type == 'multi_video':
750             # We process each entry in the playlist
751             playlist = ie_result.get('title', None) or ie_result.get('id', None)
752             self.to_screen('[download] Downloading playlist: %s' % playlist)
753
754             playlist_results = []
755
756             playliststart = self.params.get('playliststart', 1) - 1
757             playlistend = self.params.get('playlistend', None)
758             # For backwards compatibility, interpret -1 as whole list
759             if playlistend == -1:
760                 playlistend = None
761
762             playlistitems_str = self.params.get('playlist_items', None)
763             playlistitems = None
764             if playlistitems_str is not None:
765                 def iter_playlistitems(format):
766                     for string_segment in format.split(','):
767                         if '-' in string_segment:
768                             start, end = string_segment.split('-')
769                             for item in range(int(start), int(end) + 1):
770                                 yield int(item)
771                         else:
772                             yield int(string_segment)
773                 playlistitems = iter_playlistitems(playlistitems_str)
774
775             ie_entries = ie_result['entries']
776             if isinstance(ie_entries, list):
777                 n_all_entries = len(ie_entries)
778                 if playlistitems:
779                     entries = [
780                         ie_entries[i - 1] for i in playlistitems
781                         if -n_all_entries <= i - 1 < n_all_entries]
782                 else:
783                     entries = ie_entries[playliststart:playlistend]
784                 n_entries = len(entries)
785                 self.to_screen(
786                     "[%s] playlist %s: Collected %d video ids (downloading %d of them)" %
787                     (ie_result['extractor'], playlist, n_all_entries, n_entries))
788             elif isinstance(ie_entries, PagedList):
789                 if playlistitems:
790                     entries = []
791                     for item in playlistitems:
792                         entries.extend(ie_entries.getslice(
793                             item - 1, item
794                         ))
795                 else:
796                     entries = ie_entries.getslice(
797                         playliststart, playlistend)
798                 n_entries = len(entries)
799                 self.to_screen(
800                     "[%s] playlist %s: Downloading %d videos" %
801                     (ie_result['extractor'], playlist, n_entries))
802             else:  # iterable
803                 if playlistitems:
804                     entry_list = list(ie_entries)
805                     entries = [entry_list[i - 1] for i in playlistitems]
806                 else:
807                     entries = list(itertools.islice(
808                         ie_entries, playliststart, playlistend))
809                 n_entries = len(entries)
810                 self.to_screen(
811                     "[%s] playlist %s: Downloading %d videos" %
812                     (ie_result['extractor'], playlist, n_entries))
813
814             if self.params.get('playlistreverse', False):
815                 entries = entries[::-1]
816
817             for i, entry in enumerate(entries, 1):
818                 self.to_screen('[download] Downloading video %s of %s' % (i, n_entries))
819                 extra = {
820                     'n_entries': n_entries,
821                     'playlist': playlist,
822                     'playlist_id': ie_result.get('id'),
823                     'playlist_title': ie_result.get('title'),
824                     'playlist_index': i + playliststart,
825                     'extractor': ie_result['extractor'],
826                     'webpage_url': ie_result['webpage_url'],
827                     'webpage_url_basename': url_basename(ie_result['webpage_url']),
828                     'extractor_key': ie_result['extractor_key'],
829                 }
830
831                 reason = self._match_entry(entry, incomplete=True)
832                 if reason is not None:
833                     self.to_screen('[download] ' + reason)
834                     continue
835
836                 entry_result = self.process_ie_result(entry,
837                                                       download=download,
838                                                       extra_info=extra)
839                 playlist_results.append(entry_result)
840             ie_result['entries'] = playlist_results
841             self.to_screen('[download] Finished downloading playlist: %s' % playlist)
842             return ie_result
843         elif result_type == 'compat_list':
844             self.report_warning(
845                 'Extractor %s returned a compat_list result. '
846                 'It needs to be updated.' % ie_result.get('extractor'))
847
848             def _fixup(r):
849                 self.add_extra_info(
850                     r,
851                     {
852                         'extractor': ie_result['extractor'],
853                         'webpage_url': ie_result['webpage_url'],
854                         'webpage_url_basename': url_basename(ie_result['webpage_url']),
855                         'extractor_key': ie_result['extractor_key'],
856                     }
857                 )
858                 return r
859             ie_result['entries'] = [
860                 self.process_ie_result(_fixup(r), download, extra_info)
861                 for r in ie_result['entries']
862             ]
863             return ie_result
864         else:
865             raise Exception('Invalid result type: %s' % result_type)
866
867     def _build_format_filter(self, filter_spec):
868         " Returns a function to filter the formats according to the filter_spec "
869
870         OPERATORS = {
871             '<': operator.lt,
872             '<=': operator.le,
873             '>': operator.gt,
874             '>=': operator.ge,
875             '=': operator.eq,
876             '!=': operator.ne,
877         }
878         operator_rex = re.compile(r'''(?x)\s*
879             (?P<key>width|height|tbr|abr|vbr|asr|filesize|fps)
880             \s*(?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
881             (?P<value>[0-9.]+(?:[kKmMgGtTpPeEzZyY]i?[Bb]?)?)
882             $
883             ''' % '|'.join(map(re.escape, OPERATORS.keys())))
884         m = operator_rex.search(filter_spec)
885         if m:
886             try:
887                 comparison_value = int(m.group('value'))
888             except ValueError:
889                 comparison_value = parse_filesize(m.group('value'))
890                 if comparison_value is None:
891                     comparison_value = parse_filesize(m.group('value') + 'B')
892                 if comparison_value is None:
893                     raise ValueError(
894                         'Invalid value %r in format specification %r' % (
895                             m.group('value'), filter_spec))
896             op = OPERATORS[m.group('op')]
897
898         if not m:
899             STR_OPERATORS = {
900                 '=': operator.eq,
901                 '!=': operator.ne,
902                 '^=': lambda attr, value: attr.startswith(value),
903                 '$=': lambda attr, value: attr.endswith(value),
904                 '*=': lambda attr, value: value in attr,
905             }
906             str_operator_rex = re.compile(r'''(?x)
907                 \s*(?P<key>ext|acodec|vcodec|container|protocol)
908                 \s*(?P<op>%s)(?P<none_inclusive>\s*\?)?
909                 \s*(?P<value>[a-zA-Z0-9_-]+)
910                 \s*$
911                 ''' % '|'.join(map(re.escape, STR_OPERATORS.keys())))
912             m = str_operator_rex.search(filter_spec)
913             if m:
914                 comparison_value = m.group('value')
915                 op = STR_OPERATORS[m.group('op')]
916
917         if not m:
918             raise ValueError('Invalid filter specification %r' % filter_spec)
919
920         def _filter(f):
921             actual_value = f.get(m.group('key'))
922             if actual_value is None:
923                 return m.group('none_inclusive')
924             return op(actual_value, comparison_value)
925         return _filter
926
927     def build_format_selector(self, format_spec):
928         def syntax_error(note, start):
929             message = (
930                 'Invalid format specification: '
931                 '{0}\n\t{1}\n\t{2}^'.format(note, format_spec, ' ' * start[1]))
932             return SyntaxError(message)
933
934         PICKFIRST = 'PICKFIRST'
935         MERGE = 'MERGE'
936         SINGLE = 'SINGLE'
937         GROUP = 'GROUP'
938         FormatSelector = collections.namedtuple('FormatSelector', ['type', 'selector', 'filters'])
939
940         def _parse_filter(tokens):
941             filter_parts = []
942             for type, string, start, _, _ in tokens:
943                 if type == tokenize.OP and string == ']':
944                     return ''.join(filter_parts)
945                 else:
946                     filter_parts.append(string)
947
948         def _remove_unused_ops(tokens):
949             # Remove operators that we don't use and join them with the surrounding strings
950             # for example: 'mp4' '-' 'baseline' '-' '16x9' is converted to 'mp4-baseline-16x9'
951             ALLOWED_OPS = ('/', '+', ',', '(', ')')
952             last_string, last_start, last_end, last_line = None, None, None, None
953             for type, string, start, end, line in tokens:
954                 if type == tokenize.OP and string == '[':
955                     if last_string:
956                         yield tokenize.NAME, last_string, last_start, last_end, last_line
957                         last_string = None
958                     yield type, string, start, end, line
959                     # everything inside brackets will be handled by _parse_filter
960                     for type, string, start, end, line in tokens:
961                         yield type, string, start, end, line
962                         if type == tokenize.OP and string == ']':
963                             break
964                 elif type == tokenize.OP and string in ALLOWED_OPS:
965                     if last_string:
966                         yield tokenize.NAME, last_string, last_start, last_end, last_line
967                         last_string = None
968                     yield type, string, start, end, line
969                 elif type in [tokenize.NAME, tokenize.NUMBER, tokenize.OP]:
970                     if not last_string:
971                         last_string = string
972                         last_start = start
973                         last_end = end
974                     else:
975                         last_string += string
976             if last_string:
977                 yield tokenize.NAME, last_string, last_start, last_end, last_line
978
979         def _parse_format_selection(tokens, inside_merge=False, inside_choice=False, inside_group=False):
980             selectors = []
981             current_selector = None
982             for type, string, start, _, _ in tokens:
983                 # ENCODING is only defined in python 3.x
984                 if type == getattr(tokenize, 'ENCODING', None):
985                     continue
986                 elif type in [tokenize.NAME, tokenize.NUMBER]:
987                     current_selector = FormatSelector(SINGLE, string, [])
988                 elif type == tokenize.OP:
989                     if string == ')':
990                         if not inside_group:
991                             # ')' will be handled by the parentheses group
992                             tokens.restore_last_token()
993                         break
994                     elif inside_merge and string in ['/', ',']:
995                         tokens.restore_last_token()
996                         break
997                     elif inside_choice and string == ',':
998                         tokens.restore_last_token()
999                         break
1000                     elif string == ',':
1001                         if not current_selector:
1002                             raise syntax_error('"," must follow a format selector', start)
1003                         selectors.append(current_selector)
1004                         current_selector = None
1005                     elif string == '/':
1006                         if not current_selector:
1007                             raise syntax_error('"/" must follow a format selector', start)
1008                         first_choice = current_selector
1009                         second_choice = _parse_format_selection(tokens, inside_choice=True)
1010                         current_selector = FormatSelector(PICKFIRST, (first_choice, second_choice), [])
1011                     elif string == '[':
1012                         if not current_selector:
1013                             current_selector = FormatSelector(SINGLE, 'best', [])
1014                         format_filter = _parse_filter(tokens)
1015                         current_selector.filters.append(format_filter)
1016                     elif string == '(':
1017                         if current_selector:
1018                             raise syntax_error('Unexpected "("', start)
1019                         group = _parse_format_selection(tokens, inside_group=True)
1020                         current_selector = FormatSelector(GROUP, group, [])
1021                     elif string == '+':
1022                         video_selector = current_selector
1023                         audio_selector = _parse_format_selection(tokens, inside_merge=True)
1024                         if not video_selector or not audio_selector:
1025                             raise syntax_error('"+" must be between two format selectors', start)
1026                         current_selector = FormatSelector(MERGE, (video_selector, audio_selector), [])
1027                     else:
1028                         raise syntax_error('Operator not recognized: "{0}"'.format(string), start)
1029                 elif type == tokenize.ENDMARKER:
1030                     break
1031             if current_selector:
1032                 selectors.append(current_selector)
1033             return selectors
1034
1035         def _build_selector_function(selector):
1036             if isinstance(selector, list):
1037                 fs = [_build_selector_function(s) for s in selector]
1038
1039                 def selector_function(formats):
1040                     for f in fs:
1041                         for format in f(formats):
1042                             yield format
1043                 return selector_function
1044             elif selector.type == GROUP:
1045                 selector_function = _build_selector_function(selector.selector)
1046             elif selector.type == PICKFIRST:
1047                 fs = [_build_selector_function(s) for s in selector.selector]
1048
1049                 def selector_function(formats):
1050                     for f in fs:
1051                         picked_formats = list(f(formats))
1052                         if picked_formats:
1053                             return picked_formats
1054                     return []
1055             elif selector.type == SINGLE:
1056                 format_spec = selector.selector
1057
1058                 def selector_function(formats):
1059                     formats = list(formats)
1060                     if not formats:
1061                         return
1062                     if format_spec == 'all':
1063                         for f in formats:
1064                             yield f
1065                     elif format_spec in ['best', 'worst', None]:
1066                         format_idx = 0 if format_spec == 'worst' else -1
1067                         audiovideo_formats = [
1068                             f for f in formats
1069                             if f.get('vcodec') != 'none' and f.get('acodec') != 'none']
1070                         if audiovideo_formats:
1071                             yield audiovideo_formats[format_idx]
1072                         # for audio only (soundcloud) or video only (imgur) urls, select the best/worst audio format
1073                         elif (all(f.get('acodec') != 'none' for f in formats) or
1074                               all(f.get('vcodec') != 'none' for f in formats)):
1075                             yield formats[format_idx]
1076                     elif format_spec == 'bestaudio':
1077                         audio_formats = [
1078                             f for f in formats
1079                             if f.get('vcodec') == 'none']
1080                         if audio_formats:
1081                             yield audio_formats[-1]
1082                     elif format_spec == 'worstaudio':
1083                         audio_formats = [
1084                             f for f in formats
1085                             if f.get('vcodec') == 'none']
1086                         if audio_formats:
1087                             yield audio_formats[0]
1088                     elif format_spec == 'bestvideo':
1089                         video_formats = [
1090                             f for f in formats
1091                             if f.get('acodec') == 'none']
1092                         if video_formats:
1093                             yield video_formats[-1]
1094                     elif format_spec == 'worstvideo':
1095                         video_formats = [
1096                             f for f in formats
1097                             if f.get('acodec') == 'none']
1098                         if video_formats:
1099                             yield video_formats[0]
1100                     else:
1101                         extensions = ['mp4', 'flv', 'webm', '3gp', 'm4a', 'mp3', 'ogg', 'aac', 'wav']
1102                         if format_spec in extensions:
1103                             filter_f = lambda f: f['ext'] == format_spec
1104                         else:
1105                             filter_f = lambda f: f['format_id'] == format_spec
1106                         matches = list(filter(filter_f, formats))
1107                         if matches:
1108                             yield matches[-1]
1109             elif selector.type == MERGE:
1110                 def _merge(formats_info):
1111                     format_1, format_2 = [f['format_id'] for f in formats_info]
1112                     # The first format must contain the video and the
1113                     # second the audio
1114                     if formats_info[0].get('vcodec') == 'none':
1115                         self.report_error('The first format must '
1116                                           'contain the video, try using '
1117                                           '"-f %s+%s"' % (format_2, format_1))
1118                         return
1119                     # Formats must be opposite (video+audio)
1120                     if formats_info[0].get('acodec') == 'none' and formats_info[1].get('acodec') == 'none':
1121                         self.report_error(
1122                             'Both formats %s and %s are video-only, you must specify "-f video+audio"'
1123                             % (format_1, format_2))
1124                         return
1125                     output_ext = (
1126                         formats_info[0]['ext']
1127                         if self.params.get('merge_output_format') is None
1128                         else self.params['merge_output_format'])
1129                     return {
1130                         'requested_formats': formats_info,
1131                         'format': '%s+%s' % (formats_info[0].get('format'),
1132                                              formats_info[1].get('format')),
1133                         'format_id': '%s+%s' % (formats_info[0].get('format_id'),
1134                                                 formats_info[1].get('format_id')),
1135                         'width': formats_info[0].get('width'),
1136                         'height': formats_info[0].get('height'),
1137                         'resolution': formats_info[0].get('resolution'),
1138                         'fps': formats_info[0].get('fps'),
1139                         'vcodec': formats_info[0].get('vcodec'),
1140                         'vbr': formats_info[0].get('vbr'),
1141                         'stretched_ratio': formats_info[0].get('stretched_ratio'),
1142                         'acodec': formats_info[1].get('acodec'),
1143                         'abr': formats_info[1].get('abr'),
1144                         'ext': output_ext,
1145                     }
1146                 video_selector, audio_selector = map(_build_selector_function, selector.selector)
1147
1148                 def selector_function(formats):
1149                     formats = list(formats)
1150                     for pair in itertools.product(video_selector(formats), audio_selector(formats)):
1151                         yield _merge(pair)
1152
1153             filters = [self._build_format_filter(f) for f in selector.filters]
1154
1155             def final_selector(formats):
1156                 for _filter in filters:
1157                     formats = list(filter(_filter, formats))
1158                 return selector_function(formats)
1159             return final_selector
1160
1161         stream = io.BytesIO(format_spec.encode('utf-8'))
1162         try:
1163             tokens = list(_remove_unused_ops(compat_tokenize_tokenize(stream.readline)))
1164         except tokenize.TokenError:
1165             raise syntax_error('Missing closing/opening brackets or parenthesis', (0, len(format_spec)))
1166
1167         class TokenIterator(object):
1168             def __init__(self, tokens):
1169                 self.tokens = tokens
1170                 self.counter = 0
1171
1172             def __iter__(self):
1173                 return self
1174
1175             def __next__(self):
1176                 if self.counter >= len(self.tokens):
1177                     raise StopIteration()
1178                 value = self.tokens[self.counter]
1179                 self.counter += 1
1180                 return value
1181
1182             next = __next__
1183
1184             def restore_last_token(self):
1185                 self.counter -= 1
1186
1187         parsed_selector = _parse_format_selection(iter(TokenIterator(tokens)))
1188         return _build_selector_function(parsed_selector)
1189
1190     def _calc_headers(self, info_dict):
1191         res = std_headers.copy()
1192
1193         add_headers = info_dict.get('http_headers')
1194         if add_headers:
1195             res.update(add_headers)
1196
1197         cookies = self._calc_cookies(info_dict)
1198         if cookies:
1199             res['Cookie'] = cookies
1200
1201         return res
1202
1203     def _calc_cookies(self, info_dict):
1204         pr = sanitized_Request(info_dict['url'])
1205         self.cookiejar.add_cookie_header(pr)
1206         return pr.get_header('Cookie')
1207
1208     def process_video_result(self, info_dict, download=True):
1209         assert info_dict.get('_type', 'video') == 'video'
1210
1211         if 'id' not in info_dict:
1212             raise ExtractorError('Missing "id" field in extractor result')
1213         if 'title' not in info_dict:
1214             raise ExtractorError('Missing "title" field in extractor result')
1215
1216         if 'playlist' not in info_dict:
1217             # It isn't part of a playlist
1218             info_dict['playlist'] = None
1219             info_dict['playlist_index'] = None
1220
1221         thumbnails = info_dict.get('thumbnails')
1222         if thumbnails is None:
1223             thumbnail = info_dict.get('thumbnail')
1224             if thumbnail:
1225                 info_dict['thumbnails'] = thumbnails = [{'url': thumbnail}]
1226         if thumbnails:
1227             thumbnails.sort(key=lambda t: (
1228                 t.get('preference'), t.get('width'), t.get('height'),
1229                 t.get('id'), t.get('url')))
1230             for i, t in enumerate(thumbnails):
1231                 if t.get('width') and t.get('height'):
1232                     t['resolution'] = '%dx%d' % (t['width'], t['height'])
1233                 if t.get('id') is None:
1234                     t['id'] = '%d' % i
1235
1236         if thumbnails and 'thumbnail' not in info_dict:
1237             info_dict['thumbnail'] = thumbnails[-1]['url']
1238
1239         if 'display_id' not in info_dict and 'id' in info_dict:
1240             info_dict['display_id'] = info_dict['id']
1241
1242         if info_dict.get('upload_date') is None and info_dict.get('timestamp') is not None:
1243             # Working around out-of-range timestamp values (e.g. negative ones on Windows,
1244             # see http://bugs.python.org/issue1646728)
1245             try:
1246                 upload_date = datetime.datetime.utcfromtimestamp(info_dict['timestamp'])
1247                 info_dict['upload_date'] = upload_date.strftime('%Y%m%d')
1248             except (ValueError, OverflowError, OSError):
1249                 pass
1250
1251         # Auto generate title fields corresponding to the *_number fields when missing
1252         # in order to always have clean titles. This is very common for TV series.
1253         for field in ('chapter', 'season', 'episode'):
1254             if info_dict.get('%s_number' % field) is not None and not info_dict.get(field):
1255                 info_dict[field] = '%s %d' % (field.capitalize(), info_dict['%s_number' % field])
1256
1257         subtitles = info_dict.get('subtitles')
1258         if subtitles:
1259             for _, subtitle in subtitles.items():
1260                 for subtitle_format in subtitle:
1261                     if 'ext' not in subtitle_format:
1262                         subtitle_format['ext'] = determine_ext(subtitle_format['url']).lower()
1263
1264         if self.params.get('listsubtitles', False):
1265             if 'automatic_captions' in info_dict:
1266                 self.list_subtitles(info_dict['id'], info_dict.get('automatic_captions'), 'automatic captions')
1267             self.list_subtitles(info_dict['id'], subtitles, 'subtitles')
1268             return
1269         info_dict['requested_subtitles'] = self.process_subtitles(
1270             info_dict['id'], subtitles,
1271             info_dict.get('automatic_captions'))
1272
1273         # We now pick which formats have to be downloaded
1274         if info_dict.get('formats') is None:
1275             # There's only one format available
1276             formats = [info_dict]
1277         else:
1278             formats = info_dict['formats']
1279
1280         if not formats:
1281             raise ExtractorError('No video formats found!')
1282
1283         formats_dict = {}
1284
1285         # We check that all the formats have the format and format_id fields
1286         for i, format in enumerate(formats):
1287             if 'url' not in format:
1288                 raise ExtractorError('Missing "url" key in result (index %d)' % i)
1289
1290             if format.get('format_id') is None:
1291                 format['format_id'] = compat_str(i)
1292             format_id = format['format_id']
1293             if format_id not in formats_dict:
1294                 formats_dict[format_id] = []
1295             formats_dict[format_id].append(format)
1296
1297         # Make sure all formats have unique format_id
1298         for format_id, ambiguous_formats in formats_dict.items():
1299             if len(ambiguous_formats) > 1:
1300                 for i, format in enumerate(ambiguous_formats):
1301                     format['format_id'] = '%s-%d' % (format_id, i)
1302
1303         for i, format in enumerate(formats):
1304             if format.get('format') is None:
1305                 format['format'] = '{id} - {res}{note}'.format(
1306                     id=format['format_id'],
1307                     res=self.format_resolution(format),
1308                     note=' ({0})'.format(format['format_note']) if format.get('format_note') is not None else '',
1309                 )
1310             # Automatically determine file extension if missing
1311             if 'ext' not in format:
1312                 format['ext'] = determine_ext(format['url']).lower()
1313             # Automatically determine protocol if missing (useful for format
1314             # selection purposes)
1315             if 'protocol' not in format:
1316                 format['protocol'] = determine_protocol(format)
1317             # Add HTTP headers, so that external programs can use them from the
1318             # json output
1319             full_format_info = info_dict.copy()
1320             full_format_info.update(format)
1321             format['http_headers'] = self._calc_headers(full_format_info)
1322
1323         # TODO Central sorting goes here
1324
1325         if formats[0] is not info_dict:
1326             # only set the 'formats' fields if the original info_dict list them
1327             # otherwise we end up with a circular reference, the first (and unique)
1328             # element in the 'formats' field in info_dict is info_dict itself,
1329             # which can't be exported to json
1330             info_dict['formats'] = formats
1331         if self.params.get('listformats'):
1332             self.list_formats(info_dict)
1333             return
1334         if self.params.get('list_thumbnails'):
1335             self.list_thumbnails(info_dict)
1336             return
1337
1338         req_format = self.params.get('format')
1339         if req_format is None:
1340             req_format_list = []
1341             if (self.params.get('outtmpl', DEFAULT_OUTTMPL) != '-' and
1342                     info_dict['extractor'] in ['youtube', 'ted'] and
1343                     not info_dict.get('is_live')):
1344                 merger = FFmpegMergerPP(self)
1345                 if merger.available and merger.can_merge():
1346                     req_format_list.append('bestvideo+bestaudio')
1347             req_format_list.append('best')
1348             req_format = '/'.join(req_format_list)
1349         format_selector = self.build_format_selector(req_format)
1350         formats_to_download = list(format_selector(formats))
1351         if not formats_to_download:
1352             raise ExtractorError('requested format not available',
1353                                  expected=True)
1354
1355         if download:
1356             if len(formats_to_download) > 1:
1357                 self.to_screen('[info] %s: downloading video in %s formats' % (info_dict['id'], len(formats_to_download)))
1358             for format in formats_to_download:
1359                 new_info = dict(info_dict)
1360                 new_info.update(format)
1361                 self.process_info(new_info)
1362         # We update the info dict with the best quality format (backwards compatibility)
1363         info_dict.update(formats_to_download[-1])
1364         return info_dict
1365
1366     def process_subtitles(self, video_id, normal_subtitles, automatic_captions):
1367         """Select the requested subtitles and their format"""
1368         available_subs = {}
1369         if normal_subtitles and self.params.get('writesubtitles'):
1370             available_subs.update(normal_subtitles)
1371         if automatic_captions and self.params.get('writeautomaticsub'):
1372             for lang, cap_info in automatic_captions.items():
1373                 if lang not in available_subs:
1374                     available_subs[lang] = cap_info
1375
1376         if (not self.params.get('writesubtitles') and not
1377                 self.params.get('writeautomaticsub') or not
1378                 available_subs):
1379             return None
1380
1381         if self.params.get('allsubtitles', False):
1382             requested_langs = available_subs.keys()
1383         else:
1384             if self.params.get('subtitleslangs', False):
1385                 requested_langs = self.params.get('subtitleslangs')
1386             elif 'en' in available_subs:
1387                 requested_langs = ['en']
1388             else:
1389                 requested_langs = [list(available_subs.keys())[0]]
1390
1391         formats_query = self.params.get('subtitlesformat', 'best')
1392         formats_preference = formats_query.split('/') if formats_query else []
1393         subs = {}
1394         for lang in requested_langs:
1395             formats = available_subs.get(lang)
1396             if formats is None:
1397                 self.report_warning('%s subtitles not available for %s' % (lang, video_id))
1398                 continue
1399             for ext in formats_preference:
1400                 if ext == 'best':
1401                     f = formats[-1]
1402                     break
1403                 matches = list(filter(lambda f: f['ext'] == ext, formats))
1404                 if matches:
1405                     f = matches[-1]
1406                     break
1407             else:
1408                 f = formats[-1]
1409                 self.report_warning(
1410                     'No subtitle format found matching "%s" for language %s, '
1411                     'using %s' % (formats_query, lang, f['ext']))
1412             subs[lang] = f
1413         return subs
1414
1415     def process_info(self, info_dict):
1416         """Process a single resolved IE result."""
1417
1418         assert info_dict.get('_type', 'video') == 'video'
1419
1420         max_downloads = self.params.get('max_downloads')
1421         if max_downloads is not None:
1422             if self._num_downloads >= int(max_downloads):
1423                 raise MaxDownloadsReached()
1424
1425         info_dict['fulltitle'] = info_dict['title']
1426         if len(info_dict['title']) > 200:
1427             info_dict['title'] = info_dict['title'][:197] + '...'
1428
1429         if 'format' not in info_dict:
1430             info_dict['format'] = info_dict['ext']
1431
1432         reason = self._match_entry(info_dict, incomplete=False)
1433         if reason is not None:
1434             self.to_screen('[download] ' + reason)
1435             return
1436
1437         self._num_downloads += 1
1438
1439         info_dict['_filename'] = filename = self.prepare_filename(info_dict)
1440
1441         # Forced printings
1442         if self.params.get('forcetitle', False):
1443             self.to_stdout(info_dict['fulltitle'])
1444         if self.params.get('forceid', False):
1445             self.to_stdout(info_dict['id'])
1446         if self.params.get('forceurl', False):
1447             if info_dict.get('requested_formats') is not None:
1448                 for f in info_dict['requested_formats']:
1449                     self.to_stdout(f['url'] + f.get('play_path', ''))
1450             else:
1451                 # For RTMP URLs, also include the playpath
1452                 self.to_stdout(info_dict['url'] + info_dict.get('play_path', ''))
1453         if self.params.get('forcethumbnail', False) and info_dict.get('thumbnail') is not None:
1454             self.to_stdout(info_dict['thumbnail'])
1455         if self.params.get('forcedescription', False) and info_dict.get('description') is not None:
1456             self.to_stdout(info_dict['description'])
1457         if self.params.get('forcefilename', False) and filename is not None:
1458             self.to_stdout(filename)
1459         if self.params.get('forceduration', False) and info_dict.get('duration') is not None:
1460             self.to_stdout(formatSeconds(info_dict['duration']))
1461         if self.params.get('forceformat', False):
1462             self.to_stdout(info_dict['format'])
1463         if self.params.get('forcejson', False):
1464             self.to_stdout(json.dumps(info_dict))
1465
1466         # Do nothing else if in simulate mode
1467         if self.params.get('simulate', False):
1468             return
1469
1470         if filename is None:
1471             return
1472
1473         try:
1474             dn = os.path.dirname(sanitize_path(encodeFilename(filename)))
1475             if dn and not os.path.exists(dn):
1476                 os.makedirs(dn)
1477         except (OSError, IOError) as err:
1478             self.report_error('unable to create directory ' + error_to_compat_str(err))
1479             return
1480
1481         if self.params.get('writedescription', False):
1482             descfn = replace_extension(filename, 'description', info_dict.get('ext'))
1483             if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(descfn)):
1484                 self.to_screen('[info] Video description is already present')
1485             elif info_dict.get('description') is None:
1486                 self.report_warning('There\'s no description to write.')
1487             else:
1488                 try:
1489                     self.to_screen('[info] Writing video description to: ' + descfn)
1490                     with io.open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile:
1491                         descfile.write(info_dict['description'])
1492                 except (OSError, IOError):
1493                     self.report_error('Cannot write description file ' + descfn)
1494                     return
1495
1496         if self.params.get('writeannotations', False):
1497             annofn = replace_extension(filename, 'annotations.xml', info_dict.get('ext'))
1498             if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(annofn)):
1499                 self.to_screen('[info] Video annotations are already present')
1500             else:
1501                 try:
1502                     self.to_screen('[info] Writing video annotations to: ' + annofn)
1503                     with io.open(encodeFilename(annofn), 'w', encoding='utf-8') as annofile:
1504                         annofile.write(info_dict['annotations'])
1505                 except (KeyError, TypeError):
1506                     self.report_warning('There are no annotations to write.')
1507                 except (OSError, IOError):
1508                     self.report_error('Cannot write annotations file: ' + annofn)
1509                     return
1510
1511         subtitles_are_requested = any([self.params.get('writesubtitles', False),
1512                                        self.params.get('writeautomaticsub')])
1513
1514         if subtitles_are_requested and info_dict.get('requested_subtitles'):
1515             # subtitles download errors are already managed as troubles in relevant IE
1516             # that way it will silently go on when used with unsupporting IE
1517             subtitles = info_dict['requested_subtitles']
1518             ie = self.get_info_extractor(info_dict['extractor_key'])
1519             for sub_lang, sub_info in subtitles.items():
1520                 sub_format = sub_info['ext']
1521                 if sub_info.get('data') is not None:
1522                     sub_data = sub_info['data']
1523                 else:
1524                     try:
1525                         sub_data = ie._download_webpage(
1526                             sub_info['url'], info_dict['id'], note=False)
1527                     except ExtractorError as err:
1528                         self.report_warning('Unable to download subtitle for "%s": %s' %
1529                                             (sub_lang, error_to_compat_str(err.cause)))
1530                         continue
1531                 try:
1532                     sub_filename = subtitles_filename(filename, sub_lang, sub_format)
1533                     if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(sub_filename)):
1534                         self.to_screen('[info] Video subtitle %s.%s is already_present' % (sub_lang, sub_format))
1535                     else:
1536                         self.to_screen('[info] Writing video subtitles to: ' + sub_filename)
1537                         with io.open(encodeFilename(sub_filename), 'w', encoding='utf-8') as subfile:
1538                             subfile.write(sub_data)
1539                 except (OSError, IOError):
1540                     self.report_error('Cannot write subtitles file ' + sub_filename)
1541                     return
1542
1543         if self.params.get('writeinfojson', False):
1544             infofn = replace_extension(filename, 'info.json', info_dict.get('ext'))
1545             if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(infofn)):
1546                 self.to_screen('[info] Video description metadata is already present')
1547             else:
1548                 self.to_screen('[info] Writing video description metadata as JSON to: ' + infofn)
1549                 try:
1550                     write_json_file(self.filter_requested_info(info_dict), infofn)
1551                 except (OSError, IOError):
1552                     self.report_error('Cannot write metadata to JSON file ' + infofn)
1553                     return
1554
1555         self._write_thumbnails(info_dict, filename)
1556
1557         if not self.params.get('skip_download', False):
1558             try:
1559                 def dl(name, info):
1560                     fd = get_suitable_downloader(info, self.params)(self, self.params)
1561                     for ph in self._progress_hooks:
1562                         fd.add_progress_hook(ph)
1563                     if self.params.get('verbose'):
1564                         self.to_stdout('[debug] Invoking downloader on %r' % info.get('url'))
1565                     return fd.download(name, info)
1566
1567                 if info_dict.get('requested_formats') is not None:
1568                     downloaded = []
1569                     success = True
1570                     merger = FFmpegMergerPP(self)
1571                     if not merger.available:
1572                         postprocessors = []
1573                         self.report_warning('You have requested multiple '
1574                                             'formats but ffmpeg or avconv are not installed.'
1575                                             ' The formats won\'t be merged.')
1576                     else:
1577                         postprocessors = [merger]
1578
1579                     def compatible_formats(formats):
1580                         video, audio = formats
1581                         # Check extension
1582                         video_ext, audio_ext = audio.get('ext'), video.get('ext')
1583                         if video_ext and audio_ext:
1584                             COMPATIBLE_EXTS = (
1585                                 ('mp3', 'mp4', 'm4a', 'm4p', 'm4b', 'm4r', 'm4v'),
1586                                 ('webm')
1587                             )
1588                             for exts in COMPATIBLE_EXTS:
1589                                 if video_ext in exts and audio_ext in exts:
1590                                     return True
1591                         # TODO: Check acodec/vcodec
1592                         return False
1593
1594                     filename_real_ext = os.path.splitext(filename)[1][1:]
1595                     filename_wo_ext = (
1596                         os.path.splitext(filename)[0]
1597                         if filename_real_ext == info_dict['ext']
1598                         else filename)
1599                     requested_formats = info_dict['requested_formats']
1600                     if self.params.get('merge_output_format') is None and not compatible_formats(requested_formats):
1601                         info_dict['ext'] = 'mkv'
1602                         self.report_warning(
1603                             'Requested formats are incompatible for merge and will be merged into mkv.')
1604                     # Ensure filename always has a correct extension for successful merge
1605                     filename = '%s.%s' % (filename_wo_ext, info_dict['ext'])
1606                     if os.path.exists(encodeFilename(filename)):
1607                         self.to_screen(
1608                             '[download] %s has already been downloaded and '
1609                             'merged' % filename)
1610                     else:
1611                         for f in requested_formats:
1612                             new_info = dict(info_dict)
1613                             new_info.update(f)
1614                             fname = self.prepare_filename(new_info)
1615                             fname = prepend_extension(fname, 'f%s' % f['format_id'], new_info['ext'])
1616                             downloaded.append(fname)
1617                             partial_success = dl(fname, new_info)
1618                             success = success and partial_success
1619                         info_dict['__postprocessors'] = postprocessors
1620                         info_dict['__files_to_merge'] = downloaded
1621                 else:
1622                     # Just a single file
1623                     success = dl(filename, info_dict)
1624             except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
1625                 self.report_error('unable to download video data: %s' % str(err))
1626                 return
1627             except (OSError, IOError) as err:
1628                 raise UnavailableVideoError(err)
1629             except (ContentTooShortError, ) as err:
1630                 self.report_error('content too short (expected %s bytes and served %s)' % (err.expected, err.downloaded))
1631                 return
1632
1633             if success:
1634                 # Fixup content
1635                 fixup_policy = self.params.get('fixup')
1636                 if fixup_policy is None:
1637                     fixup_policy = 'detect_or_warn'
1638
1639                 stretched_ratio = info_dict.get('stretched_ratio')
1640                 if stretched_ratio is not None and stretched_ratio != 1:
1641                     if fixup_policy == 'warn':
1642                         self.report_warning('%s: Non-uniform pixel ratio (%s)' % (
1643                             info_dict['id'], stretched_ratio))
1644                     elif fixup_policy == 'detect_or_warn':
1645                         stretched_pp = FFmpegFixupStretchedPP(self)
1646                         if stretched_pp.available:
1647                             info_dict.setdefault('__postprocessors', [])
1648                             info_dict['__postprocessors'].append(stretched_pp)
1649                         else:
1650                             self.report_warning(
1651                                 '%s: Non-uniform pixel ratio (%s). Install ffmpeg or avconv to fix this automatically.' % (
1652                                     info_dict['id'], stretched_ratio))
1653                     else:
1654                         assert fixup_policy in ('ignore', 'never')
1655
1656                 if info_dict.get('requested_formats') is None and info_dict.get('container') == 'm4a_dash':
1657                     if fixup_policy == 'warn':
1658                         self.report_warning('%s: writing DASH m4a. Only some players support this container.' % (
1659                             info_dict['id']))
1660                     elif fixup_policy == 'detect_or_warn':
1661                         fixup_pp = FFmpegFixupM4aPP(self)
1662                         if fixup_pp.available:
1663                             info_dict.setdefault('__postprocessors', [])
1664                             info_dict['__postprocessors'].append(fixup_pp)
1665                         else:
1666                             self.report_warning(
1667                                 '%s: writing DASH m4a. Only some players support this container. Install ffmpeg or avconv to fix this automatically.' % (
1668                                     info_dict['id']))
1669                     else:
1670                         assert fixup_policy in ('ignore', 'never')
1671
1672                 try:
1673                     self.post_process(filename, info_dict)
1674                 except (PostProcessingError) as err:
1675                     self.report_error('postprocessing: %s' % str(err))
1676                     return
1677                 self.record_download_archive(info_dict)
1678
1679     def download(self, url_list):
1680         """Download a given list of URLs."""
1681         outtmpl = self.params.get('outtmpl', DEFAULT_OUTTMPL)
1682         if (len(url_list) > 1 and
1683                 '%' not in outtmpl and
1684                 self.params.get('max_downloads') != 1):
1685             raise SameFileError(outtmpl)
1686
1687         for url in url_list:
1688             try:
1689                 # It also downloads the videos
1690                 res = self.extract_info(
1691                     url, force_generic_extractor=self.params.get('force_generic_extractor', False))
1692             except UnavailableVideoError:
1693                 self.report_error('unable to download video')
1694             except MaxDownloadsReached:
1695                 self.to_screen('[info] Maximum number of downloaded files reached.')
1696                 raise
1697             else:
1698                 if self.params.get('dump_single_json', False):
1699                     self.to_stdout(json.dumps(res))
1700
1701         return self._download_retcode
1702
1703     def download_with_info_file(self, info_filename):
1704         with contextlib.closing(fileinput.FileInput(
1705                 [info_filename], mode='r',
1706                 openhook=fileinput.hook_encoded('utf-8'))) as f:
1707             # FileInput doesn't have a read method, we can't call json.load
1708             info = self.filter_requested_info(json.loads('\n'.join(f)))
1709         try:
1710             self.process_ie_result(info, download=True)
1711         except DownloadError:
1712             webpage_url = info.get('webpage_url')
1713             if webpage_url is not None:
1714                 self.report_warning('The info failed to download, trying with "%s"' % webpage_url)
1715                 return self.download([webpage_url])
1716             else:
1717                 raise
1718         return self._download_retcode
1719
1720     @staticmethod
1721     def filter_requested_info(info_dict):
1722         return dict(
1723             (k, v) for k, v in info_dict.items()
1724             if k not in ['requested_formats', 'requested_subtitles'])
1725
1726     def post_process(self, filename, ie_info):
1727         """Run all the postprocessors on the given file."""
1728         info = dict(ie_info)
1729         info['filepath'] = filename
1730         pps_chain = []
1731         if ie_info.get('__postprocessors') is not None:
1732             pps_chain.extend(ie_info['__postprocessors'])
1733         pps_chain.extend(self._pps)
1734         for pp in pps_chain:
1735             files_to_delete = []
1736             try:
1737                 files_to_delete, info = pp.run(info)
1738             except PostProcessingError as e:
1739                 self.report_error(e.msg)
1740             if files_to_delete and not self.params.get('keepvideo', False):
1741                 for old_filename in files_to_delete:
1742                     self.to_screen('Deleting original file %s (pass -k to keep)' % old_filename)
1743                     try:
1744                         os.remove(encodeFilename(old_filename))
1745                     except (IOError, OSError):
1746                         self.report_warning('Unable to remove downloaded original file')
1747
1748     def _make_archive_id(self, info_dict):
1749         # Future-proof against any change in case
1750         # and backwards compatibility with prior versions
1751         extractor = info_dict.get('extractor_key')
1752         if extractor is None:
1753             if 'id' in info_dict:
1754                 extractor = info_dict.get('ie_key')  # key in a playlist
1755         if extractor is None:
1756             return None  # Incomplete video information
1757         return extractor.lower() + ' ' + info_dict['id']
1758
1759     def in_download_archive(self, info_dict):
1760         fn = self.params.get('download_archive')
1761         if fn is None:
1762             return False
1763
1764         vid_id = self._make_archive_id(info_dict)
1765         if vid_id is None:
1766             return False  # Incomplete video information
1767
1768         try:
1769             with locked_file(fn, 'r', encoding='utf-8') as archive_file:
1770                 for line in archive_file:
1771                     if line.strip() == vid_id:
1772                         return True
1773         except IOError as ioe:
1774             if ioe.errno != errno.ENOENT:
1775                 raise
1776         return False
1777
1778     def record_download_archive(self, info_dict):
1779         fn = self.params.get('download_archive')
1780         if fn is None:
1781             return
1782         vid_id = self._make_archive_id(info_dict)
1783         assert vid_id
1784         with locked_file(fn, 'a', encoding='utf-8') as archive_file:
1785             archive_file.write(vid_id + '\n')
1786
1787     @staticmethod
1788     def format_resolution(format, default='unknown'):
1789         if format.get('vcodec') == 'none':
1790             return 'audio only'
1791         if format.get('resolution') is not None:
1792             return format['resolution']
1793         if format.get('height') is not None:
1794             if format.get('width') is not None:
1795                 res = '%sx%s' % (format['width'], format['height'])
1796             else:
1797                 res = '%sp' % format['height']
1798         elif format.get('width') is not None:
1799             res = '?x%d' % format['width']
1800         else:
1801             res = default
1802         return res
1803
1804     def _format_note(self, fdict):
1805         res = ''
1806         if fdict.get('ext') in ['f4f', 'f4m']:
1807             res += '(unsupported) '
1808         if fdict.get('language'):
1809             if res:
1810                 res += ' '
1811             res += '[%s]' % fdict['language']
1812         if fdict.get('format_note') is not None:
1813             res += fdict['format_note'] + ' '
1814         if fdict.get('tbr') is not None:
1815             res += '%4dk ' % fdict['tbr']
1816         if fdict.get('container') is not None:
1817             if res:
1818                 res += ', '
1819             res += '%s container' % fdict['container']
1820         if (fdict.get('vcodec') is not None and
1821                 fdict.get('vcodec') != 'none'):
1822             if res:
1823                 res += ', '
1824             res += fdict['vcodec']
1825             if fdict.get('vbr') is not None:
1826                 res += '@'
1827         elif fdict.get('vbr') is not None and fdict.get('abr') is not None:
1828             res += 'video@'
1829         if fdict.get('vbr') is not None:
1830             res += '%4dk' % fdict['vbr']
1831         if fdict.get('fps') is not None:
1832             res += ', %sfps' % fdict['fps']
1833         if fdict.get('acodec') is not None:
1834             if res:
1835                 res += ', '
1836             if fdict['acodec'] == 'none':
1837                 res += 'video only'
1838             else:
1839                 res += '%-5s' % fdict['acodec']
1840         elif fdict.get('abr') is not None:
1841             if res:
1842                 res += ', '
1843             res += 'audio'
1844         if fdict.get('abr') is not None:
1845             res += '@%3dk' % fdict['abr']
1846         if fdict.get('asr') is not None:
1847             res += ' (%5dHz)' % fdict['asr']
1848         if fdict.get('filesize') is not None:
1849             if res:
1850                 res += ', '
1851             res += format_bytes(fdict['filesize'])
1852         elif fdict.get('filesize_approx') is not None:
1853             if res:
1854                 res += ', '
1855             res += '~' + format_bytes(fdict['filesize_approx'])
1856         return res
1857
1858     def list_formats(self, info_dict):
1859         formats = info_dict.get('formats', [info_dict])
1860         table = [
1861             [f['format_id'], f['ext'], self.format_resolution(f), self._format_note(f)]
1862             for f in formats
1863             if f.get('preference') is None or f['preference'] >= -1000]
1864         if len(formats) > 1:
1865             table[-1][-1] += (' ' if table[-1][-1] else '') + '(best)'
1866
1867         header_line = ['format code', 'extension', 'resolution', 'note']
1868         self.to_screen(
1869             '[info] Available formats for %s:\n%s' %
1870             (info_dict['id'], render_table(header_line, table)))
1871
1872     def list_thumbnails(self, info_dict):
1873         thumbnails = info_dict.get('thumbnails')
1874         if not thumbnails:
1875             tn_url = info_dict.get('thumbnail')
1876             if tn_url:
1877                 thumbnails = [{'id': '0', 'url': tn_url}]
1878             else:
1879                 self.to_screen(
1880                     '[info] No thumbnails present for %s' % info_dict['id'])
1881                 return
1882
1883         self.to_screen(
1884             '[info] Thumbnails for %s:' % info_dict['id'])
1885         self.to_screen(render_table(
1886             ['ID', 'width', 'height', 'URL'],
1887             [[t['id'], t.get('width', 'unknown'), t.get('height', 'unknown'), t['url']] for t in thumbnails]))
1888
1889     def list_subtitles(self, video_id, subtitles, name='subtitles'):
1890         if not subtitles:
1891             self.to_screen('%s has no %s' % (video_id, name))
1892             return
1893         self.to_screen(
1894             'Available %s for %s:' % (name, video_id))
1895         self.to_screen(render_table(
1896             ['Language', 'formats'],
1897             [[lang, ', '.join(f['ext'] for f in reversed(formats))]
1898                 for lang, formats in subtitles.items()]))
1899
1900     def urlopen(self, req):
1901         """ Start an HTTP download """
1902         if isinstance(req, compat_basestring):
1903             req = sanitized_Request(req)
1904         return self._opener.open(req, timeout=self._socket_timeout)
1905
1906     def print_debug_header(self):
1907         if not self.params.get('verbose'):
1908             return
1909
1910         if type('') is not compat_str:
1911             # Python 2.6 on SLES11 SP1 (https://github.com/rg3/youtube-dl/issues/3326)
1912             self.report_warning(
1913                 'Your Python is broken! Update to a newer and supported version')
1914
1915         stdout_encoding = getattr(
1916             sys.stdout, 'encoding', 'missing (%s)' % type(sys.stdout).__name__)
1917         encoding_str = (
1918             '[debug] Encodings: locale %s, fs %s, out %s, pref %s\n' % (
1919                 locale.getpreferredencoding(),
1920                 sys.getfilesystemencoding(),
1921                 stdout_encoding,
1922                 self.get_encoding()))
1923         write_string(encoding_str, encoding=None)
1924
1925         self._write_string('[debug] youtube-dl version ' + __version__ + '\n')
1926         try:
1927             sp = subprocess.Popen(
1928                 ['git', 'rev-parse', '--short', 'HEAD'],
1929                 stdout=subprocess.PIPE, stderr=subprocess.PIPE,
1930                 cwd=os.path.dirname(os.path.abspath(__file__)))
1931             out, err = sp.communicate()
1932             out = out.decode().strip()
1933             if re.match('[0-9a-f]+', out):
1934                 self._write_string('[debug] Git HEAD: ' + out + '\n')
1935         except Exception:
1936             try:
1937                 sys.exc_clear()
1938             except Exception:
1939                 pass
1940         self._write_string('[debug] Python version %s - %s\n' % (
1941             platform.python_version(), platform_name()))
1942
1943         exe_versions = FFmpegPostProcessor.get_versions(self)
1944         exe_versions['rtmpdump'] = rtmpdump_version()
1945         exe_str = ', '.join(
1946             '%s %s' % (exe, v)
1947             for exe, v in sorted(exe_versions.items())
1948             if v
1949         )
1950         if not exe_str:
1951             exe_str = 'none'
1952         self._write_string('[debug] exe versions: %s\n' % exe_str)
1953
1954         proxy_map = {}
1955         for handler in self._opener.handlers:
1956             if hasattr(handler, 'proxies'):
1957                 proxy_map.update(handler.proxies)
1958         self._write_string('[debug] Proxy map: ' + compat_str(proxy_map) + '\n')
1959
1960         if self.params.get('call_home', False):
1961             ipaddr = self.urlopen('https://yt-dl.org/ip').read().decode('utf-8')
1962             self._write_string('[debug] Public IP address: %s\n' % ipaddr)
1963             latest_version = self.urlopen(
1964                 'https://yt-dl.org/latest/version').read().decode('utf-8')
1965             if version_tuple(latest_version) > version_tuple(__version__):
1966                 self.report_warning(
1967                     'You are using an outdated version (newest version: %s)! '
1968                     'See https://yt-dl.org/update if you need help updating.' %
1969                     latest_version)
1970
1971     def _setup_opener(self):
1972         timeout_val = self.params.get('socket_timeout')
1973         self._socket_timeout = 600 if timeout_val is None else float(timeout_val)
1974
1975         opts_cookiefile = self.params.get('cookiefile')
1976         opts_proxy = self.params.get('proxy')
1977
1978         if opts_cookiefile is None:
1979             self.cookiejar = compat_cookiejar.CookieJar()
1980         else:
1981             self.cookiejar = compat_cookiejar.MozillaCookieJar(
1982                 opts_cookiefile)
1983             if os.access(opts_cookiefile, os.R_OK):
1984                 self.cookiejar.load()
1985
1986         cookie_processor = YoutubeDLCookieProcessor(self.cookiejar)
1987         if opts_proxy is not None:
1988             if opts_proxy == '':
1989                 proxies = {}
1990             else:
1991                 proxies = {'http': opts_proxy, 'https': opts_proxy}
1992         else:
1993             proxies = compat_urllib_request.getproxies()
1994             # Set HTTPS proxy to HTTP one if given (https://github.com/rg3/youtube-dl/issues/805)
1995             if 'http' in proxies and 'https' not in proxies:
1996                 proxies['https'] = proxies['http']
1997         proxy_handler = PerRequestProxyHandler(proxies)
1998
1999         debuglevel = 1 if self.params.get('debug_printtraffic') else 0
2000         https_handler = make_HTTPS_handler(self.params, debuglevel=debuglevel)
2001         ydlh = YoutubeDLHandler(self.params, debuglevel=debuglevel)
2002         data_handler = compat_urllib_request_DataHandler()
2003
2004         # When passing our own FileHandler instance, build_opener won't add the
2005         # default FileHandler and allows us to disable the file protocol, which
2006         # can be used for malicious purposes (see
2007         # https://github.com/rg3/youtube-dl/issues/8227)
2008         file_handler = compat_urllib_request.FileHandler()
2009
2010         def file_open(*args, **kwargs):
2011             raise compat_urllib_error.URLError('file:// scheme is explicitly disabled in youtube-dl for security reasons')
2012         file_handler.file_open = file_open
2013
2014         opener = compat_urllib_request.build_opener(
2015             proxy_handler, https_handler, cookie_processor, ydlh, data_handler, file_handler)
2016
2017         # Delete the default user-agent header, which would otherwise apply in
2018         # cases where our custom HTTP handler doesn't come into play
2019         # (See https://github.com/rg3/youtube-dl/issues/1309 for details)
2020         opener.addheaders = []
2021         self._opener = opener
2022
2023     def encode(self, s):
2024         if isinstance(s, bytes):
2025             return s  # Already encoded
2026
2027         try:
2028             return s.encode(self.get_encoding())
2029         except UnicodeEncodeError as err:
2030             err.reason = err.reason + '. Check your system encoding configuration or use the --encoding option.'
2031             raise
2032
2033     def get_encoding(self):
2034         encoding = self.params.get('encoding')
2035         if encoding is None:
2036             encoding = preferredencoding()
2037         return encoding
2038
2039     def _write_thumbnails(self, info_dict, filename):
2040         if self.params.get('writethumbnail', False):
2041             thumbnails = info_dict.get('thumbnails')
2042             if thumbnails:
2043                 thumbnails = [thumbnails[-1]]
2044         elif self.params.get('write_all_thumbnails', False):
2045             thumbnails = info_dict.get('thumbnails')
2046         else:
2047             return
2048
2049         if not thumbnails:
2050             # No thumbnails present, so return immediately
2051             return
2052
2053         for t in thumbnails:
2054             thumb_ext = determine_ext(t['url'], 'jpg')
2055             suffix = '_%s' % t['id'] if len(thumbnails) > 1 else ''
2056             thumb_display_id = '%s ' % t['id'] if len(thumbnails) > 1 else ''
2057             t['filename'] = thumb_filename = os.path.splitext(filename)[0] + suffix + '.' + thumb_ext
2058
2059             if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(thumb_filename)):
2060                 self.to_screen('[%s] %s: Thumbnail %sis already present' %
2061                                (info_dict['extractor'], info_dict['id'], thumb_display_id))
2062             else:
2063                 self.to_screen('[%s] %s: Downloading thumbnail %s...' %
2064                                (info_dict['extractor'], info_dict['id'], thumb_display_id))
2065                 try:
2066                     uf = self.urlopen(t['url'])
2067                     with open(encodeFilename(thumb_filename), 'wb') as thumbf:
2068                         shutil.copyfileobj(uf, thumbf)
2069                     self.to_screen('[%s] %s: Writing thumbnail %sto: %s' %
2070                                    (info_dict['extractor'], info_dict['id'], thumb_display_id, thumb_filename))
2071                 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
2072                     self.report_warning('Unable to download thumbnail "%s": %s' %
2073                                         (t['url'], error_to_compat_str(err)))