Merge branch 'shahid' of https://github.com/remitamine/youtube-dl into remitamine...
[youtube-dl] / youtube_dl / YoutubeDL.py
1 #!/usr/bin/env python
2 # -*- coding: utf-8 -*-
3
4 from __future__ import absolute_import, unicode_literals
5
6 import collections
7 import contextlib
8 import datetime
9 import errno
10 import fileinput
11 import io
12 import itertools
13 import json
14 import locale
15 import operator
16 import os
17 import platform
18 import re
19 import shutil
20 import subprocess
21 import socket
22 import sys
23 import time
24 import tokenize
25 import traceback
26
27 if os.name == 'nt':
28     import ctypes
29
30 from .compat import (
31     compat_cookiejar,
32     compat_expanduser,
33     compat_get_terminal_size,
34     compat_http_client,
35     compat_kwargs,
36     compat_str,
37     compat_tokenize_tokenize,
38     compat_urllib_error,
39     compat_urllib_request,
40 )
41 from .utils import (
42     ContentTooShortError,
43     date_from_str,
44     DateRange,
45     DEFAULT_OUTTMPL,
46     determine_ext,
47     DownloadError,
48     encodeFilename,
49     ExtractorError,
50     format_bytes,
51     formatSeconds,
52     locked_file,
53     make_HTTPS_handler,
54     MaxDownloadsReached,
55     PagedList,
56     parse_filesize,
57     PerRequestProxyHandler,
58     PostProcessingError,
59     platform_name,
60     preferredencoding,
61     render_table,
62     SameFileError,
63     sanitize_filename,
64     sanitize_path,
65     std_headers,
66     subtitles_filename,
67     UnavailableVideoError,
68     url_basename,
69     version_tuple,
70     write_json_file,
71     write_string,
72     YoutubeDLHandler,
73     prepend_extension,
74     replace_extension,
75     args_to_str,
76     age_restricted,
77 )
78 from .cache import Cache
79 from .extractor import get_info_extractor, gen_extractors
80 from .downloader import get_suitable_downloader
81 from .downloader.rtmp import rtmpdump_version
82 from .postprocessor import (
83     FFmpegFixupM4aPP,
84     FFmpegFixupStretchedPP,
85     FFmpegMergerPP,
86     FFmpegPostProcessor,
87     get_postprocessor,
88 )
89 from .version import __version__
90
91
92 class YoutubeDL(object):
93     """YoutubeDL class.
94
95     YoutubeDL objects are the ones responsible of downloading the
96     actual video file and writing it to disk if the user has requested
97     it, among some other tasks. In most cases there should be one per
98     program. As, given a video URL, the downloader doesn't know how to
99     extract all the needed information, task that InfoExtractors do, it
100     has to pass the URL to one of them.
101
102     For this, YoutubeDL objects have a method that allows
103     InfoExtractors to be registered in a given order. When it is passed
104     a URL, the YoutubeDL object handles it to the first InfoExtractor it
105     finds that reports being able to handle it. The InfoExtractor extracts
106     all the information about the video or videos the URL refers to, and
107     YoutubeDL process the extracted information, possibly using a File
108     Downloader to download the video.
109
110     YoutubeDL objects accept a lot of parameters. In order not to saturate
111     the object constructor with arguments, it receives a dictionary of
112     options instead. These options are available through the params
113     attribute for the InfoExtractors to use. The YoutubeDL also
114     registers itself as the downloader in charge for the InfoExtractors
115     that are added to it, so this is a "mutual registration".
116
117     Available options:
118
119     username:          Username for authentication purposes.
120     password:          Password for authentication purposes.
121     videopassword:     Password for accessing a video.
122     usenetrc:          Use netrc for authentication instead.
123     verbose:           Print additional info to stdout.
124     quiet:             Do not print messages to stdout.
125     no_warnings:       Do not print out anything for warnings.
126     forceurl:          Force printing final URL.
127     forcetitle:        Force printing title.
128     forceid:           Force printing ID.
129     forcethumbnail:    Force printing thumbnail URL.
130     forcedescription:  Force printing description.
131     forcefilename:     Force printing final filename.
132     forceduration:     Force printing duration.
133     forcejson:         Force printing info_dict as JSON.
134     dump_single_json:  Force printing the info_dict of the whole playlist
135                        (or video) as a single JSON line.
136     simulate:          Do not download the video files.
137     format:            Video format code. See options.py for more information.
138     outtmpl:           Template for output names.
139     restrictfilenames: Do not allow "&" and spaces in file names
140     ignoreerrors:      Do not stop on download errors.
141     force_generic_extractor: Force downloader to use the generic extractor
142     nooverwrites:      Prevent overwriting files.
143     playliststart:     Playlist item to start at.
144     playlistend:       Playlist item to end at.
145     playlist_items:    Specific indices of playlist to download.
146     playlistreverse:   Download playlist items in reverse order.
147     matchtitle:        Download only matching titles.
148     rejecttitle:       Reject downloads for matching titles.
149     logger:            Log messages to a logging.Logger instance.
150     logtostderr:       Log messages to stderr instead of stdout.
151     writedescription:  Write the video description to a .description file
152     writeinfojson:     Write the video description to a .info.json file
153     writeannotations:  Write the video annotations to a .annotations.xml file
154     writethumbnail:    Write the thumbnail image to a file
155     write_all_thumbnails:  Write all thumbnail formats to files
156     writesubtitles:    Write the video subtitles to a file
157     writeautomaticsub: Write the automatic subtitles to a file
158     allsubtitles:      Downloads all the subtitles of the video
159                        (requires writesubtitles or writeautomaticsub)
160     listsubtitles:     Lists all available subtitles for the video
161     subtitlesformat:   The format code for subtitles
162     subtitleslangs:    List of languages of the subtitles to download
163     keepvideo:         Keep the video file after post-processing
164     daterange:         A DateRange object, download only if the upload_date is in the range.
165     skip_download:     Skip the actual download of the video file
166     cachedir:          Location of the cache files in the filesystem.
167                        False to disable filesystem cache.
168     noplaylist:        Download single video instead of a playlist if in doubt.
169     age_limit:         An integer representing the user's age in years.
170                        Unsuitable videos for the given age are skipped.
171     min_views:         An integer representing the minimum view count the video
172                        must have in order to not be skipped.
173                        Videos without view count information are always
174                        downloaded. None for no limit.
175     max_views:         An integer representing the maximum view count.
176                        Videos that are more popular than that are not
177                        downloaded.
178                        Videos without view count information are always
179                        downloaded. None for no limit.
180     download_archive:  File name of a file where all downloads are recorded.
181                        Videos already present in the file are not downloaded
182                        again.
183     cookiefile:        File name where cookies should be read from and dumped to.
184     nocheckcertificate:Do not verify SSL certificates
185     prefer_insecure:   Use HTTP instead of HTTPS to retrieve information.
186                        At the moment, this is only supported by YouTube.
187     proxy:             URL of the proxy server to use
188     cn_verification_proxy:  URL of the proxy to use for IP address verification
189                        on Chinese sites. (Experimental)
190     socket_timeout:    Time to wait for unresponsive hosts, in seconds
191     bidi_workaround:   Work around buggy terminals without bidirectional text
192                        support, using fridibi
193     debug_printtraffic:Print out sent and received HTTP traffic
194     include_ads:       Download ads as well
195     default_search:    Prepend this string if an input url is not valid.
196                        'auto' for elaborate guessing
197     encoding:          Use this encoding instead of the system-specified.
198     extract_flat:      Do not resolve URLs, return the immediate result.
199                        Pass in 'in_playlist' to only show this behavior for
200                        playlist items.
201     postprocessors:    A list of dictionaries, each with an entry
202                        * key:  The name of the postprocessor. See
203                                youtube_dl/postprocessor/__init__.py for a list.
204                        as well as any further keyword arguments for the
205                        postprocessor.
206     progress_hooks:    A list of functions that get called on download
207                        progress, with a dictionary with the entries
208                        * status: One of "downloading", "error", or "finished".
209                                  Check this first and ignore unknown values.
210
211                        If status is one of "downloading", or "finished", the
212                        following properties may also be present:
213                        * filename: The final filename (always present)
214                        * tmpfilename: The filename we're currently writing to
215                        * downloaded_bytes: Bytes on disk
216                        * total_bytes: Size of the whole file, None if unknown
217                        * total_bytes_estimate: Guess of the eventual file size,
218                                                None if unavailable.
219                        * elapsed: The number of seconds since download started.
220                        * eta: The estimated time in seconds, None if unknown
221                        * speed: The download speed in bytes/second, None if
222                                 unknown
223                        * fragment_index: The counter of the currently
224                                          downloaded video fragment.
225                        * fragment_count: The number of fragments (= individual
226                                          files that will be merged)
227
228                        Progress hooks are guaranteed to be called at least once
229                        (with status "finished") if the download is successful.
230     merge_output_format: Extension to use when merging formats.
231     fixup:             Automatically correct known faults of the file.
232                        One of:
233                        - "never": do nothing
234                        - "warn": only emit a warning
235                        - "detect_or_warn": check whether we can do anything
236                                            about it, warn otherwise (default)
237     source_address:    (Experimental) Client-side IP address to bind to.
238     call_home:         Boolean, true iff we are allowed to contact the
239                        youtube-dl servers for debugging.
240     sleep_interval:    Number of seconds to sleep before each download.
241     listformats:       Print an overview of available video formats and exit.
242     list_thumbnails:   Print a table of all thumbnails and exit.
243     match_filter:      A function that gets called with the info_dict of
244                        every video.
245                        If it returns a message, the video is ignored.
246                        If it returns None, the video is downloaded.
247                        match_filter_func in utils.py is one example for this.
248     no_color:          Do not emit color codes in output.
249
250     The following options determine which downloader is picked:
251     external_downloader: Executable of the external downloader to call.
252                        None or unset for standard (built-in) downloader.
253     hls_prefer_native: Use the native HLS downloader instead of ffmpeg/avconv.
254
255     The following parameters are not used by YoutubeDL itself, they are used by
256     the downloader (see youtube_dl/downloader/common.py):
257     nopart, updatetime, buffersize, ratelimit, min_filesize, max_filesize, test,
258     noresizebuffer, retries, continuedl, noprogress, consoletitle,
259     xattr_set_filesize, external_downloader_args.
260
261     The following options are used by the post processors:
262     prefer_ffmpeg:     If True, use ffmpeg instead of avconv if both are available,
263                        otherwise prefer avconv.
264     postprocessor_args: A list of additional command-line arguments for the
265                         postprocessor.
266     """
267
268     params = None
269     _ies = []
270     _pps = []
271     _download_retcode = None
272     _num_downloads = None
273     _screen_file = None
274
275     def __init__(self, params=None, auto_init=True):
276         """Create a FileDownloader object with the given options."""
277         if params is None:
278             params = {}
279         self._ies = []
280         self._ies_instances = {}
281         self._pps = []
282         self._progress_hooks = []
283         self._download_retcode = 0
284         self._num_downloads = 0
285         self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)]
286         self._err_file = sys.stderr
287         self.params = params
288         self.cache = Cache(self)
289
290         if params.get('bidi_workaround', False):
291             try:
292                 import pty
293                 master, slave = pty.openpty()
294                 width = compat_get_terminal_size().columns
295                 if width is None:
296                     width_args = []
297                 else:
298                     width_args = ['-w', str(width)]
299                 sp_kwargs = dict(
300                     stdin=subprocess.PIPE,
301                     stdout=slave,
302                     stderr=self._err_file)
303                 try:
304                     self._output_process = subprocess.Popen(
305                         ['bidiv'] + width_args, **sp_kwargs
306                     )
307                 except OSError:
308                     self._output_process = subprocess.Popen(
309                         ['fribidi', '-c', 'UTF-8'] + width_args, **sp_kwargs)
310                 self._output_channel = os.fdopen(master, 'rb')
311             except OSError as ose:
312                 if ose.errno == 2:
313                     self.report_warning('Could not find fribidi executable, ignoring --bidi-workaround . Make sure that  fribidi  is an executable file in one of the directories in your $PATH.')
314                 else:
315                     raise
316
317         if (sys.version_info >= (3,) and sys.platform != 'win32' and
318                 sys.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968'] and
319                 not params.get('restrictfilenames', False)):
320             # On Python 3, the Unicode filesystem API will throw errors (#1474)
321             self.report_warning(
322                 'Assuming --restrict-filenames since file system encoding '
323                 'cannot encode all characters. '
324                 'Set the LC_ALL environment variable to fix this.')
325             self.params['restrictfilenames'] = True
326
327         if isinstance(params.get('outtmpl'), bytes):
328             self.report_warning(
329                 'Parameter outtmpl is bytes, but should be a unicode string. '
330                 'Put  from __future__ import unicode_literals  at the top of your code file or consider switching to Python 3.x.')
331
332         self._setup_opener()
333
334         if auto_init:
335             self.print_debug_header()
336             self.add_default_info_extractors()
337
338         for pp_def_raw in self.params.get('postprocessors', []):
339             pp_class = get_postprocessor(pp_def_raw['key'])
340             pp_def = dict(pp_def_raw)
341             del pp_def['key']
342             pp = pp_class(self, **compat_kwargs(pp_def))
343             self.add_post_processor(pp)
344
345         for ph in self.params.get('progress_hooks', []):
346             self.add_progress_hook(ph)
347
348     def warn_if_short_id(self, argv):
349         # short YouTube ID starting with dash?
350         idxs = [
351             i for i, a in enumerate(argv)
352             if re.match(r'^-[0-9A-Za-z_-]{10}$', a)]
353         if idxs:
354             correct_argv = (
355                 ['youtube-dl'] +
356                 [a for i, a in enumerate(argv) if i not in idxs] +
357                 ['--'] + [argv[i] for i in idxs]
358             )
359             self.report_warning(
360                 'Long argument string detected. '
361                 'Use -- to separate parameters and URLs, like this:\n%s\n' %
362                 args_to_str(correct_argv))
363
364     def add_info_extractor(self, ie):
365         """Add an InfoExtractor object to the end of the list."""
366         self._ies.append(ie)
367         self._ies_instances[ie.ie_key()] = ie
368         ie.set_downloader(self)
369
370     def get_info_extractor(self, ie_key):
371         """
372         Get an instance of an IE with name ie_key, it will try to get one from
373         the _ies list, if there's no instance it will create a new one and add
374         it to the extractor list.
375         """
376         ie = self._ies_instances.get(ie_key)
377         if ie is None:
378             ie = get_info_extractor(ie_key)()
379             self.add_info_extractor(ie)
380         return ie
381
382     def add_default_info_extractors(self):
383         """
384         Add the InfoExtractors returned by gen_extractors to the end of the list
385         """
386         for ie in gen_extractors():
387             self.add_info_extractor(ie)
388
389     def add_post_processor(self, pp):
390         """Add a PostProcessor object to the end of the chain."""
391         self._pps.append(pp)
392         pp.set_downloader(self)
393
394     def add_progress_hook(self, ph):
395         """Add the progress hook (currently only for the file downloader)"""
396         self._progress_hooks.append(ph)
397
398     def _bidi_workaround(self, message):
399         if not hasattr(self, '_output_channel'):
400             return message
401
402         assert hasattr(self, '_output_process')
403         assert isinstance(message, compat_str)
404         line_count = message.count('\n') + 1
405         self._output_process.stdin.write((message + '\n').encode('utf-8'))
406         self._output_process.stdin.flush()
407         res = ''.join(self._output_channel.readline().decode('utf-8')
408                       for _ in range(line_count))
409         return res[:-len('\n')]
410
411     def to_screen(self, message, skip_eol=False):
412         """Print message to stdout if not in quiet mode."""
413         return self.to_stdout(message, skip_eol, check_quiet=True)
414
415     def _write_string(self, s, out=None):
416         write_string(s, out=out, encoding=self.params.get('encoding'))
417
418     def to_stdout(self, message, skip_eol=False, check_quiet=False):
419         """Print message to stdout if not in quiet mode."""
420         if self.params.get('logger'):
421             self.params['logger'].debug(message)
422         elif not check_quiet or not self.params.get('quiet', False):
423             message = self._bidi_workaround(message)
424             terminator = ['\n', ''][skip_eol]
425             output = message + terminator
426
427             self._write_string(output, self._screen_file)
428
429     def to_stderr(self, message):
430         """Print message to stderr."""
431         assert isinstance(message, compat_str)
432         if self.params.get('logger'):
433             self.params['logger'].error(message)
434         else:
435             message = self._bidi_workaround(message)
436             output = message + '\n'
437             self._write_string(output, self._err_file)
438
439     def to_console_title(self, message):
440         if not self.params.get('consoletitle', False):
441             return
442         if os.name == 'nt' and ctypes.windll.kernel32.GetConsoleWindow():
443             # c_wchar_p() might not be necessary if `message` is
444             # already of type unicode()
445             ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message))
446         elif 'TERM' in os.environ:
447             self._write_string('\033]0;%s\007' % message, self._screen_file)
448
449     def save_console_title(self):
450         if not self.params.get('consoletitle', False):
451             return
452         if 'TERM' in os.environ:
453             # Save the title on stack
454             self._write_string('\033[22;0t', self._screen_file)
455
456     def restore_console_title(self):
457         if not self.params.get('consoletitle', False):
458             return
459         if 'TERM' in os.environ:
460             # Restore the title from stack
461             self._write_string('\033[23;0t', self._screen_file)
462
463     def __enter__(self):
464         self.save_console_title()
465         return self
466
467     def __exit__(self, *args):
468         self.restore_console_title()
469
470         if self.params.get('cookiefile') is not None:
471             self.cookiejar.save()
472
473     def trouble(self, message=None, tb=None):
474         """Determine action to take when a download problem appears.
475
476         Depending on if the downloader has been configured to ignore
477         download errors or not, this method may throw an exception or
478         not when errors are found, after printing the message.
479
480         tb, if given, is additional traceback information.
481         """
482         if message is not None:
483             self.to_stderr(message)
484         if self.params.get('verbose'):
485             if tb is None:
486                 if sys.exc_info()[0]:  # if .trouble has been called from an except block
487                     tb = ''
488                     if hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
489                         tb += ''.join(traceback.format_exception(*sys.exc_info()[1].exc_info))
490                     tb += compat_str(traceback.format_exc())
491                 else:
492                     tb_data = traceback.format_list(traceback.extract_stack())
493                     tb = ''.join(tb_data)
494             self.to_stderr(tb)
495         if not self.params.get('ignoreerrors', False):
496             if sys.exc_info()[0] and hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
497                 exc_info = sys.exc_info()[1].exc_info
498             else:
499                 exc_info = sys.exc_info()
500             raise DownloadError(message, exc_info)
501         self._download_retcode = 1
502
503     def report_warning(self, message):
504         '''
505         Print the message to stderr, it will be prefixed with 'WARNING:'
506         If stderr is a tty file the 'WARNING:' will be colored
507         '''
508         if self.params.get('logger') is not None:
509             self.params['logger'].warning(message)
510         else:
511             if self.params.get('no_warnings'):
512                 return
513             if not self.params.get('no_color') and self._err_file.isatty() and os.name != 'nt':
514                 _msg_header = '\033[0;33mWARNING:\033[0m'
515             else:
516                 _msg_header = 'WARNING:'
517             warning_message = '%s %s' % (_msg_header, message)
518             self.to_stderr(warning_message)
519
520     def report_error(self, message, tb=None):
521         '''
522         Do the same as trouble, but prefixes the message with 'ERROR:', colored
523         in red if stderr is a tty file.
524         '''
525         if not self.params.get('no_color') and self._err_file.isatty() and os.name != 'nt':
526             _msg_header = '\033[0;31mERROR:\033[0m'
527         else:
528             _msg_header = 'ERROR:'
529         error_message = '%s %s' % (_msg_header, message)
530         self.trouble(error_message, tb)
531
532     def report_file_already_downloaded(self, file_name):
533         """Report file has already been fully downloaded."""
534         try:
535             self.to_screen('[download] %s has already been downloaded' % file_name)
536         except UnicodeEncodeError:
537             self.to_screen('[download] The file has already been downloaded')
538
539     def prepare_filename(self, info_dict):
540         """Generate the output filename."""
541         try:
542             template_dict = dict(info_dict)
543
544             template_dict['epoch'] = int(time.time())
545             autonumber_size = self.params.get('autonumber_size')
546             if autonumber_size is None:
547                 autonumber_size = 5
548             autonumber_templ = '%0' + str(autonumber_size) + 'd'
549             template_dict['autonumber'] = autonumber_templ % self._num_downloads
550             if template_dict.get('playlist_index') is not None:
551                 template_dict['playlist_index'] = '%0*d' % (len(str(template_dict['n_entries'])), template_dict['playlist_index'])
552             if template_dict.get('resolution') is None:
553                 if template_dict.get('width') and template_dict.get('height'):
554                     template_dict['resolution'] = '%dx%d' % (template_dict['width'], template_dict['height'])
555                 elif template_dict.get('height'):
556                     template_dict['resolution'] = '%sp' % template_dict['height']
557                 elif template_dict.get('width'):
558                     template_dict['resolution'] = '?x%d' % template_dict['width']
559
560             sanitize = lambda k, v: sanitize_filename(
561                 compat_str(v),
562                 restricted=self.params.get('restrictfilenames'),
563                 is_id=(k == 'id'))
564             template_dict = dict((k, sanitize(k, v))
565                                  for k, v in template_dict.items()
566                                  if v is not None)
567             template_dict = collections.defaultdict(lambda: 'NA', template_dict)
568
569             outtmpl = sanitize_path(self.params.get('outtmpl', DEFAULT_OUTTMPL))
570             tmpl = compat_expanduser(outtmpl)
571             filename = tmpl % template_dict
572             # Temporary fix for #4787
573             # 'Treat' all problem characters by passing filename through preferredencoding
574             # to workaround encoding issues with subprocess on python2 @ Windows
575             if sys.version_info < (3, 0) and sys.platform == 'win32':
576                 filename = encodeFilename(filename, True).decode(preferredencoding())
577             return filename
578         except ValueError as err:
579             self.report_error('Error in output template: ' + str(err) + ' (encoding: ' + repr(preferredencoding()) + ')')
580             return None
581
582     def _match_entry(self, info_dict, incomplete):
583         """ Returns None iff the file should be downloaded """
584
585         video_title = info_dict.get('title', info_dict.get('id', 'video'))
586         if 'title' in info_dict:
587             # This can happen when we're just evaluating the playlist
588             title = info_dict['title']
589             matchtitle = self.params.get('matchtitle', False)
590             if matchtitle:
591                 if not re.search(matchtitle, title, re.IGNORECASE):
592                     return '"' + title + '" title did not match pattern "' + matchtitle + '"'
593             rejecttitle = self.params.get('rejecttitle', False)
594             if rejecttitle:
595                 if re.search(rejecttitle, title, re.IGNORECASE):
596                     return '"' + title + '" title matched reject pattern "' + rejecttitle + '"'
597         date = info_dict.get('upload_date', None)
598         if date is not None:
599             dateRange = self.params.get('daterange', DateRange())
600             if date not in dateRange:
601                 return '%s upload date is not in range %s' % (date_from_str(date).isoformat(), dateRange)
602         view_count = info_dict.get('view_count', None)
603         if view_count is not None:
604             min_views = self.params.get('min_views')
605             if min_views is not None and view_count < min_views:
606                 return 'Skipping %s, because it has not reached minimum view count (%d/%d)' % (video_title, view_count, min_views)
607             max_views = self.params.get('max_views')
608             if max_views is not None and view_count > max_views:
609                 return 'Skipping %s, because it has exceeded the maximum view count (%d/%d)' % (video_title, view_count, max_views)
610         if age_restricted(info_dict.get('age_limit'), self.params.get('age_limit')):
611             return 'Skipping "%s" because it is age restricted' % video_title
612         if self.in_download_archive(info_dict):
613             return '%s has already been recorded in archive' % video_title
614
615         if not incomplete:
616             match_filter = self.params.get('match_filter')
617             if match_filter is not None:
618                 ret = match_filter(info_dict)
619                 if ret is not None:
620                     return ret
621
622         return None
623
624     @staticmethod
625     def add_extra_info(info_dict, extra_info):
626         '''Set the keys from extra_info in info dict if they are missing'''
627         for key, value in extra_info.items():
628             info_dict.setdefault(key, value)
629
630     def extract_info(self, url, download=True, ie_key=None, extra_info={},
631                      process=True, force_generic_extractor=False):
632         '''
633         Returns a list with a dictionary for each video we find.
634         If 'download', also downloads the videos.
635         extra_info is a dict containing the extra values to add to each result
636         '''
637
638         if not ie_key and force_generic_extractor:
639             ie_key = 'Generic'
640
641         if ie_key:
642             ies = [self.get_info_extractor(ie_key)]
643         else:
644             ies = self._ies
645
646         for ie in ies:
647             if not ie.suitable(url):
648                 continue
649
650             if not ie.working():
651                 self.report_warning('The program functionality for this site has been marked as broken, '
652                                     'and will probably not work.')
653
654             try:
655                 ie_result = ie.extract(url)
656                 if ie_result is None:  # Finished already (backwards compatibility; listformats and friends should be moved here)
657                     break
658                 if isinstance(ie_result, list):
659                     # Backwards compatibility: old IE result format
660                     ie_result = {
661                         '_type': 'compat_list',
662                         'entries': ie_result,
663                     }
664                 self.add_default_extra_info(ie_result, ie, url)
665                 if process:
666                     return self.process_ie_result(ie_result, download, extra_info)
667                 else:
668                     return ie_result
669             except ExtractorError as de:  # An error we somewhat expected
670                 self.report_error(compat_str(de), de.format_traceback())
671                 break
672             except MaxDownloadsReached:
673                 raise
674             except Exception as e:
675                 if self.params.get('ignoreerrors', False):
676                     self.report_error(compat_str(e), tb=compat_str(traceback.format_exc()))
677                     break
678                 else:
679                     raise
680         else:
681             self.report_error('no suitable InfoExtractor for URL %s' % url)
682
683     def add_default_extra_info(self, ie_result, ie, url):
684         self.add_extra_info(ie_result, {
685             'extractor': ie.IE_NAME,
686             'webpage_url': url,
687             'webpage_url_basename': url_basename(url),
688             'extractor_key': ie.ie_key(),
689         })
690
691     def process_ie_result(self, ie_result, download=True, extra_info={}):
692         """
693         Take the result of the ie(may be modified) and resolve all unresolved
694         references (URLs, playlist items).
695
696         It will also download the videos if 'download'.
697         Returns the resolved ie_result.
698         """
699
700         result_type = ie_result.get('_type', 'video')
701
702         if result_type in ('url', 'url_transparent'):
703             extract_flat = self.params.get('extract_flat', False)
704             if ((extract_flat == 'in_playlist' and 'playlist' in extra_info) or
705                     extract_flat is True):
706                 if self.params.get('forcejson', False):
707                     self.to_stdout(json.dumps(ie_result))
708                 return ie_result
709
710         if result_type == 'video':
711             self.add_extra_info(ie_result, extra_info)
712             return self.process_video_result(ie_result, download=download)
713         elif result_type == 'url':
714             # We have to add extra_info to the results because it may be
715             # contained in a playlist
716             return self.extract_info(ie_result['url'],
717                                      download,
718                                      ie_key=ie_result.get('ie_key'),
719                                      extra_info=extra_info)
720         elif result_type == 'url_transparent':
721             # Use the information from the embedding page
722             info = self.extract_info(
723                 ie_result['url'], ie_key=ie_result.get('ie_key'),
724                 extra_info=extra_info, download=False, process=False)
725
726             force_properties = dict(
727                 (k, v) for k, v in ie_result.items() if v is not None)
728             for f in ('_type', 'url'):
729                 if f in force_properties:
730                     del force_properties[f]
731             new_result = info.copy()
732             new_result.update(force_properties)
733
734             assert new_result.get('_type') != 'url_transparent'
735
736             return self.process_ie_result(
737                 new_result, download=download, extra_info=extra_info)
738         elif result_type == 'playlist' or result_type == 'multi_video':
739             # We process each entry in the playlist
740             playlist = ie_result.get('title', None) or ie_result.get('id', None)
741             self.to_screen('[download] Downloading playlist: %s' % playlist)
742
743             playlist_results = []
744
745             playliststart = self.params.get('playliststart', 1) - 1
746             playlistend = self.params.get('playlistend', None)
747             # For backwards compatibility, interpret -1 as whole list
748             if playlistend == -1:
749                 playlistend = None
750
751             playlistitems_str = self.params.get('playlist_items', None)
752             playlistitems = None
753             if playlistitems_str is not None:
754                 def iter_playlistitems(format):
755                     for string_segment in format.split(','):
756                         if '-' in string_segment:
757                             start, end = string_segment.split('-')
758                             for item in range(int(start), int(end) + 1):
759                                 yield int(item)
760                         else:
761                             yield int(string_segment)
762                 playlistitems = iter_playlistitems(playlistitems_str)
763
764             ie_entries = ie_result['entries']
765             if isinstance(ie_entries, list):
766                 n_all_entries = len(ie_entries)
767                 if playlistitems:
768                     entries = [
769                         ie_entries[i - 1] for i in playlistitems
770                         if -n_all_entries <= i - 1 < n_all_entries]
771                 else:
772                     entries = ie_entries[playliststart:playlistend]
773                 n_entries = len(entries)
774                 self.to_screen(
775                     "[%s] playlist %s: Collected %d video ids (downloading %d of them)" %
776                     (ie_result['extractor'], playlist, n_all_entries, n_entries))
777             elif isinstance(ie_entries, PagedList):
778                 if playlistitems:
779                     entries = []
780                     for item in playlistitems:
781                         entries.extend(ie_entries.getslice(
782                             item - 1, item
783                         ))
784                 else:
785                     entries = ie_entries.getslice(
786                         playliststart, playlistend)
787                 n_entries = len(entries)
788                 self.to_screen(
789                     "[%s] playlist %s: Downloading %d videos" %
790                     (ie_result['extractor'], playlist, n_entries))
791             else:  # iterable
792                 if playlistitems:
793                     entry_list = list(ie_entries)
794                     entries = [entry_list[i - 1] for i in playlistitems]
795                 else:
796                     entries = list(itertools.islice(
797                         ie_entries, playliststart, playlistend))
798                 n_entries = len(entries)
799                 self.to_screen(
800                     "[%s] playlist %s: Downloading %d videos" %
801                     (ie_result['extractor'], playlist, n_entries))
802
803             if self.params.get('playlistreverse', False):
804                 entries = entries[::-1]
805
806             for i, entry in enumerate(entries, 1):
807                 self.to_screen('[download] Downloading video %s of %s' % (i, n_entries))
808                 extra = {
809                     'n_entries': n_entries,
810                     'playlist': playlist,
811                     'playlist_id': ie_result.get('id'),
812                     'playlist_title': ie_result.get('title'),
813                     'playlist_index': i + playliststart,
814                     'extractor': ie_result['extractor'],
815                     'webpage_url': ie_result['webpage_url'],
816                     'webpage_url_basename': url_basename(ie_result['webpage_url']),
817                     'extractor_key': ie_result['extractor_key'],
818                 }
819
820                 reason = self._match_entry(entry, incomplete=True)
821                 if reason is not None:
822                     self.to_screen('[download] ' + reason)
823                     continue
824
825                 entry_result = self.process_ie_result(entry,
826                                                       download=download,
827                                                       extra_info=extra)
828                 playlist_results.append(entry_result)
829             ie_result['entries'] = playlist_results
830             return ie_result
831         elif result_type == 'compat_list':
832             self.report_warning(
833                 'Extractor %s returned a compat_list result. '
834                 'It needs to be updated.' % ie_result.get('extractor'))
835
836             def _fixup(r):
837                 self.add_extra_info(
838                     r,
839                     {
840                         'extractor': ie_result['extractor'],
841                         'webpage_url': ie_result['webpage_url'],
842                         'webpage_url_basename': url_basename(ie_result['webpage_url']),
843                         'extractor_key': ie_result['extractor_key'],
844                     }
845                 )
846                 return r
847             ie_result['entries'] = [
848                 self.process_ie_result(_fixup(r), download, extra_info)
849                 for r in ie_result['entries']
850             ]
851             return ie_result
852         else:
853             raise Exception('Invalid result type: %s' % result_type)
854
855     def _build_format_filter(self, filter_spec):
856         " Returns a function to filter the formats according to the filter_spec "
857
858         OPERATORS = {
859             '<': operator.lt,
860             '<=': operator.le,
861             '>': operator.gt,
862             '>=': operator.ge,
863             '=': operator.eq,
864             '!=': operator.ne,
865         }
866         operator_rex = re.compile(r'''(?x)\s*
867             (?P<key>width|height|tbr|abr|vbr|asr|filesize|fps)
868             \s*(?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
869             (?P<value>[0-9.]+(?:[kKmMgGtTpPeEzZyY]i?[Bb]?)?)
870             $
871             ''' % '|'.join(map(re.escape, OPERATORS.keys())))
872         m = operator_rex.search(filter_spec)
873         if m:
874             try:
875                 comparison_value = int(m.group('value'))
876             except ValueError:
877                 comparison_value = parse_filesize(m.group('value'))
878                 if comparison_value is None:
879                     comparison_value = parse_filesize(m.group('value') + 'B')
880                 if comparison_value is None:
881                     raise ValueError(
882                         'Invalid value %r in format specification %r' % (
883                             m.group('value'), filter_spec))
884             op = OPERATORS[m.group('op')]
885
886         if not m:
887             STR_OPERATORS = {
888                 '=': operator.eq,
889                 '!=': operator.ne,
890             }
891             str_operator_rex = re.compile(r'''(?x)
892                 \s*(?P<key>ext|acodec|vcodec|container|protocol)
893                 \s*(?P<op>%s)(?P<none_inclusive>\s*\?)?
894                 \s*(?P<value>[a-zA-Z0-9_-]+)
895                 \s*$
896                 ''' % '|'.join(map(re.escape, STR_OPERATORS.keys())))
897             m = str_operator_rex.search(filter_spec)
898             if m:
899                 comparison_value = m.group('value')
900                 op = STR_OPERATORS[m.group('op')]
901
902         if not m:
903             raise ValueError('Invalid filter specification %r' % filter_spec)
904
905         def _filter(f):
906             actual_value = f.get(m.group('key'))
907             if actual_value is None:
908                 return m.group('none_inclusive')
909             return op(actual_value, comparison_value)
910         return _filter
911
912     def build_format_selector(self, format_spec):
913         def syntax_error(note, start):
914             message = (
915                 'Invalid format specification: '
916                 '{0}\n\t{1}\n\t{2}^'.format(note, format_spec, ' ' * start[1]))
917             return SyntaxError(message)
918
919         PICKFIRST = 'PICKFIRST'
920         MERGE = 'MERGE'
921         SINGLE = 'SINGLE'
922         GROUP = 'GROUP'
923         FormatSelector = collections.namedtuple('FormatSelector', ['type', 'selector', 'filters'])
924
925         def _parse_filter(tokens):
926             filter_parts = []
927             for type, string, start, _, _ in tokens:
928                 if type == tokenize.OP and string == ']':
929                     return ''.join(filter_parts)
930                 else:
931                     filter_parts.append(string)
932
933         def _remove_unused_ops(tokens):
934             # Remove operators that we don't use and join them with the sourrounding strings
935             # for example: 'mp4' '-' 'baseline' '-' '16x9' is converted to 'mp4-baseline-16x9'
936             ALLOWED_OPS = ('/', '+', ',', '(', ')')
937             last_string, last_start, last_end, last_line = None, None, None, None
938             for type, string, start, end, line in tokens:
939                 if type == tokenize.OP and string == '[':
940                     if last_string:
941                         yield tokenize.NAME, last_string, last_start, last_end, last_line
942                         last_string = None
943                     yield type, string, start, end, line
944                     # everything inside brackets will be handled by _parse_filter
945                     for type, string, start, end, line in tokens:
946                         yield type, string, start, end, line
947                         if type == tokenize.OP and string == ']':
948                             break
949                 elif type == tokenize.OP and string in ALLOWED_OPS:
950                     if last_string:
951                         yield tokenize.NAME, last_string, last_start, last_end, last_line
952                         last_string = None
953                     yield type, string, start, end, line
954                 elif type in [tokenize.NAME, tokenize.NUMBER, tokenize.OP]:
955                     if not last_string:
956                         last_string = string
957                         last_start = start
958                         last_end = end
959                     else:
960                         last_string += string
961             if last_string:
962                 yield tokenize.NAME, last_string, last_start, last_end, last_line
963
964         def _parse_format_selection(tokens, inside_merge=False, inside_choice=False, inside_group=False):
965             selectors = []
966             current_selector = None
967             for type, string, start, _, _ in tokens:
968                 # ENCODING is only defined in python 3.x
969                 if type == getattr(tokenize, 'ENCODING', None):
970                     continue
971                 elif type in [tokenize.NAME, tokenize.NUMBER]:
972                     current_selector = FormatSelector(SINGLE, string, [])
973                 elif type == tokenize.OP:
974                     if string == ')':
975                         if not inside_group:
976                             # ')' will be handled by the parentheses group
977                             tokens.restore_last_token()
978                         break
979                     elif inside_merge and string in ['/', ',']:
980                         tokens.restore_last_token()
981                         break
982                     elif inside_choice and string == ',':
983                         tokens.restore_last_token()
984                         break
985                     elif string == ',':
986                         if not current_selector:
987                             raise syntax_error('"," must follow a format selector', start)
988                         selectors.append(current_selector)
989                         current_selector = None
990                     elif string == '/':
991                         if not current_selector:
992                             raise syntax_error('"/" must follow a format selector', start)
993                         first_choice = current_selector
994                         second_choice = _parse_format_selection(tokens, inside_choice=True)
995                         current_selector = FormatSelector(PICKFIRST, (first_choice, second_choice), [])
996                     elif string == '[':
997                         if not current_selector:
998                             current_selector = FormatSelector(SINGLE, 'best', [])
999                         format_filter = _parse_filter(tokens)
1000                         current_selector.filters.append(format_filter)
1001                     elif string == '(':
1002                         if current_selector:
1003                             raise syntax_error('Unexpected "("', start)
1004                         group = _parse_format_selection(tokens, inside_group=True)
1005                         current_selector = FormatSelector(GROUP, group, [])
1006                     elif string == '+':
1007                         video_selector = current_selector
1008                         audio_selector = _parse_format_selection(tokens, inside_merge=True)
1009                         if not video_selector or not audio_selector:
1010                             raise syntax_error('"+" must be between two format selectors', start)
1011                         current_selector = FormatSelector(MERGE, (video_selector, audio_selector), [])
1012                     else:
1013                         raise syntax_error('Operator not recognized: "{0}"'.format(string), start)
1014                 elif type == tokenize.ENDMARKER:
1015                     break
1016             if current_selector:
1017                 selectors.append(current_selector)
1018             return selectors
1019
1020         def _build_selector_function(selector):
1021             if isinstance(selector, list):
1022                 fs = [_build_selector_function(s) for s in selector]
1023
1024                 def selector_function(formats):
1025                     for f in fs:
1026                         for format in f(formats):
1027                             yield format
1028                 return selector_function
1029             elif selector.type == GROUP:
1030                 selector_function = _build_selector_function(selector.selector)
1031             elif selector.type == PICKFIRST:
1032                 fs = [_build_selector_function(s) for s in selector.selector]
1033
1034                 def selector_function(formats):
1035                     for f in fs:
1036                         picked_formats = list(f(formats))
1037                         if picked_formats:
1038                             return picked_formats
1039                     return []
1040             elif selector.type == SINGLE:
1041                 format_spec = selector.selector
1042
1043                 def selector_function(formats):
1044                     formats = list(formats)
1045                     if not formats:
1046                         return
1047                     if format_spec == 'all':
1048                         for f in formats:
1049                             yield f
1050                     elif format_spec in ['best', 'worst', None]:
1051                         format_idx = 0 if format_spec == 'worst' else -1
1052                         audiovideo_formats = [
1053                             f for f in formats
1054                             if f.get('vcodec') != 'none' and f.get('acodec') != 'none']
1055                         if audiovideo_formats:
1056                             yield audiovideo_formats[format_idx]
1057                         # for audio only (soundcloud) or video only (imgur) urls, select the best/worst audio format
1058                         elif (all(f.get('acodec') != 'none' for f in formats) or
1059                               all(f.get('vcodec') != 'none' for f in formats)):
1060                             yield formats[format_idx]
1061                     elif format_spec == 'bestaudio':
1062                         audio_formats = [
1063                             f for f in formats
1064                             if f.get('vcodec') == 'none']
1065                         if audio_formats:
1066                             yield audio_formats[-1]
1067                     elif format_spec == 'worstaudio':
1068                         audio_formats = [
1069                             f for f in formats
1070                             if f.get('vcodec') == 'none']
1071                         if audio_formats:
1072                             yield audio_formats[0]
1073                     elif format_spec == 'bestvideo':
1074                         video_formats = [
1075                             f for f in formats
1076                             if f.get('acodec') == 'none']
1077                         if video_formats:
1078                             yield video_formats[-1]
1079                     elif format_spec == 'worstvideo':
1080                         video_formats = [
1081                             f for f in formats
1082                             if f.get('acodec') == 'none']
1083                         if video_formats:
1084                             yield video_formats[0]
1085                     else:
1086                         extensions = ['mp4', 'flv', 'webm', '3gp', 'm4a', 'mp3', 'ogg', 'aac', 'wav']
1087                         if format_spec in extensions:
1088                             filter_f = lambda f: f['ext'] == format_spec
1089                         else:
1090                             filter_f = lambda f: f['format_id'] == format_spec
1091                         matches = list(filter(filter_f, formats))
1092                         if matches:
1093                             yield matches[-1]
1094             elif selector.type == MERGE:
1095                 def _merge(formats_info):
1096                     format_1, format_2 = [f['format_id'] for f in formats_info]
1097                     # The first format must contain the video and the
1098                     # second the audio
1099                     if formats_info[0].get('vcodec') == 'none':
1100                         self.report_error('The first format must '
1101                                           'contain the video, try using '
1102                                           '"-f %s+%s"' % (format_2, format_1))
1103                         return
1104                     output_ext = (
1105                         formats_info[0]['ext']
1106                         if self.params.get('merge_output_format') is None
1107                         else self.params['merge_output_format'])
1108                     return {
1109                         'requested_formats': formats_info,
1110                         'format': '%s+%s' % (formats_info[0].get('format'),
1111                                              formats_info[1].get('format')),
1112                         'format_id': '%s+%s' % (formats_info[0].get('format_id'),
1113                                                 formats_info[1].get('format_id')),
1114                         'width': formats_info[0].get('width'),
1115                         'height': formats_info[0].get('height'),
1116                         'resolution': formats_info[0].get('resolution'),
1117                         'fps': formats_info[0].get('fps'),
1118                         'vcodec': formats_info[0].get('vcodec'),
1119                         'vbr': formats_info[0].get('vbr'),
1120                         'stretched_ratio': formats_info[0].get('stretched_ratio'),
1121                         'acodec': formats_info[1].get('acodec'),
1122                         'abr': formats_info[1].get('abr'),
1123                         'ext': output_ext,
1124                     }
1125                 video_selector, audio_selector = map(_build_selector_function, selector.selector)
1126
1127                 def selector_function(formats):
1128                     formats = list(formats)
1129                     for pair in itertools.product(video_selector(formats), audio_selector(formats)):
1130                         yield _merge(pair)
1131
1132             filters = [self._build_format_filter(f) for f in selector.filters]
1133
1134             def final_selector(formats):
1135                 for _filter in filters:
1136                     formats = list(filter(_filter, formats))
1137                 return selector_function(formats)
1138             return final_selector
1139
1140         stream = io.BytesIO(format_spec.encode('utf-8'))
1141         try:
1142             tokens = list(_remove_unused_ops(compat_tokenize_tokenize(stream.readline)))
1143         except tokenize.TokenError:
1144             raise syntax_error('Missing closing/opening brackets or parenthesis', (0, len(format_spec)))
1145
1146         class TokenIterator(object):
1147             def __init__(self, tokens):
1148                 self.tokens = tokens
1149                 self.counter = 0
1150
1151             def __iter__(self):
1152                 return self
1153
1154             def __next__(self):
1155                 if self.counter >= len(self.tokens):
1156                     raise StopIteration()
1157                 value = self.tokens[self.counter]
1158                 self.counter += 1
1159                 return value
1160
1161             next = __next__
1162
1163             def restore_last_token(self):
1164                 self.counter -= 1
1165
1166         parsed_selector = _parse_format_selection(iter(TokenIterator(tokens)))
1167         return _build_selector_function(parsed_selector)
1168
1169     def _calc_headers(self, info_dict):
1170         res = std_headers.copy()
1171
1172         add_headers = info_dict.get('http_headers')
1173         if add_headers:
1174             res.update(add_headers)
1175
1176         cookies = self._calc_cookies(info_dict)
1177         if cookies:
1178             res['Cookie'] = cookies
1179
1180         return res
1181
1182     def _calc_cookies(self, info_dict):
1183         pr = compat_urllib_request.Request(info_dict['url'])
1184         self.cookiejar.add_cookie_header(pr)
1185         return pr.get_header('Cookie')
1186
1187     def process_video_result(self, info_dict, download=True):
1188         assert info_dict.get('_type', 'video') == 'video'
1189
1190         if 'id' not in info_dict:
1191             raise ExtractorError('Missing "id" field in extractor result')
1192         if 'title' not in info_dict:
1193             raise ExtractorError('Missing "title" field in extractor result')
1194
1195         if 'playlist' not in info_dict:
1196             # It isn't part of a playlist
1197             info_dict['playlist'] = None
1198             info_dict['playlist_index'] = None
1199
1200         thumbnails = info_dict.get('thumbnails')
1201         if thumbnails is None:
1202             thumbnail = info_dict.get('thumbnail')
1203             if thumbnail:
1204                 info_dict['thumbnails'] = thumbnails = [{'url': thumbnail}]
1205         if thumbnails:
1206             thumbnails.sort(key=lambda t: (
1207                 t.get('preference'), t.get('width'), t.get('height'),
1208                 t.get('id'), t.get('url')))
1209             for i, t in enumerate(thumbnails):
1210                 if t.get('width') and t.get('height'):
1211                     t['resolution'] = '%dx%d' % (t['width'], t['height'])
1212                 if t.get('id') is None:
1213                     t['id'] = '%d' % i
1214
1215         if thumbnails and 'thumbnail' not in info_dict:
1216             info_dict['thumbnail'] = thumbnails[-1]['url']
1217
1218         if 'display_id' not in info_dict and 'id' in info_dict:
1219             info_dict['display_id'] = info_dict['id']
1220
1221         if info_dict.get('upload_date') is None and info_dict.get('timestamp') is not None:
1222             # Working around out-of-range timestamp values (e.g. negative ones on Windows,
1223             # see http://bugs.python.org/issue1646728)
1224             try:
1225                 upload_date = datetime.datetime.utcfromtimestamp(info_dict['timestamp'])
1226                 info_dict['upload_date'] = upload_date.strftime('%Y%m%d')
1227             except (ValueError, OverflowError, OSError):
1228                 pass
1229
1230         if self.params.get('listsubtitles', False):
1231             if 'automatic_captions' in info_dict:
1232                 self.list_subtitles(info_dict['id'], info_dict.get('automatic_captions'), 'automatic captions')
1233             self.list_subtitles(info_dict['id'], info_dict.get('subtitles'), 'subtitles')
1234             return
1235         info_dict['requested_subtitles'] = self.process_subtitles(
1236             info_dict['id'], info_dict.get('subtitles'),
1237             info_dict.get('automatic_captions'))
1238
1239         # We now pick which formats have to be downloaded
1240         if info_dict.get('formats') is None:
1241             # There's only one format available
1242             formats = [info_dict]
1243         else:
1244             formats = info_dict['formats']
1245
1246         if not formats:
1247             raise ExtractorError('No video formats found!')
1248
1249         formats_dict = {}
1250
1251         # We check that all the formats have the format and format_id fields
1252         for i, format in enumerate(formats):
1253             if 'url' not in format:
1254                 raise ExtractorError('Missing "url" key in result (index %d)' % i)
1255
1256             if format.get('format_id') is None:
1257                 format['format_id'] = compat_str(i)
1258             format_id = format['format_id']
1259             if format_id not in formats_dict:
1260                 formats_dict[format_id] = []
1261             formats_dict[format_id].append(format)
1262
1263         # Make sure all formats have unique format_id
1264         for format_id, ambiguous_formats in formats_dict.items():
1265             if len(ambiguous_formats) > 1:
1266                 for i, format in enumerate(ambiguous_formats):
1267                     format['format_id'] = '%s-%d' % (format_id, i)
1268
1269         for i, format in enumerate(formats):
1270             if format.get('format') is None:
1271                 format['format'] = '{id} - {res}{note}'.format(
1272                     id=format['format_id'],
1273                     res=self.format_resolution(format),
1274                     note=' ({0})'.format(format['format_note']) if format.get('format_note') is not None else '',
1275                 )
1276             # Automatically determine file extension if missing
1277             if 'ext' not in format:
1278                 format['ext'] = determine_ext(format['url']).lower()
1279             # Add HTTP headers, so that external programs can use them from the
1280             # json output
1281             full_format_info = info_dict.copy()
1282             full_format_info.update(format)
1283             format['http_headers'] = self._calc_headers(full_format_info)
1284
1285         # TODO Central sorting goes here
1286
1287         if formats[0] is not info_dict:
1288             # only set the 'formats' fields if the original info_dict list them
1289             # otherwise we end up with a circular reference, the first (and unique)
1290             # element in the 'formats' field in info_dict is info_dict itself,
1291             # wich can't be exported to json
1292             info_dict['formats'] = formats
1293         if self.params.get('listformats'):
1294             self.list_formats(info_dict)
1295             return
1296         if self.params.get('list_thumbnails'):
1297             self.list_thumbnails(info_dict)
1298             return
1299
1300         req_format = self.params.get('format')
1301         if req_format is None:
1302             req_format_list = []
1303             if (self.params.get('outtmpl', DEFAULT_OUTTMPL) != '-' and
1304                     info_dict['extractor'] in ['youtube', 'ted'] and
1305                     not info_dict.get('is_live')):
1306                 merger = FFmpegMergerPP(self)
1307                 if merger.available and merger.can_merge():
1308                     req_format_list.append('bestvideo+bestaudio')
1309             req_format_list.append('best')
1310             req_format = '/'.join(req_format_list)
1311         format_selector = self.build_format_selector(req_format)
1312         formats_to_download = list(format_selector(formats))
1313         if not formats_to_download:
1314             raise ExtractorError('requested format not available',
1315                                  expected=True)
1316
1317         if download:
1318             if len(formats_to_download) > 1:
1319                 self.to_screen('[info] %s: downloading video in %s formats' % (info_dict['id'], len(formats_to_download)))
1320             for format in formats_to_download:
1321                 new_info = dict(info_dict)
1322                 new_info.update(format)
1323                 self.process_info(new_info)
1324         # We update the info dict with the best quality format (backwards compatibility)
1325         info_dict.update(formats_to_download[-1])
1326         return info_dict
1327
1328     def process_subtitles(self, video_id, normal_subtitles, automatic_captions):
1329         """Select the requested subtitles and their format"""
1330         available_subs = {}
1331         if normal_subtitles and self.params.get('writesubtitles'):
1332             available_subs.update(normal_subtitles)
1333         if automatic_captions and self.params.get('writeautomaticsub'):
1334             for lang, cap_info in automatic_captions.items():
1335                 if lang not in available_subs:
1336                     available_subs[lang] = cap_info
1337
1338         if (not self.params.get('writesubtitles') and not
1339                 self.params.get('writeautomaticsub') or not
1340                 available_subs):
1341             return None
1342
1343         if self.params.get('allsubtitles', False):
1344             requested_langs = available_subs.keys()
1345         else:
1346             if self.params.get('subtitleslangs', False):
1347                 requested_langs = self.params.get('subtitleslangs')
1348             elif 'en' in available_subs:
1349                 requested_langs = ['en']
1350             else:
1351                 requested_langs = [list(available_subs.keys())[0]]
1352
1353         formats_query = self.params.get('subtitlesformat', 'best')
1354         formats_preference = formats_query.split('/') if formats_query else []
1355         subs = {}
1356         for lang in requested_langs:
1357             formats = available_subs.get(lang)
1358             if formats is None:
1359                 self.report_warning('%s subtitles not available for %s' % (lang, video_id))
1360                 continue
1361             for ext in formats_preference:
1362                 if ext == 'best':
1363                     f = formats[-1]
1364                     break
1365                 matches = list(filter(lambda f: f['ext'] == ext, formats))
1366                 if matches:
1367                     f = matches[-1]
1368                     break
1369             else:
1370                 f = formats[-1]
1371                 self.report_warning(
1372                     'No subtitle format found matching "%s" for language %s, '
1373                     'using %s' % (formats_query, lang, f['ext']))
1374             subs[lang] = f
1375         return subs
1376
1377     def process_info(self, info_dict):
1378         """Process a single resolved IE result."""
1379
1380         assert info_dict.get('_type', 'video') == 'video'
1381
1382         max_downloads = self.params.get('max_downloads')
1383         if max_downloads is not None:
1384             if self._num_downloads >= int(max_downloads):
1385                 raise MaxDownloadsReached()
1386
1387         info_dict['fulltitle'] = info_dict['title']
1388         if len(info_dict['title']) > 200:
1389             info_dict['title'] = info_dict['title'][:197] + '...'
1390
1391         if 'format' not in info_dict:
1392             info_dict['format'] = info_dict['ext']
1393
1394         reason = self._match_entry(info_dict, incomplete=False)
1395         if reason is not None:
1396             self.to_screen('[download] ' + reason)
1397             return
1398
1399         self._num_downloads += 1
1400
1401         info_dict['_filename'] = filename = self.prepare_filename(info_dict)
1402
1403         # Forced printings
1404         if self.params.get('forcetitle', False):
1405             self.to_stdout(info_dict['fulltitle'])
1406         if self.params.get('forceid', False):
1407             self.to_stdout(info_dict['id'])
1408         if self.params.get('forceurl', False):
1409             if info_dict.get('requested_formats') is not None:
1410                 for f in info_dict['requested_formats']:
1411                     self.to_stdout(f['url'] + f.get('play_path', ''))
1412             else:
1413                 # For RTMP URLs, also include the playpath
1414                 self.to_stdout(info_dict['url'] + info_dict.get('play_path', ''))
1415         if self.params.get('forcethumbnail', False) and info_dict.get('thumbnail') is not None:
1416             self.to_stdout(info_dict['thumbnail'])
1417         if self.params.get('forcedescription', False) and info_dict.get('description') is not None:
1418             self.to_stdout(info_dict['description'])
1419         if self.params.get('forcefilename', False) and filename is not None:
1420             self.to_stdout(filename)
1421         if self.params.get('forceduration', False) and info_dict.get('duration') is not None:
1422             self.to_stdout(formatSeconds(info_dict['duration']))
1423         if self.params.get('forceformat', False):
1424             self.to_stdout(info_dict['format'])
1425         if self.params.get('forcejson', False):
1426             self.to_stdout(json.dumps(info_dict))
1427
1428         # Do nothing else if in simulate mode
1429         if self.params.get('simulate', False):
1430             return
1431
1432         if filename is None:
1433             return
1434
1435         try:
1436             dn = os.path.dirname(sanitize_path(encodeFilename(filename)))
1437             if dn and not os.path.exists(dn):
1438                 os.makedirs(dn)
1439         except (OSError, IOError) as err:
1440             self.report_error('unable to create directory ' + compat_str(err))
1441             return
1442
1443         if self.params.get('writedescription', False):
1444             descfn = replace_extension(filename, 'description', info_dict.get('ext'))
1445             if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(descfn)):
1446                 self.to_screen('[info] Video description is already present')
1447             elif info_dict.get('description') is None:
1448                 self.report_warning('There\'s no description to write.')
1449             else:
1450                 try:
1451                     self.to_screen('[info] Writing video description to: ' + descfn)
1452                     with io.open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile:
1453                         descfile.write(info_dict['description'])
1454                 except (OSError, IOError):
1455                     self.report_error('Cannot write description file ' + descfn)
1456                     return
1457
1458         if self.params.get('writeannotations', False):
1459             annofn = replace_extension(filename, 'annotations.xml', info_dict.get('ext'))
1460             if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(annofn)):
1461                 self.to_screen('[info] Video annotations are already present')
1462             else:
1463                 try:
1464                     self.to_screen('[info] Writing video annotations to: ' + annofn)
1465                     with io.open(encodeFilename(annofn), 'w', encoding='utf-8') as annofile:
1466                         annofile.write(info_dict['annotations'])
1467                 except (KeyError, TypeError):
1468                     self.report_warning('There are no annotations to write.')
1469                 except (OSError, IOError):
1470                     self.report_error('Cannot write annotations file: ' + annofn)
1471                     return
1472
1473         subtitles_are_requested = any([self.params.get('writesubtitles', False),
1474                                        self.params.get('writeautomaticsub')])
1475
1476         if subtitles_are_requested and info_dict.get('requested_subtitles'):
1477             # subtitles download errors are already managed as troubles in relevant IE
1478             # that way it will silently go on when used with unsupporting IE
1479             subtitles = info_dict['requested_subtitles']
1480             ie = self.get_info_extractor(info_dict['extractor_key'])
1481             for sub_lang, sub_info in subtitles.items():
1482                 sub_format = sub_info['ext']
1483                 if sub_info.get('data') is not None:
1484                     sub_data = sub_info['data']
1485                 else:
1486                     try:
1487                         sub_data = ie._download_webpage(
1488                             sub_info['url'], info_dict['id'], note=False)
1489                     except ExtractorError as err:
1490                         self.report_warning('Unable to download subtitle for "%s": %s' %
1491                                             (sub_lang, compat_str(err.cause)))
1492                         continue
1493                 try:
1494                     sub_filename = subtitles_filename(filename, sub_lang, sub_format)
1495                     if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(sub_filename)):
1496                         self.to_screen('[info] Video subtitle %s.%s is already_present' % (sub_lang, sub_format))
1497                     else:
1498                         self.to_screen('[info] Writing video subtitles to: ' + sub_filename)
1499                         with io.open(encodeFilename(sub_filename), 'w', encoding='utf-8') as subfile:
1500                             subfile.write(sub_data)
1501                 except (OSError, IOError):
1502                     self.report_error('Cannot write subtitles file ' + sub_filename)
1503                     return
1504
1505         if self.params.get('writeinfojson', False):
1506             infofn = replace_extension(filename, 'info.json', info_dict.get('ext'))
1507             if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(infofn)):
1508                 self.to_screen('[info] Video description metadata is already present')
1509             else:
1510                 self.to_screen('[info] Writing video description metadata as JSON to: ' + infofn)
1511                 try:
1512                     write_json_file(self.filter_requested_info(info_dict), infofn)
1513                 except (OSError, IOError):
1514                     self.report_error('Cannot write metadata to JSON file ' + infofn)
1515                     return
1516
1517         self._write_thumbnails(info_dict, filename)
1518
1519         if not self.params.get('skip_download', False):
1520             try:
1521                 def dl(name, info):
1522                     fd = get_suitable_downloader(info, self.params)(self, self.params)
1523                     for ph in self._progress_hooks:
1524                         fd.add_progress_hook(ph)
1525                     if self.params.get('verbose'):
1526                         self.to_stdout('[debug] Invoking downloader on %r' % info.get('url'))
1527                     return fd.download(name, info)
1528
1529                 if info_dict.get('requested_formats') is not None:
1530                     downloaded = []
1531                     success = True
1532                     merger = FFmpegMergerPP(self)
1533                     if not merger.available:
1534                         postprocessors = []
1535                         self.report_warning('You have requested multiple '
1536                                             'formats but ffmpeg or avconv are not installed.'
1537                                             ' The formats won\'t be merged.')
1538                     else:
1539                         postprocessors = [merger]
1540
1541                     def compatible_formats(formats):
1542                         video, audio = formats
1543                         # Check extension
1544                         video_ext, audio_ext = audio.get('ext'), video.get('ext')
1545                         if video_ext and audio_ext:
1546                             COMPATIBLE_EXTS = (
1547                                 ('mp3', 'mp4', 'm4a', 'm4p', 'm4b', 'm4r', 'm4v'),
1548                                 ('webm')
1549                             )
1550                             for exts in COMPATIBLE_EXTS:
1551                                 if video_ext in exts and audio_ext in exts:
1552                                     return True
1553                         # TODO: Check acodec/vcodec
1554                         return False
1555
1556                     filename_real_ext = os.path.splitext(filename)[1][1:]
1557                     filename_wo_ext = (
1558                         os.path.splitext(filename)[0]
1559                         if filename_real_ext == info_dict['ext']
1560                         else filename)
1561                     requested_formats = info_dict['requested_formats']
1562                     if self.params.get('merge_output_format') is None and not compatible_formats(requested_formats):
1563                         info_dict['ext'] = 'mkv'
1564                         self.report_warning(
1565                             'Requested formats are incompatible for merge and will be merged into mkv.')
1566                     # Ensure filename always has a correct extension for successful merge
1567                     filename = '%s.%s' % (filename_wo_ext, info_dict['ext'])
1568                     if os.path.exists(encodeFilename(filename)):
1569                         self.to_screen(
1570                             '[download] %s has already been downloaded and '
1571                             'merged' % filename)
1572                     else:
1573                         for f in requested_formats:
1574                             new_info = dict(info_dict)
1575                             new_info.update(f)
1576                             fname = self.prepare_filename(new_info)
1577                             fname = prepend_extension(fname, 'f%s' % f['format_id'], new_info['ext'])
1578                             downloaded.append(fname)
1579                             partial_success = dl(fname, new_info)
1580                             success = success and partial_success
1581                         info_dict['__postprocessors'] = postprocessors
1582                         info_dict['__files_to_merge'] = downloaded
1583                 else:
1584                     # Just a single file
1585                     success = dl(filename, info_dict)
1586             except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
1587                 self.report_error('unable to download video data: %s' % str(err))
1588                 return
1589             except (OSError, IOError) as err:
1590                 raise UnavailableVideoError(err)
1591             except (ContentTooShortError, ) as err:
1592                 self.report_error('content too short (expected %s bytes and served %s)' % (err.expected, err.downloaded))
1593                 return
1594
1595             if success:
1596                 # Fixup content
1597                 fixup_policy = self.params.get('fixup')
1598                 if fixup_policy is None:
1599                     fixup_policy = 'detect_or_warn'
1600
1601                 stretched_ratio = info_dict.get('stretched_ratio')
1602                 if stretched_ratio is not None and stretched_ratio != 1:
1603                     if fixup_policy == 'warn':
1604                         self.report_warning('%s: Non-uniform pixel ratio (%s)' % (
1605                             info_dict['id'], stretched_ratio))
1606                     elif fixup_policy == 'detect_or_warn':
1607                         stretched_pp = FFmpegFixupStretchedPP(self)
1608                         if stretched_pp.available:
1609                             info_dict.setdefault('__postprocessors', [])
1610                             info_dict['__postprocessors'].append(stretched_pp)
1611                         else:
1612                             self.report_warning(
1613                                 '%s: Non-uniform pixel ratio (%s). Install ffmpeg or avconv to fix this automatically.' % (
1614                                     info_dict['id'], stretched_ratio))
1615                     else:
1616                         assert fixup_policy in ('ignore', 'never')
1617
1618                 if info_dict.get('requested_formats') is None and info_dict.get('container') == 'm4a_dash':
1619                     if fixup_policy == 'warn':
1620                         self.report_warning('%s: writing DASH m4a. Only some players support this container.' % (
1621                             info_dict['id']))
1622                     elif fixup_policy == 'detect_or_warn':
1623                         fixup_pp = FFmpegFixupM4aPP(self)
1624                         if fixup_pp.available:
1625                             info_dict.setdefault('__postprocessors', [])
1626                             info_dict['__postprocessors'].append(fixup_pp)
1627                         else:
1628                             self.report_warning(
1629                                 '%s: writing DASH m4a. Only some players support this container. Install ffmpeg or avconv to fix this automatically.' % (
1630                                     info_dict['id']))
1631                     else:
1632                         assert fixup_policy in ('ignore', 'never')
1633
1634                 try:
1635                     self.post_process(filename, info_dict)
1636                 except (PostProcessingError) as err:
1637                     self.report_error('postprocessing: %s' % str(err))
1638                     return
1639                 self.record_download_archive(info_dict)
1640
1641     def download(self, url_list):
1642         """Download a given list of URLs."""
1643         outtmpl = self.params.get('outtmpl', DEFAULT_OUTTMPL)
1644         if (len(url_list) > 1 and
1645                 '%' not in outtmpl and
1646                 self.params.get('max_downloads') != 1):
1647             raise SameFileError(outtmpl)
1648
1649         for url in url_list:
1650             try:
1651                 # It also downloads the videos
1652                 res = self.extract_info(
1653                     url, force_generic_extractor=self.params.get('force_generic_extractor', False))
1654             except UnavailableVideoError:
1655                 self.report_error('unable to download video')
1656             except MaxDownloadsReached:
1657                 self.to_screen('[info] Maximum number of downloaded files reached.')
1658                 raise
1659             else:
1660                 if self.params.get('dump_single_json', False):
1661                     self.to_stdout(json.dumps(res))
1662
1663         return self._download_retcode
1664
1665     def download_with_info_file(self, info_filename):
1666         with contextlib.closing(fileinput.FileInput(
1667                 [info_filename], mode='r',
1668                 openhook=fileinput.hook_encoded('utf-8'))) as f:
1669             # FileInput doesn't have a read method, we can't call json.load
1670             info = self.filter_requested_info(json.loads('\n'.join(f)))
1671         try:
1672             self.process_ie_result(info, download=True)
1673         except DownloadError:
1674             webpage_url = info.get('webpage_url')
1675             if webpage_url is not None:
1676                 self.report_warning('The info failed to download, trying with "%s"' % webpage_url)
1677                 return self.download([webpage_url])
1678             else:
1679                 raise
1680         return self._download_retcode
1681
1682     @staticmethod
1683     def filter_requested_info(info_dict):
1684         return dict(
1685             (k, v) for k, v in info_dict.items()
1686             if k not in ['requested_formats', 'requested_subtitles'])
1687
1688     def post_process(self, filename, ie_info):
1689         """Run all the postprocessors on the given file."""
1690         info = dict(ie_info)
1691         info['filepath'] = filename
1692         pps_chain = []
1693         if ie_info.get('__postprocessors') is not None:
1694             pps_chain.extend(ie_info['__postprocessors'])
1695         pps_chain.extend(self._pps)
1696         for pp in pps_chain:
1697             files_to_delete = []
1698             try:
1699                 files_to_delete, info = pp.run(info)
1700             except PostProcessingError as e:
1701                 self.report_error(e.msg)
1702             if files_to_delete and not self.params.get('keepvideo', False):
1703                 for old_filename in files_to_delete:
1704                     self.to_screen('Deleting original file %s (pass -k to keep)' % old_filename)
1705                     try:
1706                         os.remove(encodeFilename(old_filename))
1707                     except (IOError, OSError):
1708                         self.report_warning('Unable to remove downloaded original file')
1709
1710     def _make_archive_id(self, info_dict):
1711         # Future-proof against any change in case
1712         # and backwards compatibility with prior versions
1713         extractor = info_dict.get('extractor_key')
1714         if extractor is None:
1715             if 'id' in info_dict:
1716                 extractor = info_dict.get('ie_key')  # key in a playlist
1717         if extractor is None:
1718             return None  # Incomplete video information
1719         return extractor.lower() + ' ' + info_dict['id']
1720
1721     def in_download_archive(self, info_dict):
1722         fn = self.params.get('download_archive')
1723         if fn is None:
1724             return False
1725
1726         vid_id = self._make_archive_id(info_dict)
1727         if vid_id is None:
1728             return False  # Incomplete video information
1729
1730         try:
1731             with locked_file(fn, 'r', encoding='utf-8') as archive_file:
1732                 for line in archive_file:
1733                     if line.strip() == vid_id:
1734                         return True
1735         except IOError as ioe:
1736             if ioe.errno != errno.ENOENT:
1737                 raise
1738         return False
1739
1740     def record_download_archive(self, info_dict):
1741         fn = self.params.get('download_archive')
1742         if fn is None:
1743             return
1744         vid_id = self._make_archive_id(info_dict)
1745         assert vid_id
1746         with locked_file(fn, 'a', encoding='utf-8') as archive_file:
1747             archive_file.write(vid_id + '\n')
1748
1749     @staticmethod
1750     def format_resolution(format, default='unknown'):
1751         if format.get('vcodec') == 'none':
1752             return 'audio only'
1753         if format.get('resolution') is not None:
1754             return format['resolution']
1755         if format.get('height') is not None:
1756             if format.get('width') is not None:
1757                 res = '%sx%s' % (format['width'], format['height'])
1758             else:
1759                 res = '%sp' % format['height']
1760         elif format.get('width') is not None:
1761             res = '?x%d' % format['width']
1762         else:
1763             res = default
1764         return res
1765
1766     def _format_note(self, fdict):
1767         res = ''
1768         if fdict.get('ext') in ['f4f', 'f4m']:
1769             res += '(unsupported) '
1770         if fdict.get('format_note') is not None:
1771             res += fdict['format_note'] + ' '
1772         if fdict.get('tbr') is not None:
1773             res += '%4dk ' % fdict['tbr']
1774         if fdict.get('container') is not None:
1775             if res:
1776                 res += ', '
1777             res += '%s container' % fdict['container']
1778         if (fdict.get('vcodec') is not None and
1779                 fdict.get('vcodec') != 'none'):
1780             if res:
1781                 res += ', '
1782             res += fdict['vcodec']
1783             if fdict.get('vbr') is not None:
1784                 res += '@'
1785         elif fdict.get('vbr') is not None and fdict.get('abr') is not None:
1786             res += 'video@'
1787         if fdict.get('vbr') is not None:
1788             res += '%4dk' % fdict['vbr']
1789         if fdict.get('fps') is not None:
1790             res += ', %sfps' % fdict['fps']
1791         if fdict.get('acodec') is not None:
1792             if res:
1793                 res += ', '
1794             if fdict['acodec'] == 'none':
1795                 res += 'video only'
1796             else:
1797                 res += '%-5s' % fdict['acodec']
1798         elif fdict.get('abr') is not None:
1799             if res:
1800                 res += ', '
1801             res += 'audio'
1802         if fdict.get('abr') is not None:
1803             res += '@%3dk' % fdict['abr']
1804         if fdict.get('asr') is not None:
1805             res += ' (%5dHz)' % fdict['asr']
1806         if fdict.get('filesize') is not None:
1807             if res:
1808                 res += ', '
1809             res += format_bytes(fdict['filesize'])
1810         elif fdict.get('filesize_approx') is not None:
1811             if res:
1812                 res += ', '
1813             res += '~' + format_bytes(fdict['filesize_approx'])
1814         return res
1815
1816     def list_formats(self, info_dict):
1817         formats = info_dict.get('formats', [info_dict])
1818         table = [
1819             [f['format_id'], f['ext'], self.format_resolution(f), self._format_note(f)]
1820             for f in formats
1821             if f.get('preference') is None or f['preference'] >= -1000]
1822         if len(formats) > 1:
1823             table[-1][-1] += (' ' if table[-1][-1] else '') + '(best)'
1824
1825         header_line = ['format code', 'extension', 'resolution', 'note']
1826         self.to_screen(
1827             '[info] Available formats for %s:\n%s' %
1828             (info_dict['id'], render_table(header_line, table)))
1829
1830     def list_thumbnails(self, info_dict):
1831         thumbnails = info_dict.get('thumbnails')
1832         if not thumbnails:
1833             tn_url = info_dict.get('thumbnail')
1834             if tn_url:
1835                 thumbnails = [{'id': '0', 'url': tn_url}]
1836             else:
1837                 self.to_screen(
1838                     '[info] No thumbnails present for %s' % info_dict['id'])
1839                 return
1840
1841         self.to_screen(
1842             '[info] Thumbnails for %s:' % info_dict['id'])
1843         self.to_screen(render_table(
1844             ['ID', 'width', 'height', 'URL'],
1845             [[t['id'], t.get('width', 'unknown'), t.get('height', 'unknown'), t['url']] for t in thumbnails]))
1846
1847     def list_subtitles(self, video_id, subtitles, name='subtitles'):
1848         if not subtitles:
1849             self.to_screen('%s has no %s' % (video_id, name))
1850             return
1851         self.to_screen(
1852             'Available %s for %s:' % (name, video_id))
1853         self.to_screen(render_table(
1854             ['Language', 'formats'],
1855             [[lang, ', '.join(f['ext'] for f in reversed(formats))]
1856                 for lang, formats in subtitles.items()]))
1857
1858     def urlopen(self, req):
1859         """ Start an HTTP download """
1860         return self._opener.open(req, timeout=self._socket_timeout)
1861
1862     def print_debug_header(self):
1863         if not self.params.get('verbose'):
1864             return
1865
1866         if type('') is not compat_str:
1867             # Python 2.6 on SLES11 SP1 (https://github.com/rg3/youtube-dl/issues/3326)
1868             self.report_warning(
1869                 'Your Python is broken! Update to a newer and supported version')
1870
1871         stdout_encoding = getattr(
1872             sys.stdout, 'encoding', 'missing (%s)' % type(sys.stdout).__name__)
1873         encoding_str = (
1874             '[debug] Encodings: locale %s, fs %s, out %s, pref %s\n' % (
1875                 locale.getpreferredencoding(),
1876                 sys.getfilesystemencoding(),
1877                 stdout_encoding,
1878                 self.get_encoding()))
1879         write_string(encoding_str, encoding=None)
1880
1881         self._write_string('[debug] youtube-dl version ' + __version__ + '\n')
1882         try:
1883             sp = subprocess.Popen(
1884                 ['git', 'rev-parse', '--short', 'HEAD'],
1885                 stdout=subprocess.PIPE, stderr=subprocess.PIPE,
1886                 cwd=os.path.dirname(os.path.abspath(__file__)))
1887             out, err = sp.communicate()
1888             out = out.decode().strip()
1889             if re.match('[0-9a-f]+', out):
1890                 self._write_string('[debug] Git HEAD: ' + out + '\n')
1891         except Exception:
1892             try:
1893                 sys.exc_clear()
1894             except Exception:
1895                 pass
1896         self._write_string('[debug] Python version %s - %s\n' % (
1897             platform.python_version(), platform_name()))
1898
1899         exe_versions = FFmpegPostProcessor.get_versions(self)
1900         exe_versions['rtmpdump'] = rtmpdump_version()
1901         exe_str = ', '.join(
1902             '%s %s' % (exe, v)
1903             for exe, v in sorted(exe_versions.items())
1904             if v
1905         )
1906         if not exe_str:
1907             exe_str = 'none'
1908         self._write_string('[debug] exe versions: %s\n' % exe_str)
1909
1910         proxy_map = {}
1911         for handler in self._opener.handlers:
1912             if hasattr(handler, 'proxies'):
1913                 proxy_map.update(handler.proxies)
1914         self._write_string('[debug] Proxy map: ' + compat_str(proxy_map) + '\n')
1915
1916         if self.params.get('call_home', False):
1917             ipaddr = self.urlopen('https://yt-dl.org/ip').read().decode('utf-8')
1918             self._write_string('[debug] Public IP address: %s\n' % ipaddr)
1919             latest_version = self.urlopen(
1920                 'https://yt-dl.org/latest/version').read().decode('utf-8')
1921             if version_tuple(latest_version) > version_tuple(__version__):
1922                 self.report_warning(
1923                     'You are using an outdated version (newest version: %s)! '
1924                     'See https://yt-dl.org/update if you need help updating.' %
1925                     latest_version)
1926
1927     def _setup_opener(self):
1928         timeout_val = self.params.get('socket_timeout')
1929         self._socket_timeout = 600 if timeout_val is None else float(timeout_val)
1930
1931         opts_cookiefile = self.params.get('cookiefile')
1932         opts_proxy = self.params.get('proxy')
1933
1934         if opts_cookiefile is None:
1935             self.cookiejar = compat_cookiejar.CookieJar()
1936         else:
1937             self.cookiejar = compat_cookiejar.MozillaCookieJar(
1938                 opts_cookiefile)
1939             if os.access(opts_cookiefile, os.R_OK):
1940                 self.cookiejar.load()
1941
1942         cookie_processor = compat_urllib_request.HTTPCookieProcessor(
1943             self.cookiejar)
1944         if opts_proxy is not None:
1945             if opts_proxy == '':
1946                 proxies = {}
1947             else:
1948                 proxies = {'http': opts_proxy, 'https': opts_proxy}
1949         else:
1950             proxies = compat_urllib_request.getproxies()
1951             # Set HTTPS proxy to HTTP one if given (https://github.com/rg3/youtube-dl/issues/805)
1952             if 'http' in proxies and 'https' not in proxies:
1953                 proxies['https'] = proxies['http']
1954         proxy_handler = PerRequestProxyHandler(proxies)
1955
1956         debuglevel = 1 if self.params.get('debug_printtraffic') else 0
1957         https_handler = make_HTTPS_handler(self.params, debuglevel=debuglevel)
1958         ydlh = YoutubeDLHandler(self.params, debuglevel=debuglevel)
1959         opener = compat_urllib_request.build_opener(
1960             proxy_handler, https_handler, cookie_processor, ydlh)
1961
1962         # Delete the default user-agent header, which would otherwise apply in
1963         # cases where our custom HTTP handler doesn't come into play
1964         # (See https://github.com/rg3/youtube-dl/issues/1309 for details)
1965         opener.addheaders = []
1966         self._opener = opener
1967
1968     def encode(self, s):
1969         if isinstance(s, bytes):
1970             return s  # Already encoded
1971
1972         try:
1973             return s.encode(self.get_encoding())
1974         except UnicodeEncodeError as err:
1975             err.reason = err.reason + '. Check your system encoding configuration or use the --encoding option.'
1976             raise
1977
1978     def get_encoding(self):
1979         encoding = self.params.get('encoding')
1980         if encoding is None:
1981             encoding = preferredencoding()
1982         return encoding
1983
1984     def _write_thumbnails(self, info_dict, filename):
1985         if self.params.get('writethumbnail', False):
1986             thumbnails = info_dict.get('thumbnails')
1987             if thumbnails:
1988                 thumbnails = [thumbnails[-1]]
1989         elif self.params.get('write_all_thumbnails', False):
1990             thumbnails = info_dict.get('thumbnails')
1991         else:
1992             return
1993
1994         if not thumbnails:
1995             # No thumbnails present, so return immediately
1996             return
1997
1998         for t in thumbnails:
1999             thumb_ext = determine_ext(t['url'], 'jpg')
2000             suffix = '_%s' % t['id'] if len(thumbnails) > 1 else ''
2001             thumb_display_id = '%s ' % t['id'] if len(thumbnails) > 1 else ''
2002             t['filename'] = thumb_filename = os.path.splitext(filename)[0] + suffix + '.' + thumb_ext
2003
2004             if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(thumb_filename)):
2005                 self.to_screen('[%s] %s: Thumbnail %sis already present' %
2006                                (info_dict['extractor'], info_dict['id'], thumb_display_id))
2007             else:
2008                 self.to_screen('[%s] %s: Downloading thumbnail %s...' %
2009                                (info_dict['extractor'], info_dict['id'], thumb_display_id))
2010                 try:
2011                     uf = self.urlopen(t['url'])
2012                     with open(thumb_filename, 'wb') as thumbf:
2013                         shutil.copyfileobj(uf, thumbf)
2014                     self.to_screen('[%s] %s: Writing thumbnail %sto: %s' %
2015                                    (info_dict['extractor'], info_dict['id'], thumb_display_id, thumb_filename))
2016                 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
2017                     self.report_warning('Unable to download thumbnail "%s": %s' %
2018                                         (t['url'], compat_str(err)))