[YoutubeDL] format spec: add additional checks for invalid syntax
[youtube-dl] / youtube_dl / YoutubeDL.py
1 #!/usr/bin/env python
2 # -*- coding: utf-8 -*-
3
4 from __future__ import absolute_import, unicode_literals
5
6 import collections
7 import contextlib
8 import datetime
9 import errno
10 import fileinput
11 import io
12 import itertools
13 import json
14 import locale
15 import operator
16 import os
17 import platform
18 import re
19 import shutil
20 import subprocess
21 import socket
22 import sys
23 import time
24 import tokenize
25 import traceback
26
27 if os.name == 'nt':
28     import ctypes
29
30 from .compat import (
31     compat_basestring,
32     compat_cookiejar,
33     compat_expanduser,
34     compat_get_terminal_size,
35     compat_http_client,
36     compat_kwargs,
37     compat_str,
38     compat_tokenize_tokenize,
39     compat_urllib_error,
40     compat_urllib_request,
41 )
42 from .utils import (
43     escape_url,
44     ContentTooShortError,
45     date_from_str,
46     DateRange,
47     DEFAULT_OUTTMPL,
48     determine_ext,
49     DownloadError,
50     encodeFilename,
51     ExtractorError,
52     format_bytes,
53     formatSeconds,
54     HEADRequest,
55     locked_file,
56     make_HTTPS_handler,
57     MaxDownloadsReached,
58     PagedList,
59     parse_filesize,
60     PerRequestProxyHandler,
61     PostProcessingError,
62     platform_name,
63     preferredencoding,
64     render_table,
65     SameFileError,
66     sanitize_filename,
67     sanitize_path,
68     std_headers,
69     subtitles_filename,
70     UnavailableVideoError,
71     url_basename,
72     version_tuple,
73     write_json_file,
74     write_string,
75     YoutubeDLHandler,
76     prepend_extension,
77     replace_extension,
78     args_to_str,
79     age_restricted,
80 )
81 from .cache import Cache
82 from .extractor import get_info_extractor, gen_extractors
83 from .downloader import get_suitable_downloader
84 from .downloader.rtmp import rtmpdump_version
85 from .postprocessor import (
86     FFmpegFixupM4aPP,
87     FFmpegFixupStretchedPP,
88     FFmpegMergerPP,
89     FFmpegPostProcessor,
90     get_postprocessor,
91 )
92 from .version import __version__
93
94
95 class YoutubeDL(object):
96     """YoutubeDL class.
97
98     YoutubeDL objects are the ones responsible of downloading the
99     actual video file and writing it to disk if the user has requested
100     it, among some other tasks. In most cases there should be one per
101     program. As, given a video URL, the downloader doesn't know how to
102     extract all the needed information, task that InfoExtractors do, it
103     has to pass the URL to one of them.
104
105     For this, YoutubeDL objects have a method that allows
106     InfoExtractors to be registered in a given order. When it is passed
107     a URL, the YoutubeDL object handles it to the first InfoExtractor it
108     finds that reports being able to handle it. The InfoExtractor extracts
109     all the information about the video or videos the URL refers to, and
110     YoutubeDL process the extracted information, possibly using a File
111     Downloader to download the video.
112
113     YoutubeDL objects accept a lot of parameters. In order not to saturate
114     the object constructor with arguments, it receives a dictionary of
115     options instead. These options are available through the params
116     attribute for the InfoExtractors to use. The YoutubeDL also
117     registers itself as the downloader in charge for the InfoExtractors
118     that are added to it, so this is a "mutual registration".
119
120     Available options:
121
122     username:          Username for authentication purposes.
123     password:          Password for authentication purposes.
124     videopassword:     Password for accessing a video.
125     usenetrc:          Use netrc for authentication instead.
126     verbose:           Print additional info to stdout.
127     quiet:             Do not print messages to stdout.
128     no_warnings:       Do not print out anything for warnings.
129     forceurl:          Force printing final URL.
130     forcetitle:        Force printing title.
131     forceid:           Force printing ID.
132     forcethumbnail:    Force printing thumbnail URL.
133     forcedescription:  Force printing description.
134     forcefilename:     Force printing final filename.
135     forceduration:     Force printing duration.
136     forcejson:         Force printing info_dict as JSON.
137     dump_single_json:  Force printing the info_dict of the whole playlist
138                        (or video) as a single JSON line.
139     simulate:          Do not download the video files.
140     format:            Video format code. See options.py for more information.
141     outtmpl:           Template for output names.
142     restrictfilenames: Do not allow "&" and spaces in file names
143     ignoreerrors:      Do not stop on download errors.
144     force_generic_extractor: Force downloader to use the generic extractor
145     nooverwrites:      Prevent overwriting files.
146     playliststart:     Playlist item to start at.
147     playlistend:       Playlist item to end at.
148     playlist_items:    Specific indices of playlist to download.
149     playlistreverse:   Download playlist items in reverse order.
150     matchtitle:        Download only matching titles.
151     rejecttitle:       Reject downloads for matching titles.
152     logger:            Log messages to a logging.Logger instance.
153     logtostderr:       Log messages to stderr instead of stdout.
154     writedescription:  Write the video description to a .description file
155     writeinfojson:     Write the video description to a .info.json file
156     writeannotations:  Write the video annotations to a .annotations.xml file
157     writethumbnail:    Write the thumbnail image to a file
158     write_all_thumbnails:  Write all thumbnail formats to files
159     writesubtitles:    Write the video subtitles to a file
160     writeautomaticsub: Write the automatic subtitles to a file
161     allsubtitles:      Downloads all the subtitles of the video
162                        (requires writesubtitles or writeautomaticsub)
163     listsubtitles:     Lists all available subtitles for the video
164     subtitlesformat:   The format code for subtitles
165     subtitleslangs:    List of languages of the subtitles to download
166     keepvideo:         Keep the video file after post-processing
167     daterange:         A DateRange object, download only if the upload_date is in the range.
168     skip_download:     Skip the actual download of the video file
169     cachedir:          Location of the cache files in the filesystem.
170                        False to disable filesystem cache.
171     noplaylist:        Download single video instead of a playlist if in doubt.
172     age_limit:         An integer representing the user's age in years.
173                        Unsuitable videos for the given age are skipped.
174     min_views:         An integer representing the minimum view count the video
175                        must have in order to not be skipped.
176                        Videos without view count information are always
177                        downloaded. None for no limit.
178     max_views:         An integer representing the maximum view count.
179                        Videos that are more popular than that are not
180                        downloaded.
181                        Videos without view count information are always
182                        downloaded. None for no limit.
183     download_archive:  File name of a file where all downloads are recorded.
184                        Videos already present in the file are not downloaded
185                        again.
186     cookiefile:        File name where cookies should be read from and dumped to.
187     nocheckcertificate:Do not verify SSL certificates
188     prefer_insecure:   Use HTTP instead of HTTPS to retrieve information.
189                        At the moment, this is only supported by YouTube.
190     proxy:             URL of the proxy server to use
191     cn_verification_proxy:  URL of the proxy to use for IP address verification
192                        on Chinese sites. (Experimental)
193     socket_timeout:    Time to wait for unresponsive hosts, in seconds
194     bidi_workaround:   Work around buggy terminals without bidirectional text
195                        support, using fridibi
196     debug_printtraffic:Print out sent and received HTTP traffic
197     include_ads:       Download ads as well
198     default_search:    Prepend this string if an input url is not valid.
199                        'auto' for elaborate guessing
200     encoding:          Use this encoding instead of the system-specified.
201     extract_flat:      Do not resolve URLs, return the immediate result.
202                        Pass in 'in_playlist' to only show this behavior for
203                        playlist items.
204     postprocessors:    A list of dictionaries, each with an entry
205                        * key:  The name of the postprocessor. See
206                                youtube_dl/postprocessor/__init__.py for a list.
207                        as well as any further keyword arguments for the
208                        postprocessor.
209     progress_hooks:    A list of functions that get called on download
210                        progress, with a dictionary with the entries
211                        * status: One of "downloading", "error", or "finished".
212                                  Check this first and ignore unknown values.
213
214                        If status is one of "downloading", or "finished", the
215                        following properties may also be present:
216                        * filename: The final filename (always present)
217                        * tmpfilename: The filename we're currently writing to
218                        * downloaded_bytes: Bytes on disk
219                        * total_bytes: Size of the whole file, None if unknown
220                        * total_bytes_estimate: Guess of the eventual file size,
221                                                None if unavailable.
222                        * elapsed: The number of seconds since download started.
223                        * eta: The estimated time in seconds, None if unknown
224                        * speed: The download speed in bytes/second, None if
225                                 unknown
226                        * fragment_index: The counter of the currently
227                                          downloaded video fragment.
228                        * fragment_count: The number of fragments (= individual
229                                          files that will be merged)
230
231                        Progress hooks are guaranteed to be called at least once
232                        (with status "finished") if the download is successful.
233     merge_output_format: Extension to use when merging formats.
234     fixup:             Automatically correct known faults of the file.
235                        One of:
236                        - "never": do nothing
237                        - "warn": only emit a warning
238                        - "detect_or_warn": check whether we can do anything
239                                            about it, warn otherwise (default)
240     source_address:    (Experimental) Client-side IP address to bind to.
241     call_home:         Boolean, true iff we are allowed to contact the
242                        youtube-dl servers for debugging.
243     sleep_interval:    Number of seconds to sleep before each download.
244     listformats:       Print an overview of available video formats and exit.
245     list_thumbnails:   Print a table of all thumbnails and exit.
246     match_filter:      A function that gets called with the info_dict of
247                        every video.
248                        If it returns a message, the video is ignored.
249                        If it returns None, the video is downloaded.
250                        match_filter_func in utils.py is one example for this.
251     no_color:          Do not emit color codes in output.
252
253     The following options determine which downloader is picked:
254     external_downloader: Executable of the external downloader to call.
255                        None or unset for standard (built-in) downloader.
256     hls_prefer_native: Use the native HLS downloader instead of ffmpeg/avconv.
257
258     The following parameters are not used by YoutubeDL itself, they are used by
259     the downloader (see youtube_dl/downloader/common.py):
260     nopart, updatetime, buffersize, ratelimit, min_filesize, max_filesize, test,
261     noresizebuffer, retries, continuedl, noprogress, consoletitle,
262     xattr_set_filesize, external_downloader_args.
263
264     The following options are used by the post processors:
265     prefer_ffmpeg:     If True, use ffmpeg instead of avconv if both are available,
266                        otherwise prefer avconv.
267     """
268
269     params = None
270     _ies = []
271     _pps = []
272     _download_retcode = None
273     _num_downloads = None
274     _screen_file = None
275
276     def __init__(self, params=None, auto_init=True):
277         """Create a FileDownloader object with the given options."""
278         if params is None:
279             params = {}
280         self._ies = []
281         self._ies_instances = {}
282         self._pps = []
283         self._progress_hooks = []
284         self._download_retcode = 0
285         self._num_downloads = 0
286         self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)]
287         self._err_file = sys.stderr
288         self.params = params
289         self.cache = Cache(self)
290
291         if params.get('bidi_workaround', False):
292             try:
293                 import pty
294                 master, slave = pty.openpty()
295                 width = compat_get_terminal_size().columns
296                 if width is None:
297                     width_args = []
298                 else:
299                     width_args = ['-w', str(width)]
300                 sp_kwargs = dict(
301                     stdin=subprocess.PIPE,
302                     stdout=slave,
303                     stderr=self._err_file)
304                 try:
305                     self._output_process = subprocess.Popen(
306                         ['bidiv'] + width_args, **sp_kwargs
307                     )
308                 except OSError:
309                     self._output_process = subprocess.Popen(
310                         ['fribidi', '-c', 'UTF-8'] + width_args, **sp_kwargs)
311                 self._output_channel = os.fdopen(master, 'rb')
312             except OSError as ose:
313                 if ose.errno == 2:
314                     self.report_warning('Could not find fribidi executable, ignoring --bidi-workaround . Make sure that  fribidi  is an executable file in one of the directories in your $PATH.')
315                 else:
316                     raise
317
318         if (sys.version_info >= (3,) and sys.platform != 'win32' and
319                 sys.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968'] and
320                 not params.get('restrictfilenames', False)):
321             # On Python 3, the Unicode filesystem API will throw errors (#1474)
322             self.report_warning(
323                 'Assuming --restrict-filenames since file system encoding '
324                 'cannot encode all characters. '
325                 'Set the LC_ALL environment variable to fix this.')
326             self.params['restrictfilenames'] = True
327
328         if isinstance(params.get('outtmpl'), bytes):
329             self.report_warning(
330                 'Parameter outtmpl is bytes, but should be a unicode string. '
331                 'Put  from __future__ import unicode_literals  at the top of your code file or consider switching to Python 3.x.')
332
333         self._setup_opener()
334
335         if auto_init:
336             self.print_debug_header()
337             self.add_default_info_extractors()
338
339         for pp_def_raw in self.params.get('postprocessors', []):
340             pp_class = get_postprocessor(pp_def_raw['key'])
341             pp_def = dict(pp_def_raw)
342             del pp_def['key']
343             pp = pp_class(self, **compat_kwargs(pp_def))
344             self.add_post_processor(pp)
345
346         for ph in self.params.get('progress_hooks', []):
347             self.add_progress_hook(ph)
348
349     def warn_if_short_id(self, argv):
350         # short YouTube ID starting with dash?
351         idxs = [
352             i for i, a in enumerate(argv)
353             if re.match(r'^-[0-9A-Za-z_-]{10}$', a)]
354         if idxs:
355             correct_argv = (
356                 ['youtube-dl'] +
357                 [a for i, a in enumerate(argv) if i not in idxs] +
358                 ['--'] + [argv[i] for i in idxs]
359             )
360             self.report_warning(
361                 'Long argument string detected. '
362                 'Use -- to separate parameters and URLs, like this:\n%s\n' %
363                 args_to_str(correct_argv))
364
365     def add_info_extractor(self, ie):
366         """Add an InfoExtractor object to the end of the list."""
367         self._ies.append(ie)
368         self._ies_instances[ie.ie_key()] = ie
369         ie.set_downloader(self)
370
371     def get_info_extractor(self, ie_key):
372         """
373         Get an instance of an IE with name ie_key, it will try to get one from
374         the _ies list, if there's no instance it will create a new one and add
375         it to the extractor list.
376         """
377         ie = self._ies_instances.get(ie_key)
378         if ie is None:
379             ie = get_info_extractor(ie_key)()
380             self.add_info_extractor(ie)
381         return ie
382
383     def add_default_info_extractors(self):
384         """
385         Add the InfoExtractors returned by gen_extractors to the end of the list
386         """
387         for ie in gen_extractors():
388             self.add_info_extractor(ie)
389
390     def add_post_processor(self, pp):
391         """Add a PostProcessor object to the end of the chain."""
392         self._pps.append(pp)
393         pp.set_downloader(self)
394
395     def add_progress_hook(self, ph):
396         """Add the progress hook (currently only for the file downloader)"""
397         self._progress_hooks.append(ph)
398
399     def _bidi_workaround(self, message):
400         if not hasattr(self, '_output_channel'):
401             return message
402
403         assert hasattr(self, '_output_process')
404         assert isinstance(message, compat_str)
405         line_count = message.count('\n') + 1
406         self._output_process.stdin.write((message + '\n').encode('utf-8'))
407         self._output_process.stdin.flush()
408         res = ''.join(self._output_channel.readline().decode('utf-8')
409                       for _ in range(line_count))
410         return res[:-len('\n')]
411
412     def to_screen(self, message, skip_eol=False):
413         """Print message to stdout if not in quiet mode."""
414         return self.to_stdout(message, skip_eol, check_quiet=True)
415
416     def _write_string(self, s, out=None):
417         write_string(s, out=out, encoding=self.params.get('encoding'))
418
419     def to_stdout(self, message, skip_eol=False, check_quiet=False):
420         """Print message to stdout if not in quiet mode."""
421         if self.params.get('logger'):
422             self.params['logger'].debug(message)
423         elif not check_quiet or not self.params.get('quiet', False):
424             message = self._bidi_workaround(message)
425             terminator = ['\n', ''][skip_eol]
426             output = message + terminator
427
428             self._write_string(output, self._screen_file)
429
430     def to_stderr(self, message):
431         """Print message to stderr."""
432         assert isinstance(message, compat_str)
433         if self.params.get('logger'):
434             self.params['logger'].error(message)
435         else:
436             message = self._bidi_workaround(message)
437             output = message + '\n'
438             self._write_string(output, self._err_file)
439
440     def to_console_title(self, message):
441         if not self.params.get('consoletitle', False):
442             return
443         if os.name == 'nt' and ctypes.windll.kernel32.GetConsoleWindow():
444             # c_wchar_p() might not be necessary if `message` is
445             # already of type unicode()
446             ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message))
447         elif 'TERM' in os.environ:
448             self._write_string('\033]0;%s\007' % message, self._screen_file)
449
450     def save_console_title(self):
451         if not self.params.get('consoletitle', False):
452             return
453         if 'TERM' in os.environ:
454             # Save the title on stack
455             self._write_string('\033[22;0t', self._screen_file)
456
457     def restore_console_title(self):
458         if not self.params.get('consoletitle', False):
459             return
460         if 'TERM' in os.environ:
461             # Restore the title from stack
462             self._write_string('\033[23;0t', self._screen_file)
463
464     def __enter__(self):
465         self.save_console_title()
466         return self
467
468     def __exit__(self, *args):
469         self.restore_console_title()
470
471         if self.params.get('cookiefile') is not None:
472             self.cookiejar.save()
473
474     def trouble(self, message=None, tb=None):
475         """Determine action to take when a download problem appears.
476
477         Depending on if the downloader has been configured to ignore
478         download errors or not, this method may throw an exception or
479         not when errors are found, after printing the message.
480
481         tb, if given, is additional traceback information.
482         """
483         if message is not None:
484             self.to_stderr(message)
485         if self.params.get('verbose'):
486             if tb is None:
487                 if sys.exc_info()[0]:  # if .trouble has been called from an except block
488                     tb = ''
489                     if hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
490                         tb += ''.join(traceback.format_exception(*sys.exc_info()[1].exc_info))
491                     tb += compat_str(traceback.format_exc())
492                 else:
493                     tb_data = traceback.format_list(traceback.extract_stack())
494                     tb = ''.join(tb_data)
495             self.to_stderr(tb)
496         if not self.params.get('ignoreerrors', False):
497             if sys.exc_info()[0] and hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
498                 exc_info = sys.exc_info()[1].exc_info
499             else:
500                 exc_info = sys.exc_info()
501             raise DownloadError(message, exc_info)
502         self._download_retcode = 1
503
504     def report_warning(self, message):
505         '''
506         Print the message to stderr, it will be prefixed with 'WARNING:'
507         If stderr is a tty file the 'WARNING:' will be colored
508         '''
509         if self.params.get('logger') is not None:
510             self.params['logger'].warning(message)
511         else:
512             if self.params.get('no_warnings'):
513                 return
514             if not self.params.get('no_color') and self._err_file.isatty() and os.name != 'nt':
515                 _msg_header = '\033[0;33mWARNING:\033[0m'
516             else:
517                 _msg_header = 'WARNING:'
518             warning_message = '%s %s' % (_msg_header, message)
519             self.to_stderr(warning_message)
520
521     def report_error(self, message, tb=None):
522         '''
523         Do the same as trouble, but prefixes the message with 'ERROR:', colored
524         in red if stderr is a tty file.
525         '''
526         if not self.params.get('no_color') and self._err_file.isatty() and os.name != 'nt':
527             _msg_header = '\033[0;31mERROR:\033[0m'
528         else:
529             _msg_header = 'ERROR:'
530         error_message = '%s %s' % (_msg_header, message)
531         self.trouble(error_message, tb)
532
533     def report_file_already_downloaded(self, file_name):
534         """Report file has already been fully downloaded."""
535         try:
536             self.to_screen('[download] %s has already been downloaded' % file_name)
537         except UnicodeEncodeError:
538             self.to_screen('[download] The file has already been downloaded')
539
540     def prepare_filename(self, info_dict):
541         """Generate the output filename."""
542         try:
543             template_dict = dict(info_dict)
544
545             template_dict['epoch'] = int(time.time())
546             autonumber_size = self.params.get('autonumber_size')
547             if autonumber_size is None:
548                 autonumber_size = 5
549             autonumber_templ = '%0' + str(autonumber_size) + 'd'
550             template_dict['autonumber'] = autonumber_templ % self._num_downloads
551             if template_dict.get('playlist_index') is not None:
552                 template_dict['playlist_index'] = '%0*d' % (len(str(template_dict['n_entries'])), template_dict['playlist_index'])
553             if template_dict.get('resolution') is None:
554                 if template_dict.get('width') and template_dict.get('height'):
555                     template_dict['resolution'] = '%dx%d' % (template_dict['width'], template_dict['height'])
556                 elif template_dict.get('height'):
557                     template_dict['resolution'] = '%sp' % template_dict['height']
558                 elif template_dict.get('width'):
559                     template_dict['resolution'] = '?x%d' % template_dict['width']
560
561             sanitize = lambda k, v: sanitize_filename(
562                 compat_str(v),
563                 restricted=self.params.get('restrictfilenames'),
564                 is_id=(k == 'id'))
565             template_dict = dict((k, sanitize(k, v))
566                                  for k, v in template_dict.items()
567                                  if v is not None)
568             template_dict = collections.defaultdict(lambda: 'NA', template_dict)
569
570             outtmpl = sanitize_path(self.params.get('outtmpl', DEFAULT_OUTTMPL))
571             tmpl = compat_expanduser(outtmpl)
572             filename = tmpl % template_dict
573             # Temporary fix for #4787
574             # 'Treat' all problem characters by passing filename through preferredencoding
575             # to workaround encoding issues with subprocess on python2 @ Windows
576             if sys.version_info < (3, 0) and sys.platform == 'win32':
577                 filename = encodeFilename(filename, True).decode(preferredencoding())
578             return filename
579         except ValueError as err:
580             self.report_error('Error in output template: ' + str(err) + ' (encoding: ' + repr(preferredencoding()) + ')')
581             return None
582
583     def _match_entry(self, info_dict, incomplete):
584         """ Returns None iff the file should be downloaded """
585
586         video_title = info_dict.get('title', info_dict.get('id', 'video'))
587         if 'title' in info_dict:
588             # This can happen when we're just evaluating the playlist
589             title = info_dict['title']
590             matchtitle = self.params.get('matchtitle', False)
591             if matchtitle:
592                 if not re.search(matchtitle, title, re.IGNORECASE):
593                     return '"' + title + '" title did not match pattern "' + matchtitle + '"'
594             rejecttitle = self.params.get('rejecttitle', False)
595             if rejecttitle:
596                 if re.search(rejecttitle, title, re.IGNORECASE):
597                     return '"' + title + '" title matched reject pattern "' + rejecttitle + '"'
598         date = info_dict.get('upload_date', None)
599         if date is not None:
600             dateRange = self.params.get('daterange', DateRange())
601             if date not in dateRange:
602                 return '%s upload date is not in range %s' % (date_from_str(date).isoformat(), dateRange)
603         view_count = info_dict.get('view_count', None)
604         if view_count is not None:
605             min_views = self.params.get('min_views')
606             if min_views is not None and view_count < min_views:
607                 return 'Skipping %s, because it has not reached minimum view count (%d/%d)' % (video_title, view_count, min_views)
608             max_views = self.params.get('max_views')
609             if max_views is not None and view_count > max_views:
610                 return 'Skipping %s, because it has exceeded the maximum view count (%d/%d)' % (video_title, view_count, max_views)
611         if age_restricted(info_dict.get('age_limit'), self.params.get('age_limit')):
612             return 'Skipping "%s" because it is age restricted' % video_title
613         if self.in_download_archive(info_dict):
614             return '%s has already been recorded in archive' % video_title
615
616         if not incomplete:
617             match_filter = self.params.get('match_filter')
618             if match_filter is not None:
619                 ret = match_filter(info_dict)
620                 if ret is not None:
621                     return ret
622
623         return None
624
625     @staticmethod
626     def add_extra_info(info_dict, extra_info):
627         '''Set the keys from extra_info in info dict if they are missing'''
628         for key, value in extra_info.items():
629             info_dict.setdefault(key, value)
630
631     def extract_info(self, url, download=True, ie_key=None, extra_info={},
632                      process=True, force_generic_extractor=False):
633         '''
634         Returns a list with a dictionary for each video we find.
635         If 'download', also downloads the videos.
636         extra_info is a dict containing the extra values to add to each result
637         '''
638
639         if not ie_key and force_generic_extractor:
640             ie_key = 'Generic'
641
642         if ie_key:
643             ies = [self.get_info_extractor(ie_key)]
644         else:
645             ies = self._ies
646
647         for ie in ies:
648             if not ie.suitable(url):
649                 continue
650
651             if not ie.working():
652                 self.report_warning('The program functionality for this site has been marked as broken, '
653                                     'and will probably not work.')
654
655             try:
656                 ie_result = ie.extract(url)
657                 if ie_result is None:  # Finished already (backwards compatibility; listformats and friends should be moved here)
658                     break
659                 if isinstance(ie_result, list):
660                     # Backwards compatibility: old IE result format
661                     ie_result = {
662                         '_type': 'compat_list',
663                         'entries': ie_result,
664                     }
665                 self.add_default_extra_info(ie_result, ie, url)
666                 if process:
667                     return self.process_ie_result(ie_result, download, extra_info)
668                 else:
669                     return ie_result
670             except ExtractorError as de:  # An error we somewhat expected
671                 self.report_error(compat_str(de), de.format_traceback())
672                 break
673             except MaxDownloadsReached:
674                 raise
675             except Exception as e:
676                 if self.params.get('ignoreerrors', False):
677                     self.report_error(compat_str(e), tb=compat_str(traceback.format_exc()))
678                     break
679                 else:
680                     raise
681         else:
682             self.report_error('no suitable InfoExtractor for URL %s' % url)
683
684     def add_default_extra_info(self, ie_result, ie, url):
685         self.add_extra_info(ie_result, {
686             'extractor': ie.IE_NAME,
687             'webpage_url': url,
688             'webpage_url_basename': url_basename(url),
689             'extractor_key': ie.ie_key(),
690         })
691
692     def process_ie_result(self, ie_result, download=True, extra_info={}):
693         """
694         Take the result of the ie(may be modified) and resolve all unresolved
695         references (URLs, playlist items).
696
697         It will also download the videos if 'download'.
698         Returns the resolved ie_result.
699         """
700
701         result_type = ie_result.get('_type', 'video')
702
703         if result_type in ('url', 'url_transparent'):
704             extract_flat = self.params.get('extract_flat', False)
705             if ((extract_flat == 'in_playlist' and 'playlist' in extra_info) or
706                     extract_flat is True):
707                 if self.params.get('forcejson', False):
708                     self.to_stdout(json.dumps(ie_result))
709                 return ie_result
710
711         if result_type == 'video':
712             self.add_extra_info(ie_result, extra_info)
713             return self.process_video_result(ie_result, download=download)
714         elif result_type == 'url':
715             # We have to add extra_info to the results because it may be
716             # contained in a playlist
717             return self.extract_info(ie_result['url'],
718                                      download,
719                                      ie_key=ie_result.get('ie_key'),
720                                      extra_info=extra_info)
721         elif result_type == 'url_transparent':
722             # Use the information from the embedding page
723             info = self.extract_info(
724                 ie_result['url'], ie_key=ie_result.get('ie_key'),
725                 extra_info=extra_info, download=False, process=False)
726
727             force_properties = dict(
728                 (k, v) for k, v in ie_result.items() if v is not None)
729             for f in ('_type', 'url'):
730                 if f in force_properties:
731                     del force_properties[f]
732             new_result = info.copy()
733             new_result.update(force_properties)
734
735             assert new_result.get('_type') != 'url_transparent'
736
737             return self.process_ie_result(
738                 new_result, download=download, extra_info=extra_info)
739         elif result_type == 'playlist' or result_type == 'multi_video':
740             # We process each entry in the playlist
741             playlist = ie_result.get('title', None) or ie_result.get('id', None)
742             self.to_screen('[download] Downloading playlist: %s' % playlist)
743
744             playlist_results = []
745
746             playliststart = self.params.get('playliststart', 1) - 1
747             playlistend = self.params.get('playlistend', None)
748             # For backwards compatibility, interpret -1 as whole list
749             if playlistend == -1:
750                 playlistend = None
751
752             playlistitems_str = self.params.get('playlist_items', None)
753             playlistitems = None
754             if playlistitems_str is not None:
755                 def iter_playlistitems(format):
756                     for string_segment in format.split(','):
757                         if '-' in string_segment:
758                             start, end = string_segment.split('-')
759                             for item in range(int(start), int(end) + 1):
760                                 yield int(item)
761                         else:
762                             yield int(string_segment)
763                 playlistitems = iter_playlistitems(playlistitems_str)
764
765             ie_entries = ie_result['entries']
766             if isinstance(ie_entries, list):
767                 n_all_entries = len(ie_entries)
768                 if playlistitems:
769                     entries = [
770                         ie_entries[i - 1] for i in playlistitems
771                         if -n_all_entries <= i - 1 < n_all_entries]
772                 else:
773                     entries = ie_entries[playliststart:playlistend]
774                 n_entries = len(entries)
775                 self.to_screen(
776                     "[%s] playlist %s: Collected %d video ids (downloading %d of them)" %
777                     (ie_result['extractor'], playlist, n_all_entries, n_entries))
778             elif isinstance(ie_entries, PagedList):
779                 if playlistitems:
780                     entries = []
781                     for item in playlistitems:
782                         entries.extend(ie_entries.getslice(
783                             item - 1, item
784                         ))
785                 else:
786                     entries = ie_entries.getslice(
787                         playliststart, playlistend)
788                 n_entries = len(entries)
789                 self.to_screen(
790                     "[%s] playlist %s: Downloading %d videos" %
791                     (ie_result['extractor'], playlist, n_entries))
792             else:  # iterable
793                 if playlistitems:
794                     entry_list = list(ie_entries)
795                     entries = [entry_list[i - 1] for i in playlistitems]
796                 else:
797                     entries = list(itertools.islice(
798                         ie_entries, playliststart, playlistend))
799                 n_entries = len(entries)
800                 self.to_screen(
801                     "[%s] playlist %s: Downloading %d videos" %
802                     (ie_result['extractor'], playlist, n_entries))
803
804             if self.params.get('playlistreverse', False):
805                 entries = entries[::-1]
806
807             for i, entry in enumerate(entries, 1):
808                 self.to_screen('[download] Downloading video %s of %s' % (i, n_entries))
809                 extra = {
810                     'n_entries': n_entries,
811                     'playlist': playlist,
812                     'playlist_id': ie_result.get('id'),
813                     'playlist_title': ie_result.get('title'),
814                     'playlist_index': i + playliststart,
815                     'extractor': ie_result['extractor'],
816                     'webpage_url': ie_result['webpage_url'],
817                     'webpage_url_basename': url_basename(ie_result['webpage_url']),
818                     'extractor_key': ie_result['extractor_key'],
819                 }
820
821                 reason = self._match_entry(entry, incomplete=True)
822                 if reason is not None:
823                     self.to_screen('[download] ' + reason)
824                     continue
825
826                 entry_result = self.process_ie_result(entry,
827                                                       download=download,
828                                                       extra_info=extra)
829                 playlist_results.append(entry_result)
830             ie_result['entries'] = playlist_results
831             return ie_result
832         elif result_type == 'compat_list':
833             self.report_warning(
834                 'Extractor %s returned a compat_list result. '
835                 'It needs to be updated.' % ie_result.get('extractor'))
836
837             def _fixup(r):
838                 self.add_extra_info(
839                     r,
840                     {
841                         'extractor': ie_result['extractor'],
842                         'webpage_url': ie_result['webpage_url'],
843                         'webpage_url_basename': url_basename(ie_result['webpage_url']),
844                         'extractor_key': ie_result['extractor_key'],
845                     }
846                 )
847                 return r
848             ie_result['entries'] = [
849                 self.process_ie_result(_fixup(r), download, extra_info)
850                 for r in ie_result['entries']
851             ]
852             return ie_result
853         else:
854             raise Exception('Invalid result type: %s' % result_type)
855
856     def _build_format_filter(self, filter_spec):
857         " Returns a function to filter the formats according to the filter_spec "
858
859         OPERATORS = {
860             '<': operator.lt,
861             '<=': operator.le,
862             '>': operator.gt,
863             '>=': operator.ge,
864             '=': operator.eq,
865             '!=': operator.ne,
866         }
867         operator_rex = re.compile(r'''(?x)\s*
868             (?P<key>width|height|tbr|abr|vbr|asr|filesize|fps)
869             \s*(?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
870             (?P<value>[0-9.]+(?:[kKmMgGtTpPeEzZyY]i?[Bb]?)?)
871             $
872             ''' % '|'.join(map(re.escape, OPERATORS.keys())))
873         m = operator_rex.search(filter_spec)
874         if m:
875             try:
876                 comparison_value = int(m.group('value'))
877             except ValueError:
878                 comparison_value = parse_filesize(m.group('value'))
879                 if comparison_value is None:
880                     comparison_value = parse_filesize(m.group('value') + 'B')
881                 if comparison_value is None:
882                     raise ValueError(
883                         'Invalid value %r in format specification %r' % (
884                             m.group('value'), filter_spec))
885             op = OPERATORS[m.group('op')]
886
887         if not m:
888             STR_OPERATORS = {
889                 '=': operator.eq,
890                 '!=': operator.ne,
891             }
892             str_operator_rex = re.compile(r'''(?x)
893                 \s*(?P<key>ext|acodec|vcodec|container|protocol)
894                 \s*(?P<op>%s)(?P<none_inclusive>\s*\?)?
895                 \s*(?P<value>[a-zA-Z0-9_-]+)
896                 \s*$
897                 ''' % '|'.join(map(re.escape, STR_OPERATORS.keys())))
898             m = str_operator_rex.search(filter_spec)
899             if m:
900                 comparison_value = m.group('value')
901                 op = STR_OPERATORS[m.group('op')]
902
903         if not m:
904             raise ValueError('Invalid filter specification %r' % filter_spec)
905
906         def _filter(f):
907             actual_value = f.get(m.group('key'))
908             if actual_value is None:
909                 return m.group('none_inclusive')
910             return op(actual_value, comparison_value)
911         return _filter
912
913     def build_format_selector(self, format_spec):
914         def syntax_error(note, start):
915             message = (
916                 'Invalid format specification: '
917                 '{0}\n\t{1}\n\t{2}^'.format(note, format_spec, ' ' * start[1]))
918             return SyntaxError(message)
919
920         PICKFIRST = 'PICKFIRST'
921         MERGE = 'MERGE'
922         SINGLE = 'SINGLE'
923         GROUP = 'GROUP'
924         FormatSelector = collections.namedtuple('FormatSelector', ['type', 'selector', 'filters'])
925
926         def _parse_filter(tokens):
927             filter_parts = []
928             for type, string, start, _, _ in tokens:
929                 if type == tokenize.OP and string == ']':
930                     return ''.join(filter_parts)
931                 else:
932                     filter_parts.append(string)
933
934         def _parse_format_selection(tokens, inside_merge=False, inside_choice=False, inside_group=False):
935             selectors = []
936             current_selector = None
937             for type, string, start, _, _ in tokens:
938                 # ENCODING is only defined in python 3.x
939                 if type == getattr(tokenize, 'ENCODING', None):
940                     continue
941                 elif type in [tokenize.NAME, tokenize.NUMBER]:
942                     current_selector = FormatSelector(SINGLE, string, [])
943                 elif type == tokenize.OP:
944                     if string == ')':
945                         if not inside_group:
946                             # ')' will be handled by the parentheses group
947                             tokens.restore_last_token()
948                         break
949                     elif inside_merge and string in ['/', ',']:
950                         tokens.restore_last_token()
951                         break
952                     elif inside_choice and string == ',':
953                         tokens.restore_last_token()
954                         break
955                     elif string == ',':
956                         if not current_selector:
957                             raise syntax_error('"," must follow a format selector', start)
958                         selectors.append(current_selector)
959                         current_selector = None
960                     elif string == '/':
961                         first_choice = current_selector
962                         second_choice = _parse_format_selection(tokens, inside_choice=True)
963                         current_selector = FormatSelector(PICKFIRST, (first_choice, second_choice), [])
964                     elif string == '[':
965                         if not current_selector:
966                             current_selector = FormatSelector(SINGLE, 'best', [])
967                         format_filter = _parse_filter(tokens)
968                         current_selector.filters.append(format_filter)
969                     elif string == '(':
970                         if current_selector:
971                             raise syntax_error('Unexpected "("', start)
972                         group = _parse_format_selection(tokens, inside_group=True)
973                         current_selector = FormatSelector(GROUP, group, [])
974                     elif string == '+':
975                         video_selector = current_selector
976                         audio_selector = _parse_format_selection(tokens, inside_merge=True)
977                         if not video_selector or not audio_selector:
978                             raise syntax_error('"+" must be between two format selectors', start)
979                         current_selector = FormatSelector(MERGE, (video_selector, audio_selector), [])
980                     else:
981                         raise syntax_error('Operator not recognized: "{0}"'.format(string), start)
982                 elif type == tokenize.ENDMARKER:
983                     break
984             if current_selector:
985                 selectors.append(current_selector)
986             return selectors
987
988         def _build_selector_function(selector):
989             if isinstance(selector, list):
990                 fs = [_build_selector_function(s) for s in selector]
991
992                 def selector_function(formats):
993                     for f in fs:
994                         for format in f(formats):
995                             yield format
996                 return selector_function
997             elif selector.type == GROUP:
998                 selector_function = _build_selector_function(selector.selector)
999             elif selector.type == PICKFIRST:
1000                 fs = [_build_selector_function(s) for s in selector.selector]
1001
1002                 def selector_function(formats):
1003                     for f in fs:
1004                         picked_formats = list(f(formats))
1005                         if picked_formats:
1006                             return picked_formats
1007                     return []
1008             elif selector.type == SINGLE:
1009                 format_spec = selector.selector
1010
1011                 def selector_function(formats):
1012                     formats = list(formats)
1013                     if not formats:
1014                         return
1015                     if format_spec == 'all':
1016                         for f in formats:
1017                             yield f
1018                     elif format_spec in ['best', 'worst', None]:
1019                         format_idx = 0 if format_spec == 'worst' else -1
1020                         audiovideo_formats = [
1021                             f for f in formats
1022                             if f.get('vcodec') != 'none' and f.get('acodec') != 'none']
1023                         if audiovideo_formats:
1024                             yield audiovideo_formats[format_idx]
1025                         # for audio only (soundcloud) or video only (imgur) urls, select the best/worst audio format
1026                         elif (all(f.get('acodec') != 'none' for f in formats) or
1027                               all(f.get('vcodec') != 'none' for f in formats)):
1028                             yield formats[format_idx]
1029                     elif format_spec == 'bestaudio':
1030                         audio_formats = [
1031                             f for f in formats
1032                             if f.get('vcodec') == 'none']
1033                         if audio_formats:
1034                             yield audio_formats[-1]
1035                     elif format_spec == 'worstaudio':
1036                         audio_formats = [
1037                             f for f in formats
1038                             if f.get('vcodec') == 'none']
1039                         if audio_formats:
1040                             yield audio_formats[0]
1041                     elif format_spec == 'bestvideo':
1042                         video_formats = [
1043                             f for f in formats
1044                             if f.get('acodec') == 'none']
1045                         if video_formats:
1046                             yield video_formats[-1]
1047                     elif format_spec == 'worstvideo':
1048                         video_formats = [
1049                             f for f in formats
1050                             if f.get('acodec') == 'none']
1051                         if video_formats:
1052                             yield video_formats[0]
1053                     else:
1054                         extensions = ['mp4', 'flv', 'webm', '3gp', 'm4a', 'mp3', 'ogg', 'aac', 'wav']
1055                         if format_spec in extensions:
1056                             filter_f = lambda f: f['ext'] == format_spec
1057                         else:
1058                             filter_f = lambda f: f['format_id'] == format_spec
1059                         matches = list(filter(filter_f, formats))
1060                         if matches:
1061                             yield matches[-1]
1062             elif selector.type == MERGE:
1063                 def _merge(formats_info):
1064                     format_1, format_2 = [f['format_id'] for f in formats_info]
1065                     # The first format must contain the video and the
1066                     # second the audio
1067                     if formats_info[0].get('vcodec') == 'none':
1068                         self.report_error('The first format must '
1069                                           'contain the video, try using '
1070                                           '"-f %s+%s"' % (format_2, format_1))
1071                         return
1072                     output_ext = (
1073                         formats_info[0]['ext']
1074                         if self.params.get('merge_output_format') is None
1075                         else self.params['merge_output_format'])
1076                     return {
1077                         'requested_formats': formats_info,
1078                         'format': '%s+%s' % (formats_info[0].get('format'),
1079                                              formats_info[1].get('format')),
1080                         'format_id': '%s+%s' % (formats_info[0].get('format_id'),
1081                                                 formats_info[1].get('format_id')),
1082                         'width': formats_info[0].get('width'),
1083                         'height': formats_info[0].get('height'),
1084                         'resolution': formats_info[0].get('resolution'),
1085                         'fps': formats_info[0].get('fps'),
1086                         'vcodec': formats_info[0].get('vcodec'),
1087                         'vbr': formats_info[0].get('vbr'),
1088                         'stretched_ratio': formats_info[0].get('stretched_ratio'),
1089                         'acodec': formats_info[1].get('acodec'),
1090                         'abr': formats_info[1].get('abr'),
1091                         'ext': output_ext,
1092                     }
1093                 video_selector, audio_selector = map(_build_selector_function, selector.selector)
1094
1095                 def selector_function(formats):
1096                     formats = list(formats)
1097                     for pair in itertools.product(video_selector(formats), audio_selector(formats)):
1098                         yield _merge(pair)
1099
1100             filters = [self._build_format_filter(f) for f in selector.filters]
1101
1102             def final_selector(formats):
1103                 for _filter in filters:
1104                     formats = list(filter(_filter, formats))
1105                 return selector_function(formats)
1106             return final_selector
1107
1108         stream = io.BytesIO(format_spec.encode('utf-8'))
1109         try:
1110             tokens = list(compat_tokenize_tokenize(stream.readline))
1111         except tokenize.TokenError:
1112             raise syntax_error('Missing closing/opening brackets or parenthesis', (0, len(format_spec)))
1113
1114         class TokenIterator(object):
1115             def __init__(self, tokens):
1116                 self.tokens = tokens
1117                 self.counter = 0
1118
1119             def __iter__(self):
1120                 return self
1121
1122             def __next__(self):
1123                 if self.counter >= len(self.tokens):
1124                     raise StopIteration()
1125                 value = self.tokens[self.counter]
1126                 self.counter += 1
1127                 return value
1128
1129             next = __next__
1130
1131             def restore_last_token(self):
1132                 self.counter -= 1
1133
1134         parsed_selector = _parse_format_selection(iter(TokenIterator(tokens)))
1135         return _build_selector_function(parsed_selector)
1136
1137     def _calc_headers(self, info_dict):
1138         res = std_headers.copy()
1139
1140         add_headers = info_dict.get('http_headers')
1141         if add_headers:
1142             res.update(add_headers)
1143
1144         cookies = self._calc_cookies(info_dict)
1145         if cookies:
1146             res['Cookie'] = cookies
1147
1148         return res
1149
1150     def _calc_cookies(self, info_dict):
1151         pr = compat_urllib_request.Request(info_dict['url'])
1152         self.cookiejar.add_cookie_header(pr)
1153         return pr.get_header('Cookie')
1154
1155     def process_video_result(self, info_dict, download=True):
1156         assert info_dict.get('_type', 'video') == 'video'
1157
1158         if 'id' not in info_dict:
1159             raise ExtractorError('Missing "id" field in extractor result')
1160         if 'title' not in info_dict:
1161             raise ExtractorError('Missing "title" field in extractor result')
1162
1163         if 'playlist' not in info_dict:
1164             # It isn't part of a playlist
1165             info_dict['playlist'] = None
1166             info_dict['playlist_index'] = None
1167
1168         thumbnails = info_dict.get('thumbnails')
1169         if thumbnails is None:
1170             thumbnail = info_dict.get('thumbnail')
1171             if thumbnail:
1172                 info_dict['thumbnails'] = thumbnails = [{'url': thumbnail}]
1173         if thumbnails:
1174             thumbnails.sort(key=lambda t: (
1175                 t.get('preference'), t.get('width'), t.get('height'),
1176                 t.get('id'), t.get('url')))
1177             for i, t in enumerate(thumbnails):
1178                 if 'width' in t and 'height' in t:
1179                     t['resolution'] = '%dx%d' % (t['width'], t['height'])
1180                 if t.get('id') is None:
1181                     t['id'] = '%d' % i
1182
1183         if thumbnails and 'thumbnail' not in info_dict:
1184             info_dict['thumbnail'] = thumbnails[-1]['url']
1185
1186         if 'display_id' not in info_dict and 'id' in info_dict:
1187             info_dict['display_id'] = info_dict['id']
1188
1189         if info_dict.get('upload_date') is None and info_dict.get('timestamp') is not None:
1190             # Working around out-of-range timestamp values (e.g. negative ones on Windows,
1191             # see http://bugs.python.org/issue1646728)
1192             try:
1193                 upload_date = datetime.datetime.utcfromtimestamp(info_dict['timestamp'])
1194                 info_dict['upload_date'] = upload_date.strftime('%Y%m%d')
1195             except (ValueError, OverflowError, OSError):
1196                 pass
1197
1198         if self.params.get('listsubtitles', False):
1199             if 'automatic_captions' in info_dict:
1200                 self.list_subtitles(info_dict['id'], info_dict.get('automatic_captions'), 'automatic captions')
1201             self.list_subtitles(info_dict['id'], info_dict.get('subtitles'), 'subtitles')
1202             return
1203         info_dict['requested_subtitles'] = self.process_subtitles(
1204             info_dict['id'], info_dict.get('subtitles'),
1205             info_dict.get('automatic_captions'))
1206
1207         # We now pick which formats have to be downloaded
1208         if info_dict.get('formats') is None:
1209             # There's only one format available
1210             formats = [info_dict]
1211         else:
1212             formats = info_dict['formats']
1213
1214         if not formats:
1215             raise ExtractorError('No video formats found!')
1216
1217         formats_dict = {}
1218
1219         # We check that all the formats have the format and format_id fields
1220         for i, format in enumerate(formats):
1221             if 'url' not in format:
1222                 raise ExtractorError('Missing "url" key in result (index %d)' % i)
1223
1224             if format.get('format_id') is None:
1225                 format['format_id'] = compat_str(i)
1226             format_id = format['format_id']
1227             if format_id not in formats_dict:
1228                 formats_dict[format_id] = []
1229             formats_dict[format_id].append(format)
1230
1231         # Make sure all formats have unique format_id
1232         for format_id, ambiguous_formats in formats_dict.items():
1233             if len(ambiguous_formats) > 1:
1234                 for i, format in enumerate(ambiguous_formats):
1235                     format['format_id'] = '%s-%d' % (format_id, i)
1236
1237         for i, format in enumerate(formats):
1238             if format.get('format') is None:
1239                 format['format'] = '{id} - {res}{note}'.format(
1240                     id=format['format_id'],
1241                     res=self.format_resolution(format),
1242                     note=' ({0})'.format(format['format_note']) if format.get('format_note') is not None else '',
1243                 )
1244             # Automatically determine file extension if missing
1245             if 'ext' not in format:
1246                 format['ext'] = determine_ext(format['url']).lower()
1247             # Add HTTP headers, so that external programs can use them from the
1248             # json output
1249             full_format_info = info_dict.copy()
1250             full_format_info.update(format)
1251             format['http_headers'] = self._calc_headers(full_format_info)
1252
1253         # TODO Central sorting goes here
1254
1255         if formats[0] is not info_dict:
1256             # only set the 'formats' fields if the original info_dict list them
1257             # otherwise we end up with a circular reference, the first (and unique)
1258             # element in the 'formats' field in info_dict is info_dict itself,
1259             # wich can't be exported to json
1260             info_dict['formats'] = formats
1261         if self.params.get('listformats'):
1262             self.list_formats(info_dict)
1263             return
1264         if self.params.get('list_thumbnails'):
1265             self.list_thumbnails(info_dict)
1266             return
1267
1268         req_format = self.params.get('format')
1269         if req_format is None:
1270             req_format_list = []
1271             if (self.params.get('outtmpl', DEFAULT_OUTTMPL) != '-' and
1272                     info_dict['extractor'] in ['youtube', 'ted']):
1273                 merger = FFmpegMergerPP(self)
1274                 if merger.available and merger.can_merge():
1275                     req_format_list.append('bestvideo+bestaudio')
1276             req_format_list.append('best')
1277             req_format = '/'.join(req_format_list)
1278         format_selector = self.build_format_selector(req_format)
1279         formats_to_download = list(format_selector(formats))
1280         if not formats_to_download:
1281             raise ExtractorError('requested format not available',
1282                                  expected=True)
1283
1284         if download:
1285             if len(formats_to_download) > 1:
1286                 self.to_screen('[info] %s: downloading video in %s formats' % (info_dict['id'], len(formats_to_download)))
1287             for format in formats_to_download:
1288                 new_info = dict(info_dict)
1289                 new_info.update(format)
1290                 self.process_info(new_info)
1291         # We update the info dict with the best quality format (backwards compatibility)
1292         info_dict.update(formats_to_download[-1])
1293         return info_dict
1294
1295     def process_subtitles(self, video_id, normal_subtitles, automatic_captions):
1296         """Select the requested subtitles and their format"""
1297         available_subs = {}
1298         if normal_subtitles and self.params.get('writesubtitles'):
1299             available_subs.update(normal_subtitles)
1300         if automatic_captions and self.params.get('writeautomaticsub'):
1301             for lang, cap_info in automatic_captions.items():
1302                 if lang not in available_subs:
1303                     available_subs[lang] = cap_info
1304
1305         if (not self.params.get('writesubtitles') and not
1306                 self.params.get('writeautomaticsub') or not
1307                 available_subs):
1308             return None
1309
1310         if self.params.get('allsubtitles', False):
1311             requested_langs = available_subs.keys()
1312         else:
1313             if self.params.get('subtitleslangs', False):
1314                 requested_langs = self.params.get('subtitleslangs')
1315             elif 'en' in available_subs:
1316                 requested_langs = ['en']
1317             else:
1318                 requested_langs = [list(available_subs.keys())[0]]
1319
1320         formats_query = self.params.get('subtitlesformat', 'best')
1321         formats_preference = formats_query.split('/') if formats_query else []
1322         subs = {}
1323         for lang in requested_langs:
1324             formats = available_subs.get(lang)
1325             if formats is None:
1326                 self.report_warning('%s subtitles not available for %s' % (lang, video_id))
1327                 continue
1328             for ext in formats_preference:
1329                 if ext == 'best':
1330                     f = formats[-1]
1331                     break
1332                 matches = list(filter(lambda f: f['ext'] == ext, formats))
1333                 if matches:
1334                     f = matches[-1]
1335                     break
1336             else:
1337                 f = formats[-1]
1338                 self.report_warning(
1339                     'No subtitle format found matching "%s" for language %s, '
1340                     'using %s' % (formats_query, lang, f['ext']))
1341             subs[lang] = f
1342         return subs
1343
1344     def process_info(self, info_dict):
1345         """Process a single resolved IE result."""
1346
1347         assert info_dict.get('_type', 'video') == 'video'
1348
1349         max_downloads = self.params.get('max_downloads')
1350         if max_downloads is not None:
1351             if self._num_downloads >= int(max_downloads):
1352                 raise MaxDownloadsReached()
1353
1354         info_dict['fulltitle'] = info_dict['title']
1355         if len(info_dict['title']) > 200:
1356             info_dict['title'] = info_dict['title'][:197] + '...'
1357
1358         if 'format' not in info_dict:
1359             info_dict['format'] = info_dict['ext']
1360
1361         reason = self._match_entry(info_dict, incomplete=False)
1362         if reason is not None:
1363             self.to_screen('[download] ' + reason)
1364             return
1365
1366         self._num_downloads += 1
1367
1368         info_dict['_filename'] = filename = self.prepare_filename(info_dict)
1369
1370         # Forced printings
1371         if self.params.get('forcetitle', False):
1372             self.to_stdout(info_dict['fulltitle'])
1373         if self.params.get('forceid', False):
1374             self.to_stdout(info_dict['id'])
1375         if self.params.get('forceurl', False):
1376             if info_dict.get('requested_formats') is not None:
1377                 for f in info_dict['requested_formats']:
1378                     self.to_stdout(f['url'] + f.get('play_path', ''))
1379             else:
1380                 # For RTMP URLs, also include the playpath
1381                 self.to_stdout(info_dict['url'] + info_dict.get('play_path', ''))
1382         if self.params.get('forcethumbnail', False) and info_dict.get('thumbnail') is not None:
1383             self.to_stdout(info_dict['thumbnail'])
1384         if self.params.get('forcedescription', False) and info_dict.get('description') is not None:
1385             self.to_stdout(info_dict['description'])
1386         if self.params.get('forcefilename', False) and filename is not None:
1387             self.to_stdout(filename)
1388         if self.params.get('forceduration', False) and info_dict.get('duration') is not None:
1389             self.to_stdout(formatSeconds(info_dict['duration']))
1390         if self.params.get('forceformat', False):
1391             self.to_stdout(info_dict['format'])
1392         if self.params.get('forcejson', False):
1393             self.to_stdout(json.dumps(info_dict))
1394
1395         # Do nothing else if in simulate mode
1396         if self.params.get('simulate', False):
1397             return
1398
1399         if filename is None:
1400             return
1401
1402         try:
1403             dn = os.path.dirname(sanitize_path(encodeFilename(filename)))
1404             if dn and not os.path.exists(dn):
1405                 os.makedirs(dn)
1406         except (OSError, IOError) as err:
1407             self.report_error('unable to create directory ' + compat_str(err))
1408             return
1409
1410         if self.params.get('writedescription', False):
1411             descfn = replace_extension(filename, 'description', info_dict.get('ext'))
1412             if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(descfn)):
1413                 self.to_screen('[info] Video description is already present')
1414             elif info_dict.get('description') is None:
1415                 self.report_warning('There\'s no description to write.')
1416             else:
1417                 try:
1418                     self.to_screen('[info] Writing video description to: ' + descfn)
1419                     with io.open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile:
1420                         descfile.write(info_dict['description'])
1421                 except (OSError, IOError):
1422                     self.report_error('Cannot write description file ' + descfn)
1423                     return
1424
1425         if self.params.get('writeannotations', False):
1426             annofn = replace_extension(filename, 'annotations.xml', info_dict.get('ext'))
1427             if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(annofn)):
1428                 self.to_screen('[info] Video annotations are already present')
1429             else:
1430                 try:
1431                     self.to_screen('[info] Writing video annotations to: ' + annofn)
1432                     with io.open(encodeFilename(annofn), 'w', encoding='utf-8') as annofile:
1433                         annofile.write(info_dict['annotations'])
1434                 except (KeyError, TypeError):
1435                     self.report_warning('There are no annotations to write.')
1436                 except (OSError, IOError):
1437                     self.report_error('Cannot write annotations file: ' + annofn)
1438                     return
1439
1440         subtitles_are_requested = any([self.params.get('writesubtitles', False),
1441                                        self.params.get('writeautomaticsub')])
1442
1443         if subtitles_are_requested and info_dict.get('requested_subtitles'):
1444             # subtitles download errors are already managed as troubles in relevant IE
1445             # that way it will silently go on when used with unsupporting IE
1446             subtitles = info_dict['requested_subtitles']
1447             ie = self.get_info_extractor(info_dict['extractor_key'])
1448             for sub_lang, sub_info in subtitles.items():
1449                 sub_format = sub_info['ext']
1450                 if sub_info.get('data') is not None:
1451                     sub_data = sub_info['data']
1452                 else:
1453                     try:
1454                         sub_data = ie._download_webpage(
1455                             sub_info['url'], info_dict['id'], note=False)
1456                     except ExtractorError as err:
1457                         self.report_warning('Unable to download subtitle for "%s": %s' %
1458                                             (sub_lang, compat_str(err.cause)))
1459                         continue
1460                 try:
1461                     sub_filename = subtitles_filename(filename, sub_lang, sub_format)
1462                     if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(sub_filename)):
1463                         self.to_screen('[info] Video subtitle %s.%s is already_present' % (sub_lang, sub_format))
1464                     else:
1465                         self.to_screen('[info] Writing video subtitles to: ' + sub_filename)
1466                         with io.open(encodeFilename(sub_filename), 'w', encoding='utf-8') as subfile:
1467                             subfile.write(sub_data)
1468                 except (OSError, IOError):
1469                     self.report_error('Cannot write subtitles file ' + sub_filename)
1470                     return
1471
1472         if self.params.get('writeinfojson', False):
1473             infofn = replace_extension(filename, 'info.json', info_dict.get('ext'))
1474             if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(infofn)):
1475                 self.to_screen('[info] Video description metadata is already present')
1476             else:
1477                 self.to_screen('[info] Writing video description metadata as JSON to: ' + infofn)
1478                 try:
1479                     write_json_file(self.filter_requested_info(info_dict), infofn)
1480                 except (OSError, IOError):
1481                     self.report_error('Cannot write metadata to JSON file ' + infofn)
1482                     return
1483
1484         self._write_thumbnails(info_dict, filename)
1485
1486         if not self.params.get('skip_download', False):
1487             try:
1488                 def dl(name, info):
1489                     fd = get_suitable_downloader(info, self.params)(self, self.params)
1490                     for ph in self._progress_hooks:
1491                         fd.add_progress_hook(ph)
1492                     if self.params.get('verbose'):
1493                         self.to_stdout('[debug] Invoking downloader on %r' % info.get('url'))
1494                     return fd.download(name, info)
1495
1496                 if info_dict.get('requested_formats') is not None:
1497                     downloaded = []
1498                     success = True
1499                     merger = FFmpegMergerPP(self)
1500                     if not merger.available:
1501                         postprocessors = []
1502                         self.report_warning('You have requested multiple '
1503                                             'formats but ffmpeg or avconv are not installed.'
1504                                             ' The formats won\'t be merged.')
1505                     else:
1506                         postprocessors = [merger]
1507
1508                     def compatible_formats(formats):
1509                         video, audio = formats
1510                         # Check extension
1511                         video_ext, audio_ext = audio.get('ext'), video.get('ext')
1512                         if video_ext and audio_ext:
1513                             COMPATIBLE_EXTS = (
1514                                 ('mp3', 'mp4', 'm4a', 'm4p', 'm4b', 'm4r', 'm4v'),
1515                                 ('webm')
1516                             )
1517                             for exts in COMPATIBLE_EXTS:
1518                                 if video_ext in exts and audio_ext in exts:
1519                                     return True
1520                         # TODO: Check acodec/vcodec
1521                         return False
1522
1523                     filename_real_ext = os.path.splitext(filename)[1][1:]
1524                     filename_wo_ext = (
1525                         os.path.splitext(filename)[0]
1526                         if filename_real_ext == info_dict['ext']
1527                         else filename)
1528                     requested_formats = info_dict['requested_formats']
1529                     if self.params.get('merge_output_format') is None and not compatible_formats(requested_formats):
1530                         info_dict['ext'] = 'mkv'
1531                         self.report_warning(
1532                             'Requested formats are incompatible for merge and will be merged into mkv.')
1533                     # Ensure filename always has a correct extension for successful merge
1534                     filename = '%s.%s' % (filename_wo_ext, info_dict['ext'])
1535                     if os.path.exists(encodeFilename(filename)):
1536                         self.to_screen(
1537                             '[download] %s has already been downloaded and '
1538                             'merged' % filename)
1539                     else:
1540                         for f in requested_formats:
1541                             new_info = dict(info_dict)
1542                             new_info.update(f)
1543                             fname = self.prepare_filename(new_info)
1544                             fname = prepend_extension(fname, 'f%s' % f['format_id'], new_info['ext'])
1545                             downloaded.append(fname)
1546                             partial_success = dl(fname, new_info)
1547                             success = success and partial_success
1548                         info_dict['__postprocessors'] = postprocessors
1549                         info_dict['__files_to_merge'] = downloaded
1550                 else:
1551                     # Just a single file
1552                     success = dl(filename, info_dict)
1553             except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
1554                 self.report_error('unable to download video data: %s' % str(err))
1555                 return
1556             except (OSError, IOError) as err:
1557                 raise UnavailableVideoError(err)
1558             except (ContentTooShortError, ) as err:
1559                 self.report_error('content too short (expected %s bytes and served %s)' % (err.expected, err.downloaded))
1560                 return
1561
1562             if success:
1563                 # Fixup content
1564                 fixup_policy = self.params.get('fixup')
1565                 if fixup_policy is None:
1566                     fixup_policy = 'detect_or_warn'
1567
1568                 stretched_ratio = info_dict.get('stretched_ratio')
1569                 if stretched_ratio is not None and stretched_ratio != 1:
1570                     if fixup_policy == 'warn':
1571                         self.report_warning('%s: Non-uniform pixel ratio (%s)' % (
1572                             info_dict['id'], stretched_ratio))
1573                     elif fixup_policy == 'detect_or_warn':
1574                         stretched_pp = FFmpegFixupStretchedPP(self)
1575                         if stretched_pp.available:
1576                             info_dict.setdefault('__postprocessors', [])
1577                             info_dict['__postprocessors'].append(stretched_pp)
1578                         else:
1579                             self.report_warning(
1580                                 '%s: Non-uniform pixel ratio (%s). Install ffmpeg or avconv to fix this automatically.' % (
1581                                     info_dict['id'], stretched_ratio))
1582                     else:
1583                         assert fixup_policy in ('ignore', 'never')
1584
1585                 if info_dict.get('requested_formats') is None and info_dict.get('container') == 'm4a_dash':
1586                     if fixup_policy == 'warn':
1587                         self.report_warning('%s: writing DASH m4a. Only some players support this container.' % (
1588                             info_dict['id']))
1589                     elif fixup_policy == 'detect_or_warn':
1590                         fixup_pp = FFmpegFixupM4aPP(self)
1591                         if fixup_pp.available:
1592                             info_dict.setdefault('__postprocessors', [])
1593                             info_dict['__postprocessors'].append(fixup_pp)
1594                         else:
1595                             self.report_warning(
1596                                 '%s: writing DASH m4a. Only some players support this container. Install ffmpeg or avconv to fix this automatically.' % (
1597                                     info_dict['id']))
1598                     else:
1599                         assert fixup_policy in ('ignore', 'never')
1600
1601                 try:
1602                     self.post_process(filename, info_dict)
1603                 except (PostProcessingError) as err:
1604                     self.report_error('postprocessing: %s' % str(err))
1605                     return
1606                 self.record_download_archive(info_dict)
1607
1608     def download(self, url_list):
1609         """Download a given list of URLs."""
1610         outtmpl = self.params.get('outtmpl', DEFAULT_OUTTMPL)
1611         if (len(url_list) > 1 and
1612                 '%' not in outtmpl and
1613                 self.params.get('max_downloads') != 1):
1614             raise SameFileError(outtmpl)
1615
1616         for url in url_list:
1617             try:
1618                 # It also downloads the videos
1619                 res = self.extract_info(
1620                     url, force_generic_extractor=self.params.get('force_generic_extractor', False))
1621             except UnavailableVideoError:
1622                 self.report_error('unable to download video')
1623             except MaxDownloadsReached:
1624                 self.to_screen('[info] Maximum number of downloaded files reached.')
1625                 raise
1626             else:
1627                 if self.params.get('dump_single_json', False):
1628                     self.to_stdout(json.dumps(res))
1629
1630         return self._download_retcode
1631
1632     def download_with_info_file(self, info_filename):
1633         with contextlib.closing(fileinput.FileInput(
1634                 [info_filename], mode='r',
1635                 openhook=fileinput.hook_encoded('utf-8'))) as f:
1636             # FileInput doesn't have a read method, we can't call json.load
1637             info = self.filter_requested_info(json.loads('\n'.join(f)))
1638         try:
1639             self.process_ie_result(info, download=True)
1640         except DownloadError:
1641             webpage_url = info.get('webpage_url')
1642             if webpage_url is not None:
1643                 self.report_warning('The info failed to download, trying with "%s"' % webpage_url)
1644                 return self.download([webpage_url])
1645             else:
1646                 raise
1647         return self._download_retcode
1648
1649     @staticmethod
1650     def filter_requested_info(info_dict):
1651         return dict(
1652             (k, v) for k, v in info_dict.items()
1653             if k not in ['requested_formats', 'requested_subtitles'])
1654
1655     def post_process(self, filename, ie_info):
1656         """Run all the postprocessors on the given file."""
1657         info = dict(ie_info)
1658         info['filepath'] = filename
1659         pps_chain = []
1660         if ie_info.get('__postprocessors') is not None:
1661             pps_chain.extend(ie_info['__postprocessors'])
1662         pps_chain.extend(self._pps)
1663         for pp in pps_chain:
1664             files_to_delete = []
1665             try:
1666                 files_to_delete, info = pp.run(info)
1667             except PostProcessingError as e:
1668                 self.report_error(e.msg)
1669             if files_to_delete and not self.params.get('keepvideo', False):
1670                 for old_filename in files_to_delete:
1671                     self.to_screen('Deleting original file %s (pass -k to keep)' % old_filename)
1672                     try:
1673                         os.remove(encodeFilename(old_filename))
1674                     except (IOError, OSError):
1675                         self.report_warning('Unable to remove downloaded original file')
1676
1677     def _make_archive_id(self, info_dict):
1678         # Future-proof against any change in case
1679         # and backwards compatibility with prior versions
1680         extractor = info_dict.get('extractor_key')
1681         if extractor is None:
1682             if 'id' in info_dict:
1683                 extractor = info_dict.get('ie_key')  # key in a playlist
1684         if extractor is None:
1685             return None  # Incomplete video information
1686         return extractor.lower() + ' ' + info_dict['id']
1687
1688     def in_download_archive(self, info_dict):
1689         fn = self.params.get('download_archive')
1690         if fn is None:
1691             return False
1692
1693         vid_id = self._make_archive_id(info_dict)
1694         if vid_id is None:
1695             return False  # Incomplete video information
1696
1697         try:
1698             with locked_file(fn, 'r', encoding='utf-8') as archive_file:
1699                 for line in archive_file:
1700                     if line.strip() == vid_id:
1701                         return True
1702         except IOError as ioe:
1703             if ioe.errno != errno.ENOENT:
1704                 raise
1705         return False
1706
1707     def record_download_archive(self, info_dict):
1708         fn = self.params.get('download_archive')
1709         if fn is None:
1710             return
1711         vid_id = self._make_archive_id(info_dict)
1712         assert vid_id
1713         with locked_file(fn, 'a', encoding='utf-8') as archive_file:
1714             archive_file.write(vid_id + '\n')
1715
1716     @staticmethod
1717     def format_resolution(format, default='unknown'):
1718         if format.get('vcodec') == 'none':
1719             return 'audio only'
1720         if format.get('resolution') is not None:
1721             return format['resolution']
1722         if format.get('height') is not None:
1723             if format.get('width') is not None:
1724                 res = '%sx%s' % (format['width'], format['height'])
1725             else:
1726                 res = '%sp' % format['height']
1727         elif format.get('width') is not None:
1728             res = '?x%d' % format['width']
1729         else:
1730             res = default
1731         return res
1732
1733     def _format_note(self, fdict):
1734         res = ''
1735         if fdict.get('ext') in ['f4f', 'f4m']:
1736             res += '(unsupported) '
1737         if fdict.get('format_note') is not None:
1738             res += fdict['format_note'] + ' '
1739         if fdict.get('tbr') is not None:
1740             res += '%4dk ' % fdict['tbr']
1741         if fdict.get('container') is not None:
1742             if res:
1743                 res += ', '
1744             res += '%s container' % fdict['container']
1745         if (fdict.get('vcodec') is not None and
1746                 fdict.get('vcodec') != 'none'):
1747             if res:
1748                 res += ', '
1749             res += fdict['vcodec']
1750             if fdict.get('vbr') is not None:
1751                 res += '@'
1752         elif fdict.get('vbr') is not None and fdict.get('abr') is not None:
1753             res += 'video@'
1754         if fdict.get('vbr') is not None:
1755             res += '%4dk' % fdict['vbr']
1756         if fdict.get('fps') is not None:
1757             res += ', %sfps' % fdict['fps']
1758         if fdict.get('acodec') is not None:
1759             if res:
1760                 res += ', '
1761             if fdict['acodec'] == 'none':
1762                 res += 'video only'
1763             else:
1764                 res += '%-5s' % fdict['acodec']
1765         elif fdict.get('abr') is not None:
1766             if res:
1767                 res += ', '
1768             res += 'audio'
1769         if fdict.get('abr') is not None:
1770             res += '@%3dk' % fdict['abr']
1771         if fdict.get('asr') is not None:
1772             res += ' (%5dHz)' % fdict['asr']
1773         if fdict.get('filesize') is not None:
1774             if res:
1775                 res += ', '
1776             res += format_bytes(fdict['filesize'])
1777         elif fdict.get('filesize_approx') is not None:
1778             if res:
1779                 res += ', '
1780             res += '~' + format_bytes(fdict['filesize_approx'])
1781         return res
1782
1783     def list_formats(self, info_dict):
1784         formats = info_dict.get('formats', [info_dict])
1785         table = [
1786             [f['format_id'], f['ext'], self.format_resolution(f), self._format_note(f)]
1787             for f in formats
1788             if f.get('preference') is None or f['preference'] >= -1000]
1789         if len(formats) > 1:
1790             table[-1][-1] += (' ' if table[-1][-1] else '') + '(best)'
1791
1792         header_line = ['format code', 'extension', 'resolution', 'note']
1793         self.to_screen(
1794             '[info] Available formats for %s:\n%s' %
1795             (info_dict['id'], render_table(header_line, table)))
1796
1797     def list_thumbnails(self, info_dict):
1798         thumbnails = info_dict.get('thumbnails')
1799         if not thumbnails:
1800             tn_url = info_dict.get('thumbnail')
1801             if tn_url:
1802                 thumbnails = [{'id': '0', 'url': tn_url}]
1803             else:
1804                 self.to_screen(
1805                     '[info] No thumbnails present for %s' % info_dict['id'])
1806                 return
1807
1808         self.to_screen(
1809             '[info] Thumbnails for %s:' % info_dict['id'])
1810         self.to_screen(render_table(
1811             ['ID', 'width', 'height', 'URL'],
1812             [[t['id'], t.get('width', 'unknown'), t.get('height', 'unknown'), t['url']] for t in thumbnails]))
1813
1814     def list_subtitles(self, video_id, subtitles, name='subtitles'):
1815         if not subtitles:
1816             self.to_screen('%s has no %s' % (video_id, name))
1817             return
1818         self.to_screen(
1819             'Available %s for %s:' % (name, video_id))
1820         self.to_screen(render_table(
1821             ['Language', 'formats'],
1822             [[lang, ', '.join(f['ext'] for f in reversed(formats))]
1823                 for lang, formats in subtitles.items()]))
1824
1825     def urlopen(self, req):
1826         """ Start an HTTP download """
1827
1828         # According to RFC 3986, URLs can not contain non-ASCII characters, however this is not
1829         # always respected by websites, some tend to give out URLs with non percent-encoded
1830         # non-ASCII characters (see telemb.py, ard.py [#3412])
1831         # urllib chokes on URLs with non-ASCII characters (see http://bugs.python.org/issue3991)
1832         # To work around aforementioned issue we will replace request's original URL with
1833         # percent-encoded one
1834         req_is_string = isinstance(req, compat_basestring)
1835         url = req if req_is_string else req.get_full_url()
1836         url_escaped = escape_url(url)
1837
1838         # Substitute URL if any change after escaping
1839         if url != url_escaped:
1840             if req_is_string:
1841                 req = url_escaped
1842             else:
1843                 req_type = HEADRequest if req.get_method() == 'HEAD' else compat_urllib_request.Request
1844                 req = req_type(
1845                     url_escaped, data=req.data, headers=req.headers,
1846                     origin_req_host=req.origin_req_host, unverifiable=req.unverifiable)
1847
1848         return self._opener.open(req, timeout=self._socket_timeout)
1849
1850     def print_debug_header(self):
1851         if not self.params.get('verbose'):
1852             return
1853
1854         if type('') is not compat_str:
1855             # Python 2.6 on SLES11 SP1 (https://github.com/rg3/youtube-dl/issues/3326)
1856             self.report_warning(
1857                 'Your Python is broken! Update to a newer and supported version')
1858
1859         stdout_encoding = getattr(
1860             sys.stdout, 'encoding', 'missing (%s)' % type(sys.stdout).__name__)
1861         encoding_str = (
1862             '[debug] Encodings: locale %s, fs %s, out %s, pref %s\n' % (
1863                 locale.getpreferredencoding(),
1864                 sys.getfilesystemencoding(),
1865                 stdout_encoding,
1866                 self.get_encoding()))
1867         write_string(encoding_str, encoding=None)
1868
1869         self._write_string('[debug] youtube-dl version ' + __version__ + '\n')
1870         try:
1871             sp = subprocess.Popen(
1872                 ['git', 'rev-parse', '--short', 'HEAD'],
1873                 stdout=subprocess.PIPE, stderr=subprocess.PIPE,
1874                 cwd=os.path.dirname(os.path.abspath(__file__)))
1875             out, err = sp.communicate()
1876             out = out.decode().strip()
1877             if re.match('[0-9a-f]+', out):
1878                 self._write_string('[debug] Git HEAD: ' + out + '\n')
1879         except Exception:
1880             try:
1881                 sys.exc_clear()
1882             except Exception:
1883                 pass
1884         self._write_string('[debug] Python version %s - %s\n' % (
1885             platform.python_version(), platform_name()))
1886
1887         exe_versions = FFmpegPostProcessor.get_versions(self)
1888         exe_versions['rtmpdump'] = rtmpdump_version()
1889         exe_str = ', '.join(
1890             '%s %s' % (exe, v)
1891             for exe, v in sorted(exe_versions.items())
1892             if v
1893         )
1894         if not exe_str:
1895             exe_str = 'none'
1896         self._write_string('[debug] exe versions: %s\n' % exe_str)
1897
1898         proxy_map = {}
1899         for handler in self._opener.handlers:
1900             if hasattr(handler, 'proxies'):
1901                 proxy_map.update(handler.proxies)
1902         self._write_string('[debug] Proxy map: ' + compat_str(proxy_map) + '\n')
1903
1904         if self.params.get('call_home', False):
1905             ipaddr = self.urlopen('https://yt-dl.org/ip').read().decode('utf-8')
1906             self._write_string('[debug] Public IP address: %s\n' % ipaddr)
1907             latest_version = self.urlopen(
1908                 'https://yt-dl.org/latest/version').read().decode('utf-8')
1909             if version_tuple(latest_version) > version_tuple(__version__):
1910                 self.report_warning(
1911                     'You are using an outdated version (newest version: %s)! '
1912                     'See https://yt-dl.org/update if you need help updating.' %
1913                     latest_version)
1914
1915     def _setup_opener(self):
1916         timeout_val = self.params.get('socket_timeout')
1917         self._socket_timeout = 600 if timeout_val is None else float(timeout_val)
1918
1919         opts_cookiefile = self.params.get('cookiefile')
1920         opts_proxy = self.params.get('proxy')
1921
1922         if opts_cookiefile is None:
1923             self.cookiejar = compat_cookiejar.CookieJar()
1924         else:
1925             self.cookiejar = compat_cookiejar.MozillaCookieJar(
1926                 opts_cookiefile)
1927             if os.access(opts_cookiefile, os.R_OK):
1928                 self.cookiejar.load()
1929
1930         cookie_processor = compat_urllib_request.HTTPCookieProcessor(
1931             self.cookiejar)
1932         if opts_proxy is not None:
1933             if opts_proxy == '':
1934                 proxies = {}
1935             else:
1936                 proxies = {'http': opts_proxy, 'https': opts_proxy}
1937         else:
1938             proxies = compat_urllib_request.getproxies()
1939             # Set HTTPS proxy to HTTP one if given (https://github.com/rg3/youtube-dl/issues/805)
1940             if 'http' in proxies and 'https' not in proxies:
1941                 proxies['https'] = proxies['http']
1942         proxy_handler = PerRequestProxyHandler(proxies)
1943
1944         debuglevel = 1 if self.params.get('debug_printtraffic') else 0
1945         https_handler = make_HTTPS_handler(self.params, debuglevel=debuglevel)
1946         ydlh = YoutubeDLHandler(self.params, debuglevel=debuglevel)
1947         opener = compat_urllib_request.build_opener(
1948             proxy_handler, https_handler, cookie_processor, ydlh)
1949
1950         # Delete the default user-agent header, which would otherwise apply in
1951         # cases where our custom HTTP handler doesn't come into play
1952         # (See https://github.com/rg3/youtube-dl/issues/1309 for details)
1953         opener.addheaders = []
1954         self._opener = opener
1955
1956     def encode(self, s):
1957         if isinstance(s, bytes):
1958             return s  # Already encoded
1959
1960         try:
1961             return s.encode(self.get_encoding())
1962         except UnicodeEncodeError as err:
1963             err.reason = err.reason + '. Check your system encoding configuration or use the --encoding option.'
1964             raise
1965
1966     def get_encoding(self):
1967         encoding = self.params.get('encoding')
1968         if encoding is None:
1969             encoding = preferredencoding()
1970         return encoding
1971
1972     def _write_thumbnails(self, info_dict, filename):
1973         if self.params.get('writethumbnail', False):
1974             thumbnails = info_dict.get('thumbnails')
1975             if thumbnails:
1976                 thumbnails = [thumbnails[-1]]
1977         elif self.params.get('write_all_thumbnails', False):
1978             thumbnails = info_dict.get('thumbnails')
1979         else:
1980             return
1981
1982         if not thumbnails:
1983             # No thumbnails present, so return immediately
1984             return
1985
1986         for t in thumbnails:
1987             thumb_ext = determine_ext(t['url'], 'jpg')
1988             suffix = '_%s' % t['id'] if len(thumbnails) > 1 else ''
1989             thumb_display_id = '%s ' % t['id'] if len(thumbnails) > 1 else ''
1990             t['filename'] = thumb_filename = os.path.splitext(filename)[0] + suffix + '.' + thumb_ext
1991
1992             if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(thumb_filename)):
1993                 self.to_screen('[%s] %s: Thumbnail %sis already present' %
1994                                (info_dict['extractor'], info_dict['id'], thumb_display_id))
1995             else:
1996                 self.to_screen('[%s] %s: Downloading thumbnail %s...' %
1997                                (info_dict['extractor'], info_dict['id'], thumb_display_id))
1998                 try:
1999                     uf = self.urlopen(t['url'])
2000                     with open(thumb_filename, 'wb') as thumbf:
2001                         shutil.copyfileobj(uf, thumbf)
2002                     self.to_screen('[%s] %s: Writing thumbnail %sto: %s' %
2003                                    (info_dict['extractor'], info_dict['id'], thumb_display_id, thumb_filename))
2004                 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
2005                     self.report_warning('Unable to download thumbnail "%s": %s' %
2006                                         (t['url'], compat_str(err)))