[jython] Introduce compat_os_name
[youtube-dl] / youtube_dl / YoutubeDL.py
1 #!/usr/bin/env python
2 # -*- coding: utf-8 -*-
3
4 from __future__ import absolute_import, unicode_literals
5
6 import collections
7 import contextlib
8 import datetime
9 import errno
10 import fileinput
11 import io
12 import itertools
13 import json
14 import locale
15 import operator
16 import os
17 import platform
18 import re
19 import shutil
20 import subprocess
21 import socket
22 import sys
23 import time
24 import tokenize
25 import traceback
26
27 from .compat import (
28     compat_basestring,
29     compat_cookiejar,
30     compat_expanduser,
31     compat_get_terminal_size,
32     compat_http_client,
33     compat_kwargs,
34     compat_os_name,
35     compat_str,
36     compat_tokenize_tokenize,
37     compat_urllib_error,
38     compat_urllib_request,
39     compat_urllib_request_DataHandler,
40 )
41 from .utils import (
42     ContentTooShortError,
43     date_from_str,
44     DateRange,
45     DEFAULT_OUTTMPL,
46     determine_ext,
47     determine_protocol,
48     DownloadError,
49     encode_compat_str,
50     encodeFilename,
51     error_to_compat_str,
52     ExtractorError,
53     format_bytes,
54     formatSeconds,
55     locked_file,
56     make_HTTPS_handler,
57     MaxDownloadsReached,
58     PagedList,
59     parse_filesize,
60     PerRequestProxyHandler,
61     PostProcessingError,
62     platform_name,
63     preferredencoding,
64     render_table,
65     SameFileError,
66     sanitize_filename,
67     sanitize_path,
68     sanitized_Request,
69     std_headers,
70     subtitles_filename,
71     UnavailableVideoError,
72     url_basename,
73     version_tuple,
74     write_json_file,
75     write_string,
76     YoutubeDLCookieProcessor,
77     YoutubeDLHandler,
78     prepend_extension,
79     replace_extension,
80     args_to_str,
81     age_restricted,
82 )
83 from .cache import Cache
84 from .extractor import get_info_extractor, gen_extractors
85 from .downloader import get_suitable_downloader
86 from .downloader.rtmp import rtmpdump_version
87 from .postprocessor import (
88     FFmpegFixupM4aPP,
89     FFmpegFixupStretchedPP,
90     FFmpegMergerPP,
91     FFmpegPostProcessor,
92     get_postprocessor,
93 )
94 from .version import __version__
95
96 if compat_os_name == 'nt':
97     import ctypes
98
99
100 class YoutubeDL(object):
101     """YoutubeDL class.
102
103     YoutubeDL objects are the ones responsible of downloading the
104     actual video file and writing it to disk if the user has requested
105     it, among some other tasks. In most cases there should be one per
106     program. As, given a video URL, the downloader doesn't know how to
107     extract all the needed information, task that InfoExtractors do, it
108     has to pass the URL to one of them.
109
110     For this, YoutubeDL objects have a method that allows
111     InfoExtractors to be registered in a given order. When it is passed
112     a URL, the YoutubeDL object handles it to the first InfoExtractor it
113     finds that reports being able to handle it. The InfoExtractor extracts
114     all the information about the video or videos the URL refers to, and
115     YoutubeDL process the extracted information, possibly using a File
116     Downloader to download the video.
117
118     YoutubeDL objects accept a lot of parameters. In order not to saturate
119     the object constructor with arguments, it receives a dictionary of
120     options instead. These options are available through the params
121     attribute for the InfoExtractors to use. The YoutubeDL also
122     registers itself as the downloader in charge for the InfoExtractors
123     that are added to it, so this is a "mutual registration".
124
125     Available options:
126
127     username:          Username for authentication purposes.
128     password:          Password for authentication purposes.
129     videopassword:     Password for accessing a video.
130     usenetrc:          Use netrc for authentication instead.
131     verbose:           Print additional info to stdout.
132     quiet:             Do not print messages to stdout.
133     no_warnings:       Do not print out anything for warnings.
134     forceurl:          Force printing final URL.
135     forcetitle:        Force printing title.
136     forceid:           Force printing ID.
137     forcethumbnail:    Force printing thumbnail URL.
138     forcedescription:  Force printing description.
139     forcefilename:     Force printing final filename.
140     forceduration:     Force printing duration.
141     forcejson:         Force printing info_dict as JSON.
142     dump_single_json:  Force printing the info_dict of the whole playlist
143                        (or video) as a single JSON line.
144     simulate:          Do not download the video files.
145     format:            Video format code. See options.py for more information.
146     outtmpl:           Template for output names.
147     restrictfilenames: Do not allow "&" and spaces in file names
148     ignoreerrors:      Do not stop on download errors.
149     force_generic_extractor: Force downloader to use the generic extractor
150     nooverwrites:      Prevent overwriting files.
151     playliststart:     Playlist item to start at.
152     playlistend:       Playlist item to end at.
153     playlist_items:    Specific indices of playlist to download.
154     playlistreverse:   Download playlist items in reverse order.
155     matchtitle:        Download only matching titles.
156     rejecttitle:       Reject downloads for matching titles.
157     logger:            Log messages to a logging.Logger instance.
158     logtostderr:       Log messages to stderr instead of stdout.
159     writedescription:  Write the video description to a .description file
160     writeinfojson:     Write the video description to a .info.json file
161     writeannotations:  Write the video annotations to a .annotations.xml file
162     writethumbnail:    Write the thumbnail image to a file
163     write_all_thumbnails:  Write all thumbnail formats to files
164     writesubtitles:    Write the video subtitles to a file
165     writeautomaticsub: Write the automatically generated subtitles to a file
166     allsubtitles:      Downloads all the subtitles of the video
167                        (requires writesubtitles or writeautomaticsub)
168     listsubtitles:     Lists all available subtitles for the video
169     subtitlesformat:   The format code for subtitles
170     subtitleslangs:    List of languages of the subtitles to download
171     keepvideo:         Keep the video file after post-processing
172     daterange:         A DateRange object, download only if the upload_date is in the range.
173     skip_download:     Skip the actual download of the video file
174     cachedir:          Location of the cache files in the filesystem.
175                        False to disable filesystem cache.
176     noplaylist:        Download single video instead of a playlist if in doubt.
177     age_limit:         An integer representing the user's age in years.
178                        Unsuitable videos for the given age are skipped.
179     min_views:         An integer representing the minimum view count the video
180                        must have in order to not be skipped.
181                        Videos without view count information are always
182                        downloaded. None for no limit.
183     max_views:         An integer representing the maximum view count.
184                        Videos that are more popular than that are not
185                        downloaded.
186                        Videos without view count information are always
187                        downloaded. None for no limit.
188     download_archive:  File name of a file where all downloads are recorded.
189                        Videos already present in the file are not downloaded
190                        again.
191     cookiefile:        File name where cookies should be read from and dumped to.
192     nocheckcertificate:Do not verify SSL certificates
193     prefer_insecure:   Use HTTP instead of HTTPS to retrieve information.
194                        At the moment, this is only supported by YouTube.
195     proxy:             URL of the proxy server to use
196     cn_verification_proxy:  URL of the proxy to use for IP address verification
197                        on Chinese sites. (Experimental)
198     socket_timeout:    Time to wait for unresponsive hosts, in seconds
199     bidi_workaround:   Work around buggy terminals without bidirectional text
200                        support, using fridibi
201     debug_printtraffic:Print out sent and received HTTP traffic
202     include_ads:       Download ads as well
203     default_search:    Prepend this string if an input url is not valid.
204                        'auto' for elaborate guessing
205     encoding:          Use this encoding instead of the system-specified.
206     extract_flat:      Do not resolve URLs, return the immediate result.
207                        Pass in 'in_playlist' to only show this behavior for
208                        playlist items.
209     postprocessors:    A list of dictionaries, each with an entry
210                        * key:  The name of the postprocessor. See
211                                youtube_dl/postprocessor/__init__.py for a list.
212                        as well as any further keyword arguments for the
213                        postprocessor.
214     progress_hooks:    A list of functions that get called on download
215                        progress, with a dictionary with the entries
216                        * status: One of "downloading", "error", or "finished".
217                                  Check this first and ignore unknown values.
218
219                        If status is one of "downloading", or "finished", the
220                        following properties may also be present:
221                        * filename: The final filename (always present)
222                        * tmpfilename: The filename we're currently writing to
223                        * downloaded_bytes: Bytes on disk
224                        * total_bytes: Size of the whole file, None if unknown
225                        * total_bytes_estimate: Guess of the eventual file size,
226                                                None if unavailable.
227                        * elapsed: The number of seconds since download started.
228                        * eta: The estimated time in seconds, None if unknown
229                        * speed: The download speed in bytes/second, None if
230                                 unknown
231                        * fragment_index: The counter of the currently
232                                          downloaded video fragment.
233                        * fragment_count: The number of fragments (= individual
234                                          files that will be merged)
235
236                        Progress hooks are guaranteed to be called at least once
237                        (with status "finished") if the download is successful.
238     merge_output_format: Extension to use when merging formats.
239     fixup:             Automatically correct known faults of the file.
240                        One of:
241                        - "never": do nothing
242                        - "warn": only emit a warning
243                        - "detect_or_warn": check whether we can do anything
244                                            about it, warn otherwise (default)
245     source_address:    (Experimental) Client-side IP address to bind to.
246     call_home:         Boolean, true iff we are allowed to contact the
247                        youtube-dl servers for debugging.
248     sleep_interval:    Number of seconds to sleep before each download.
249     listformats:       Print an overview of available video formats and exit.
250     list_thumbnails:   Print a table of all thumbnails and exit.
251     match_filter:      A function that gets called with the info_dict of
252                        every video.
253                        If it returns a message, the video is ignored.
254                        If it returns None, the video is downloaded.
255                        match_filter_func in utils.py is one example for this.
256     no_color:          Do not emit color codes in output.
257
258     The following options determine which downloader is picked:
259     external_downloader: Executable of the external downloader to call.
260                        None or unset for standard (built-in) downloader.
261     hls_prefer_native: Use the native HLS downloader instead of ffmpeg/avconv.
262
263     The following parameters are not used by YoutubeDL itself, they are used by
264     the downloader (see youtube_dl/downloader/common.py):
265     nopart, updatetime, buffersize, ratelimit, min_filesize, max_filesize, test,
266     noresizebuffer, retries, continuedl, noprogress, consoletitle,
267     xattr_set_filesize, external_downloader_args, hls_use_mpegts.
268
269     The following options are used by the post processors:
270     prefer_ffmpeg:     If True, use ffmpeg instead of avconv if both are available,
271                        otherwise prefer avconv.
272     postprocessor_args: A list of additional command-line arguments for the
273                         postprocessor.
274     """
275
276     params = None
277     _ies = []
278     _pps = []
279     _download_retcode = None
280     _num_downloads = None
281     _screen_file = None
282
283     def __init__(self, params=None, auto_init=True):
284         """Create a FileDownloader object with the given options."""
285         if params is None:
286             params = {}
287         self._ies = []
288         self._ies_instances = {}
289         self._pps = []
290         self._progress_hooks = []
291         self._download_retcode = 0
292         self._num_downloads = 0
293         self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)]
294         self._err_file = sys.stderr
295         self.params = {
296             # Default parameters
297             'nocheckcertificate': False,
298         }
299         self.params.update(params)
300         self.cache = Cache(self)
301
302         if params.get('bidi_workaround', False):
303             try:
304                 import pty
305                 master, slave = pty.openpty()
306                 width = compat_get_terminal_size().columns
307                 if width is None:
308                     width_args = []
309                 else:
310                     width_args = ['-w', str(width)]
311                 sp_kwargs = dict(
312                     stdin=subprocess.PIPE,
313                     stdout=slave,
314                     stderr=self._err_file)
315                 try:
316                     self._output_process = subprocess.Popen(
317                         ['bidiv'] + width_args, **sp_kwargs
318                     )
319                 except OSError:
320                     self._output_process = subprocess.Popen(
321                         ['fribidi', '-c', 'UTF-8'] + width_args, **sp_kwargs)
322                 self._output_channel = os.fdopen(master, 'rb')
323             except OSError as ose:
324                 if ose.errno == 2:
325                     self.report_warning('Could not find fribidi executable, ignoring --bidi-workaround . Make sure that  fribidi  is an executable file in one of the directories in your $PATH.')
326                 else:
327                     raise
328
329         if (sys.version_info >= (3,) and sys.platform != 'win32' and
330                 sys.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968'] and
331                 not params.get('restrictfilenames', False)):
332             # On Python 3, the Unicode filesystem API will throw errors (#1474)
333             self.report_warning(
334                 'Assuming --restrict-filenames since file system encoding '
335                 'cannot encode all characters. '
336                 'Set the LC_ALL environment variable to fix this.')
337             self.params['restrictfilenames'] = True
338
339         if isinstance(params.get('outtmpl'), bytes):
340             self.report_warning(
341                 'Parameter outtmpl is bytes, but should be a unicode string. '
342                 'Put  from __future__ import unicode_literals  at the top of your code file or consider switching to Python 3.x.')
343
344         self._setup_opener()
345
346         if auto_init:
347             self.print_debug_header()
348             self.add_default_info_extractors()
349
350         for pp_def_raw in self.params.get('postprocessors', []):
351             pp_class = get_postprocessor(pp_def_raw['key'])
352             pp_def = dict(pp_def_raw)
353             del pp_def['key']
354             pp = pp_class(self, **compat_kwargs(pp_def))
355             self.add_post_processor(pp)
356
357         for ph in self.params.get('progress_hooks', []):
358             self.add_progress_hook(ph)
359
360     def warn_if_short_id(self, argv):
361         # short YouTube ID starting with dash?
362         idxs = [
363             i for i, a in enumerate(argv)
364             if re.match(r'^-[0-9A-Za-z_-]{10}$', a)]
365         if idxs:
366             correct_argv = (
367                 ['youtube-dl'] +
368                 [a for i, a in enumerate(argv) if i not in idxs] +
369                 ['--'] + [argv[i] for i in idxs]
370             )
371             self.report_warning(
372                 'Long argument string detected. '
373                 'Use -- to separate parameters and URLs, like this:\n%s\n' %
374                 args_to_str(correct_argv))
375
376     def add_info_extractor(self, ie):
377         """Add an InfoExtractor object to the end of the list."""
378         self._ies.append(ie)
379         self._ies_instances[ie.ie_key()] = ie
380         ie.set_downloader(self)
381
382     def get_info_extractor(self, ie_key):
383         """
384         Get an instance of an IE with name ie_key, it will try to get one from
385         the _ies list, if there's no instance it will create a new one and add
386         it to the extractor list.
387         """
388         ie = self._ies_instances.get(ie_key)
389         if ie is None:
390             ie = get_info_extractor(ie_key)()
391             self.add_info_extractor(ie)
392         return ie
393
394     def add_default_info_extractors(self):
395         """
396         Add the InfoExtractors returned by gen_extractors to the end of the list
397         """
398         for ie in gen_extractors():
399             self.add_info_extractor(ie)
400
401     def add_post_processor(self, pp):
402         """Add a PostProcessor object to the end of the chain."""
403         self._pps.append(pp)
404         pp.set_downloader(self)
405
406     def add_progress_hook(self, ph):
407         """Add the progress hook (currently only for the file downloader)"""
408         self._progress_hooks.append(ph)
409
410     def _bidi_workaround(self, message):
411         if not hasattr(self, '_output_channel'):
412             return message
413
414         assert hasattr(self, '_output_process')
415         assert isinstance(message, compat_str)
416         line_count = message.count('\n') + 1
417         self._output_process.stdin.write((message + '\n').encode('utf-8'))
418         self._output_process.stdin.flush()
419         res = ''.join(self._output_channel.readline().decode('utf-8')
420                       for _ in range(line_count))
421         return res[:-len('\n')]
422
423     def to_screen(self, message, skip_eol=False):
424         """Print message to stdout if not in quiet mode."""
425         return self.to_stdout(message, skip_eol, check_quiet=True)
426
427     def _write_string(self, s, out=None):
428         write_string(s, out=out, encoding=self.params.get('encoding'))
429
430     def to_stdout(self, message, skip_eol=False, check_quiet=False):
431         """Print message to stdout if not in quiet mode."""
432         if self.params.get('logger'):
433             self.params['logger'].debug(message)
434         elif not check_quiet or not self.params.get('quiet', False):
435             message = self._bidi_workaround(message)
436             terminator = ['\n', ''][skip_eol]
437             output = message + terminator
438
439             self._write_string(output, self._screen_file)
440
441     def to_stderr(self, message):
442         """Print message to stderr."""
443         assert isinstance(message, compat_str)
444         if self.params.get('logger'):
445             self.params['logger'].error(message)
446         else:
447             message = self._bidi_workaround(message)
448             output = message + '\n'
449             self._write_string(output, self._err_file)
450
451     def to_console_title(self, message):
452         if not self.params.get('consoletitle', False):
453             return
454         if compat_os_name == 'nt' and ctypes.windll.kernel32.GetConsoleWindow():
455             # c_wchar_p() might not be necessary if `message` is
456             # already of type unicode()
457             ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message))
458         elif 'TERM' in os.environ:
459             self._write_string('\033]0;%s\007' % message, self._screen_file)
460
461     def save_console_title(self):
462         if not self.params.get('consoletitle', False):
463             return
464         if 'TERM' in os.environ:
465             # Save the title on stack
466             self._write_string('\033[22;0t', self._screen_file)
467
468     def restore_console_title(self):
469         if not self.params.get('consoletitle', False):
470             return
471         if 'TERM' in os.environ:
472             # Restore the title from stack
473             self._write_string('\033[23;0t', self._screen_file)
474
475     def __enter__(self):
476         self.save_console_title()
477         return self
478
479     def __exit__(self, *args):
480         self.restore_console_title()
481
482         if self.params.get('cookiefile') is not None:
483             self.cookiejar.save()
484
485     def trouble(self, message=None, tb=None):
486         """Determine action to take when a download problem appears.
487
488         Depending on if the downloader has been configured to ignore
489         download errors or not, this method may throw an exception or
490         not when errors are found, after printing the message.
491
492         tb, if given, is additional traceback information.
493         """
494         if message is not None:
495             self.to_stderr(message)
496         if self.params.get('verbose'):
497             if tb is None:
498                 if sys.exc_info()[0]:  # if .trouble has been called from an except block
499                     tb = ''
500                     if hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
501                         tb += ''.join(traceback.format_exception(*sys.exc_info()[1].exc_info))
502                     tb += encode_compat_str(traceback.format_exc())
503                 else:
504                     tb_data = traceback.format_list(traceback.extract_stack())
505                     tb = ''.join(tb_data)
506             self.to_stderr(tb)
507         if not self.params.get('ignoreerrors', False):
508             if sys.exc_info()[0] and hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
509                 exc_info = sys.exc_info()[1].exc_info
510             else:
511                 exc_info = sys.exc_info()
512             raise DownloadError(message, exc_info)
513         self._download_retcode = 1
514
515     def report_warning(self, message):
516         '''
517         Print the message to stderr, it will be prefixed with 'WARNING:'
518         If stderr is a tty file the 'WARNING:' will be colored
519         '''
520         if self.params.get('logger') is not None:
521             self.params['logger'].warning(message)
522         else:
523             if self.params.get('no_warnings'):
524                 return
525             if not self.params.get('no_color') and self._err_file.isatty() and compat_os_name != 'nt':
526                 _msg_header = '\033[0;33mWARNING:\033[0m'
527             else:
528                 _msg_header = 'WARNING:'
529             warning_message = '%s %s' % (_msg_header, message)
530             self.to_stderr(warning_message)
531
532     def report_error(self, message, tb=None):
533         '''
534         Do the same as trouble, but prefixes the message with 'ERROR:', colored
535         in red if stderr is a tty file.
536         '''
537         if not self.params.get('no_color') and self._err_file.isatty() and compat_os_name != 'nt':
538             _msg_header = '\033[0;31mERROR:\033[0m'
539         else:
540             _msg_header = 'ERROR:'
541         error_message = '%s %s' % (_msg_header, message)
542         self.trouble(error_message, tb)
543
544     def report_file_already_downloaded(self, file_name):
545         """Report file has already been fully downloaded."""
546         try:
547             self.to_screen('[download] %s has already been downloaded' % file_name)
548         except UnicodeEncodeError:
549             self.to_screen('[download] The file has already been downloaded')
550
551     def prepare_filename(self, info_dict):
552         """Generate the output filename."""
553         try:
554             template_dict = dict(info_dict)
555
556             template_dict['epoch'] = int(time.time())
557             autonumber_size = self.params.get('autonumber_size')
558             if autonumber_size is None:
559                 autonumber_size = 5
560             autonumber_templ = '%0' + str(autonumber_size) + 'd'
561             template_dict['autonumber'] = autonumber_templ % self._num_downloads
562             if template_dict.get('playlist_index') is not None:
563                 template_dict['playlist_index'] = '%0*d' % (len(str(template_dict['n_entries'])), template_dict['playlist_index'])
564             if template_dict.get('resolution') is None:
565                 if template_dict.get('width') and template_dict.get('height'):
566                     template_dict['resolution'] = '%dx%d' % (template_dict['width'], template_dict['height'])
567                 elif template_dict.get('height'):
568                     template_dict['resolution'] = '%sp' % template_dict['height']
569                 elif template_dict.get('width'):
570                     template_dict['resolution'] = '?x%d' % template_dict['width']
571
572             sanitize = lambda k, v: sanitize_filename(
573                 compat_str(v),
574                 restricted=self.params.get('restrictfilenames'),
575                 is_id=(k == 'id'))
576             template_dict = dict((k, sanitize(k, v))
577                                  for k, v in template_dict.items()
578                                  if v is not None)
579             template_dict = collections.defaultdict(lambda: 'NA', template_dict)
580
581             outtmpl = self.params.get('outtmpl', DEFAULT_OUTTMPL)
582             tmpl = compat_expanduser(outtmpl)
583             filename = tmpl % template_dict
584             # Temporary fix for #4787
585             # 'Treat' all problem characters by passing filename through preferredencoding
586             # to workaround encoding issues with subprocess on python2 @ Windows
587             if sys.version_info < (3, 0) and sys.platform == 'win32':
588                 filename = encodeFilename(filename, True).decode(preferredencoding())
589             return sanitize_path(filename)
590         except ValueError as err:
591             self.report_error('Error in output template: ' + str(err) + ' (encoding: ' + repr(preferredencoding()) + ')')
592             return None
593
594     def _match_entry(self, info_dict, incomplete):
595         """ Returns None iff the file should be downloaded """
596
597         video_title = info_dict.get('title', info_dict.get('id', 'video'))
598         if 'title' in info_dict:
599             # This can happen when we're just evaluating the playlist
600             title = info_dict['title']
601             matchtitle = self.params.get('matchtitle', False)
602             if matchtitle:
603                 if not re.search(matchtitle, title, re.IGNORECASE):
604                     return '"' + title + '" title did not match pattern "' + matchtitle + '"'
605             rejecttitle = self.params.get('rejecttitle', False)
606             if rejecttitle:
607                 if re.search(rejecttitle, title, re.IGNORECASE):
608                     return '"' + title + '" title matched reject pattern "' + rejecttitle + '"'
609         date = info_dict.get('upload_date')
610         if date is not None:
611             dateRange = self.params.get('daterange', DateRange())
612             if date not in dateRange:
613                 return '%s upload date is not in range %s' % (date_from_str(date).isoformat(), dateRange)
614         view_count = info_dict.get('view_count')
615         if view_count is not None:
616             min_views = self.params.get('min_views')
617             if min_views is not None and view_count < min_views:
618                 return 'Skipping %s, because it has not reached minimum view count (%d/%d)' % (video_title, view_count, min_views)
619             max_views = self.params.get('max_views')
620             if max_views is not None and view_count > max_views:
621                 return 'Skipping %s, because it has exceeded the maximum view count (%d/%d)' % (video_title, view_count, max_views)
622         if age_restricted(info_dict.get('age_limit'), self.params.get('age_limit')):
623             return 'Skipping "%s" because it is age restricted' % video_title
624         if self.in_download_archive(info_dict):
625             return '%s has already been recorded in archive' % video_title
626
627         if not incomplete:
628             match_filter = self.params.get('match_filter')
629             if match_filter is not None:
630                 ret = match_filter(info_dict)
631                 if ret is not None:
632                     return ret
633
634         return None
635
636     @staticmethod
637     def add_extra_info(info_dict, extra_info):
638         '''Set the keys from extra_info in info dict if they are missing'''
639         for key, value in extra_info.items():
640             info_dict.setdefault(key, value)
641
642     def extract_info(self, url, download=True, ie_key=None, extra_info={},
643                      process=True, force_generic_extractor=False):
644         '''
645         Returns a list with a dictionary for each video we find.
646         If 'download', also downloads the videos.
647         extra_info is a dict containing the extra values to add to each result
648         '''
649
650         if not ie_key and force_generic_extractor:
651             ie_key = 'Generic'
652
653         if ie_key:
654             ies = [self.get_info_extractor(ie_key)]
655         else:
656             ies = self._ies
657
658         for ie in ies:
659             if not ie.suitable(url):
660                 continue
661
662             if not ie.working():
663                 self.report_warning('The program functionality for this site has been marked as broken, '
664                                     'and will probably not work.')
665
666             try:
667                 ie_result = ie.extract(url)
668                 if ie_result is None:  # Finished already (backwards compatibility; listformats and friends should be moved here)
669                     break
670                 if isinstance(ie_result, list):
671                     # Backwards compatibility: old IE result format
672                     ie_result = {
673                         '_type': 'compat_list',
674                         'entries': ie_result,
675                     }
676                 self.add_default_extra_info(ie_result, ie, url)
677                 if process:
678                     return self.process_ie_result(ie_result, download, extra_info)
679                 else:
680                     return ie_result
681             except ExtractorError as e:  # An error we somewhat expected
682                 self.report_error(compat_str(e), e.format_traceback())
683                 break
684             except MaxDownloadsReached:
685                 raise
686             except Exception as e:
687                 if self.params.get('ignoreerrors', False):
688                     self.report_error(error_to_compat_str(e), tb=encode_compat_str(traceback.format_exc()))
689                     break
690                 else:
691                     raise
692         else:
693             self.report_error('no suitable InfoExtractor for URL %s' % url)
694
695     def add_default_extra_info(self, ie_result, ie, url):
696         self.add_extra_info(ie_result, {
697             'extractor': ie.IE_NAME,
698             'webpage_url': url,
699             'webpage_url_basename': url_basename(url),
700             'extractor_key': ie.ie_key(),
701         })
702
703     def process_ie_result(self, ie_result, download=True, extra_info={}):
704         """
705         Take the result of the ie(may be modified) and resolve all unresolved
706         references (URLs, playlist items).
707
708         It will also download the videos if 'download'.
709         Returns the resolved ie_result.
710         """
711         result_type = ie_result.get('_type', 'video')
712
713         if result_type in ('url', 'url_transparent'):
714             extract_flat = self.params.get('extract_flat', False)
715             if ((extract_flat == 'in_playlist' and 'playlist' in extra_info) or
716                     extract_flat is True):
717                 if self.params.get('forcejson', False):
718                     self.to_stdout(json.dumps(ie_result))
719                 return ie_result
720
721         if result_type == 'video':
722             self.add_extra_info(ie_result, extra_info)
723             return self.process_video_result(ie_result, download=download)
724         elif result_type == 'url':
725             # We have to add extra_info to the results because it may be
726             # contained in a playlist
727             return self.extract_info(ie_result['url'],
728                                      download,
729                                      ie_key=ie_result.get('ie_key'),
730                                      extra_info=extra_info)
731         elif result_type == 'url_transparent':
732             # Use the information from the embedding page
733             info = self.extract_info(
734                 ie_result['url'], ie_key=ie_result.get('ie_key'),
735                 extra_info=extra_info, download=False, process=False)
736
737             force_properties = dict(
738                 (k, v) for k, v in ie_result.items() if v is not None)
739             for f in ('_type', 'url', 'ie_key'):
740                 if f in force_properties:
741                     del force_properties[f]
742             new_result = info.copy()
743             new_result.update(force_properties)
744
745             assert new_result.get('_type') != 'url_transparent'
746
747             return self.process_ie_result(
748                 new_result, download=download, extra_info=extra_info)
749         elif result_type == 'playlist' or result_type == 'multi_video':
750             # We process each entry in the playlist
751             playlist = ie_result.get('title') or ie_result.get('id')
752             self.to_screen('[download] Downloading playlist: %s' % playlist)
753
754             playlist_results = []
755
756             playliststart = self.params.get('playliststart', 1) - 1
757             playlistend = self.params.get('playlistend')
758             # For backwards compatibility, interpret -1 as whole list
759             if playlistend == -1:
760                 playlistend = None
761
762             playlistitems_str = self.params.get('playlist_items')
763             playlistitems = None
764             if playlistitems_str is not None:
765                 def iter_playlistitems(format):
766                     for string_segment in format.split(','):
767                         if '-' in string_segment:
768                             start, end = string_segment.split('-')
769                             for item in range(int(start), int(end) + 1):
770                                 yield int(item)
771                         else:
772                             yield int(string_segment)
773                 playlistitems = iter_playlistitems(playlistitems_str)
774
775             ie_entries = ie_result['entries']
776             if isinstance(ie_entries, list):
777                 n_all_entries = len(ie_entries)
778                 if playlistitems:
779                     entries = [
780                         ie_entries[i - 1] for i in playlistitems
781                         if -n_all_entries <= i - 1 < n_all_entries]
782                 else:
783                     entries = ie_entries[playliststart:playlistend]
784                 n_entries = len(entries)
785                 self.to_screen(
786                     '[%s] playlist %s: Collected %d video ids (downloading %d of them)' %
787                     (ie_result['extractor'], playlist, n_all_entries, n_entries))
788             elif isinstance(ie_entries, PagedList):
789                 if playlistitems:
790                     entries = []
791                     for item in playlistitems:
792                         entries.extend(ie_entries.getslice(
793                             item - 1, item
794                         ))
795                 else:
796                     entries = ie_entries.getslice(
797                         playliststart, playlistend)
798                 n_entries = len(entries)
799                 self.to_screen(
800                     '[%s] playlist %s: Downloading %d videos' %
801                     (ie_result['extractor'], playlist, n_entries))
802             else:  # iterable
803                 if playlistitems:
804                     entry_list = list(ie_entries)
805                     entries = [entry_list[i - 1] for i in playlistitems]
806                 else:
807                     entries = list(itertools.islice(
808                         ie_entries, playliststart, playlistend))
809                 n_entries = len(entries)
810                 self.to_screen(
811                     '[%s] playlist %s: Downloading %d videos' %
812                     (ie_result['extractor'], playlist, n_entries))
813
814             if self.params.get('playlistreverse', False):
815                 entries = entries[::-1]
816
817             for i, entry in enumerate(entries, 1):
818                 self.to_screen('[download] Downloading video %s of %s' % (i, n_entries))
819                 extra = {
820                     'n_entries': n_entries,
821                     'playlist': playlist,
822                     'playlist_id': ie_result.get('id'),
823                     'playlist_title': ie_result.get('title'),
824                     'playlist_index': i + playliststart,
825                     'extractor': ie_result['extractor'],
826                     'webpage_url': ie_result['webpage_url'],
827                     'webpage_url_basename': url_basename(ie_result['webpage_url']),
828                     'extractor_key': ie_result['extractor_key'],
829                 }
830
831                 reason = self._match_entry(entry, incomplete=True)
832                 if reason is not None:
833                     self.to_screen('[download] ' + reason)
834                     continue
835
836                 entry_result = self.process_ie_result(entry,
837                                                       download=download,
838                                                       extra_info=extra)
839                 playlist_results.append(entry_result)
840             ie_result['entries'] = playlist_results
841             self.to_screen('[download] Finished downloading playlist: %s' % playlist)
842             return ie_result
843         elif result_type == 'compat_list':
844             self.report_warning(
845                 'Extractor %s returned a compat_list result. '
846                 'It needs to be updated.' % ie_result.get('extractor'))
847
848             def _fixup(r):
849                 self.add_extra_info(
850                     r,
851                     {
852                         'extractor': ie_result['extractor'],
853                         'webpage_url': ie_result['webpage_url'],
854                         'webpage_url_basename': url_basename(ie_result['webpage_url']),
855                         'extractor_key': ie_result['extractor_key'],
856                     }
857                 )
858                 return r
859             ie_result['entries'] = [
860                 self.process_ie_result(_fixup(r), download, extra_info)
861                 for r in ie_result['entries']
862             ]
863             return ie_result
864         else:
865             raise Exception('Invalid result type: %s' % result_type)
866
867     def _build_format_filter(self, filter_spec):
868         " Returns a function to filter the formats according to the filter_spec "
869
870         OPERATORS = {
871             '<': operator.lt,
872             '<=': operator.le,
873             '>': operator.gt,
874             '>=': operator.ge,
875             '=': operator.eq,
876             '!=': operator.ne,
877         }
878         operator_rex = re.compile(r'''(?x)\s*
879             (?P<key>width|height|tbr|abr|vbr|asr|filesize|fps)
880             \s*(?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
881             (?P<value>[0-9.]+(?:[kKmMgGtTpPeEzZyY]i?[Bb]?)?)
882             $
883             ''' % '|'.join(map(re.escape, OPERATORS.keys())))
884         m = operator_rex.search(filter_spec)
885         if m:
886             try:
887                 comparison_value = int(m.group('value'))
888             except ValueError:
889                 comparison_value = parse_filesize(m.group('value'))
890                 if comparison_value is None:
891                     comparison_value = parse_filesize(m.group('value') + 'B')
892                 if comparison_value is None:
893                     raise ValueError(
894                         'Invalid value %r in format specification %r' % (
895                             m.group('value'), filter_spec))
896             op = OPERATORS[m.group('op')]
897
898         if not m:
899             STR_OPERATORS = {
900                 '=': operator.eq,
901                 '!=': operator.ne,
902                 '^=': lambda attr, value: attr.startswith(value),
903                 '$=': lambda attr, value: attr.endswith(value),
904                 '*=': lambda attr, value: value in attr,
905             }
906             str_operator_rex = re.compile(r'''(?x)
907                 \s*(?P<key>ext|acodec|vcodec|container|protocol)
908                 \s*(?P<op>%s)(?P<none_inclusive>\s*\?)?
909                 \s*(?P<value>[a-zA-Z0-9._-]+)
910                 \s*$
911                 ''' % '|'.join(map(re.escape, STR_OPERATORS.keys())))
912             m = str_operator_rex.search(filter_spec)
913             if m:
914                 comparison_value = m.group('value')
915                 op = STR_OPERATORS[m.group('op')]
916
917         if not m:
918             raise ValueError('Invalid filter specification %r' % filter_spec)
919
920         def _filter(f):
921             actual_value = f.get(m.group('key'))
922             if actual_value is None:
923                 return m.group('none_inclusive')
924             return op(actual_value, comparison_value)
925         return _filter
926
927     def build_format_selector(self, format_spec):
928         def syntax_error(note, start):
929             message = (
930                 'Invalid format specification: '
931                 '{0}\n\t{1}\n\t{2}^'.format(note, format_spec, ' ' * start[1]))
932             return SyntaxError(message)
933
934         PICKFIRST = 'PICKFIRST'
935         MERGE = 'MERGE'
936         SINGLE = 'SINGLE'
937         GROUP = 'GROUP'
938         FormatSelector = collections.namedtuple('FormatSelector', ['type', 'selector', 'filters'])
939
940         def _parse_filter(tokens):
941             filter_parts = []
942             for type, string, start, _, _ in tokens:
943                 if type == tokenize.OP and string == ']':
944                     return ''.join(filter_parts)
945                 else:
946                     filter_parts.append(string)
947
948         def _remove_unused_ops(tokens):
949             # Remove operators that we don't use and join them with the surrounding strings
950             # for example: 'mp4' '-' 'baseline' '-' '16x9' is converted to 'mp4-baseline-16x9'
951             ALLOWED_OPS = ('/', '+', ',', '(', ')')
952             last_string, last_start, last_end, last_line = None, None, None, None
953             for type, string, start, end, line in tokens:
954                 if type == tokenize.OP and string == '[':
955                     if last_string:
956                         yield tokenize.NAME, last_string, last_start, last_end, last_line
957                         last_string = None
958                     yield type, string, start, end, line
959                     # everything inside brackets will be handled by _parse_filter
960                     for type, string, start, end, line in tokens:
961                         yield type, string, start, end, line
962                         if type == tokenize.OP and string == ']':
963                             break
964                 elif type == tokenize.OP and string in ALLOWED_OPS:
965                     if last_string:
966                         yield tokenize.NAME, last_string, last_start, last_end, last_line
967                         last_string = None
968                     yield type, string, start, end, line
969                 elif type in [tokenize.NAME, tokenize.NUMBER, tokenize.OP]:
970                     if not last_string:
971                         last_string = string
972                         last_start = start
973                         last_end = end
974                     else:
975                         last_string += string
976             if last_string:
977                 yield tokenize.NAME, last_string, last_start, last_end, last_line
978
979         def _parse_format_selection(tokens, inside_merge=False, inside_choice=False, inside_group=False):
980             selectors = []
981             current_selector = None
982             for type, string, start, _, _ in tokens:
983                 # ENCODING is only defined in python 3.x
984                 if type == getattr(tokenize, 'ENCODING', None):
985                     continue
986                 elif type in [tokenize.NAME, tokenize.NUMBER]:
987                     current_selector = FormatSelector(SINGLE, string, [])
988                 elif type == tokenize.OP:
989                     if string == ')':
990                         if not inside_group:
991                             # ')' will be handled by the parentheses group
992                             tokens.restore_last_token()
993                         break
994                     elif inside_merge and string in ['/', ',']:
995                         tokens.restore_last_token()
996                         break
997                     elif inside_choice and string == ',':
998                         tokens.restore_last_token()
999                         break
1000                     elif string == ',':
1001                         if not current_selector:
1002                             raise syntax_error('"," must follow a format selector', start)
1003                         selectors.append(current_selector)
1004                         current_selector = None
1005                     elif string == '/':
1006                         if not current_selector:
1007                             raise syntax_error('"/" must follow a format selector', start)
1008                         first_choice = current_selector
1009                         second_choice = _parse_format_selection(tokens, inside_choice=True)
1010                         current_selector = FormatSelector(PICKFIRST, (first_choice, second_choice), [])
1011                     elif string == '[':
1012                         if not current_selector:
1013                             current_selector = FormatSelector(SINGLE, 'best', [])
1014                         format_filter = _parse_filter(tokens)
1015                         current_selector.filters.append(format_filter)
1016                     elif string == '(':
1017                         if current_selector:
1018                             raise syntax_error('Unexpected "("', start)
1019                         group = _parse_format_selection(tokens, inside_group=True)
1020                         current_selector = FormatSelector(GROUP, group, [])
1021                     elif string == '+':
1022                         video_selector = current_selector
1023                         audio_selector = _parse_format_selection(tokens, inside_merge=True)
1024                         if not video_selector or not audio_selector:
1025                             raise syntax_error('"+" must be between two format selectors', start)
1026                         current_selector = FormatSelector(MERGE, (video_selector, audio_selector), [])
1027                     else:
1028                         raise syntax_error('Operator not recognized: "{0}"'.format(string), start)
1029                 elif type == tokenize.ENDMARKER:
1030                     break
1031             if current_selector:
1032                 selectors.append(current_selector)
1033             return selectors
1034
1035         def _build_selector_function(selector):
1036             if isinstance(selector, list):
1037                 fs = [_build_selector_function(s) for s in selector]
1038
1039                 def selector_function(formats):
1040                     for f in fs:
1041                         for format in f(formats):
1042                             yield format
1043                 return selector_function
1044             elif selector.type == GROUP:
1045                 selector_function = _build_selector_function(selector.selector)
1046             elif selector.type == PICKFIRST:
1047                 fs = [_build_selector_function(s) for s in selector.selector]
1048
1049                 def selector_function(formats):
1050                     for f in fs:
1051                         picked_formats = list(f(formats))
1052                         if picked_formats:
1053                             return picked_formats
1054                     return []
1055             elif selector.type == SINGLE:
1056                 format_spec = selector.selector
1057
1058                 def selector_function(formats):
1059                     formats = list(formats)
1060                     if not formats:
1061                         return
1062                     if format_spec == 'all':
1063                         for f in formats:
1064                             yield f
1065                     elif format_spec in ['best', 'worst', None]:
1066                         format_idx = 0 if format_spec == 'worst' else -1
1067                         audiovideo_formats = [
1068                             f for f in formats
1069                             if f.get('vcodec') != 'none' and f.get('acodec') != 'none']
1070                         if audiovideo_formats:
1071                             yield audiovideo_formats[format_idx]
1072                         # for audio only (soundcloud) or video only (imgur) urls, select the best/worst audio format
1073                         elif (all(f.get('acodec') != 'none' for f in formats) or
1074                               all(f.get('vcodec') != 'none' for f in formats)):
1075                             yield formats[format_idx]
1076                     elif format_spec == 'bestaudio':
1077                         audio_formats = [
1078                             f for f in formats
1079                             if f.get('vcodec') == 'none']
1080                         if audio_formats:
1081                             yield audio_formats[-1]
1082                     elif format_spec == 'worstaudio':
1083                         audio_formats = [
1084                             f for f in formats
1085                             if f.get('vcodec') == 'none']
1086                         if audio_formats:
1087                             yield audio_formats[0]
1088                     elif format_spec == 'bestvideo':
1089                         video_formats = [
1090                             f for f in formats
1091                             if f.get('acodec') == 'none']
1092                         if video_formats:
1093                             yield video_formats[-1]
1094                     elif format_spec == 'worstvideo':
1095                         video_formats = [
1096                             f for f in formats
1097                             if f.get('acodec') == 'none']
1098                         if video_formats:
1099                             yield video_formats[0]
1100                     else:
1101                         extensions = ['mp4', 'flv', 'webm', '3gp', 'm4a', 'mp3', 'ogg', 'aac', 'wav']
1102                         if format_spec in extensions:
1103                             filter_f = lambda f: f['ext'] == format_spec
1104                         else:
1105                             filter_f = lambda f: f['format_id'] == format_spec
1106                         matches = list(filter(filter_f, formats))
1107                         if matches:
1108                             yield matches[-1]
1109             elif selector.type == MERGE:
1110                 def _merge(formats_info):
1111                     format_1, format_2 = [f['format_id'] for f in formats_info]
1112                     # The first format must contain the video and the
1113                     # second the audio
1114                     if formats_info[0].get('vcodec') == 'none':
1115                         self.report_error('The first format must '
1116                                           'contain the video, try using '
1117                                           '"-f %s+%s"' % (format_2, format_1))
1118                         return
1119                     # Formats must be opposite (video+audio)
1120                     if formats_info[0].get('acodec') == 'none' and formats_info[1].get('acodec') == 'none':
1121                         self.report_error(
1122                             'Both formats %s and %s are video-only, you must specify "-f video+audio"'
1123                             % (format_1, format_2))
1124                         return
1125                     output_ext = (
1126                         formats_info[0]['ext']
1127                         if self.params.get('merge_output_format') is None
1128                         else self.params['merge_output_format'])
1129                     return {
1130                         'requested_formats': formats_info,
1131                         'format': '%s+%s' % (formats_info[0].get('format'),
1132                                              formats_info[1].get('format')),
1133                         'format_id': '%s+%s' % (formats_info[0].get('format_id'),
1134                                                 formats_info[1].get('format_id')),
1135                         'width': formats_info[0].get('width'),
1136                         'height': formats_info[0].get('height'),
1137                         'resolution': formats_info[0].get('resolution'),
1138                         'fps': formats_info[0].get('fps'),
1139                         'vcodec': formats_info[0].get('vcodec'),
1140                         'vbr': formats_info[0].get('vbr'),
1141                         'stretched_ratio': formats_info[0].get('stretched_ratio'),
1142                         'acodec': formats_info[1].get('acodec'),
1143                         'abr': formats_info[1].get('abr'),
1144                         'ext': output_ext,
1145                     }
1146                 video_selector, audio_selector = map(_build_selector_function, selector.selector)
1147
1148                 def selector_function(formats):
1149                     formats = list(formats)
1150                     for pair in itertools.product(video_selector(formats), audio_selector(formats)):
1151                         yield _merge(pair)
1152
1153             filters = [self._build_format_filter(f) for f in selector.filters]
1154
1155             def final_selector(formats):
1156                 for _filter in filters:
1157                     formats = list(filter(_filter, formats))
1158                 return selector_function(formats)
1159             return final_selector
1160
1161         stream = io.BytesIO(format_spec.encode('utf-8'))
1162         try:
1163             tokens = list(_remove_unused_ops(compat_tokenize_tokenize(stream.readline)))
1164         except tokenize.TokenError:
1165             raise syntax_error('Missing closing/opening brackets or parenthesis', (0, len(format_spec)))
1166
1167         class TokenIterator(object):
1168             def __init__(self, tokens):
1169                 self.tokens = tokens
1170                 self.counter = 0
1171
1172             def __iter__(self):
1173                 return self
1174
1175             def __next__(self):
1176                 if self.counter >= len(self.tokens):
1177                     raise StopIteration()
1178                 value = self.tokens[self.counter]
1179                 self.counter += 1
1180                 return value
1181
1182             next = __next__
1183
1184             def restore_last_token(self):
1185                 self.counter -= 1
1186
1187         parsed_selector = _parse_format_selection(iter(TokenIterator(tokens)))
1188         return _build_selector_function(parsed_selector)
1189
1190     def _calc_headers(self, info_dict):
1191         res = std_headers.copy()
1192
1193         add_headers = info_dict.get('http_headers')
1194         if add_headers:
1195             res.update(add_headers)
1196
1197         cookies = self._calc_cookies(info_dict)
1198         if cookies:
1199             res['Cookie'] = cookies
1200
1201         return res
1202
1203     def _calc_cookies(self, info_dict):
1204         pr = sanitized_Request(info_dict['url'])
1205         self.cookiejar.add_cookie_header(pr)
1206         return pr.get_header('Cookie')
1207
1208     def process_video_result(self, info_dict, download=True):
1209         assert info_dict.get('_type', 'video') == 'video'
1210
1211         if 'id' not in info_dict:
1212             raise ExtractorError('Missing "id" field in extractor result')
1213         if 'title' not in info_dict:
1214             raise ExtractorError('Missing "title" field in extractor result')
1215
1216         if 'playlist' not in info_dict:
1217             # It isn't part of a playlist
1218             info_dict['playlist'] = None
1219             info_dict['playlist_index'] = None
1220
1221         thumbnails = info_dict.get('thumbnails')
1222         if thumbnails is None:
1223             thumbnail = info_dict.get('thumbnail')
1224             if thumbnail:
1225                 info_dict['thumbnails'] = thumbnails = [{'url': thumbnail}]
1226         if thumbnails:
1227             thumbnails.sort(key=lambda t: (
1228                 t.get('preference'), t.get('width'), t.get('height'),
1229                 t.get('id'), t.get('url')))
1230             for i, t in enumerate(thumbnails):
1231                 if t.get('width') and t.get('height'):
1232                     t['resolution'] = '%dx%d' % (t['width'], t['height'])
1233                 if t.get('id') is None:
1234                     t['id'] = '%d' % i
1235
1236         if thumbnails and 'thumbnail' not in info_dict:
1237             info_dict['thumbnail'] = thumbnails[-1]['url']
1238
1239         if 'display_id' not in info_dict and 'id' in info_dict:
1240             info_dict['display_id'] = info_dict['id']
1241
1242         if info_dict.get('upload_date') is None and info_dict.get('timestamp') is not None:
1243             # Working around out-of-range timestamp values (e.g. negative ones on Windows,
1244             # see http://bugs.python.org/issue1646728)
1245             try:
1246                 upload_date = datetime.datetime.utcfromtimestamp(info_dict['timestamp'])
1247                 info_dict['upload_date'] = upload_date.strftime('%Y%m%d')
1248             except (ValueError, OverflowError, OSError):
1249                 pass
1250
1251         # Auto generate title fields corresponding to the *_number fields when missing
1252         # in order to always have clean titles. This is very common for TV series.
1253         for field in ('chapter', 'season', 'episode'):
1254             if info_dict.get('%s_number' % field) is not None and not info_dict.get(field):
1255                 info_dict[field] = '%s %d' % (field.capitalize(), info_dict['%s_number' % field])
1256
1257         subtitles = info_dict.get('subtitles')
1258         if subtitles:
1259             for _, subtitle in subtitles.items():
1260                 for subtitle_format in subtitle:
1261                     if 'ext' not in subtitle_format:
1262                         subtitle_format['ext'] = determine_ext(subtitle_format['url']).lower()
1263
1264         if self.params.get('listsubtitles', False):
1265             if 'automatic_captions' in info_dict:
1266                 self.list_subtitles(info_dict['id'], info_dict.get('automatic_captions'), 'automatic captions')
1267             self.list_subtitles(info_dict['id'], subtitles, 'subtitles')
1268             return
1269         info_dict['requested_subtitles'] = self.process_subtitles(
1270             info_dict['id'], subtitles,
1271             info_dict.get('automatic_captions'))
1272
1273         # We now pick which formats have to be downloaded
1274         if info_dict.get('formats') is None:
1275             # There's only one format available
1276             formats = [info_dict]
1277         else:
1278             formats = info_dict['formats']
1279
1280         if not formats:
1281             raise ExtractorError('No video formats found!')
1282
1283         formats_dict = {}
1284
1285         # We check that all the formats have the format and format_id fields
1286         for i, format in enumerate(formats):
1287             if 'url' not in format:
1288                 raise ExtractorError('Missing "url" key in result (index %d)' % i)
1289
1290             if format.get('format_id') is None:
1291                 format['format_id'] = compat_str(i)
1292             else:
1293                 # Sanitize format_id from characters used in format selector expression
1294                 format['format_id'] = re.sub('[\s,/+\[\]()]', '_', format['format_id'])
1295             format_id = format['format_id']
1296             if format_id not in formats_dict:
1297                 formats_dict[format_id] = []
1298             formats_dict[format_id].append(format)
1299
1300         # Make sure all formats have unique format_id
1301         for format_id, ambiguous_formats in formats_dict.items():
1302             if len(ambiguous_formats) > 1:
1303                 for i, format in enumerate(ambiguous_formats):
1304                     format['format_id'] = '%s-%d' % (format_id, i)
1305
1306         for i, format in enumerate(formats):
1307             if format.get('format') is None:
1308                 format['format'] = '{id} - {res}{note}'.format(
1309                     id=format['format_id'],
1310                     res=self.format_resolution(format),
1311                     note=' ({0})'.format(format['format_note']) if format.get('format_note') is not None else '',
1312                 )
1313             # Automatically determine file extension if missing
1314             if 'ext' not in format:
1315                 format['ext'] = determine_ext(format['url']).lower()
1316             # Automatically determine protocol if missing (useful for format
1317             # selection purposes)
1318             if 'protocol' not in format:
1319                 format['protocol'] = determine_protocol(format)
1320             # Add HTTP headers, so that external programs can use them from the
1321             # json output
1322             full_format_info = info_dict.copy()
1323             full_format_info.update(format)
1324             format['http_headers'] = self._calc_headers(full_format_info)
1325
1326         # TODO Central sorting goes here
1327
1328         if formats[0] is not info_dict:
1329             # only set the 'formats' fields if the original info_dict list them
1330             # otherwise we end up with a circular reference, the first (and unique)
1331             # element in the 'formats' field in info_dict is info_dict itself,
1332             # which can't be exported to json
1333             info_dict['formats'] = formats
1334         if self.params.get('listformats'):
1335             self.list_formats(info_dict)
1336             return
1337         if self.params.get('list_thumbnails'):
1338             self.list_thumbnails(info_dict)
1339             return
1340
1341         req_format = self.params.get('format')
1342         if req_format is None:
1343             req_format_list = []
1344             if (self.params.get('outtmpl', DEFAULT_OUTTMPL) != '-' and
1345                     not info_dict.get('is_live')):
1346                 merger = FFmpegMergerPP(self)
1347                 if merger.available and merger.can_merge():
1348                     req_format_list.append('bestvideo+bestaudio')
1349             req_format_list.append('best')
1350             req_format = '/'.join(req_format_list)
1351         format_selector = self.build_format_selector(req_format)
1352         formats_to_download = list(format_selector(formats))
1353         if not formats_to_download:
1354             raise ExtractorError('requested format not available',
1355                                  expected=True)
1356
1357         if download:
1358             if len(formats_to_download) > 1:
1359                 self.to_screen('[info] %s: downloading video in %s formats' % (info_dict['id'], len(formats_to_download)))
1360             for format in formats_to_download:
1361                 new_info = dict(info_dict)
1362                 new_info.update(format)
1363                 self.process_info(new_info)
1364         # We update the info dict with the best quality format (backwards compatibility)
1365         info_dict.update(formats_to_download[-1])
1366         return info_dict
1367
1368     def process_subtitles(self, video_id, normal_subtitles, automatic_captions):
1369         """Select the requested subtitles and their format"""
1370         available_subs = {}
1371         if normal_subtitles and self.params.get('writesubtitles'):
1372             available_subs.update(normal_subtitles)
1373         if automatic_captions and self.params.get('writeautomaticsub'):
1374             for lang, cap_info in automatic_captions.items():
1375                 if lang not in available_subs:
1376                     available_subs[lang] = cap_info
1377
1378         if (not self.params.get('writesubtitles') and not
1379                 self.params.get('writeautomaticsub') or not
1380                 available_subs):
1381             return None
1382
1383         if self.params.get('allsubtitles', False):
1384             requested_langs = available_subs.keys()
1385         else:
1386             if self.params.get('subtitleslangs', False):
1387                 requested_langs = self.params.get('subtitleslangs')
1388             elif 'en' in available_subs:
1389                 requested_langs = ['en']
1390             else:
1391                 requested_langs = [list(available_subs.keys())[0]]
1392
1393         formats_query = self.params.get('subtitlesformat', 'best')
1394         formats_preference = formats_query.split('/') if formats_query else []
1395         subs = {}
1396         for lang in requested_langs:
1397             formats = available_subs.get(lang)
1398             if formats is None:
1399                 self.report_warning('%s subtitles not available for %s' % (lang, video_id))
1400                 continue
1401             for ext in formats_preference:
1402                 if ext == 'best':
1403                     f = formats[-1]
1404                     break
1405                 matches = list(filter(lambda f: f['ext'] == ext, formats))
1406                 if matches:
1407                     f = matches[-1]
1408                     break
1409             else:
1410                 f = formats[-1]
1411                 self.report_warning(
1412                     'No subtitle format found matching "%s" for language %s, '
1413                     'using %s' % (formats_query, lang, f['ext']))
1414             subs[lang] = f
1415         return subs
1416
1417     def process_info(self, info_dict):
1418         """Process a single resolved IE result."""
1419
1420         assert info_dict.get('_type', 'video') == 'video'
1421
1422         max_downloads = self.params.get('max_downloads')
1423         if max_downloads is not None:
1424             if self._num_downloads >= int(max_downloads):
1425                 raise MaxDownloadsReached()
1426
1427         info_dict['fulltitle'] = info_dict['title']
1428         if len(info_dict['title']) > 200:
1429             info_dict['title'] = info_dict['title'][:197] + '...'
1430
1431         if 'format' not in info_dict:
1432             info_dict['format'] = info_dict['ext']
1433
1434         reason = self._match_entry(info_dict, incomplete=False)
1435         if reason is not None:
1436             self.to_screen('[download] ' + reason)
1437             return
1438
1439         self._num_downloads += 1
1440
1441         info_dict['_filename'] = filename = self.prepare_filename(info_dict)
1442
1443         # Forced printings
1444         if self.params.get('forcetitle', False):
1445             self.to_stdout(info_dict['fulltitle'])
1446         if self.params.get('forceid', False):
1447             self.to_stdout(info_dict['id'])
1448         if self.params.get('forceurl', False):
1449             if info_dict.get('requested_formats') is not None:
1450                 for f in info_dict['requested_formats']:
1451                     self.to_stdout(f['url'] + f.get('play_path', ''))
1452             else:
1453                 # For RTMP URLs, also include the playpath
1454                 self.to_stdout(info_dict['url'] + info_dict.get('play_path', ''))
1455         if self.params.get('forcethumbnail', False) and info_dict.get('thumbnail') is not None:
1456             self.to_stdout(info_dict['thumbnail'])
1457         if self.params.get('forcedescription', False) and info_dict.get('description') is not None:
1458             self.to_stdout(info_dict['description'])
1459         if self.params.get('forcefilename', False) and filename is not None:
1460             self.to_stdout(filename)
1461         if self.params.get('forceduration', False) and info_dict.get('duration') is not None:
1462             self.to_stdout(formatSeconds(info_dict['duration']))
1463         if self.params.get('forceformat', False):
1464             self.to_stdout(info_dict['format'])
1465         if self.params.get('forcejson', False):
1466             self.to_stdout(json.dumps(info_dict))
1467
1468         # Do nothing else if in simulate mode
1469         if self.params.get('simulate', False):
1470             return
1471
1472         if filename is None:
1473             return
1474
1475         try:
1476             dn = os.path.dirname(sanitize_path(encodeFilename(filename)))
1477             if dn and not os.path.exists(dn):
1478                 os.makedirs(dn)
1479         except (OSError, IOError) as err:
1480             self.report_error('unable to create directory ' + error_to_compat_str(err))
1481             return
1482
1483         if self.params.get('writedescription', False):
1484             descfn = replace_extension(filename, 'description', info_dict.get('ext'))
1485             if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(descfn)):
1486                 self.to_screen('[info] Video description is already present')
1487             elif info_dict.get('description') is None:
1488                 self.report_warning('There\'s no description to write.')
1489             else:
1490                 try:
1491                     self.to_screen('[info] Writing video description to: ' + descfn)
1492                     with io.open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile:
1493                         descfile.write(info_dict['description'])
1494                 except (OSError, IOError):
1495                     self.report_error('Cannot write description file ' + descfn)
1496                     return
1497
1498         if self.params.get('writeannotations', False):
1499             annofn = replace_extension(filename, 'annotations.xml', info_dict.get('ext'))
1500             if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(annofn)):
1501                 self.to_screen('[info] Video annotations are already present')
1502             else:
1503                 try:
1504                     self.to_screen('[info] Writing video annotations to: ' + annofn)
1505                     with io.open(encodeFilename(annofn), 'w', encoding='utf-8') as annofile:
1506                         annofile.write(info_dict['annotations'])
1507                 except (KeyError, TypeError):
1508                     self.report_warning('There are no annotations to write.')
1509                 except (OSError, IOError):
1510                     self.report_error('Cannot write annotations file: ' + annofn)
1511                     return
1512
1513         subtitles_are_requested = any([self.params.get('writesubtitles', False),
1514                                        self.params.get('writeautomaticsub')])
1515
1516         if subtitles_are_requested and info_dict.get('requested_subtitles'):
1517             # subtitles download errors are already managed as troubles in relevant IE
1518             # that way it will silently go on when used with unsupporting IE
1519             subtitles = info_dict['requested_subtitles']
1520             ie = self.get_info_extractor(info_dict['extractor_key'])
1521             for sub_lang, sub_info in subtitles.items():
1522                 sub_format = sub_info['ext']
1523                 if sub_info.get('data') is not None:
1524                     sub_data = sub_info['data']
1525                 else:
1526                     try:
1527                         sub_data = ie._download_webpage(
1528                             sub_info['url'], info_dict['id'], note=False)
1529                     except ExtractorError as err:
1530                         self.report_warning('Unable to download subtitle for "%s": %s' %
1531                                             (sub_lang, error_to_compat_str(err.cause)))
1532                         continue
1533                 try:
1534                     sub_filename = subtitles_filename(filename, sub_lang, sub_format)
1535                     if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(sub_filename)):
1536                         self.to_screen('[info] Video subtitle %s.%s is already_present' % (sub_lang, sub_format))
1537                     else:
1538                         self.to_screen('[info] Writing video subtitles to: ' + sub_filename)
1539                         with io.open(encodeFilename(sub_filename), 'w', encoding='utf-8') as subfile:
1540                             subfile.write(sub_data)
1541                 except (OSError, IOError):
1542                     self.report_error('Cannot write subtitles file ' + sub_filename)
1543                     return
1544
1545         if self.params.get('writeinfojson', False):
1546             infofn = replace_extension(filename, 'info.json', info_dict.get('ext'))
1547             if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(infofn)):
1548                 self.to_screen('[info] Video description metadata is already present')
1549             else:
1550                 self.to_screen('[info] Writing video description metadata as JSON to: ' + infofn)
1551                 try:
1552                     write_json_file(self.filter_requested_info(info_dict), infofn)
1553                 except (OSError, IOError):
1554                     self.report_error('Cannot write metadata to JSON file ' + infofn)
1555                     return
1556
1557         self._write_thumbnails(info_dict, filename)
1558
1559         if not self.params.get('skip_download', False):
1560             try:
1561                 def dl(name, info):
1562                     fd = get_suitable_downloader(info, self.params)(self, self.params)
1563                     for ph in self._progress_hooks:
1564                         fd.add_progress_hook(ph)
1565                     if self.params.get('verbose'):
1566                         self.to_stdout('[debug] Invoking downloader on %r' % info.get('url'))
1567                     return fd.download(name, info)
1568
1569                 if info_dict.get('requested_formats') is not None:
1570                     downloaded = []
1571                     success = True
1572                     merger = FFmpegMergerPP(self)
1573                     if not merger.available:
1574                         postprocessors = []
1575                         self.report_warning('You have requested multiple '
1576                                             'formats but ffmpeg or avconv are not installed.'
1577                                             ' The formats won\'t be merged.')
1578                     else:
1579                         postprocessors = [merger]
1580
1581                     def compatible_formats(formats):
1582                         video, audio = formats
1583                         # Check extension
1584                         video_ext, audio_ext = audio.get('ext'), video.get('ext')
1585                         if video_ext and audio_ext:
1586                             COMPATIBLE_EXTS = (
1587                                 ('mp3', 'mp4', 'm4a', 'm4p', 'm4b', 'm4r', 'm4v'),
1588                                 ('webm')
1589                             )
1590                             for exts in COMPATIBLE_EXTS:
1591                                 if video_ext in exts and audio_ext in exts:
1592                                     return True
1593                         # TODO: Check acodec/vcodec
1594                         return False
1595
1596                     filename_real_ext = os.path.splitext(filename)[1][1:]
1597                     filename_wo_ext = (
1598                         os.path.splitext(filename)[0]
1599                         if filename_real_ext == info_dict['ext']
1600                         else filename)
1601                     requested_formats = info_dict['requested_formats']
1602                     if self.params.get('merge_output_format') is None and not compatible_formats(requested_formats):
1603                         info_dict['ext'] = 'mkv'
1604                         self.report_warning(
1605                             'Requested formats are incompatible for merge and will be merged into mkv.')
1606                     # Ensure filename always has a correct extension for successful merge
1607                     filename = '%s.%s' % (filename_wo_ext, info_dict['ext'])
1608                     if os.path.exists(encodeFilename(filename)):
1609                         self.to_screen(
1610                             '[download] %s has already been downloaded and '
1611                             'merged' % filename)
1612                     else:
1613                         for f in requested_formats:
1614                             new_info = dict(info_dict)
1615                             new_info.update(f)
1616                             fname = self.prepare_filename(new_info)
1617                             fname = prepend_extension(fname, 'f%s' % f['format_id'], new_info['ext'])
1618                             downloaded.append(fname)
1619                             partial_success = dl(fname, new_info)
1620                             success = success and partial_success
1621                         info_dict['__postprocessors'] = postprocessors
1622                         info_dict['__files_to_merge'] = downloaded
1623                 else:
1624                     # Just a single file
1625                     success = dl(filename, info_dict)
1626             except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
1627                 self.report_error('unable to download video data: %s' % str(err))
1628                 return
1629             except (OSError, IOError) as err:
1630                 raise UnavailableVideoError(err)
1631             except (ContentTooShortError, ) as err:
1632                 self.report_error('content too short (expected %s bytes and served %s)' % (err.expected, err.downloaded))
1633                 return
1634
1635             if success and filename != '-':
1636                 # Fixup content
1637                 fixup_policy = self.params.get('fixup')
1638                 if fixup_policy is None:
1639                     fixup_policy = 'detect_or_warn'
1640
1641                 stretched_ratio = info_dict.get('stretched_ratio')
1642                 if stretched_ratio is not None and stretched_ratio != 1:
1643                     if fixup_policy == 'warn':
1644                         self.report_warning('%s: Non-uniform pixel ratio (%s)' % (
1645                             info_dict['id'], stretched_ratio))
1646                     elif fixup_policy == 'detect_or_warn':
1647                         stretched_pp = FFmpegFixupStretchedPP(self)
1648                         if stretched_pp.available:
1649                             info_dict.setdefault('__postprocessors', [])
1650                             info_dict['__postprocessors'].append(stretched_pp)
1651                         else:
1652                             self.report_warning(
1653                                 '%s: Non-uniform pixel ratio (%s). Install ffmpeg or avconv to fix this automatically.' % (
1654                                     info_dict['id'], stretched_ratio))
1655                     else:
1656                         assert fixup_policy in ('ignore', 'never')
1657
1658                 if info_dict.get('requested_formats') is None and info_dict.get('container') == 'm4a_dash':
1659                     if fixup_policy == 'warn':
1660                         self.report_warning('%s: writing DASH m4a. Only some players support this container.' % (
1661                             info_dict['id']))
1662                     elif fixup_policy == 'detect_or_warn':
1663                         fixup_pp = FFmpegFixupM4aPP(self)
1664                         if fixup_pp.available:
1665                             info_dict.setdefault('__postprocessors', [])
1666                             info_dict['__postprocessors'].append(fixup_pp)
1667                         else:
1668                             self.report_warning(
1669                                 '%s: writing DASH m4a. Only some players support this container. Install ffmpeg or avconv to fix this automatically.' % (
1670                                     info_dict['id']))
1671                     else:
1672                         assert fixup_policy in ('ignore', 'never')
1673
1674                 try:
1675                     self.post_process(filename, info_dict)
1676                 except (PostProcessingError) as err:
1677                     self.report_error('postprocessing: %s' % str(err))
1678                     return
1679                 self.record_download_archive(info_dict)
1680
1681     def download(self, url_list):
1682         """Download a given list of URLs."""
1683         outtmpl = self.params.get('outtmpl', DEFAULT_OUTTMPL)
1684         if (len(url_list) > 1 and
1685                 '%' not in outtmpl and
1686                 self.params.get('max_downloads') != 1):
1687             raise SameFileError(outtmpl)
1688
1689         for url in url_list:
1690             try:
1691                 # It also downloads the videos
1692                 res = self.extract_info(
1693                     url, force_generic_extractor=self.params.get('force_generic_extractor', False))
1694             except UnavailableVideoError:
1695                 self.report_error('unable to download video')
1696             except MaxDownloadsReached:
1697                 self.to_screen('[info] Maximum number of downloaded files reached.')
1698                 raise
1699             else:
1700                 if self.params.get('dump_single_json', False):
1701                     self.to_stdout(json.dumps(res))
1702
1703         return self._download_retcode
1704
1705     def download_with_info_file(self, info_filename):
1706         with contextlib.closing(fileinput.FileInput(
1707                 [info_filename], mode='r',
1708                 openhook=fileinput.hook_encoded('utf-8'))) as f:
1709             # FileInput doesn't have a read method, we can't call json.load
1710             info = self.filter_requested_info(json.loads('\n'.join(f)))
1711         try:
1712             self.process_ie_result(info, download=True)
1713         except DownloadError:
1714             webpage_url = info.get('webpage_url')
1715             if webpage_url is not None:
1716                 self.report_warning('The info failed to download, trying with "%s"' % webpage_url)
1717                 return self.download([webpage_url])
1718             else:
1719                 raise
1720         return self._download_retcode
1721
1722     @staticmethod
1723     def filter_requested_info(info_dict):
1724         return dict(
1725             (k, v) for k, v in info_dict.items()
1726             if k not in ['requested_formats', 'requested_subtitles'])
1727
1728     def post_process(self, filename, ie_info):
1729         """Run all the postprocessors on the given file."""
1730         info = dict(ie_info)
1731         info['filepath'] = filename
1732         pps_chain = []
1733         if ie_info.get('__postprocessors') is not None:
1734             pps_chain.extend(ie_info['__postprocessors'])
1735         pps_chain.extend(self._pps)
1736         for pp in pps_chain:
1737             files_to_delete = []
1738             try:
1739                 files_to_delete, info = pp.run(info)
1740             except PostProcessingError as e:
1741                 self.report_error(e.msg)
1742             if files_to_delete and not self.params.get('keepvideo', False):
1743                 for old_filename in files_to_delete:
1744                     self.to_screen('Deleting original file %s (pass -k to keep)' % old_filename)
1745                     try:
1746                         os.remove(encodeFilename(old_filename))
1747                     except (IOError, OSError):
1748                         self.report_warning('Unable to remove downloaded original file')
1749
1750     def _make_archive_id(self, info_dict):
1751         # Future-proof against any change in case
1752         # and backwards compatibility with prior versions
1753         extractor = info_dict.get('extractor_key')
1754         if extractor is None:
1755             if 'id' in info_dict:
1756                 extractor = info_dict.get('ie_key')  # key in a playlist
1757         if extractor is None:
1758             return None  # Incomplete video information
1759         return extractor.lower() + ' ' + info_dict['id']
1760
1761     def in_download_archive(self, info_dict):
1762         fn = self.params.get('download_archive')
1763         if fn is None:
1764             return False
1765
1766         vid_id = self._make_archive_id(info_dict)
1767         if vid_id is None:
1768             return False  # Incomplete video information
1769
1770         try:
1771             with locked_file(fn, 'r', encoding='utf-8') as archive_file:
1772                 for line in archive_file:
1773                     if line.strip() == vid_id:
1774                         return True
1775         except IOError as ioe:
1776             if ioe.errno != errno.ENOENT:
1777                 raise
1778         return False
1779
1780     def record_download_archive(self, info_dict):
1781         fn = self.params.get('download_archive')
1782         if fn is None:
1783             return
1784         vid_id = self._make_archive_id(info_dict)
1785         assert vid_id
1786         with locked_file(fn, 'a', encoding='utf-8') as archive_file:
1787             archive_file.write(vid_id + '\n')
1788
1789     @staticmethod
1790     def format_resolution(format, default='unknown'):
1791         if format.get('vcodec') == 'none':
1792             return 'audio only'
1793         if format.get('resolution') is not None:
1794             return format['resolution']
1795         if format.get('height') is not None:
1796             if format.get('width') is not None:
1797                 res = '%sx%s' % (format['width'], format['height'])
1798             else:
1799                 res = '%sp' % format['height']
1800         elif format.get('width') is not None:
1801             res = '%dx?' % format['width']
1802         else:
1803             res = default
1804         return res
1805
1806     def _format_note(self, fdict):
1807         res = ''
1808         if fdict.get('ext') in ['f4f', 'f4m']:
1809             res += '(unsupported) '
1810         if fdict.get('language'):
1811             if res:
1812                 res += ' '
1813             res += '[%s]' % fdict['language']
1814         if fdict.get('format_note') is not None:
1815             res += fdict['format_note'] + ' '
1816         if fdict.get('tbr') is not None:
1817             res += '%4dk ' % fdict['tbr']
1818         if fdict.get('container') is not None:
1819             if res:
1820                 res += ', '
1821             res += '%s container' % fdict['container']
1822         if (fdict.get('vcodec') is not None and
1823                 fdict.get('vcodec') != 'none'):
1824             if res:
1825                 res += ', '
1826             res += fdict['vcodec']
1827             if fdict.get('vbr') is not None:
1828                 res += '@'
1829         elif fdict.get('vbr') is not None and fdict.get('abr') is not None:
1830             res += 'video@'
1831         if fdict.get('vbr') is not None:
1832             res += '%4dk' % fdict['vbr']
1833         if fdict.get('fps') is not None:
1834             res += ', %sfps' % fdict['fps']
1835         if fdict.get('acodec') is not None:
1836             if res:
1837                 res += ', '
1838             if fdict['acodec'] == 'none':
1839                 res += 'video only'
1840             else:
1841                 res += '%-5s' % fdict['acodec']
1842         elif fdict.get('abr') is not None:
1843             if res:
1844                 res += ', '
1845             res += 'audio'
1846         if fdict.get('abr') is not None:
1847             res += '@%3dk' % fdict['abr']
1848         if fdict.get('asr') is not None:
1849             res += ' (%5dHz)' % fdict['asr']
1850         if fdict.get('filesize') is not None:
1851             if res:
1852                 res += ', '
1853             res += format_bytes(fdict['filesize'])
1854         elif fdict.get('filesize_approx') is not None:
1855             if res:
1856                 res += ', '
1857             res += '~' + format_bytes(fdict['filesize_approx'])
1858         return res
1859
1860     def list_formats(self, info_dict):
1861         formats = info_dict.get('formats', [info_dict])
1862         table = [
1863             [f['format_id'], f['ext'], self.format_resolution(f), self._format_note(f)]
1864             for f in formats
1865             if f.get('preference') is None or f['preference'] >= -1000]
1866         if len(formats) > 1:
1867             table[-1][-1] += (' ' if table[-1][-1] else '') + '(best)'
1868
1869         header_line = ['format code', 'extension', 'resolution', 'note']
1870         self.to_screen(
1871             '[info] Available formats for %s:\n%s' %
1872             (info_dict['id'], render_table(header_line, table)))
1873
1874     def list_thumbnails(self, info_dict):
1875         thumbnails = info_dict.get('thumbnails')
1876         if not thumbnails:
1877             tn_url = info_dict.get('thumbnail')
1878             if tn_url:
1879                 thumbnails = [{'id': '0', 'url': tn_url}]
1880             else:
1881                 self.to_screen(
1882                     '[info] No thumbnails present for %s' % info_dict['id'])
1883                 return
1884
1885         self.to_screen(
1886             '[info] Thumbnails for %s:' % info_dict['id'])
1887         self.to_screen(render_table(
1888             ['ID', 'width', 'height', 'URL'],
1889             [[t['id'], t.get('width', 'unknown'), t.get('height', 'unknown'), t['url']] for t in thumbnails]))
1890
1891     def list_subtitles(self, video_id, subtitles, name='subtitles'):
1892         if not subtitles:
1893             self.to_screen('%s has no %s' % (video_id, name))
1894             return
1895         self.to_screen(
1896             'Available %s for %s:' % (name, video_id))
1897         self.to_screen(render_table(
1898             ['Language', 'formats'],
1899             [[lang, ', '.join(f['ext'] for f in reversed(formats))]
1900                 for lang, formats in subtitles.items()]))
1901
1902     def urlopen(self, req):
1903         """ Start an HTTP download """
1904         if isinstance(req, compat_basestring):
1905             req = sanitized_Request(req)
1906         return self._opener.open(req, timeout=self._socket_timeout)
1907
1908     def print_debug_header(self):
1909         if not self.params.get('verbose'):
1910             return
1911
1912         if type('') is not compat_str:
1913             # Python 2.6 on SLES11 SP1 (https://github.com/rg3/youtube-dl/issues/3326)
1914             self.report_warning(
1915                 'Your Python is broken! Update to a newer and supported version')
1916
1917         stdout_encoding = getattr(
1918             sys.stdout, 'encoding', 'missing (%s)' % type(sys.stdout).__name__)
1919         encoding_str = (
1920             '[debug] Encodings: locale %s, fs %s, out %s, pref %s\n' % (
1921                 locale.getpreferredencoding(),
1922                 sys.getfilesystemencoding(),
1923                 stdout_encoding,
1924                 self.get_encoding()))
1925         write_string(encoding_str, encoding=None)
1926
1927         self._write_string('[debug] youtube-dl version ' + __version__ + '\n')
1928         try:
1929             sp = subprocess.Popen(
1930                 ['git', 'rev-parse', '--short', 'HEAD'],
1931                 stdout=subprocess.PIPE, stderr=subprocess.PIPE,
1932                 cwd=os.path.dirname(os.path.abspath(__file__)))
1933             out, err = sp.communicate()
1934             out = out.decode().strip()
1935             if re.match('[0-9a-f]+', out):
1936                 self._write_string('[debug] Git HEAD: ' + out + '\n')
1937         except Exception:
1938             try:
1939                 sys.exc_clear()
1940             except Exception:
1941                 pass
1942         self._write_string('[debug] Python version %s - %s\n' % (
1943             platform.python_version(), platform_name()))
1944
1945         exe_versions = FFmpegPostProcessor.get_versions(self)
1946         exe_versions['rtmpdump'] = rtmpdump_version()
1947         exe_str = ', '.join(
1948             '%s %s' % (exe, v)
1949             for exe, v in sorted(exe_versions.items())
1950             if v
1951         )
1952         if not exe_str:
1953             exe_str = 'none'
1954         self._write_string('[debug] exe versions: %s\n' % exe_str)
1955
1956         proxy_map = {}
1957         for handler in self._opener.handlers:
1958             if hasattr(handler, 'proxies'):
1959                 proxy_map.update(handler.proxies)
1960         self._write_string('[debug] Proxy map: ' + compat_str(proxy_map) + '\n')
1961
1962         if self.params.get('call_home', False):
1963             ipaddr = self.urlopen('https://yt-dl.org/ip').read().decode('utf-8')
1964             self._write_string('[debug] Public IP address: %s\n' % ipaddr)
1965             latest_version = self.urlopen(
1966                 'https://yt-dl.org/latest/version').read().decode('utf-8')
1967             if version_tuple(latest_version) > version_tuple(__version__):
1968                 self.report_warning(
1969                     'You are using an outdated version (newest version: %s)! '
1970                     'See https://yt-dl.org/update if you need help updating.' %
1971                     latest_version)
1972
1973     def _setup_opener(self):
1974         timeout_val = self.params.get('socket_timeout')
1975         self._socket_timeout = 600 if timeout_val is None else float(timeout_val)
1976
1977         opts_cookiefile = self.params.get('cookiefile')
1978         opts_proxy = self.params.get('proxy')
1979
1980         if opts_cookiefile is None:
1981             self.cookiejar = compat_cookiejar.CookieJar()
1982         else:
1983             self.cookiejar = compat_cookiejar.MozillaCookieJar(
1984                 opts_cookiefile)
1985             if os.access(opts_cookiefile, os.R_OK):
1986                 self.cookiejar.load()
1987
1988         cookie_processor = YoutubeDLCookieProcessor(self.cookiejar)
1989         if opts_proxy is not None:
1990             if opts_proxy == '':
1991                 proxies = {}
1992             else:
1993                 proxies = {'http': opts_proxy, 'https': opts_proxy}
1994         else:
1995             proxies = compat_urllib_request.getproxies()
1996             # Set HTTPS proxy to HTTP one if given (https://github.com/rg3/youtube-dl/issues/805)
1997             if 'http' in proxies and 'https' not in proxies:
1998                 proxies['https'] = proxies['http']
1999         proxy_handler = PerRequestProxyHandler(proxies)
2000
2001         debuglevel = 1 if self.params.get('debug_printtraffic') else 0
2002         https_handler = make_HTTPS_handler(self.params, debuglevel=debuglevel)
2003         ydlh = YoutubeDLHandler(self.params, debuglevel=debuglevel)
2004         data_handler = compat_urllib_request_DataHandler()
2005
2006         # When passing our own FileHandler instance, build_opener won't add the
2007         # default FileHandler and allows us to disable the file protocol, which
2008         # can be used for malicious purposes (see
2009         # https://github.com/rg3/youtube-dl/issues/8227)
2010         file_handler = compat_urllib_request.FileHandler()
2011
2012         def file_open(*args, **kwargs):
2013             raise compat_urllib_error.URLError('file:// scheme is explicitly disabled in youtube-dl for security reasons')
2014         file_handler.file_open = file_open
2015
2016         opener = compat_urllib_request.build_opener(
2017             proxy_handler, https_handler, cookie_processor, ydlh, data_handler, file_handler)
2018
2019         # Delete the default user-agent header, which would otherwise apply in
2020         # cases where our custom HTTP handler doesn't come into play
2021         # (See https://github.com/rg3/youtube-dl/issues/1309 for details)
2022         opener.addheaders = []
2023         self._opener = opener
2024
2025     def encode(self, s):
2026         if isinstance(s, bytes):
2027             return s  # Already encoded
2028
2029         try:
2030             return s.encode(self.get_encoding())
2031         except UnicodeEncodeError as err:
2032             err.reason = err.reason + '. Check your system encoding configuration or use the --encoding option.'
2033             raise
2034
2035     def get_encoding(self):
2036         encoding = self.params.get('encoding')
2037         if encoding is None:
2038             encoding = preferredencoding()
2039         return encoding
2040
2041     def _write_thumbnails(self, info_dict, filename):
2042         if self.params.get('writethumbnail', False):
2043             thumbnails = info_dict.get('thumbnails')
2044             if thumbnails:
2045                 thumbnails = [thumbnails[-1]]
2046         elif self.params.get('write_all_thumbnails', False):
2047             thumbnails = info_dict.get('thumbnails')
2048         else:
2049             return
2050
2051         if not thumbnails:
2052             # No thumbnails present, so return immediately
2053             return
2054
2055         for t in thumbnails:
2056             thumb_ext = determine_ext(t['url'], 'jpg')
2057             suffix = '_%s' % t['id'] if len(thumbnails) > 1 else ''
2058             thumb_display_id = '%s ' % t['id'] if len(thumbnails) > 1 else ''
2059             t['filename'] = thumb_filename = os.path.splitext(filename)[0] + suffix + '.' + thumb_ext
2060
2061             if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(thumb_filename)):
2062                 self.to_screen('[%s] %s: Thumbnail %sis already present' %
2063                                (info_dict['extractor'], info_dict['id'], thumb_display_id))
2064             else:
2065                 self.to_screen('[%s] %s: Downloading thumbnail %s...' %
2066                                (info_dict['extractor'], info_dict['id'], thumb_display_id))
2067                 try:
2068                     uf = self.urlopen(t['url'])
2069                     with open(encodeFilename(thumb_filename), 'wb') as thumbf:
2070                         shutil.copyfileobj(uf, thumbf)
2071                     self.to_screen('[%s] %s: Writing thumbnail %sto: %s' %
2072                                    (info_dict['extractor'], info_dict['id'], thumb_display_id, thumb_filename))
2073                 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
2074                     self.report_warning('Unable to download thumbnail "%s": %s' %
2075                                         (t['url'], error_to_compat_str(err)))