Merge pull request #8246 from dstftw/initial-json-ld-metadata-support
[youtube-dl] / youtube_dl / YoutubeDL.py
1 #!/usr/bin/env python
2 # -*- coding: utf-8 -*-
3
4 from __future__ import absolute_import, unicode_literals
5
6 import collections
7 import contextlib
8 import datetime
9 import errno
10 import fileinput
11 import io
12 import itertools
13 import json
14 import locale
15 import operator
16 import os
17 import platform
18 import re
19 import shutil
20 import subprocess
21 import socket
22 import sys
23 import time
24 import tokenize
25 import traceback
26
27 if os.name == 'nt':
28     import ctypes
29
30 from .compat import (
31     compat_basestring,
32     compat_cookiejar,
33     compat_expanduser,
34     compat_get_terminal_size,
35     compat_http_client,
36     compat_kwargs,
37     compat_str,
38     compat_tokenize_tokenize,
39     compat_urllib_error,
40     compat_urllib_request,
41     compat_urllib_request_DataHandler,
42 )
43 from .utils import (
44     ContentTooShortError,
45     date_from_str,
46     DateRange,
47     DEFAULT_OUTTMPL,
48     determine_ext,
49     DownloadError,
50     encode_compat_str,
51     encodeFilename,
52     error_to_compat_str,
53     ExtractorError,
54     format_bytes,
55     formatSeconds,
56     locked_file,
57     make_HTTPS_handler,
58     MaxDownloadsReached,
59     PagedList,
60     parse_filesize,
61     PerRequestProxyHandler,
62     PostProcessingError,
63     platform_name,
64     preferredencoding,
65     render_table,
66     SameFileError,
67     sanitize_filename,
68     sanitize_path,
69     sanitized_Request,
70     std_headers,
71     subtitles_filename,
72     UnavailableVideoError,
73     url_basename,
74     version_tuple,
75     write_json_file,
76     write_string,
77     YoutubeDLCookieProcessor,
78     YoutubeDLHandler,
79     prepend_extension,
80     replace_extension,
81     args_to_str,
82     age_restricted,
83 )
84 from .cache import Cache
85 from .extractor import get_info_extractor, gen_extractors
86 from .downloader import get_suitable_downloader
87 from .downloader.rtmp import rtmpdump_version
88 from .postprocessor import (
89     FFmpegFixupM4aPP,
90     FFmpegFixupStretchedPP,
91     FFmpegMergerPP,
92     FFmpegPostProcessor,
93     get_postprocessor,
94 )
95 from .version import __version__
96
97
98 class YoutubeDL(object):
99     """YoutubeDL class.
100
101     YoutubeDL objects are the ones responsible of downloading the
102     actual video file and writing it to disk if the user has requested
103     it, among some other tasks. In most cases there should be one per
104     program. As, given a video URL, the downloader doesn't know how to
105     extract all the needed information, task that InfoExtractors do, it
106     has to pass the URL to one of them.
107
108     For this, YoutubeDL objects have a method that allows
109     InfoExtractors to be registered in a given order. When it is passed
110     a URL, the YoutubeDL object handles it to the first InfoExtractor it
111     finds that reports being able to handle it. The InfoExtractor extracts
112     all the information about the video or videos the URL refers to, and
113     YoutubeDL process the extracted information, possibly using a File
114     Downloader to download the video.
115
116     YoutubeDL objects accept a lot of parameters. In order not to saturate
117     the object constructor with arguments, it receives a dictionary of
118     options instead. These options are available through the params
119     attribute for the InfoExtractors to use. The YoutubeDL also
120     registers itself as the downloader in charge for the InfoExtractors
121     that are added to it, so this is a "mutual registration".
122
123     Available options:
124
125     username:          Username for authentication purposes.
126     password:          Password for authentication purposes.
127     videopassword:     Password for accessing a video.
128     usenetrc:          Use netrc for authentication instead.
129     verbose:           Print additional info to stdout.
130     quiet:             Do not print messages to stdout.
131     no_warnings:       Do not print out anything for warnings.
132     forceurl:          Force printing final URL.
133     forcetitle:        Force printing title.
134     forceid:           Force printing ID.
135     forcethumbnail:    Force printing thumbnail URL.
136     forcedescription:  Force printing description.
137     forcefilename:     Force printing final filename.
138     forceduration:     Force printing duration.
139     forcejson:         Force printing info_dict as JSON.
140     dump_single_json:  Force printing the info_dict of the whole playlist
141                        (or video) as a single JSON line.
142     simulate:          Do not download the video files.
143     format:            Video format code. See options.py for more information.
144     outtmpl:           Template for output names.
145     restrictfilenames: Do not allow "&" and spaces in file names
146     ignoreerrors:      Do not stop on download errors.
147     force_generic_extractor: Force downloader to use the generic extractor
148     nooverwrites:      Prevent overwriting files.
149     playliststart:     Playlist item to start at.
150     playlistend:       Playlist item to end at.
151     playlist_items:    Specific indices of playlist to download.
152     playlistreverse:   Download playlist items in reverse order.
153     matchtitle:        Download only matching titles.
154     rejecttitle:       Reject downloads for matching titles.
155     logger:            Log messages to a logging.Logger instance.
156     logtostderr:       Log messages to stderr instead of stdout.
157     writedescription:  Write the video description to a .description file
158     writeinfojson:     Write the video description to a .info.json file
159     writeannotations:  Write the video annotations to a .annotations.xml file
160     writethumbnail:    Write the thumbnail image to a file
161     write_all_thumbnails:  Write all thumbnail formats to files
162     writesubtitles:    Write the video subtitles to a file
163     writeautomaticsub: Write the automatically generated subtitles to a file
164     allsubtitles:      Downloads all the subtitles of the video
165                        (requires writesubtitles or writeautomaticsub)
166     listsubtitles:     Lists all available subtitles for the video
167     subtitlesformat:   The format code for subtitles
168     subtitleslangs:    List of languages of the subtitles to download
169     keepvideo:         Keep the video file after post-processing
170     daterange:         A DateRange object, download only if the upload_date is in the range.
171     skip_download:     Skip the actual download of the video file
172     cachedir:          Location of the cache files in the filesystem.
173                        False to disable filesystem cache.
174     noplaylist:        Download single video instead of a playlist if in doubt.
175     age_limit:         An integer representing the user's age in years.
176                        Unsuitable videos for the given age are skipped.
177     min_views:         An integer representing the minimum view count the video
178                        must have in order to not be skipped.
179                        Videos without view count information are always
180                        downloaded. None for no limit.
181     max_views:         An integer representing the maximum view count.
182                        Videos that are more popular than that are not
183                        downloaded.
184                        Videos without view count information are always
185                        downloaded. None for no limit.
186     download_archive:  File name of a file where all downloads are recorded.
187                        Videos already present in the file are not downloaded
188                        again.
189     cookiefile:        File name where cookies should be read from and dumped to.
190     nocheckcertificate:Do not verify SSL certificates
191     prefer_insecure:   Use HTTP instead of HTTPS to retrieve information.
192                        At the moment, this is only supported by YouTube.
193     proxy:             URL of the proxy server to use
194     cn_verification_proxy:  URL of the proxy to use for IP address verification
195                        on Chinese sites. (Experimental)
196     socket_timeout:    Time to wait for unresponsive hosts, in seconds
197     bidi_workaround:   Work around buggy terminals without bidirectional text
198                        support, using fridibi
199     debug_printtraffic:Print out sent and received HTTP traffic
200     include_ads:       Download ads as well
201     default_search:    Prepend this string if an input url is not valid.
202                        'auto' for elaborate guessing
203     encoding:          Use this encoding instead of the system-specified.
204     extract_flat:      Do not resolve URLs, return the immediate result.
205                        Pass in 'in_playlist' to only show this behavior for
206                        playlist items.
207     postprocessors:    A list of dictionaries, each with an entry
208                        * key:  The name of the postprocessor. See
209                                youtube_dl/postprocessor/__init__.py for a list.
210                        as well as any further keyword arguments for the
211                        postprocessor.
212     progress_hooks:    A list of functions that get called on download
213                        progress, with a dictionary with the entries
214                        * status: One of "downloading", "error", or "finished".
215                                  Check this first and ignore unknown values.
216
217                        If status is one of "downloading", or "finished", the
218                        following properties may also be present:
219                        * filename: The final filename (always present)
220                        * tmpfilename: The filename we're currently writing to
221                        * downloaded_bytes: Bytes on disk
222                        * total_bytes: Size of the whole file, None if unknown
223                        * total_bytes_estimate: Guess of the eventual file size,
224                                                None if unavailable.
225                        * elapsed: The number of seconds since download started.
226                        * eta: The estimated time in seconds, None if unknown
227                        * speed: The download speed in bytes/second, None if
228                                 unknown
229                        * fragment_index: The counter of the currently
230                                          downloaded video fragment.
231                        * fragment_count: The number of fragments (= individual
232                                          files that will be merged)
233
234                        Progress hooks are guaranteed to be called at least once
235                        (with status "finished") if the download is successful.
236     merge_output_format: Extension to use when merging formats.
237     fixup:             Automatically correct known faults of the file.
238                        One of:
239                        - "never": do nothing
240                        - "warn": only emit a warning
241                        - "detect_or_warn": check whether we can do anything
242                                            about it, warn otherwise (default)
243     source_address:    (Experimental) Client-side IP address to bind to.
244     call_home:         Boolean, true iff we are allowed to contact the
245                        youtube-dl servers for debugging.
246     sleep_interval:    Number of seconds to sleep before each download.
247     listformats:       Print an overview of available video formats and exit.
248     list_thumbnails:   Print a table of all thumbnails and exit.
249     match_filter:      A function that gets called with the info_dict of
250                        every video.
251                        If it returns a message, the video is ignored.
252                        If it returns None, the video is downloaded.
253                        match_filter_func in utils.py is one example for this.
254     no_color:          Do not emit color codes in output.
255
256     The following options determine which downloader is picked:
257     external_downloader: Executable of the external downloader to call.
258                        None or unset for standard (built-in) downloader.
259     hls_prefer_native: Use the native HLS downloader instead of ffmpeg/avconv.
260
261     The following parameters are not used by YoutubeDL itself, they are used by
262     the downloader (see youtube_dl/downloader/common.py):
263     nopart, updatetime, buffersize, ratelimit, min_filesize, max_filesize, test,
264     noresizebuffer, retries, continuedl, noprogress, consoletitle,
265     xattr_set_filesize, external_downloader_args.
266
267     The following options are used by the post processors:
268     prefer_ffmpeg:     If True, use ffmpeg instead of avconv if both are available,
269                        otherwise prefer avconv.
270     postprocessor_args: A list of additional command-line arguments for the
271                         postprocessor.
272     """
273
274     params = None
275     _ies = []
276     _pps = []
277     _download_retcode = None
278     _num_downloads = None
279     _screen_file = None
280
281     def __init__(self, params=None, auto_init=True):
282         """Create a FileDownloader object with the given options."""
283         if params is None:
284             params = {}
285         self._ies = []
286         self._ies_instances = {}
287         self._pps = []
288         self._progress_hooks = []
289         self._download_retcode = 0
290         self._num_downloads = 0
291         self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)]
292         self._err_file = sys.stderr
293         self.params = {
294             # Default parameters
295             'nocheckcertificate': False,
296         }
297         self.params.update(params)
298         self.cache = Cache(self)
299
300         if params.get('bidi_workaround', False):
301             try:
302                 import pty
303                 master, slave = pty.openpty()
304                 width = compat_get_terminal_size().columns
305                 if width is None:
306                     width_args = []
307                 else:
308                     width_args = ['-w', str(width)]
309                 sp_kwargs = dict(
310                     stdin=subprocess.PIPE,
311                     stdout=slave,
312                     stderr=self._err_file)
313                 try:
314                     self._output_process = subprocess.Popen(
315                         ['bidiv'] + width_args, **sp_kwargs
316                     )
317                 except OSError:
318                     self._output_process = subprocess.Popen(
319                         ['fribidi', '-c', 'UTF-8'] + width_args, **sp_kwargs)
320                 self._output_channel = os.fdopen(master, 'rb')
321             except OSError as ose:
322                 if ose.errno == 2:
323                     self.report_warning('Could not find fribidi executable, ignoring --bidi-workaround . Make sure that  fribidi  is an executable file in one of the directories in your $PATH.')
324                 else:
325                     raise
326
327         if (sys.version_info >= (3,) and sys.platform != 'win32' and
328                 sys.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968'] and
329                 not params.get('restrictfilenames', False)):
330             # On Python 3, the Unicode filesystem API will throw errors (#1474)
331             self.report_warning(
332                 'Assuming --restrict-filenames since file system encoding '
333                 'cannot encode all characters. '
334                 'Set the LC_ALL environment variable to fix this.')
335             self.params['restrictfilenames'] = True
336
337         if isinstance(params.get('outtmpl'), bytes):
338             self.report_warning(
339                 'Parameter outtmpl is bytes, but should be a unicode string. '
340                 'Put  from __future__ import unicode_literals  at the top of your code file or consider switching to Python 3.x.')
341
342         self._setup_opener()
343
344         if auto_init:
345             self.print_debug_header()
346             self.add_default_info_extractors()
347
348         for pp_def_raw in self.params.get('postprocessors', []):
349             pp_class = get_postprocessor(pp_def_raw['key'])
350             pp_def = dict(pp_def_raw)
351             del pp_def['key']
352             pp = pp_class(self, **compat_kwargs(pp_def))
353             self.add_post_processor(pp)
354
355         for ph in self.params.get('progress_hooks', []):
356             self.add_progress_hook(ph)
357
358     def warn_if_short_id(self, argv):
359         # short YouTube ID starting with dash?
360         idxs = [
361             i for i, a in enumerate(argv)
362             if re.match(r'^-[0-9A-Za-z_-]{10}$', a)]
363         if idxs:
364             correct_argv = (
365                 ['youtube-dl'] +
366                 [a for i, a in enumerate(argv) if i not in idxs] +
367                 ['--'] + [argv[i] for i in idxs]
368             )
369             self.report_warning(
370                 'Long argument string detected. '
371                 'Use -- to separate parameters and URLs, like this:\n%s\n' %
372                 args_to_str(correct_argv))
373
374     def add_info_extractor(self, ie):
375         """Add an InfoExtractor object to the end of the list."""
376         self._ies.append(ie)
377         self._ies_instances[ie.ie_key()] = ie
378         ie.set_downloader(self)
379
380     def get_info_extractor(self, ie_key):
381         """
382         Get an instance of an IE with name ie_key, it will try to get one from
383         the _ies list, if there's no instance it will create a new one and add
384         it to the extractor list.
385         """
386         ie = self._ies_instances.get(ie_key)
387         if ie is None:
388             ie = get_info_extractor(ie_key)()
389             self.add_info_extractor(ie)
390         return ie
391
392     def add_default_info_extractors(self):
393         """
394         Add the InfoExtractors returned by gen_extractors to the end of the list
395         """
396         for ie in gen_extractors():
397             self.add_info_extractor(ie)
398
399     def add_post_processor(self, pp):
400         """Add a PostProcessor object to the end of the chain."""
401         self._pps.append(pp)
402         pp.set_downloader(self)
403
404     def add_progress_hook(self, ph):
405         """Add the progress hook (currently only for the file downloader)"""
406         self._progress_hooks.append(ph)
407
408     def _bidi_workaround(self, message):
409         if not hasattr(self, '_output_channel'):
410             return message
411
412         assert hasattr(self, '_output_process')
413         assert isinstance(message, compat_str)
414         line_count = message.count('\n') + 1
415         self._output_process.stdin.write((message + '\n').encode('utf-8'))
416         self._output_process.stdin.flush()
417         res = ''.join(self._output_channel.readline().decode('utf-8')
418                       for _ in range(line_count))
419         return res[:-len('\n')]
420
421     def to_screen(self, message, skip_eol=False):
422         """Print message to stdout if not in quiet mode."""
423         return self.to_stdout(message, skip_eol, check_quiet=True)
424
425     def _write_string(self, s, out=None):
426         write_string(s, out=out, encoding=self.params.get('encoding'))
427
428     def to_stdout(self, message, skip_eol=False, check_quiet=False):
429         """Print message to stdout if not in quiet mode."""
430         if self.params.get('logger'):
431             self.params['logger'].debug(message)
432         elif not check_quiet or not self.params.get('quiet', False):
433             message = self._bidi_workaround(message)
434             terminator = ['\n', ''][skip_eol]
435             output = message + terminator
436
437             self._write_string(output, self._screen_file)
438
439     def to_stderr(self, message):
440         """Print message to stderr."""
441         assert isinstance(message, compat_str)
442         if self.params.get('logger'):
443             self.params['logger'].error(message)
444         else:
445             message = self._bidi_workaround(message)
446             output = message + '\n'
447             self._write_string(output, self._err_file)
448
449     def to_console_title(self, message):
450         if not self.params.get('consoletitle', False):
451             return
452         if os.name == 'nt' and ctypes.windll.kernel32.GetConsoleWindow():
453             # c_wchar_p() might not be necessary if `message` is
454             # already of type unicode()
455             ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message))
456         elif 'TERM' in os.environ:
457             self._write_string('\033]0;%s\007' % message, self._screen_file)
458
459     def save_console_title(self):
460         if not self.params.get('consoletitle', False):
461             return
462         if 'TERM' in os.environ:
463             # Save the title on stack
464             self._write_string('\033[22;0t', self._screen_file)
465
466     def restore_console_title(self):
467         if not self.params.get('consoletitle', False):
468             return
469         if 'TERM' in os.environ:
470             # Restore the title from stack
471             self._write_string('\033[23;0t', self._screen_file)
472
473     def __enter__(self):
474         self.save_console_title()
475         return self
476
477     def __exit__(self, *args):
478         self.restore_console_title()
479
480         if self.params.get('cookiefile') is not None:
481             self.cookiejar.save()
482
483     def trouble(self, message=None, tb=None):
484         """Determine action to take when a download problem appears.
485
486         Depending on if the downloader has been configured to ignore
487         download errors or not, this method may throw an exception or
488         not when errors are found, after printing the message.
489
490         tb, if given, is additional traceback information.
491         """
492         if message is not None:
493             self.to_stderr(message)
494         if self.params.get('verbose'):
495             if tb is None:
496                 if sys.exc_info()[0]:  # if .trouble has been called from an except block
497                     tb = ''
498                     if hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
499                         tb += ''.join(traceback.format_exception(*sys.exc_info()[1].exc_info))
500                     tb += encode_compat_str(traceback.format_exc())
501                 else:
502                     tb_data = traceback.format_list(traceback.extract_stack())
503                     tb = ''.join(tb_data)
504             self.to_stderr(tb)
505         if not self.params.get('ignoreerrors', False):
506             if sys.exc_info()[0] and hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
507                 exc_info = sys.exc_info()[1].exc_info
508             else:
509                 exc_info = sys.exc_info()
510             raise DownloadError(message, exc_info)
511         self._download_retcode = 1
512
513     def report_warning(self, message):
514         '''
515         Print the message to stderr, it will be prefixed with 'WARNING:'
516         If stderr is a tty file the 'WARNING:' will be colored
517         '''
518         if self.params.get('logger') is not None:
519             self.params['logger'].warning(message)
520         else:
521             if self.params.get('no_warnings'):
522                 return
523             if not self.params.get('no_color') and self._err_file.isatty() and os.name != 'nt':
524                 _msg_header = '\033[0;33mWARNING:\033[0m'
525             else:
526                 _msg_header = 'WARNING:'
527             warning_message = '%s %s' % (_msg_header, message)
528             self.to_stderr(warning_message)
529
530     def report_error(self, message, tb=None):
531         '''
532         Do the same as trouble, but prefixes the message with 'ERROR:', colored
533         in red if stderr is a tty file.
534         '''
535         if not self.params.get('no_color') and self._err_file.isatty() and os.name != 'nt':
536             _msg_header = '\033[0;31mERROR:\033[0m'
537         else:
538             _msg_header = 'ERROR:'
539         error_message = '%s %s' % (_msg_header, message)
540         self.trouble(error_message, tb)
541
542     def report_file_already_downloaded(self, file_name):
543         """Report file has already been fully downloaded."""
544         try:
545             self.to_screen('[download] %s has already been downloaded' % file_name)
546         except UnicodeEncodeError:
547             self.to_screen('[download] The file has already been downloaded')
548
549     def prepare_filename(self, info_dict):
550         """Generate the output filename."""
551         try:
552             template_dict = dict(info_dict)
553
554             template_dict['epoch'] = int(time.time())
555             autonumber_size = self.params.get('autonumber_size')
556             if autonumber_size is None:
557                 autonumber_size = 5
558             autonumber_templ = '%0' + str(autonumber_size) + 'd'
559             template_dict['autonumber'] = autonumber_templ % self._num_downloads
560             if template_dict.get('playlist_index') is not None:
561                 template_dict['playlist_index'] = '%0*d' % (len(str(template_dict['n_entries'])), template_dict['playlist_index'])
562             if template_dict.get('resolution') is None:
563                 if template_dict.get('width') and template_dict.get('height'):
564                     template_dict['resolution'] = '%dx%d' % (template_dict['width'], template_dict['height'])
565                 elif template_dict.get('height'):
566                     template_dict['resolution'] = '%sp' % template_dict['height']
567                 elif template_dict.get('width'):
568                     template_dict['resolution'] = '?x%d' % template_dict['width']
569
570             sanitize = lambda k, v: sanitize_filename(
571                 compat_str(v),
572                 restricted=self.params.get('restrictfilenames'),
573                 is_id=(k == 'id'))
574             template_dict = dict((k, sanitize(k, v))
575                                  for k, v in template_dict.items()
576                                  if v is not None)
577             template_dict = collections.defaultdict(lambda: 'NA', template_dict)
578
579             outtmpl = self.params.get('outtmpl', DEFAULT_OUTTMPL)
580             tmpl = compat_expanduser(outtmpl)
581             filename = tmpl % template_dict
582             # Temporary fix for #4787
583             # 'Treat' all problem characters by passing filename through preferredencoding
584             # to workaround encoding issues with subprocess on python2 @ Windows
585             if sys.version_info < (3, 0) and sys.platform == 'win32':
586                 filename = encodeFilename(filename, True).decode(preferredencoding())
587             return sanitize_path(filename)
588         except ValueError as err:
589             self.report_error('Error in output template: ' + str(err) + ' (encoding: ' + repr(preferredencoding()) + ')')
590             return None
591
592     def _match_entry(self, info_dict, incomplete):
593         """ Returns None iff the file should be downloaded """
594
595         video_title = info_dict.get('title', info_dict.get('id', 'video'))
596         if 'title' in info_dict:
597             # This can happen when we're just evaluating the playlist
598             title = info_dict['title']
599             matchtitle = self.params.get('matchtitle', False)
600             if matchtitle:
601                 if not re.search(matchtitle, title, re.IGNORECASE):
602                     return '"' + title + '" title did not match pattern "' + matchtitle + '"'
603             rejecttitle = self.params.get('rejecttitle', False)
604             if rejecttitle:
605                 if re.search(rejecttitle, title, re.IGNORECASE):
606                     return '"' + title + '" title matched reject pattern "' + rejecttitle + '"'
607         date = info_dict.get('upload_date', None)
608         if date is not None:
609             dateRange = self.params.get('daterange', DateRange())
610             if date not in dateRange:
611                 return '%s upload date is not in range %s' % (date_from_str(date).isoformat(), dateRange)
612         view_count = info_dict.get('view_count', None)
613         if view_count is not None:
614             min_views = self.params.get('min_views')
615             if min_views is not None and view_count < min_views:
616                 return 'Skipping %s, because it has not reached minimum view count (%d/%d)' % (video_title, view_count, min_views)
617             max_views = self.params.get('max_views')
618             if max_views is not None and view_count > max_views:
619                 return 'Skipping %s, because it has exceeded the maximum view count (%d/%d)' % (video_title, view_count, max_views)
620         if age_restricted(info_dict.get('age_limit'), self.params.get('age_limit')):
621             return 'Skipping "%s" because it is age restricted' % video_title
622         if self.in_download_archive(info_dict):
623             return '%s has already been recorded in archive' % video_title
624
625         if not incomplete:
626             match_filter = self.params.get('match_filter')
627             if match_filter is not None:
628                 ret = match_filter(info_dict)
629                 if ret is not None:
630                     return ret
631
632         return None
633
634     @staticmethod
635     def add_extra_info(info_dict, extra_info):
636         '''Set the keys from extra_info in info dict if they are missing'''
637         for key, value in extra_info.items():
638             info_dict.setdefault(key, value)
639
640     def extract_info(self, url, download=True, ie_key=None, extra_info={},
641                      process=True, force_generic_extractor=False):
642         '''
643         Returns a list with a dictionary for each video we find.
644         If 'download', also downloads the videos.
645         extra_info is a dict containing the extra values to add to each result
646         '''
647
648         if not ie_key and force_generic_extractor:
649             ie_key = 'Generic'
650
651         if ie_key:
652             ies = [self.get_info_extractor(ie_key)]
653         else:
654             ies = self._ies
655
656         for ie in ies:
657             if not ie.suitable(url):
658                 continue
659
660             if not ie.working():
661                 self.report_warning('The program functionality for this site has been marked as broken, '
662                                     'and will probably not work.')
663
664             try:
665                 ie_result = ie.extract(url)
666                 if ie_result is None:  # Finished already (backwards compatibility; listformats and friends should be moved here)
667                     break
668                 if isinstance(ie_result, list):
669                     # Backwards compatibility: old IE result format
670                     ie_result = {
671                         '_type': 'compat_list',
672                         'entries': ie_result,
673                     }
674                 self.add_default_extra_info(ie_result, ie, url)
675                 if process:
676                     return self.process_ie_result(ie_result, download, extra_info)
677                 else:
678                     return ie_result
679             except ExtractorError as e:  # An error we somewhat expected
680                 self.report_error(compat_str(e), e.format_traceback())
681                 break
682             except MaxDownloadsReached:
683                 raise
684             except Exception as e:
685                 if self.params.get('ignoreerrors', False):
686                     self.report_error(error_to_compat_str(e), tb=encode_compat_str(traceback.format_exc()))
687                     break
688                 else:
689                     raise
690         else:
691             self.report_error('no suitable InfoExtractor for URL %s' % url)
692
693     def add_default_extra_info(self, ie_result, ie, url):
694         self.add_extra_info(ie_result, {
695             'extractor': ie.IE_NAME,
696             'webpage_url': url,
697             'webpage_url_basename': url_basename(url),
698             'extractor_key': ie.ie_key(),
699         })
700
701     def process_ie_result(self, ie_result, download=True, extra_info={}):
702         """
703         Take the result of the ie(may be modified) and resolve all unresolved
704         references (URLs, playlist items).
705
706         It will also download the videos if 'download'.
707         Returns the resolved ie_result.
708         """
709
710         result_type = ie_result.get('_type', 'video')
711
712         if result_type in ('url', 'url_transparent'):
713             extract_flat = self.params.get('extract_flat', False)
714             if ((extract_flat == 'in_playlist' and 'playlist' in extra_info) or
715                     extract_flat is True):
716                 if self.params.get('forcejson', False):
717                     self.to_stdout(json.dumps(ie_result))
718                 return ie_result
719
720         if result_type == 'video':
721             self.add_extra_info(ie_result, extra_info)
722             return self.process_video_result(ie_result, download=download)
723         elif result_type == 'url':
724             # We have to add extra_info to the results because it may be
725             # contained in a playlist
726             return self.extract_info(ie_result['url'],
727                                      download,
728                                      ie_key=ie_result.get('ie_key'),
729                                      extra_info=extra_info)
730         elif result_type == 'url_transparent':
731             # Use the information from the embedding page
732             info = self.extract_info(
733                 ie_result['url'], ie_key=ie_result.get('ie_key'),
734                 extra_info=extra_info, download=False, process=False)
735
736             force_properties = dict(
737                 (k, v) for k, v in ie_result.items() if v is not None)
738             for f in ('_type', 'url'):
739                 if f in force_properties:
740                     del force_properties[f]
741             new_result = info.copy()
742             new_result.update(force_properties)
743
744             assert new_result.get('_type') != 'url_transparent'
745
746             return self.process_ie_result(
747                 new_result, download=download, extra_info=extra_info)
748         elif result_type == 'playlist' or result_type == 'multi_video':
749             # We process each entry in the playlist
750             playlist = ie_result.get('title', None) or ie_result.get('id', None)
751             self.to_screen('[download] Downloading playlist: %s' % playlist)
752
753             playlist_results = []
754
755             playliststart = self.params.get('playliststart', 1) - 1
756             playlistend = self.params.get('playlistend', None)
757             # For backwards compatibility, interpret -1 as whole list
758             if playlistend == -1:
759                 playlistend = None
760
761             playlistitems_str = self.params.get('playlist_items', None)
762             playlistitems = None
763             if playlistitems_str is not None:
764                 def iter_playlistitems(format):
765                     for string_segment in format.split(','):
766                         if '-' in string_segment:
767                             start, end = string_segment.split('-')
768                             for item in range(int(start), int(end) + 1):
769                                 yield int(item)
770                         else:
771                             yield int(string_segment)
772                 playlistitems = iter_playlistitems(playlistitems_str)
773
774             ie_entries = ie_result['entries']
775             if isinstance(ie_entries, list):
776                 n_all_entries = len(ie_entries)
777                 if playlistitems:
778                     entries = [
779                         ie_entries[i - 1] for i in playlistitems
780                         if -n_all_entries <= i - 1 < n_all_entries]
781                 else:
782                     entries = ie_entries[playliststart:playlistend]
783                 n_entries = len(entries)
784                 self.to_screen(
785                     "[%s] playlist %s: Collected %d video ids (downloading %d of them)" %
786                     (ie_result['extractor'], playlist, n_all_entries, n_entries))
787             elif isinstance(ie_entries, PagedList):
788                 if playlistitems:
789                     entries = []
790                     for item in playlistitems:
791                         entries.extend(ie_entries.getslice(
792                             item - 1, item
793                         ))
794                 else:
795                     entries = ie_entries.getslice(
796                         playliststart, playlistend)
797                 n_entries = len(entries)
798                 self.to_screen(
799                     "[%s] playlist %s: Downloading %d videos" %
800                     (ie_result['extractor'], playlist, n_entries))
801             else:  # iterable
802                 if playlistitems:
803                     entry_list = list(ie_entries)
804                     entries = [entry_list[i - 1] for i in playlistitems]
805                 else:
806                     entries = list(itertools.islice(
807                         ie_entries, playliststart, playlistend))
808                 n_entries = len(entries)
809                 self.to_screen(
810                     "[%s] playlist %s: Downloading %d videos" %
811                     (ie_result['extractor'], playlist, n_entries))
812
813             if self.params.get('playlistreverse', False):
814                 entries = entries[::-1]
815
816             for i, entry in enumerate(entries, 1):
817                 self.to_screen('[download] Downloading video %s of %s' % (i, n_entries))
818                 extra = {
819                     'n_entries': n_entries,
820                     'playlist': playlist,
821                     'playlist_id': ie_result.get('id'),
822                     'playlist_title': ie_result.get('title'),
823                     'playlist_index': i + playliststart,
824                     'extractor': ie_result['extractor'],
825                     'webpage_url': ie_result['webpage_url'],
826                     'webpage_url_basename': url_basename(ie_result['webpage_url']),
827                     'extractor_key': ie_result['extractor_key'],
828                 }
829
830                 reason = self._match_entry(entry, incomplete=True)
831                 if reason is not None:
832                     self.to_screen('[download] ' + reason)
833                     continue
834
835                 entry_result = self.process_ie_result(entry,
836                                                       download=download,
837                                                       extra_info=extra)
838                 playlist_results.append(entry_result)
839             ie_result['entries'] = playlist_results
840             self.to_screen('[download] Finished downloading playlist: %s' % playlist)
841             return ie_result
842         elif result_type == 'compat_list':
843             self.report_warning(
844                 'Extractor %s returned a compat_list result. '
845                 'It needs to be updated.' % ie_result.get('extractor'))
846
847             def _fixup(r):
848                 self.add_extra_info(
849                     r,
850                     {
851                         'extractor': ie_result['extractor'],
852                         'webpage_url': ie_result['webpage_url'],
853                         'webpage_url_basename': url_basename(ie_result['webpage_url']),
854                         'extractor_key': ie_result['extractor_key'],
855                     }
856                 )
857                 return r
858             ie_result['entries'] = [
859                 self.process_ie_result(_fixup(r), download, extra_info)
860                 for r in ie_result['entries']
861             ]
862             return ie_result
863         else:
864             raise Exception('Invalid result type: %s' % result_type)
865
866     def _build_format_filter(self, filter_spec):
867         " Returns a function to filter the formats according to the filter_spec "
868
869         OPERATORS = {
870             '<': operator.lt,
871             '<=': operator.le,
872             '>': operator.gt,
873             '>=': operator.ge,
874             '=': operator.eq,
875             '!=': operator.ne,
876         }
877         operator_rex = re.compile(r'''(?x)\s*
878             (?P<key>width|height|tbr|abr|vbr|asr|filesize|fps)
879             \s*(?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
880             (?P<value>[0-9.]+(?:[kKmMgGtTpPeEzZyY]i?[Bb]?)?)
881             $
882             ''' % '|'.join(map(re.escape, OPERATORS.keys())))
883         m = operator_rex.search(filter_spec)
884         if m:
885             try:
886                 comparison_value = int(m.group('value'))
887             except ValueError:
888                 comparison_value = parse_filesize(m.group('value'))
889                 if comparison_value is None:
890                     comparison_value = parse_filesize(m.group('value') + 'B')
891                 if comparison_value is None:
892                     raise ValueError(
893                         'Invalid value %r in format specification %r' % (
894                             m.group('value'), filter_spec))
895             op = OPERATORS[m.group('op')]
896
897         if not m:
898             STR_OPERATORS = {
899                 '=': operator.eq,
900                 '!=': operator.ne,
901             }
902             str_operator_rex = re.compile(r'''(?x)
903                 \s*(?P<key>ext|acodec|vcodec|container|protocol)
904                 \s*(?P<op>%s)(?P<none_inclusive>\s*\?)?
905                 \s*(?P<value>[a-zA-Z0-9_-]+)
906                 \s*$
907                 ''' % '|'.join(map(re.escape, STR_OPERATORS.keys())))
908             m = str_operator_rex.search(filter_spec)
909             if m:
910                 comparison_value = m.group('value')
911                 op = STR_OPERATORS[m.group('op')]
912
913         if not m:
914             raise ValueError('Invalid filter specification %r' % filter_spec)
915
916         def _filter(f):
917             actual_value = f.get(m.group('key'))
918             if actual_value is None:
919                 return m.group('none_inclusive')
920             return op(actual_value, comparison_value)
921         return _filter
922
923     def build_format_selector(self, format_spec):
924         def syntax_error(note, start):
925             message = (
926                 'Invalid format specification: '
927                 '{0}\n\t{1}\n\t{2}^'.format(note, format_spec, ' ' * start[1]))
928             return SyntaxError(message)
929
930         PICKFIRST = 'PICKFIRST'
931         MERGE = 'MERGE'
932         SINGLE = 'SINGLE'
933         GROUP = 'GROUP'
934         FormatSelector = collections.namedtuple('FormatSelector', ['type', 'selector', 'filters'])
935
936         def _parse_filter(tokens):
937             filter_parts = []
938             for type, string, start, _, _ in tokens:
939                 if type == tokenize.OP and string == ']':
940                     return ''.join(filter_parts)
941                 else:
942                     filter_parts.append(string)
943
944         def _remove_unused_ops(tokens):
945             # Remove operators that we don't use and join them with the surrounding strings
946             # for example: 'mp4' '-' 'baseline' '-' '16x9' is converted to 'mp4-baseline-16x9'
947             ALLOWED_OPS = ('/', '+', ',', '(', ')')
948             last_string, last_start, last_end, last_line = None, None, None, None
949             for type, string, start, end, line in tokens:
950                 if type == tokenize.OP and string == '[':
951                     if last_string:
952                         yield tokenize.NAME, last_string, last_start, last_end, last_line
953                         last_string = None
954                     yield type, string, start, end, line
955                     # everything inside brackets will be handled by _parse_filter
956                     for type, string, start, end, line in tokens:
957                         yield type, string, start, end, line
958                         if type == tokenize.OP and string == ']':
959                             break
960                 elif type == tokenize.OP and string in ALLOWED_OPS:
961                     if last_string:
962                         yield tokenize.NAME, last_string, last_start, last_end, last_line
963                         last_string = None
964                     yield type, string, start, end, line
965                 elif type in [tokenize.NAME, tokenize.NUMBER, tokenize.OP]:
966                     if not last_string:
967                         last_string = string
968                         last_start = start
969                         last_end = end
970                     else:
971                         last_string += string
972             if last_string:
973                 yield tokenize.NAME, last_string, last_start, last_end, last_line
974
975         def _parse_format_selection(tokens, inside_merge=False, inside_choice=False, inside_group=False):
976             selectors = []
977             current_selector = None
978             for type, string, start, _, _ in tokens:
979                 # ENCODING is only defined in python 3.x
980                 if type == getattr(tokenize, 'ENCODING', None):
981                     continue
982                 elif type in [tokenize.NAME, tokenize.NUMBER]:
983                     current_selector = FormatSelector(SINGLE, string, [])
984                 elif type == tokenize.OP:
985                     if string == ')':
986                         if not inside_group:
987                             # ')' will be handled by the parentheses group
988                             tokens.restore_last_token()
989                         break
990                     elif inside_merge and string in ['/', ',']:
991                         tokens.restore_last_token()
992                         break
993                     elif inside_choice and string == ',':
994                         tokens.restore_last_token()
995                         break
996                     elif string == ',':
997                         if not current_selector:
998                             raise syntax_error('"," must follow a format selector', start)
999                         selectors.append(current_selector)
1000                         current_selector = None
1001                     elif string == '/':
1002                         if not current_selector:
1003                             raise syntax_error('"/" must follow a format selector', start)
1004                         first_choice = current_selector
1005                         second_choice = _parse_format_selection(tokens, inside_choice=True)
1006                         current_selector = FormatSelector(PICKFIRST, (first_choice, second_choice), [])
1007                     elif string == '[':
1008                         if not current_selector:
1009                             current_selector = FormatSelector(SINGLE, 'best', [])
1010                         format_filter = _parse_filter(tokens)
1011                         current_selector.filters.append(format_filter)
1012                     elif string == '(':
1013                         if current_selector:
1014                             raise syntax_error('Unexpected "("', start)
1015                         group = _parse_format_selection(tokens, inside_group=True)
1016                         current_selector = FormatSelector(GROUP, group, [])
1017                     elif string == '+':
1018                         video_selector = current_selector
1019                         audio_selector = _parse_format_selection(tokens, inside_merge=True)
1020                         if not video_selector or not audio_selector:
1021                             raise syntax_error('"+" must be between two format selectors', start)
1022                         current_selector = FormatSelector(MERGE, (video_selector, audio_selector), [])
1023                     else:
1024                         raise syntax_error('Operator not recognized: "{0}"'.format(string), start)
1025                 elif type == tokenize.ENDMARKER:
1026                     break
1027             if current_selector:
1028                 selectors.append(current_selector)
1029             return selectors
1030
1031         def _build_selector_function(selector):
1032             if isinstance(selector, list):
1033                 fs = [_build_selector_function(s) for s in selector]
1034
1035                 def selector_function(formats):
1036                     for f in fs:
1037                         for format in f(formats):
1038                             yield format
1039                 return selector_function
1040             elif selector.type == GROUP:
1041                 selector_function = _build_selector_function(selector.selector)
1042             elif selector.type == PICKFIRST:
1043                 fs = [_build_selector_function(s) for s in selector.selector]
1044
1045                 def selector_function(formats):
1046                     for f in fs:
1047                         picked_formats = list(f(formats))
1048                         if picked_formats:
1049                             return picked_formats
1050                     return []
1051             elif selector.type == SINGLE:
1052                 format_spec = selector.selector
1053
1054                 def selector_function(formats):
1055                     formats = list(formats)
1056                     if not formats:
1057                         return
1058                     if format_spec == 'all':
1059                         for f in formats:
1060                             yield f
1061                     elif format_spec in ['best', 'worst', None]:
1062                         format_idx = 0 if format_spec == 'worst' else -1
1063                         audiovideo_formats = [
1064                             f for f in formats
1065                             if f.get('vcodec') != 'none' and f.get('acodec') != 'none']
1066                         if audiovideo_formats:
1067                             yield audiovideo_formats[format_idx]
1068                         # for audio only (soundcloud) or video only (imgur) urls, select the best/worst audio format
1069                         elif (all(f.get('acodec') != 'none' for f in formats) or
1070                               all(f.get('vcodec') != 'none' for f in formats)):
1071                             yield formats[format_idx]
1072                     elif format_spec == 'bestaudio':
1073                         audio_formats = [
1074                             f for f in formats
1075                             if f.get('vcodec') == 'none']
1076                         if audio_formats:
1077                             yield audio_formats[-1]
1078                     elif format_spec == 'worstaudio':
1079                         audio_formats = [
1080                             f for f in formats
1081                             if f.get('vcodec') == 'none']
1082                         if audio_formats:
1083                             yield audio_formats[0]
1084                     elif format_spec == 'bestvideo':
1085                         video_formats = [
1086                             f for f in formats
1087                             if f.get('acodec') == 'none']
1088                         if video_formats:
1089                             yield video_formats[-1]
1090                     elif format_spec == 'worstvideo':
1091                         video_formats = [
1092                             f for f in formats
1093                             if f.get('acodec') == 'none']
1094                         if video_formats:
1095                             yield video_formats[0]
1096                     else:
1097                         extensions = ['mp4', 'flv', 'webm', '3gp', 'm4a', 'mp3', 'ogg', 'aac', 'wav']
1098                         if format_spec in extensions:
1099                             filter_f = lambda f: f['ext'] == format_spec
1100                         else:
1101                             filter_f = lambda f: f['format_id'] == format_spec
1102                         matches = list(filter(filter_f, formats))
1103                         if matches:
1104                             yield matches[-1]
1105             elif selector.type == MERGE:
1106                 def _merge(formats_info):
1107                     format_1, format_2 = [f['format_id'] for f in formats_info]
1108                     # The first format must contain the video and the
1109                     # second the audio
1110                     if formats_info[0].get('vcodec') == 'none':
1111                         self.report_error('The first format must '
1112                                           'contain the video, try using '
1113                                           '"-f %s+%s"' % (format_2, format_1))
1114                         return
1115                     # Formats must be opposite (video+audio)
1116                     if formats_info[0].get('acodec') == 'none' and formats_info[1].get('acodec') == 'none':
1117                         self.report_error(
1118                             'Both formats %s and %s are video-only, you must specify "-f video+audio"'
1119                             % (format_1, format_2))
1120                         return
1121                     output_ext = (
1122                         formats_info[0]['ext']
1123                         if self.params.get('merge_output_format') is None
1124                         else self.params['merge_output_format'])
1125                     return {
1126                         'requested_formats': formats_info,
1127                         'format': '%s+%s' % (formats_info[0].get('format'),
1128                                              formats_info[1].get('format')),
1129                         'format_id': '%s+%s' % (formats_info[0].get('format_id'),
1130                                                 formats_info[1].get('format_id')),
1131                         'width': formats_info[0].get('width'),
1132                         'height': formats_info[0].get('height'),
1133                         'resolution': formats_info[0].get('resolution'),
1134                         'fps': formats_info[0].get('fps'),
1135                         'vcodec': formats_info[0].get('vcodec'),
1136                         'vbr': formats_info[0].get('vbr'),
1137                         'stretched_ratio': formats_info[0].get('stretched_ratio'),
1138                         'acodec': formats_info[1].get('acodec'),
1139                         'abr': formats_info[1].get('abr'),
1140                         'ext': output_ext,
1141                     }
1142                 video_selector, audio_selector = map(_build_selector_function, selector.selector)
1143
1144                 def selector_function(formats):
1145                     formats = list(formats)
1146                     for pair in itertools.product(video_selector(formats), audio_selector(formats)):
1147                         yield _merge(pair)
1148
1149             filters = [self._build_format_filter(f) for f in selector.filters]
1150
1151             def final_selector(formats):
1152                 for _filter in filters:
1153                     formats = list(filter(_filter, formats))
1154                 return selector_function(formats)
1155             return final_selector
1156
1157         stream = io.BytesIO(format_spec.encode('utf-8'))
1158         try:
1159             tokens = list(_remove_unused_ops(compat_tokenize_tokenize(stream.readline)))
1160         except tokenize.TokenError:
1161             raise syntax_error('Missing closing/opening brackets or parenthesis', (0, len(format_spec)))
1162
1163         class TokenIterator(object):
1164             def __init__(self, tokens):
1165                 self.tokens = tokens
1166                 self.counter = 0
1167
1168             def __iter__(self):
1169                 return self
1170
1171             def __next__(self):
1172                 if self.counter >= len(self.tokens):
1173                     raise StopIteration()
1174                 value = self.tokens[self.counter]
1175                 self.counter += 1
1176                 return value
1177
1178             next = __next__
1179
1180             def restore_last_token(self):
1181                 self.counter -= 1
1182
1183         parsed_selector = _parse_format_selection(iter(TokenIterator(tokens)))
1184         return _build_selector_function(parsed_selector)
1185
1186     def _calc_headers(self, info_dict):
1187         res = std_headers.copy()
1188
1189         add_headers = info_dict.get('http_headers')
1190         if add_headers:
1191             res.update(add_headers)
1192
1193         cookies = self._calc_cookies(info_dict)
1194         if cookies:
1195             res['Cookie'] = cookies
1196
1197         return res
1198
1199     def _calc_cookies(self, info_dict):
1200         pr = sanitized_Request(info_dict['url'])
1201         self.cookiejar.add_cookie_header(pr)
1202         return pr.get_header('Cookie')
1203
1204     def process_video_result(self, info_dict, download=True):
1205         assert info_dict.get('_type', 'video') == 'video'
1206
1207         if 'id' not in info_dict:
1208             raise ExtractorError('Missing "id" field in extractor result')
1209         if 'title' not in info_dict:
1210             raise ExtractorError('Missing "title" field in extractor result')
1211
1212         if 'playlist' not in info_dict:
1213             # It isn't part of a playlist
1214             info_dict['playlist'] = None
1215             info_dict['playlist_index'] = None
1216
1217         thumbnails = info_dict.get('thumbnails')
1218         if thumbnails is None:
1219             thumbnail = info_dict.get('thumbnail')
1220             if thumbnail:
1221                 info_dict['thumbnails'] = thumbnails = [{'url': thumbnail}]
1222         if thumbnails:
1223             thumbnails.sort(key=lambda t: (
1224                 t.get('preference'), t.get('width'), t.get('height'),
1225                 t.get('id'), t.get('url')))
1226             for i, t in enumerate(thumbnails):
1227                 if t.get('width') and t.get('height'):
1228                     t['resolution'] = '%dx%d' % (t['width'], t['height'])
1229                 if t.get('id') is None:
1230                     t['id'] = '%d' % i
1231
1232         if thumbnails and 'thumbnail' not in info_dict:
1233             info_dict['thumbnail'] = thumbnails[-1]['url']
1234
1235         if 'display_id' not in info_dict and 'id' in info_dict:
1236             info_dict['display_id'] = info_dict['id']
1237
1238         if info_dict.get('upload_date') is None and info_dict.get('timestamp') is not None:
1239             # Working around out-of-range timestamp values (e.g. negative ones on Windows,
1240             # see http://bugs.python.org/issue1646728)
1241             try:
1242                 upload_date = datetime.datetime.utcfromtimestamp(info_dict['timestamp'])
1243                 info_dict['upload_date'] = upload_date.strftime('%Y%m%d')
1244             except (ValueError, OverflowError, OSError):
1245                 pass
1246
1247         # Auto generate title fields corresponding to the *_number fields when missing
1248         # in order to always have clean titles. This is very common for TV series.
1249         for field in ('chapter', 'season', 'episode'):
1250             if info_dict.get('%s_number' % field) is not None and not info_dict.get(field):
1251                 info_dict[field] = '%s %d' % (field.capitalize(), info_dict['%s_number' % field])
1252
1253         subtitles = info_dict.get('subtitles')
1254         if subtitles:
1255             for _, subtitle in subtitles.items():
1256                 for subtitle_format in subtitle:
1257                     if 'ext' not in subtitle_format:
1258                         subtitle_format['ext'] = determine_ext(subtitle_format['url']).lower()
1259
1260         if self.params.get('listsubtitles', False):
1261             if 'automatic_captions' in info_dict:
1262                 self.list_subtitles(info_dict['id'], info_dict.get('automatic_captions'), 'automatic captions')
1263             self.list_subtitles(info_dict['id'], subtitles, 'subtitles')
1264             return
1265         info_dict['requested_subtitles'] = self.process_subtitles(
1266             info_dict['id'], subtitles,
1267             info_dict.get('automatic_captions'))
1268
1269         # We now pick which formats have to be downloaded
1270         if info_dict.get('formats') is None:
1271             # There's only one format available
1272             formats = [info_dict]
1273         else:
1274             formats = info_dict['formats']
1275
1276         if not formats:
1277             raise ExtractorError('No video formats found!')
1278
1279         formats_dict = {}
1280
1281         # We check that all the formats have the format and format_id fields
1282         for i, format in enumerate(formats):
1283             if 'url' not in format:
1284                 raise ExtractorError('Missing "url" key in result (index %d)' % i)
1285
1286             if format.get('format_id') is None:
1287                 format['format_id'] = compat_str(i)
1288             format_id = format['format_id']
1289             if format_id not in formats_dict:
1290                 formats_dict[format_id] = []
1291             formats_dict[format_id].append(format)
1292
1293         # Make sure all formats have unique format_id
1294         for format_id, ambiguous_formats in formats_dict.items():
1295             if len(ambiguous_formats) > 1:
1296                 for i, format in enumerate(ambiguous_formats):
1297                     format['format_id'] = '%s-%d' % (format_id, i)
1298
1299         for i, format in enumerate(formats):
1300             if format.get('format') is None:
1301                 format['format'] = '{id} - {res}{note}'.format(
1302                     id=format['format_id'],
1303                     res=self.format_resolution(format),
1304                     note=' ({0})'.format(format['format_note']) if format.get('format_note') is not None else '',
1305                 )
1306             # Automatically determine file extension if missing
1307             if 'ext' not in format:
1308                 format['ext'] = determine_ext(format['url']).lower()
1309             # Add HTTP headers, so that external programs can use them from the
1310             # json output
1311             full_format_info = info_dict.copy()
1312             full_format_info.update(format)
1313             format['http_headers'] = self._calc_headers(full_format_info)
1314
1315         # TODO Central sorting goes here
1316
1317         if formats[0] is not info_dict:
1318             # only set the 'formats' fields if the original info_dict list them
1319             # otherwise we end up with a circular reference, the first (and unique)
1320             # element in the 'formats' field in info_dict is info_dict itself,
1321             # which can't be exported to json
1322             info_dict['formats'] = formats
1323         if self.params.get('listformats'):
1324             self.list_formats(info_dict)
1325             return
1326         if self.params.get('list_thumbnails'):
1327             self.list_thumbnails(info_dict)
1328             return
1329
1330         req_format = self.params.get('format')
1331         if req_format is None:
1332             req_format_list = []
1333             if (self.params.get('outtmpl', DEFAULT_OUTTMPL) != '-' and
1334                     info_dict['extractor'] in ['youtube', 'ted'] and
1335                     not info_dict.get('is_live')):
1336                 merger = FFmpegMergerPP(self)
1337                 if merger.available and merger.can_merge():
1338                     req_format_list.append('bestvideo+bestaudio')
1339             req_format_list.append('best')
1340             req_format = '/'.join(req_format_list)
1341         format_selector = self.build_format_selector(req_format)
1342         formats_to_download = list(format_selector(formats))
1343         if not formats_to_download:
1344             raise ExtractorError('requested format not available',
1345                                  expected=True)
1346
1347         if download:
1348             if len(formats_to_download) > 1:
1349                 self.to_screen('[info] %s: downloading video in %s formats' % (info_dict['id'], len(formats_to_download)))
1350             for format in formats_to_download:
1351                 new_info = dict(info_dict)
1352                 new_info.update(format)
1353                 self.process_info(new_info)
1354         # We update the info dict with the best quality format (backwards compatibility)
1355         info_dict.update(formats_to_download[-1])
1356         return info_dict
1357
1358     def process_subtitles(self, video_id, normal_subtitles, automatic_captions):
1359         """Select the requested subtitles and their format"""
1360         available_subs = {}
1361         if normal_subtitles and self.params.get('writesubtitles'):
1362             available_subs.update(normal_subtitles)
1363         if automatic_captions and self.params.get('writeautomaticsub'):
1364             for lang, cap_info in automatic_captions.items():
1365                 if lang not in available_subs:
1366                     available_subs[lang] = cap_info
1367
1368         if (not self.params.get('writesubtitles') and not
1369                 self.params.get('writeautomaticsub') or not
1370                 available_subs):
1371             return None
1372
1373         if self.params.get('allsubtitles', False):
1374             requested_langs = available_subs.keys()
1375         else:
1376             if self.params.get('subtitleslangs', False):
1377                 requested_langs = self.params.get('subtitleslangs')
1378             elif 'en' in available_subs:
1379                 requested_langs = ['en']
1380             else:
1381                 requested_langs = [list(available_subs.keys())[0]]
1382
1383         formats_query = self.params.get('subtitlesformat', 'best')
1384         formats_preference = formats_query.split('/') if formats_query else []
1385         subs = {}
1386         for lang in requested_langs:
1387             formats = available_subs.get(lang)
1388             if formats is None:
1389                 self.report_warning('%s subtitles not available for %s' % (lang, video_id))
1390                 continue
1391             for ext in formats_preference:
1392                 if ext == 'best':
1393                     f = formats[-1]
1394                     break
1395                 matches = list(filter(lambda f: f['ext'] == ext, formats))
1396                 if matches:
1397                     f = matches[-1]
1398                     break
1399             else:
1400                 f = formats[-1]
1401                 self.report_warning(
1402                     'No subtitle format found matching "%s" for language %s, '
1403                     'using %s' % (formats_query, lang, f['ext']))
1404             subs[lang] = f
1405         return subs
1406
1407     def process_info(self, info_dict):
1408         """Process a single resolved IE result."""
1409
1410         assert info_dict.get('_type', 'video') == 'video'
1411
1412         max_downloads = self.params.get('max_downloads')
1413         if max_downloads is not None:
1414             if self._num_downloads >= int(max_downloads):
1415                 raise MaxDownloadsReached()
1416
1417         info_dict['fulltitle'] = info_dict['title']
1418         if len(info_dict['title']) > 200:
1419             info_dict['title'] = info_dict['title'][:197] + '...'
1420
1421         if 'format' not in info_dict:
1422             info_dict['format'] = info_dict['ext']
1423
1424         reason = self._match_entry(info_dict, incomplete=False)
1425         if reason is not None:
1426             self.to_screen('[download] ' + reason)
1427             return
1428
1429         self._num_downloads += 1
1430
1431         info_dict['_filename'] = filename = self.prepare_filename(info_dict)
1432
1433         # Forced printings
1434         if self.params.get('forcetitle', False):
1435             self.to_stdout(info_dict['fulltitle'])
1436         if self.params.get('forceid', False):
1437             self.to_stdout(info_dict['id'])
1438         if self.params.get('forceurl', False):
1439             if info_dict.get('requested_formats') is not None:
1440                 for f in info_dict['requested_formats']:
1441                     self.to_stdout(f['url'] + f.get('play_path', ''))
1442             else:
1443                 # For RTMP URLs, also include the playpath
1444                 self.to_stdout(info_dict['url'] + info_dict.get('play_path', ''))
1445         if self.params.get('forcethumbnail', False) and info_dict.get('thumbnail') is not None:
1446             self.to_stdout(info_dict['thumbnail'])
1447         if self.params.get('forcedescription', False) and info_dict.get('description') is not None:
1448             self.to_stdout(info_dict['description'])
1449         if self.params.get('forcefilename', False) and filename is not None:
1450             self.to_stdout(filename)
1451         if self.params.get('forceduration', False) and info_dict.get('duration') is not None:
1452             self.to_stdout(formatSeconds(info_dict['duration']))
1453         if self.params.get('forceformat', False):
1454             self.to_stdout(info_dict['format'])
1455         if self.params.get('forcejson', False):
1456             self.to_stdout(json.dumps(info_dict))
1457
1458         # Do nothing else if in simulate mode
1459         if self.params.get('simulate', False):
1460             return
1461
1462         if filename is None:
1463             return
1464
1465         try:
1466             dn = os.path.dirname(sanitize_path(encodeFilename(filename)))
1467             if dn and not os.path.exists(dn):
1468                 os.makedirs(dn)
1469         except (OSError, IOError) as err:
1470             self.report_error('unable to create directory ' + error_to_compat_str(err))
1471             return
1472
1473         if self.params.get('writedescription', False):
1474             descfn = replace_extension(filename, 'description', info_dict.get('ext'))
1475             if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(descfn)):
1476                 self.to_screen('[info] Video description is already present')
1477             elif info_dict.get('description') is None:
1478                 self.report_warning('There\'s no description to write.')
1479             else:
1480                 try:
1481                     self.to_screen('[info] Writing video description to: ' + descfn)
1482                     with io.open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile:
1483                         descfile.write(info_dict['description'])
1484                 except (OSError, IOError):
1485                     self.report_error('Cannot write description file ' + descfn)
1486                     return
1487
1488         if self.params.get('writeannotations', False):
1489             annofn = replace_extension(filename, 'annotations.xml', info_dict.get('ext'))
1490             if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(annofn)):
1491                 self.to_screen('[info] Video annotations are already present')
1492             else:
1493                 try:
1494                     self.to_screen('[info] Writing video annotations to: ' + annofn)
1495                     with io.open(encodeFilename(annofn), 'w', encoding='utf-8') as annofile:
1496                         annofile.write(info_dict['annotations'])
1497                 except (KeyError, TypeError):
1498                     self.report_warning('There are no annotations to write.')
1499                 except (OSError, IOError):
1500                     self.report_error('Cannot write annotations file: ' + annofn)
1501                     return
1502
1503         subtitles_are_requested = any([self.params.get('writesubtitles', False),
1504                                        self.params.get('writeautomaticsub')])
1505
1506         if subtitles_are_requested and info_dict.get('requested_subtitles'):
1507             # subtitles download errors are already managed as troubles in relevant IE
1508             # that way it will silently go on when used with unsupporting IE
1509             subtitles = info_dict['requested_subtitles']
1510             ie = self.get_info_extractor(info_dict['extractor_key'])
1511             for sub_lang, sub_info in subtitles.items():
1512                 sub_format = sub_info['ext']
1513                 if sub_info.get('data') is not None:
1514                     sub_data = sub_info['data']
1515                 else:
1516                     try:
1517                         sub_data = ie._download_webpage(
1518                             sub_info['url'], info_dict['id'], note=False)
1519                     except ExtractorError as err:
1520                         self.report_warning('Unable to download subtitle for "%s": %s' %
1521                                             (sub_lang, error_to_compat_str(err.cause)))
1522                         continue
1523                 try:
1524                     sub_filename = subtitles_filename(filename, sub_lang, sub_format)
1525                     if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(sub_filename)):
1526                         self.to_screen('[info] Video subtitle %s.%s is already_present' % (sub_lang, sub_format))
1527                     else:
1528                         self.to_screen('[info] Writing video subtitles to: ' + sub_filename)
1529                         with io.open(encodeFilename(sub_filename), 'w', encoding='utf-8') as subfile:
1530                             subfile.write(sub_data)
1531                 except (OSError, IOError):
1532                     self.report_error('Cannot write subtitles file ' + sub_filename)
1533                     return
1534
1535         if self.params.get('writeinfojson', False):
1536             infofn = replace_extension(filename, 'info.json', info_dict.get('ext'))
1537             if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(infofn)):
1538                 self.to_screen('[info] Video description metadata is already present')
1539             else:
1540                 self.to_screen('[info] Writing video description metadata as JSON to: ' + infofn)
1541                 try:
1542                     write_json_file(self.filter_requested_info(info_dict), infofn)
1543                 except (OSError, IOError):
1544                     self.report_error('Cannot write metadata to JSON file ' + infofn)
1545                     return
1546
1547         self._write_thumbnails(info_dict, filename)
1548
1549         if not self.params.get('skip_download', False):
1550             try:
1551                 def dl(name, info):
1552                     fd = get_suitable_downloader(info, self.params)(self, self.params)
1553                     for ph in self._progress_hooks:
1554                         fd.add_progress_hook(ph)
1555                     if self.params.get('verbose'):
1556                         self.to_stdout('[debug] Invoking downloader on %r' % info.get('url'))
1557                     return fd.download(name, info)
1558
1559                 if info_dict.get('requested_formats') is not None:
1560                     downloaded = []
1561                     success = True
1562                     merger = FFmpegMergerPP(self)
1563                     if not merger.available:
1564                         postprocessors = []
1565                         self.report_warning('You have requested multiple '
1566                                             'formats but ffmpeg or avconv are not installed.'
1567                                             ' The formats won\'t be merged.')
1568                     else:
1569                         postprocessors = [merger]
1570
1571                     def compatible_formats(formats):
1572                         video, audio = formats
1573                         # Check extension
1574                         video_ext, audio_ext = audio.get('ext'), video.get('ext')
1575                         if video_ext and audio_ext:
1576                             COMPATIBLE_EXTS = (
1577                                 ('mp3', 'mp4', 'm4a', 'm4p', 'm4b', 'm4r', 'm4v'),
1578                                 ('webm')
1579                             )
1580                             for exts in COMPATIBLE_EXTS:
1581                                 if video_ext in exts and audio_ext in exts:
1582                                     return True
1583                         # TODO: Check acodec/vcodec
1584                         return False
1585
1586                     filename_real_ext = os.path.splitext(filename)[1][1:]
1587                     filename_wo_ext = (
1588                         os.path.splitext(filename)[0]
1589                         if filename_real_ext == info_dict['ext']
1590                         else filename)
1591                     requested_formats = info_dict['requested_formats']
1592                     if self.params.get('merge_output_format') is None and not compatible_formats(requested_formats):
1593                         info_dict['ext'] = 'mkv'
1594                         self.report_warning(
1595                             'Requested formats are incompatible for merge and will be merged into mkv.')
1596                     # Ensure filename always has a correct extension for successful merge
1597                     filename = '%s.%s' % (filename_wo_ext, info_dict['ext'])
1598                     if os.path.exists(encodeFilename(filename)):
1599                         self.to_screen(
1600                             '[download] %s has already been downloaded and '
1601                             'merged' % filename)
1602                     else:
1603                         for f in requested_formats:
1604                             new_info = dict(info_dict)
1605                             new_info.update(f)
1606                             fname = self.prepare_filename(new_info)
1607                             fname = prepend_extension(fname, 'f%s' % f['format_id'], new_info['ext'])
1608                             downloaded.append(fname)
1609                             partial_success = dl(fname, new_info)
1610                             success = success and partial_success
1611                         info_dict['__postprocessors'] = postprocessors
1612                         info_dict['__files_to_merge'] = downloaded
1613                 else:
1614                     # Just a single file
1615                     success = dl(filename, info_dict)
1616             except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
1617                 self.report_error('unable to download video data: %s' % str(err))
1618                 return
1619             except (OSError, IOError) as err:
1620                 raise UnavailableVideoError(err)
1621             except (ContentTooShortError, ) as err:
1622                 self.report_error('content too short (expected %s bytes and served %s)' % (err.expected, err.downloaded))
1623                 return
1624
1625             if success:
1626                 # Fixup content
1627                 fixup_policy = self.params.get('fixup')
1628                 if fixup_policy is None:
1629                     fixup_policy = 'detect_or_warn'
1630
1631                 stretched_ratio = info_dict.get('stretched_ratio')
1632                 if stretched_ratio is not None and stretched_ratio != 1:
1633                     if fixup_policy == 'warn':
1634                         self.report_warning('%s: Non-uniform pixel ratio (%s)' % (
1635                             info_dict['id'], stretched_ratio))
1636                     elif fixup_policy == 'detect_or_warn':
1637                         stretched_pp = FFmpegFixupStretchedPP(self)
1638                         if stretched_pp.available:
1639                             info_dict.setdefault('__postprocessors', [])
1640                             info_dict['__postprocessors'].append(stretched_pp)
1641                         else:
1642                             self.report_warning(
1643                                 '%s: Non-uniform pixel ratio (%s). Install ffmpeg or avconv to fix this automatically.' % (
1644                                     info_dict['id'], stretched_ratio))
1645                     else:
1646                         assert fixup_policy in ('ignore', 'never')
1647
1648                 if info_dict.get('requested_formats') is None and info_dict.get('container') == 'm4a_dash':
1649                     if fixup_policy == 'warn':
1650                         self.report_warning('%s: writing DASH m4a. Only some players support this container.' % (
1651                             info_dict['id']))
1652                     elif fixup_policy == 'detect_or_warn':
1653                         fixup_pp = FFmpegFixupM4aPP(self)
1654                         if fixup_pp.available:
1655                             info_dict.setdefault('__postprocessors', [])
1656                             info_dict['__postprocessors'].append(fixup_pp)
1657                         else:
1658                             self.report_warning(
1659                                 '%s: writing DASH m4a. Only some players support this container. Install ffmpeg or avconv to fix this automatically.' % (
1660                                     info_dict['id']))
1661                     else:
1662                         assert fixup_policy in ('ignore', 'never')
1663
1664                 try:
1665                     self.post_process(filename, info_dict)
1666                 except (PostProcessingError) as err:
1667                     self.report_error('postprocessing: %s' % str(err))
1668                     return
1669                 self.record_download_archive(info_dict)
1670
1671     def download(self, url_list):
1672         """Download a given list of URLs."""
1673         outtmpl = self.params.get('outtmpl', DEFAULT_OUTTMPL)
1674         if (len(url_list) > 1 and
1675                 '%' not in outtmpl and
1676                 self.params.get('max_downloads') != 1):
1677             raise SameFileError(outtmpl)
1678
1679         for url in url_list:
1680             try:
1681                 # It also downloads the videos
1682                 res = self.extract_info(
1683                     url, force_generic_extractor=self.params.get('force_generic_extractor', False))
1684             except UnavailableVideoError:
1685                 self.report_error('unable to download video')
1686             except MaxDownloadsReached:
1687                 self.to_screen('[info] Maximum number of downloaded files reached.')
1688                 raise
1689             else:
1690                 if self.params.get('dump_single_json', False):
1691                     self.to_stdout(json.dumps(res))
1692
1693         return self._download_retcode
1694
1695     def download_with_info_file(self, info_filename):
1696         with contextlib.closing(fileinput.FileInput(
1697                 [info_filename], mode='r',
1698                 openhook=fileinput.hook_encoded('utf-8'))) as f:
1699             # FileInput doesn't have a read method, we can't call json.load
1700             info = self.filter_requested_info(json.loads('\n'.join(f)))
1701         try:
1702             self.process_ie_result(info, download=True)
1703         except DownloadError:
1704             webpage_url = info.get('webpage_url')
1705             if webpage_url is not None:
1706                 self.report_warning('The info failed to download, trying with "%s"' % webpage_url)
1707                 return self.download([webpage_url])
1708             else:
1709                 raise
1710         return self._download_retcode
1711
1712     @staticmethod
1713     def filter_requested_info(info_dict):
1714         return dict(
1715             (k, v) for k, v in info_dict.items()
1716             if k not in ['requested_formats', 'requested_subtitles'])
1717
1718     def post_process(self, filename, ie_info):
1719         """Run all the postprocessors on the given file."""
1720         info = dict(ie_info)
1721         info['filepath'] = filename
1722         pps_chain = []
1723         if ie_info.get('__postprocessors') is not None:
1724             pps_chain.extend(ie_info['__postprocessors'])
1725         pps_chain.extend(self._pps)
1726         for pp in pps_chain:
1727             files_to_delete = []
1728             try:
1729                 files_to_delete, info = pp.run(info)
1730             except PostProcessingError as e:
1731                 self.report_error(e.msg)
1732             if files_to_delete and not self.params.get('keepvideo', False):
1733                 for old_filename in files_to_delete:
1734                     self.to_screen('Deleting original file %s (pass -k to keep)' % old_filename)
1735                     try:
1736                         os.remove(encodeFilename(old_filename))
1737                     except (IOError, OSError):
1738                         self.report_warning('Unable to remove downloaded original file')
1739
1740     def _make_archive_id(self, info_dict):
1741         # Future-proof against any change in case
1742         # and backwards compatibility with prior versions
1743         extractor = info_dict.get('extractor_key')
1744         if extractor is None:
1745             if 'id' in info_dict:
1746                 extractor = info_dict.get('ie_key')  # key in a playlist
1747         if extractor is None:
1748             return None  # Incomplete video information
1749         return extractor.lower() + ' ' + info_dict['id']
1750
1751     def in_download_archive(self, info_dict):
1752         fn = self.params.get('download_archive')
1753         if fn is None:
1754             return False
1755
1756         vid_id = self._make_archive_id(info_dict)
1757         if vid_id is None:
1758             return False  # Incomplete video information
1759
1760         try:
1761             with locked_file(fn, 'r', encoding='utf-8') as archive_file:
1762                 for line in archive_file:
1763                     if line.strip() == vid_id:
1764                         return True
1765         except IOError as ioe:
1766             if ioe.errno != errno.ENOENT:
1767                 raise
1768         return False
1769
1770     def record_download_archive(self, info_dict):
1771         fn = self.params.get('download_archive')
1772         if fn is None:
1773             return
1774         vid_id = self._make_archive_id(info_dict)
1775         assert vid_id
1776         with locked_file(fn, 'a', encoding='utf-8') as archive_file:
1777             archive_file.write(vid_id + '\n')
1778
1779     @staticmethod
1780     def format_resolution(format, default='unknown'):
1781         if format.get('vcodec') == 'none':
1782             return 'audio only'
1783         if format.get('resolution') is not None:
1784             return format['resolution']
1785         if format.get('height') is not None:
1786             if format.get('width') is not None:
1787                 res = '%sx%s' % (format['width'], format['height'])
1788             else:
1789                 res = '%sp' % format['height']
1790         elif format.get('width') is not None:
1791             res = '?x%d' % format['width']
1792         else:
1793             res = default
1794         return res
1795
1796     def _format_note(self, fdict):
1797         res = ''
1798         if fdict.get('ext') in ['f4f', 'f4m']:
1799             res += '(unsupported) '
1800         if fdict.get('language'):
1801             if res:
1802                 res += ' '
1803             res += '[%s]' % fdict['language']
1804         if fdict.get('format_note') is not None:
1805             res += fdict['format_note'] + ' '
1806         if fdict.get('tbr') is not None:
1807             res += '%4dk ' % fdict['tbr']
1808         if fdict.get('container') is not None:
1809             if res:
1810                 res += ', '
1811             res += '%s container' % fdict['container']
1812         if (fdict.get('vcodec') is not None and
1813                 fdict.get('vcodec') != 'none'):
1814             if res:
1815                 res += ', '
1816             res += fdict['vcodec']
1817             if fdict.get('vbr') is not None:
1818                 res += '@'
1819         elif fdict.get('vbr') is not None and fdict.get('abr') is not None:
1820             res += 'video@'
1821         if fdict.get('vbr') is not None:
1822             res += '%4dk' % fdict['vbr']
1823         if fdict.get('fps') is not None:
1824             res += ', %sfps' % fdict['fps']
1825         if fdict.get('acodec') is not None:
1826             if res:
1827                 res += ', '
1828             if fdict['acodec'] == 'none':
1829                 res += 'video only'
1830             else:
1831                 res += '%-5s' % fdict['acodec']
1832         elif fdict.get('abr') is not None:
1833             if res:
1834                 res += ', '
1835             res += 'audio'
1836         if fdict.get('abr') is not None:
1837             res += '@%3dk' % fdict['abr']
1838         if fdict.get('asr') is not None:
1839             res += ' (%5dHz)' % fdict['asr']
1840         if fdict.get('filesize') is not None:
1841             if res:
1842                 res += ', '
1843             res += format_bytes(fdict['filesize'])
1844         elif fdict.get('filesize_approx') is not None:
1845             if res:
1846                 res += ', '
1847             res += '~' + format_bytes(fdict['filesize_approx'])
1848         return res
1849
1850     def list_formats(self, info_dict):
1851         formats = info_dict.get('formats', [info_dict])
1852         table = [
1853             [f['format_id'], f['ext'], self.format_resolution(f), self._format_note(f)]
1854             for f in formats
1855             if f.get('preference') is None or f['preference'] >= -1000]
1856         if len(formats) > 1:
1857             table[-1][-1] += (' ' if table[-1][-1] else '') + '(best)'
1858
1859         header_line = ['format code', 'extension', 'resolution', 'note']
1860         self.to_screen(
1861             '[info] Available formats for %s:\n%s' %
1862             (info_dict['id'], render_table(header_line, table)))
1863
1864     def list_thumbnails(self, info_dict):
1865         thumbnails = info_dict.get('thumbnails')
1866         if not thumbnails:
1867             tn_url = info_dict.get('thumbnail')
1868             if tn_url:
1869                 thumbnails = [{'id': '0', 'url': tn_url}]
1870             else:
1871                 self.to_screen(
1872                     '[info] No thumbnails present for %s' % info_dict['id'])
1873                 return
1874
1875         self.to_screen(
1876             '[info] Thumbnails for %s:' % info_dict['id'])
1877         self.to_screen(render_table(
1878             ['ID', 'width', 'height', 'URL'],
1879             [[t['id'], t.get('width', 'unknown'), t.get('height', 'unknown'), t['url']] for t in thumbnails]))
1880
1881     def list_subtitles(self, video_id, subtitles, name='subtitles'):
1882         if not subtitles:
1883             self.to_screen('%s has no %s' % (video_id, name))
1884             return
1885         self.to_screen(
1886             'Available %s for %s:' % (name, video_id))
1887         self.to_screen(render_table(
1888             ['Language', 'formats'],
1889             [[lang, ', '.join(f['ext'] for f in reversed(formats))]
1890                 for lang, formats in subtitles.items()]))
1891
1892     def urlopen(self, req):
1893         """ Start an HTTP download """
1894         if isinstance(req, compat_basestring):
1895             req = sanitized_Request(req)
1896         return self._opener.open(req, timeout=self._socket_timeout)
1897
1898     def print_debug_header(self):
1899         if not self.params.get('verbose'):
1900             return
1901
1902         if type('') is not compat_str:
1903             # Python 2.6 on SLES11 SP1 (https://github.com/rg3/youtube-dl/issues/3326)
1904             self.report_warning(
1905                 'Your Python is broken! Update to a newer and supported version')
1906
1907         stdout_encoding = getattr(
1908             sys.stdout, 'encoding', 'missing (%s)' % type(sys.stdout).__name__)
1909         encoding_str = (
1910             '[debug] Encodings: locale %s, fs %s, out %s, pref %s\n' % (
1911                 locale.getpreferredencoding(),
1912                 sys.getfilesystemencoding(),
1913                 stdout_encoding,
1914                 self.get_encoding()))
1915         write_string(encoding_str, encoding=None)
1916
1917         self._write_string('[debug] youtube-dl version ' + __version__ + '\n')
1918         try:
1919             sp = subprocess.Popen(
1920                 ['git', 'rev-parse', '--short', 'HEAD'],
1921                 stdout=subprocess.PIPE, stderr=subprocess.PIPE,
1922                 cwd=os.path.dirname(os.path.abspath(__file__)))
1923             out, err = sp.communicate()
1924             out = out.decode().strip()
1925             if re.match('[0-9a-f]+', out):
1926                 self._write_string('[debug] Git HEAD: ' + out + '\n')
1927         except Exception:
1928             try:
1929                 sys.exc_clear()
1930             except Exception:
1931                 pass
1932         self._write_string('[debug] Python version %s - %s\n' % (
1933             platform.python_version(), platform_name()))
1934
1935         exe_versions = FFmpegPostProcessor.get_versions(self)
1936         exe_versions['rtmpdump'] = rtmpdump_version()
1937         exe_str = ', '.join(
1938             '%s %s' % (exe, v)
1939             for exe, v in sorted(exe_versions.items())
1940             if v
1941         )
1942         if not exe_str:
1943             exe_str = 'none'
1944         self._write_string('[debug] exe versions: %s\n' % exe_str)
1945
1946         proxy_map = {}
1947         for handler in self._opener.handlers:
1948             if hasattr(handler, 'proxies'):
1949                 proxy_map.update(handler.proxies)
1950         self._write_string('[debug] Proxy map: ' + compat_str(proxy_map) + '\n')
1951
1952         if self.params.get('call_home', False):
1953             ipaddr = self.urlopen('https://yt-dl.org/ip').read().decode('utf-8')
1954             self._write_string('[debug] Public IP address: %s\n' % ipaddr)
1955             latest_version = self.urlopen(
1956                 'https://yt-dl.org/latest/version').read().decode('utf-8')
1957             if version_tuple(latest_version) > version_tuple(__version__):
1958                 self.report_warning(
1959                     'You are using an outdated version (newest version: %s)! '
1960                     'See https://yt-dl.org/update if you need help updating.' %
1961                     latest_version)
1962
1963     def _setup_opener(self):
1964         timeout_val = self.params.get('socket_timeout')
1965         self._socket_timeout = 600 if timeout_val is None else float(timeout_val)
1966
1967         opts_cookiefile = self.params.get('cookiefile')
1968         opts_proxy = self.params.get('proxy')
1969
1970         if opts_cookiefile is None:
1971             self.cookiejar = compat_cookiejar.CookieJar()
1972         else:
1973             self.cookiejar = compat_cookiejar.MozillaCookieJar(
1974                 opts_cookiefile)
1975             if os.access(opts_cookiefile, os.R_OK):
1976                 self.cookiejar.load()
1977
1978         cookie_processor = YoutubeDLCookieProcessor(self.cookiejar)
1979         if opts_proxy is not None:
1980             if opts_proxy == '':
1981                 proxies = {}
1982             else:
1983                 proxies = {'http': opts_proxy, 'https': opts_proxy}
1984         else:
1985             proxies = compat_urllib_request.getproxies()
1986             # Set HTTPS proxy to HTTP one if given (https://github.com/rg3/youtube-dl/issues/805)
1987             if 'http' in proxies and 'https' not in proxies:
1988                 proxies['https'] = proxies['http']
1989         proxy_handler = PerRequestProxyHandler(proxies)
1990
1991         debuglevel = 1 if self.params.get('debug_printtraffic') else 0
1992         https_handler = make_HTTPS_handler(self.params, debuglevel=debuglevel)
1993         ydlh = YoutubeDLHandler(self.params, debuglevel=debuglevel)
1994         data_handler = compat_urllib_request_DataHandler()
1995
1996         # When passing our own FileHandler instance, build_opener won't add the
1997         # default FileHandler and allows us to disable the file protocol, which
1998         # can be used for malicious purposes (see
1999         # https://github.com/rg3/youtube-dl/issues/8227)
2000         file_handler = compat_urllib_request.FileHandler()
2001
2002         def file_open(*args, **kwargs):
2003             raise compat_urllib_error.URLError('file:// scheme is explicitly disabled in youtube-dl for security reasons')
2004         file_handler.file_open = file_open
2005
2006         opener = compat_urllib_request.build_opener(
2007             proxy_handler, https_handler, cookie_processor, ydlh, data_handler, file_handler)
2008
2009         # Delete the default user-agent header, which would otherwise apply in
2010         # cases where our custom HTTP handler doesn't come into play
2011         # (See https://github.com/rg3/youtube-dl/issues/1309 for details)
2012         opener.addheaders = []
2013         self._opener = opener
2014
2015     def encode(self, s):
2016         if isinstance(s, bytes):
2017             return s  # Already encoded
2018
2019         try:
2020             return s.encode(self.get_encoding())
2021         except UnicodeEncodeError as err:
2022             err.reason = err.reason + '. Check your system encoding configuration or use the --encoding option.'
2023             raise
2024
2025     def get_encoding(self):
2026         encoding = self.params.get('encoding')
2027         if encoding is None:
2028             encoding = preferredencoding()
2029         return encoding
2030
2031     def _write_thumbnails(self, info_dict, filename):
2032         if self.params.get('writethumbnail', False):
2033             thumbnails = info_dict.get('thumbnails')
2034             if thumbnails:
2035                 thumbnails = [thumbnails[-1]]
2036         elif self.params.get('write_all_thumbnails', False):
2037             thumbnails = info_dict.get('thumbnails')
2038         else:
2039             return
2040
2041         if not thumbnails:
2042             # No thumbnails present, so return immediately
2043             return
2044
2045         for t in thumbnails:
2046             thumb_ext = determine_ext(t['url'], 'jpg')
2047             suffix = '_%s' % t['id'] if len(thumbnails) > 1 else ''
2048             thumb_display_id = '%s ' % t['id'] if len(thumbnails) > 1 else ''
2049             t['filename'] = thumb_filename = os.path.splitext(filename)[0] + suffix + '.' + thumb_ext
2050
2051             if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(thumb_filename)):
2052                 self.to_screen('[%s] %s: Thumbnail %sis already present' %
2053                                (info_dict['extractor'], info_dict['id'], thumb_display_id))
2054             else:
2055                 self.to_screen('[%s] %s: Downloading thumbnail %s...' %
2056                                (info_dict['extractor'], info_dict['id'], thumb_display_id))
2057                 try:
2058                     uf = self.urlopen(t['url'])
2059                     with open(encodeFilename(thumb_filename), 'wb') as thumbf:
2060                         shutil.copyfileobj(uf, thumbf)
2061                     self.to_screen('[%s] %s: Writing thumbnail %sto: %s' %
2062                                    (info_dict['extractor'], info_dict['id'], thumb_display_id, thumb_filename))
2063                 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
2064                     self.report_warning('Unable to download thumbnail "%s": %s' %
2065                                         (t['url'], error_to_compat_str(err)))