[YoutubeDL] More error_to_str
[youtube-dl] / youtube_dl / YoutubeDL.py
1 #!/usr/bin/env python
2 # -*- coding: utf-8 -*-
3
4 from __future__ import absolute_import, unicode_literals
5
6 import collections
7 import contextlib
8 import datetime
9 import errno
10 import fileinput
11 import io
12 import itertools
13 import json
14 import locale
15 import operator
16 import os
17 import platform
18 import re
19 import shutil
20 import subprocess
21 import socket
22 import sys
23 import time
24 import tokenize
25 import traceback
26
27 if os.name == 'nt':
28     import ctypes
29
30 from .compat import (
31     compat_basestring,
32     compat_cookiejar,
33     compat_expanduser,
34     compat_get_terminal_size,
35     compat_http_client,
36     compat_kwargs,
37     compat_str,
38     compat_tokenize_tokenize,
39     compat_urllib_error,
40     compat_urllib_request,
41     compat_urllib_request_DataHandler,
42 )
43 from .utils import (
44     ContentTooShortError,
45     date_from_str,
46     DateRange,
47     DEFAULT_OUTTMPL,
48     determine_ext,
49     DownloadError,
50     encodeFilename,
51     error_to_str,
52     ExtractorError,
53     format_bytes,
54     formatSeconds,
55     locked_file,
56     make_HTTPS_handler,
57     MaxDownloadsReached,
58     PagedList,
59     parse_filesize,
60     PerRequestProxyHandler,
61     PostProcessingError,
62     platform_name,
63     preferredencoding,
64     render_table,
65     SameFileError,
66     sanitize_filename,
67     sanitize_path,
68     sanitized_Request,
69     std_headers,
70     subtitles_filename,
71     UnavailableVideoError,
72     url_basename,
73     version_tuple,
74     write_json_file,
75     write_string,
76     YoutubeDLCookieProcessor,
77     YoutubeDLHandler,
78     prepend_extension,
79     replace_extension,
80     args_to_str,
81     age_restricted,
82 )
83 from .cache import Cache
84 from .extractor import get_info_extractor, gen_extractors
85 from .downloader import get_suitable_downloader
86 from .downloader.rtmp import rtmpdump_version
87 from .postprocessor import (
88     FFmpegFixupM4aPP,
89     FFmpegFixupStretchedPP,
90     FFmpegMergerPP,
91     FFmpegPostProcessor,
92     get_postprocessor,
93 )
94 from .version import __version__
95
96
97 class YoutubeDL(object):
98     """YoutubeDL class.
99
100     YoutubeDL objects are the ones responsible of downloading the
101     actual video file and writing it to disk if the user has requested
102     it, among some other tasks. In most cases there should be one per
103     program. As, given a video URL, the downloader doesn't know how to
104     extract all the needed information, task that InfoExtractors do, it
105     has to pass the URL to one of them.
106
107     For this, YoutubeDL objects have a method that allows
108     InfoExtractors to be registered in a given order. When it is passed
109     a URL, the YoutubeDL object handles it to the first InfoExtractor it
110     finds that reports being able to handle it. The InfoExtractor extracts
111     all the information about the video or videos the URL refers to, and
112     YoutubeDL process the extracted information, possibly using a File
113     Downloader to download the video.
114
115     YoutubeDL objects accept a lot of parameters. In order not to saturate
116     the object constructor with arguments, it receives a dictionary of
117     options instead. These options are available through the params
118     attribute for the InfoExtractors to use. The YoutubeDL also
119     registers itself as the downloader in charge for the InfoExtractors
120     that are added to it, so this is a "mutual registration".
121
122     Available options:
123
124     username:          Username for authentication purposes.
125     password:          Password for authentication purposes.
126     videopassword:     Password for accessing a video.
127     usenetrc:          Use netrc for authentication instead.
128     verbose:           Print additional info to stdout.
129     quiet:             Do not print messages to stdout.
130     no_warnings:       Do not print out anything for warnings.
131     forceurl:          Force printing final URL.
132     forcetitle:        Force printing title.
133     forceid:           Force printing ID.
134     forcethumbnail:    Force printing thumbnail URL.
135     forcedescription:  Force printing description.
136     forcefilename:     Force printing final filename.
137     forceduration:     Force printing duration.
138     forcejson:         Force printing info_dict as JSON.
139     dump_single_json:  Force printing the info_dict of the whole playlist
140                        (or video) as a single JSON line.
141     simulate:          Do not download the video files.
142     format:            Video format code. See options.py for more information.
143     outtmpl:           Template for output names.
144     restrictfilenames: Do not allow "&" and spaces in file names
145     ignoreerrors:      Do not stop on download errors.
146     force_generic_extractor: Force downloader to use the generic extractor
147     nooverwrites:      Prevent overwriting files.
148     playliststart:     Playlist item to start at.
149     playlistend:       Playlist item to end at.
150     playlist_items:    Specific indices of playlist to download.
151     playlistreverse:   Download playlist items in reverse order.
152     matchtitle:        Download only matching titles.
153     rejecttitle:       Reject downloads for matching titles.
154     logger:            Log messages to a logging.Logger instance.
155     logtostderr:       Log messages to stderr instead of stdout.
156     writedescription:  Write the video description to a .description file
157     writeinfojson:     Write the video description to a .info.json file
158     writeannotations:  Write the video annotations to a .annotations.xml file
159     writethumbnail:    Write the thumbnail image to a file
160     write_all_thumbnails:  Write all thumbnail formats to files
161     writesubtitles:    Write the video subtitles to a file
162     writeautomaticsub: Write the automatically generated subtitles to a file
163     allsubtitles:      Downloads all the subtitles of the video
164                        (requires writesubtitles or writeautomaticsub)
165     listsubtitles:     Lists all available subtitles for the video
166     subtitlesformat:   The format code for subtitles
167     subtitleslangs:    List of languages of the subtitles to download
168     keepvideo:         Keep the video file after post-processing
169     daterange:         A DateRange object, download only if the upload_date is in the range.
170     skip_download:     Skip the actual download of the video file
171     cachedir:          Location of the cache files in the filesystem.
172                        False to disable filesystem cache.
173     noplaylist:        Download single video instead of a playlist if in doubt.
174     age_limit:         An integer representing the user's age in years.
175                        Unsuitable videos for the given age are skipped.
176     min_views:         An integer representing the minimum view count the video
177                        must have in order to not be skipped.
178                        Videos without view count information are always
179                        downloaded. None for no limit.
180     max_views:         An integer representing the maximum view count.
181                        Videos that are more popular than that are not
182                        downloaded.
183                        Videos without view count information are always
184                        downloaded. None for no limit.
185     download_archive:  File name of a file where all downloads are recorded.
186                        Videos already present in the file are not downloaded
187                        again.
188     cookiefile:        File name where cookies should be read from and dumped to.
189     nocheckcertificate:Do not verify SSL certificates
190     prefer_insecure:   Use HTTP instead of HTTPS to retrieve information.
191                        At the moment, this is only supported by YouTube.
192     proxy:             URL of the proxy server to use
193     cn_verification_proxy:  URL of the proxy to use for IP address verification
194                        on Chinese sites. (Experimental)
195     socket_timeout:    Time to wait for unresponsive hosts, in seconds
196     bidi_workaround:   Work around buggy terminals without bidirectional text
197                        support, using fridibi
198     debug_printtraffic:Print out sent and received HTTP traffic
199     include_ads:       Download ads as well
200     default_search:    Prepend this string if an input url is not valid.
201                        'auto' for elaborate guessing
202     encoding:          Use this encoding instead of the system-specified.
203     extract_flat:      Do not resolve URLs, return the immediate result.
204                        Pass in 'in_playlist' to only show this behavior for
205                        playlist items.
206     postprocessors:    A list of dictionaries, each with an entry
207                        * key:  The name of the postprocessor. See
208                                youtube_dl/postprocessor/__init__.py for a list.
209                        as well as any further keyword arguments for the
210                        postprocessor.
211     progress_hooks:    A list of functions that get called on download
212                        progress, with a dictionary with the entries
213                        * status: One of "downloading", "error", or "finished".
214                                  Check this first and ignore unknown values.
215
216                        If status is one of "downloading", or "finished", the
217                        following properties may also be present:
218                        * filename: The final filename (always present)
219                        * tmpfilename: The filename we're currently writing to
220                        * downloaded_bytes: Bytes on disk
221                        * total_bytes: Size of the whole file, None if unknown
222                        * total_bytes_estimate: Guess of the eventual file size,
223                                                None if unavailable.
224                        * elapsed: The number of seconds since download started.
225                        * eta: The estimated time in seconds, None if unknown
226                        * speed: The download speed in bytes/second, None if
227                                 unknown
228                        * fragment_index: The counter of the currently
229                                          downloaded video fragment.
230                        * fragment_count: The number of fragments (= individual
231                                          files that will be merged)
232
233                        Progress hooks are guaranteed to be called at least once
234                        (with status "finished") if the download is successful.
235     merge_output_format: Extension to use when merging formats.
236     fixup:             Automatically correct known faults of the file.
237                        One of:
238                        - "never": do nothing
239                        - "warn": only emit a warning
240                        - "detect_or_warn": check whether we can do anything
241                                            about it, warn otherwise (default)
242     source_address:    (Experimental) Client-side IP address to bind to.
243     call_home:         Boolean, true iff we are allowed to contact the
244                        youtube-dl servers for debugging.
245     sleep_interval:    Number of seconds to sleep before each download.
246     listformats:       Print an overview of available video formats and exit.
247     list_thumbnails:   Print a table of all thumbnails and exit.
248     match_filter:      A function that gets called with the info_dict of
249                        every video.
250                        If it returns a message, the video is ignored.
251                        If it returns None, the video is downloaded.
252                        match_filter_func in utils.py is one example for this.
253     no_color:          Do not emit color codes in output.
254
255     The following options determine which downloader is picked:
256     external_downloader: Executable of the external downloader to call.
257                        None or unset for standard (built-in) downloader.
258     hls_prefer_native: Use the native HLS downloader instead of ffmpeg/avconv.
259
260     The following parameters are not used by YoutubeDL itself, they are used by
261     the downloader (see youtube_dl/downloader/common.py):
262     nopart, updatetime, buffersize, ratelimit, min_filesize, max_filesize, test,
263     noresizebuffer, retries, continuedl, noprogress, consoletitle,
264     xattr_set_filesize, external_downloader_args.
265
266     The following options are used by the post processors:
267     prefer_ffmpeg:     If True, use ffmpeg instead of avconv if both are available,
268                        otherwise prefer avconv.
269     postprocessor_args: A list of additional command-line arguments for the
270                         postprocessor.
271     """
272
273     params = None
274     _ies = []
275     _pps = []
276     _download_retcode = None
277     _num_downloads = None
278     _screen_file = None
279
280     def __init__(self, params=None, auto_init=True):
281         """Create a FileDownloader object with the given options."""
282         if params is None:
283             params = {}
284         self._ies = []
285         self._ies_instances = {}
286         self._pps = []
287         self._progress_hooks = []
288         self._download_retcode = 0
289         self._num_downloads = 0
290         self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)]
291         self._err_file = sys.stderr
292         self.params = {
293             # Default parameters
294             'nocheckcertificate': False,
295         }
296         self.params.update(params)
297         self.cache = Cache(self)
298
299         if params.get('bidi_workaround', False):
300             try:
301                 import pty
302                 master, slave = pty.openpty()
303                 width = compat_get_terminal_size().columns
304                 if width is None:
305                     width_args = []
306                 else:
307                     width_args = ['-w', str(width)]
308                 sp_kwargs = dict(
309                     stdin=subprocess.PIPE,
310                     stdout=slave,
311                     stderr=self._err_file)
312                 try:
313                     self._output_process = subprocess.Popen(
314                         ['bidiv'] + width_args, **sp_kwargs
315                     )
316                 except OSError:
317                     self._output_process = subprocess.Popen(
318                         ['fribidi', '-c', 'UTF-8'] + width_args, **sp_kwargs)
319                 self._output_channel = os.fdopen(master, 'rb')
320             except OSError as ose:
321                 if ose.errno == 2:
322                     self.report_warning('Could not find fribidi executable, ignoring --bidi-workaround . Make sure that  fribidi  is an executable file in one of the directories in your $PATH.')
323                 else:
324                     raise
325
326         if (sys.version_info >= (3,) and sys.platform != 'win32' and
327                 sys.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968'] and
328                 not params.get('restrictfilenames', False)):
329             # On Python 3, the Unicode filesystem API will throw errors (#1474)
330             self.report_warning(
331                 'Assuming --restrict-filenames since file system encoding '
332                 'cannot encode all characters. '
333                 'Set the LC_ALL environment variable to fix this.')
334             self.params['restrictfilenames'] = True
335
336         if isinstance(params.get('outtmpl'), bytes):
337             self.report_warning(
338                 'Parameter outtmpl is bytes, but should be a unicode string. '
339                 'Put  from __future__ import unicode_literals  at the top of your code file or consider switching to Python 3.x.')
340
341         self._setup_opener()
342
343         if auto_init:
344             self.print_debug_header()
345             self.add_default_info_extractors()
346
347         for pp_def_raw in self.params.get('postprocessors', []):
348             pp_class = get_postprocessor(pp_def_raw['key'])
349             pp_def = dict(pp_def_raw)
350             del pp_def['key']
351             pp = pp_class(self, **compat_kwargs(pp_def))
352             self.add_post_processor(pp)
353
354         for ph in self.params.get('progress_hooks', []):
355             self.add_progress_hook(ph)
356
357     def warn_if_short_id(self, argv):
358         # short YouTube ID starting with dash?
359         idxs = [
360             i for i, a in enumerate(argv)
361             if re.match(r'^-[0-9A-Za-z_-]{10}$', a)]
362         if idxs:
363             correct_argv = (
364                 ['youtube-dl'] +
365                 [a for i, a in enumerate(argv) if i not in idxs] +
366                 ['--'] + [argv[i] for i in idxs]
367             )
368             self.report_warning(
369                 'Long argument string detected. '
370                 'Use -- to separate parameters and URLs, like this:\n%s\n' %
371                 args_to_str(correct_argv))
372
373     def add_info_extractor(self, ie):
374         """Add an InfoExtractor object to the end of the list."""
375         self._ies.append(ie)
376         self._ies_instances[ie.ie_key()] = ie
377         ie.set_downloader(self)
378
379     def get_info_extractor(self, ie_key):
380         """
381         Get an instance of an IE with name ie_key, it will try to get one from
382         the _ies list, if there's no instance it will create a new one and add
383         it to the extractor list.
384         """
385         ie = self._ies_instances.get(ie_key)
386         if ie is None:
387             ie = get_info_extractor(ie_key)()
388             self.add_info_extractor(ie)
389         return ie
390
391     def add_default_info_extractors(self):
392         """
393         Add the InfoExtractors returned by gen_extractors to the end of the list
394         """
395         for ie in gen_extractors():
396             self.add_info_extractor(ie)
397
398     def add_post_processor(self, pp):
399         """Add a PostProcessor object to the end of the chain."""
400         self._pps.append(pp)
401         pp.set_downloader(self)
402
403     def add_progress_hook(self, ph):
404         """Add the progress hook (currently only for the file downloader)"""
405         self._progress_hooks.append(ph)
406
407     def _bidi_workaround(self, message):
408         if not hasattr(self, '_output_channel'):
409             return message
410
411         assert hasattr(self, '_output_process')
412         assert isinstance(message, compat_str)
413         line_count = message.count('\n') + 1
414         self._output_process.stdin.write((message + '\n').encode('utf-8'))
415         self._output_process.stdin.flush()
416         res = ''.join(self._output_channel.readline().decode('utf-8')
417                       for _ in range(line_count))
418         return res[:-len('\n')]
419
420     def to_screen(self, message, skip_eol=False):
421         """Print message to stdout if not in quiet mode."""
422         return self.to_stdout(message, skip_eol, check_quiet=True)
423
424     def _write_string(self, s, out=None):
425         write_string(s, out=out, encoding=self.params.get('encoding'))
426
427     def to_stdout(self, message, skip_eol=False, check_quiet=False):
428         """Print message to stdout if not in quiet mode."""
429         if self.params.get('logger'):
430             self.params['logger'].debug(message)
431         elif not check_quiet or not self.params.get('quiet', False):
432             message = self._bidi_workaround(message)
433             terminator = ['\n', ''][skip_eol]
434             output = message + terminator
435
436             self._write_string(output, self._screen_file)
437
438     def to_stderr(self, message):
439         """Print message to stderr."""
440         assert isinstance(message, compat_str)
441         if self.params.get('logger'):
442             self.params['logger'].error(message)
443         else:
444             message = self._bidi_workaround(message)
445             output = message + '\n'
446             self._write_string(output, self._err_file)
447
448     def to_console_title(self, message):
449         if not self.params.get('consoletitle', False):
450             return
451         if os.name == 'nt' and ctypes.windll.kernel32.GetConsoleWindow():
452             # c_wchar_p() might not be necessary if `message` is
453             # already of type unicode()
454             ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message))
455         elif 'TERM' in os.environ:
456             self._write_string('\033]0;%s\007' % message, self._screen_file)
457
458     def save_console_title(self):
459         if not self.params.get('consoletitle', False):
460             return
461         if 'TERM' in os.environ:
462             # Save the title on stack
463             self._write_string('\033[22;0t', self._screen_file)
464
465     def restore_console_title(self):
466         if not self.params.get('consoletitle', False):
467             return
468         if 'TERM' in os.environ:
469             # Restore the title from stack
470             self._write_string('\033[23;0t', self._screen_file)
471
472     def __enter__(self):
473         self.save_console_title()
474         return self
475
476     def __exit__(self, *args):
477         self.restore_console_title()
478
479         if self.params.get('cookiefile') is not None:
480             self.cookiejar.save()
481
482     def trouble(self, message=None, tb=None):
483         """Determine action to take when a download problem appears.
484
485         Depending on if the downloader has been configured to ignore
486         download errors or not, this method may throw an exception or
487         not when errors are found, after printing the message.
488
489         tb, if given, is additional traceback information.
490         """
491         if message is not None:
492             self.to_stderr(message)
493         if self.params.get('verbose'):
494             if tb is None:
495                 if sys.exc_info()[0]:  # if .trouble has been called from an except block
496                     tb = ''
497                     if hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
498                         tb += ''.join(traceback.format_exception(*sys.exc_info()[1].exc_info))
499                     tb += compat_str(traceback.format_exc())
500                 else:
501                     tb_data = traceback.format_list(traceback.extract_stack())
502                     tb = ''.join(tb_data)
503             self.to_stderr(tb)
504         if not self.params.get('ignoreerrors', False):
505             if sys.exc_info()[0] and hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
506                 exc_info = sys.exc_info()[1].exc_info
507             else:
508                 exc_info = sys.exc_info()
509             raise DownloadError(message, exc_info)
510         self._download_retcode = 1
511
512     def report_warning(self, message):
513         '''
514         Print the message to stderr, it will be prefixed with 'WARNING:'
515         If stderr is a tty file the 'WARNING:' will be colored
516         '''
517         if self.params.get('logger') is not None:
518             self.params['logger'].warning(message)
519         else:
520             if self.params.get('no_warnings'):
521                 return
522             if not self.params.get('no_color') and self._err_file.isatty() and os.name != 'nt':
523                 _msg_header = '\033[0;33mWARNING:\033[0m'
524             else:
525                 _msg_header = 'WARNING:'
526             warning_message = '%s %s' % (_msg_header, message)
527             self.to_stderr(warning_message)
528
529     def report_error(self, message, tb=None):
530         '''
531         Do the same as trouble, but prefixes the message with 'ERROR:', colored
532         in red if stderr is a tty file.
533         '''
534         if not self.params.get('no_color') and self._err_file.isatty() and os.name != 'nt':
535             _msg_header = '\033[0;31mERROR:\033[0m'
536         else:
537             _msg_header = 'ERROR:'
538         error_message = '%s %s' % (_msg_header, message)
539         self.trouble(error_message, tb)
540
541     def report_file_already_downloaded(self, file_name):
542         """Report file has already been fully downloaded."""
543         try:
544             self.to_screen('[download] %s has already been downloaded' % file_name)
545         except UnicodeEncodeError:
546             self.to_screen('[download] The file has already been downloaded')
547
548     def prepare_filename(self, info_dict):
549         """Generate the output filename."""
550         try:
551             template_dict = dict(info_dict)
552
553             template_dict['epoch'] = int(time.time())
554             autonumber_size = self.params.get('autonumber_size')
555             if autonumber_size is None:
556                 autonumber_size = 5
557             autonumber_templ = '%0' + str(autonumber_size) + 'd'
558             template_dict['autonumber'] = autonumber_templ % self._num_downloads
559             if template_dict.get('playlist_index') is not None:
560                 template_dict['playlist_index'] = '%0*d' % (len(str(template_dict['n_entries'])), template_dict['playlist_index'])
561             if template_dict.get('resolution') is None:
562                 if template_dict.get('width') and template_dict.get('height'):
563                     template_dict['resolution'] = '%dx%d' % (template_dict['width'], template_dict['height'])
564                 elif template_dict.get('height'):
565                     template_dict['resolution'] = '%sp' % template_dict['height']
566                 elif template_dict.get('width'):
567                     template_dict['resolution'] = '?x%d' % template_dict['width']
568
569             sanitize = lambda k, v: sanitize_filename(
570                 compat_str(v),
571                 restricted=self.params.get('restrictfilenames'),
572                 is_id=(k == 'id'))
573             template_dict = dict((k, sanitize(k, v))
574                                  for k, v in template_dict.items()
575                                  if v is not None)
576             template_dict = collections.defaultdict(lambda: 'NA', template_dict)
577
578             outtmpl = self.params.get('outtmpl', DEFAULT_OUTTMPL)
579             tmpl = compat_expanduser(outtmpl)
580             filename = tmpl % template_dict
581             # Temporary fix for #4787
582             # 'Treat' all problem characters by passing filename through preferredencoding
583             # to workaround encoding issues with subprocess on python2 @ Windows
584             if sys.version_info < (3, 0) and sys.platform == 'win32':
585                 filename = encodeFilename(filename, True).decode(preferredencoding())
586             return sanitize_path(filename)
587         except ValueError as err:
588             self.report_error('Error in output template: ' + str(err) + ' (encoding: ' + repr(preferredencoding()) + ')')
589             return None
590
591     def _match_entry(self, info_dict, incomplete):
592         """ Returns None iff the file should be downloaded """
593
594         video_title = info_dict.get('title', info_dict.get('id', 'video'))
595         if 'title' in info_dict:
596             # This can happen when we're just evaluating the playlist
597             title = info_dict['title']
598             matchtitle = self.params.get('matchtitle', False)
599             if matchtitle:
600                 if not re.search(matchtitle, title, re.IGNORECASE):
601                     return '"' + title + '" title did not match pattern "' + matchtitle + '"'
602             rejecttitle = self.params.get('rejecttitle', False)
603             if rejecttitle:
604                 if re.search(rejecttitle, title, re.IGNORECASE):
605                     return '"' + title + '" title matched reject pattern "' + rejecttitle + '"'
606         date = info_dict.get('upload_date', None)
607         if date is not None:
608             dateRange = self.params.get('daterange', DateRange())
609             if date not in dateRange:
610                 return '%s upload date is not in range %s' % (date_from_str(date).isoformat(), dateRange)
611         view_count = info_dict.get('view_count', None)
612         if view_count is not None:
613             min_views = self.params.get('min_views')
614             if min_views is not None and view_count < min_views:
615                 return 'Skipping %s, because it has not reached minimum view count (%d/%d)' % (video_title, view_count, min_views)
616             max_views = self.params.get('max_views')
617             if max_views is not None and view_count > max_views:
618                 return 'Skipping %s, because it has exceeded the maximum view count (%d/%d)' % (video_title, view_count, max_views)
619         if age_restricted(info_dict.get('age_limit'), self.params.get('age_limit')):
620             return 'Skipping "%s" because it is age restricted' % video_title
621         if self.in_download_archive(info_dict):
622             return '%s has already been recorded in archive' % video_title
623
624         if not incomplete:
625             match_filter = self.params.get('match_filter')
626             if match_filter is not None:
627                 ret = match_filter(info_dict)
628                 if ret is not None:
629                     return ret
630
631         return None
632
633     @staticmethod
634     def add_extra_info(info_dict, extra_info):
635         '''Set the keys from extra_info in info dict if they are missing'''
636         for key, value in extra_info.items():
637             info_dict.setdefault(key, value)
638
639     def extract_info(self, url, download=True, ie_key=None, extra_info={},
640                      process=True, force_generic_extractor=False):
641         '''
642         Returns a list with a dictionary for each video we find.
643         If 'download', also downloads the videos.
644         extra_info is a dict containing the extra values to add to each result
645         '''
646
647         if not ie_key and force_generic_extractor:
648             ie_key = 'Generic'
649
650         if ie_key:
651             ies = [self.get_info_extractor(ie_key)]
652         else:
653             ies = self._ies
654
655         for ie in ies:
656             if not ie.suitable(url):
657                 continue
658
659             if not ie.working():
660                 self.report_warning('The program functionality for this site has been marked as broken, '
661                                     'and will probably not work.')
662
663             try:
664                 ie_result = ie.extract(url)
665                 if ie_result is None:  # Finished already (backwards compatibility; listformats and friends should be moved here)
666                     break
667                 if isinstance(ie_result, list):
668                     # Backwards compatibility: old IE result format
669                     ie_result = {
670                         '_type': 'compat_list',
671                         'entries': ie_result,
672                     }
673                 self.add_default_extra_info(ie_result, ie, url)
674                 if process:
675                     return self.process_ie_result(ie_result, download, extra_info)
676                 else:
677                     return ie_result
678             except ExtractorError as e:  # An error we somewhat expected
679                 self.report_error(error_to_str(e), e.format_traceback())
680                 break
681             except MaxDownloadsReached:
682                 raise
683             except Exception as e:
684                 if self.params.get('ignoreerrors', False):
685                     self.report_error(error_to_str(e), tb=compat_str(traceback.format_exc()))
686                     break
687                 else:
688                     raise
689         else:
690             self.report_error('no suitable InfoExtractor for URL %s' % url)
691
692     def add_default_extra_info(self, ie_result, ie, url):
693         self.add_extra_info(ie_result, {
694             'extractor': ie.IE_NAME,
695             'webpage_url': url,
696             'webpage_url_basename': url_basename(url),
697             'extractor_key': ie.ie_key(),
698         })
699
700     def process_ie_result(self, ie_result, download=True, extra_info={}):
701         """
702         Take the result of the ie(may be modified) and resolve all unresolved
703         references (URLs, playlist items).
704
705         It will also download the videos if 'download'.
706         Returns the resolved ie_result.
707         """
708
709         result_type = ie_result.get('_type', 'video')
710
711         if result_type in ('url', 'url_transparent'):
712             extract_flat = self.params.get('extract_flat', False)
713             if ((extract_flat == 'in_playlist' and 'playlist' in extra_info) or
714                     extract_flat is True):
715                 if self.params.get('forcejson', False):
716                     self.to_stdout(json.dumps(ie_result))
717                 return ie_result
718
719         if result_type == 'video':
720             self.add_extra_info(ie_result, extra_info)
721             return self.process_video_result(ie_result, download=download)
722         elif result_type == 'url':
723             # We have to add extra_info to the results because it may be
724             # contained in a playlist
725             return self.extract_info(ie_result['url'],
726                                      download,
727                                      ie_key=ie_result.get('ie_key'),
728                                      extra_info=extra_info)
729         elif result_type == 'url_transparent':
730             # Use the information from the embedding page
731             info = self.extract_info(
732                 ie_result['url'], ie_key=ie_result.get('ie_key'),
733                 extra_info=extra_info, download=False, process=False)
734
735             force_properties = dict(
736                 (k, v) for k, v in ie_result.items() if v is not None)
737             for f in ('_type', 'url'):
738                 if f in force_properties:
739                     del force_properties[f]
740             new_result = info.copy()
741             new_result.update(force_properties)
742
743             assert new_result.get('_type') != 'url_transparent'
744
745             return self.process_ie_result(
746                 new_result, download=download, extra_info=extra_info)
747         elif result_type == 'playlist' or result_type == 'multi_video':
748             # We process each entry in the playlist
749             playlist = ie_result.get('title', None) or ie_result.get('id', None)
750             self.to_screen('[download] Downloading playlist: %s' % playlist)
751
752             playlist_results = []
753
754             playliststart = self.params.get('playliststart', 1) - 1
755             playlistend = self.params.get('playlistend', None)
756             # For backwards compatibility, interpret -1 as whole list
757             if playlistend == -1:
758                 playlistend = None
759
760             playlistitems_str = self.params.get('playlist_items', None)
761             playlistitems = None
762             if playlistitems_str is not None:
763                 def iter_playlistitems(format):
764                     for string_segment in format.split(','):
765                         if '-' in string_segment:
766                             start, end = string_segment.split('-')
767                             for item in range(int(start), int(end) + 1):
768                                 yield int(item)
769                         else:
770                             yield int(string_segment)
771                 playlistitems = iter_playlistitems(playlistitems_str)
772
773             ie_entries = ie_result['entries']
774             if isinstance(ie_entries, list):
775                 n_all_entries = len(ie_entries)
776                 if playlistitems:
777                     entries = [
778                         ie_entries[i - 1] for i in playlistitems
779                         if -n_all_entries <= i - 1 < n_all_entries]
780                 else:
781                     entries = ie_entries[playliststart:playlistend]
782                 n_entries = len(entries)
783                 self.to_screen(
784                     "[%s] playlist %s: Collected %d video ids (downloading %d of them)" %
785                     (ie_result['extractor'], playlist, n_all_entries, n_entries))
786             elif isinstance(ie_entries, PagedList):
787                 if playlistitems:
788                     entries = []
789                     for item in playlistitems:
790                         entries.extend(ie_entries.getslice(
791                             item - 1, item
792                         ))
793                 else:
794                     entries = ie_entries.getslice(
795                         playliststart, playlistend)
796                 n_entries = len(entries)
797                 self.to_screen(
798                     "[%s] playlist %s: Downloading %d videos" %
799                     (ie_result['extractor'], playlist, n_entries))
800             else:  # iterable
801                 if playlistitems:
802                     entry_list = list(ie_entries)
803                     entries = [entry_list[i - 1] for i in playlistitems]
804                 else:
805                     entries = list(itertools.islice(
806                         ie_entries, playliststart, playlistend))
807                 n_entries = len(entries)
808                 self.to_screen(
809                     "[%s] playlist %s: Downloading %d videos" %
810                     (ie_result['extractor'], playlist, n_entries))
811
812             if self.params.get('playlistreverse', False):
813                 entries = entries[::-1]
814
815             for i, entry in enumerate(entries, 1):
816                 self.to_screen('[download] Downloading video %s of %s' % (i, n_entries))
817                 extra = {
818                     'n_entries': n_entries,
819                     'playlist': playlist,
820                     'playlist_id': ie_result.get('id'),
821                     'playlist_title': ie_result.get('title'),
822                     'playlist_index': i + playliststart,
823                     'extractor': ie_result['extractor'],
824                     'webpage_url': ie_result['webpage_url'],
825                     'webpage_url_basename': url_basename(ie_result['webpage_url']),
826                     'extractor_key': ie_result['extractor_key'],
827                 }
828
829                 reason = self._match_entry(entry, incomplete=True)
830                 if reason is not None:
831                     self.to_screen('[download] ' + reason)
832                     continue
833
834                 entry_result = self.process_ie_result(entry,
835                                                       download=download,
836                                                       extra_info=extra)
837                 playlist_results.append(entry_result)
838             ie_result['entries'] = playlist_results
839             self.to_screen('[download] Finished downloading playlist: %s' % playlist)
840             return ie_result
841         elif result_type == 'compat_list':
842             self.report_warning(
843                 'Extractor %s returned a compat_list result. '
844                 'It needs to be updated.' % ie_result.get('extractor'))
845
846             def _fixup(r):
847                 self.add_extra_info(
848                     r,
849                     {
850                         'extractor': ie_result['extractor'],
851                         'webpage_url': ie_result['webpage_url'],
852                         'webpage_url_basename': url_basename(ie_result['webpage_url']),
853                         'extractor_key': ie_result['extractor_key'],
854                     }
855                 )
856                 return r
857             ie_result['entries'] = [
858                 self.process_ie_result(_fixup(r), download, extra_info)
859                 for r in ie_result['entries']
860             ]
861             return ie_result
862         else:
863             raise Exception('Invalid result type: %s' % result_type)
864
865     def _build_format_filter(self, filter_spec):
866         " Returns a function to filter the formats according to the filter_spec "
867
868         OPERATORS = {
869             '<': operator.lt,
870             '<=': operator.le,
871             '>': operator.gt,
872             '>=': operator.ge,
873             '=': operator.eq,
874             '!=': operator.ne,
875         }
876         operator_rex = re.compile(r'''(?x)\s*
877             (?P<key>width|height|tbr|abr|vbr|asr|filesize|fps)
878             \s*(?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
879             (?P<value>[0-9.]+(?:[kKmMgGtTpPeEzZyY]i?[Bb]?)?)
880             $
881             ''' % '|'.join(map(re.escape, OPERATORS.keys())))
882         m = operator_rex.search(filter_spec)
883         if m:
884             try:
885                 comparison_value = int(m.group('value'))
886             except ValueError:
887                 comparison_value = parse_filesize(m.group('value'))
888                 if comparison_value is None:
889                     comparison_value = parse_filesize(m.group('value') + 'B')
890                 if comparison_value is None:
891                     raise ValueError(
892                         'Invalid value %r in format specification %r' % (
893                             m.group('value'), filter_spec))
894             op = OPERATORS[m.group('op')]
895
896         if not m:
897             STR_OPERATORS = {
898                 '=': operator.eq,
899                 '!=': operator.ne,
900             }
901             str_operator_rex = re.compile(r'''(?x)
902                 \s*(?P<key>ext|acodec|vcodec|container|protocol)
903                 \s*(?P<op>%s)(?P<none_inclusive>\s*\?)?
904                 \s*(?P<value>[a-zA-Z0-9_-]+)
905                 \s*$
906                 ''' % '|'.join(map(re.escape, STR_OPERATORS.keys())))
907             m = str_operator_rex.search(filter_spec)
908             if m:
909                 comparison_value = m.group('value')
910                 op = STR_OPERATORS[m.group('op')]
911
912         if not m:
913             raise ValueError('Invalid filter specification %r' % filter_spec)
914
915         def _filter(f):
916             actual_value = f.get(m.group('key'))
917             if actual_value is None:
918                 return m.group('none_inclusive')
919             return op(actual_value, comparison_value)
920         return _filter
921
922     def build_format_selector(self, format_spec):
923         def syntax_error(note, start):
924             message = (
925                 'Invalid format specification: '
926                 '{0}\n\t{1}\n\t{2}^'.format(note, format_spec, ' ' * start[1]))
927             return SyntaxError(message)
928
929         PICKFIRST = 'PICKFIRST'
930         MERGE = 'MERGE'
931         SINGLE = 'SINGLE'
932         GROUP = 'GROUP'
933         FormatSelector = collections.namedtuple('FormatSelector', ['type', 'selector', 'filters'])
934
935         def _parse_filter(tokens):
936             filter_parts = []
937             for type, string, start, _, _ in tokens:
938                 if type == tokenize.OP and string == ']':
939                     return ''.join(filter_parts)
940                 else:
941                     filter_parts.append(string)
942
943         def _remove_unused_ops(tokens):
944             # Remove operators that we don't use and join them with the surrounding strings
945             # for example: 'mp4' '-' 'baseline' '-' '16x9' is converted to 'mp4-baseline-16x9'
946             ALLOWED_OPS = ('/', '+', ',', '(', ')')
947             last_string, last_start, last_end, last_line = None, None, None, None
948             for type, string, start, end, line in tokens:
949                 if type == tokenize.OP and string == '[':
950                     if last_string:
951                         yield tokenize.NAME, last_string, last_start, last_end, last_line
952                         last_string = None
953                     yield type, string, start, end, line
954                     # everything inside brackets will be handled by _parse_filter
955                     for type, string, start, end, line in tokens:
956                         yield type, string, start, end, line
957                         if type == tokenize.OP and string == ']':
958                             break
959                 elif type == tokenize.OP and string in ALLOWED_OPS:
960                     if last_string:
961                         yield tokenize.NAME, last_string, last_start, last_end, last_line
962                         last_string = None
963                     yield type, string, start, end, line
964                 elif type in [tokenize.NAME, tokenize.NUMBER, tokenize.OP]:
965                     if not last_string:
966                         last_string = string
967                         last_start = start
968                         last_end = end
969                     else:
970                         last_string += string
971             if last_string:
972                 yield tokenize.NAME, last_string, last_start, last_end, last_line
973
974         def _parse_format_selection(tokens, inside_merge=False, inside_choice=False, inside_group=False):
975             selectors = []
976             current_selector = None
977             for type, string, start, _, _ in tokens:
978                 # ENCODING is only defined in python 3.x
979                 if type == getattr(tokenize, 'ENCODING', None):
980                     continue
981                 elif type in [tokenize.NAME, tokenize.NUMBER]:
982                     current_selector = FormatSelector(SINGLE, string, [])
983                 elif type == tokenize.OP:
984                     if string == ')':
985                         if not inside_group:
986                             # ')' will be handled by the parentheses group
987                             tokens.restore_last_token()
988                         break
989                     elif inside_merge and string in ['/', ',']:
990                         tokens.restore_last_token()
991                         break
992                     elif inside_choice and string == ',':
993                         tokens.restore_last_token()
994                         break
995                     elif string == ',':
996                         if not current_selector:
997                             raise syntax_error('"," must follow a format selector', start)
998                         selectors.append(current_selector)
999                         current_selector = None
1000                     elif string == '/':
1001                         if not current_selector:
1002                             raise syntax_error('"/" must follow a format selector', start)
1003                         first_choice = current_selector
1004                         second_choice = _parse_format_selection(tokens, inside_choice=True)
1005                         current_selector = FormatSelector(PICKFIRST, (first_choice, second_choice), [])
1006                     elif string == '[':
1007                         if not current_selector:
1008                             current_selector = FormatSelector(SINGLE, 'best', [])
1009                         format_filter = _parse_filter(tokens)
1010                         current_selector.filters.append(format_filter)
1011                     elif string == '(':
1012                         if current_selector:
1013                             raise syntax_error('Unexpected "("', start)
1014                         group = _parse_format_selection(tokens, inside_group=True)
1015                         current_selector = FormatSelector(GROUP, group, [])
1016                     elif string == '+':
1017                         video_selector = current_selector
1018                         audio_selector = _parse_format_selection(tokens, inside_merge=True)
1019                         if not video_selector or not audio_selector:
1020                             raise syntax_error('"+" must be between two format selectors', start)
1021                         current_selector = FormatSelector(MERGE, (video_selector, audio_selector), [])
1022                     else:
1023                         raise syntax_error('Operator not recognized: "{0}"'.format(string), start)
1024                 elif type == tokenize.ENDMARKER:
1025                     break
1026             if current_selector:
1027                 selectors.append(current_selector)
1028             return selectors
1029
1030         def _build_selector_function(selector):
1031             if isinstance(selector, list):
1032                 fs = [_build_selector_function(s) for s in selector]
1033
1034                 def selector_function(formats):
1035                     for f in fs:
1036                         for format in f(formats):
1037                             yield format
1038                 return selector_function
1039             elif selector.type == GROUP:
1040                 selector_function = _build_selector_function(selector.selector)
1041             elif selector.type == PICKFIRST:
1042                 fs = [_build_selector_function(s) for s in selector.selector]
1043
1044                 def selector_function(formats):
1045                     for f in fs:
1046                         picked_formats = list(f(formats))
1047                         if picked_formats:
1048                             return picked_formats
1049                     return []
1050             elif selector.type == SINGLE:
1051                 format_spec = selector.selector
1052
1053                 def selector_function(formats):
1054                     formats = list(formats)
1055                     if not formats:
1056                         return
1057                     if format_spec == 'all':
1058                         for f in formats:
1059                             yield f
1060                     elif format_spec in ['best', 'worst', None]:
1061                         format_idx = 0 if format_spec == 'worst' else -1
1062                         audiovideo_formats = [
1063                             f for f in formats
1064                             if f.get('vcodec') != 'none' and f.get('acodec') != 'none']
1065                         if audiovideo_formats:
1066                             yield audiovideo_formats[format_idx]
1067                         # for audio only (soundcloud) or video only (imgur) urls, select the best/worst audio format
1068                         elif (all(f.get('acodec') != 'none' for f in formats) or
1069                               all(f.get('vcodec') != 'none' for f in formats)):
1070                             yield formats[format_idx]
1071                     elif format_spec == 'bestaudio':
1072                         audio_formats = [
1073                             f for f in formats
1074                             if f.get('vcodec') == 'none']
1075                         if audio_formats:
1076                             yield audio_formats[-1]
1077                     elif format_spec == 'worstaudio':
1078                         audio_formats = [
1079                             f for f in formats
1080                             if f.get('vcodec') == 'none']
1081                         if audio_formats:
1082                             yield audio_formats[0]
1083                     elif format_spec == 'bestvideo':
1084                         video_formats = [
1085                             f for f in formats
1086                             if f.get('acodec') == 'none']
1087                         if video_formats:
1088                             yield video_formats[-1]
1089                     elif format_spec == 'worstvideo':
1090                         video_formats = [
1091                             f for f in formats
1092                             if f.get('acodec') == 'none']
1093                         if video_formats:
1094                             yield video_formats[0]
1095                     else:
1096                         extensions = ['mp4', 'flv', 'webm', '3gp', 'm4a', 'mp3', 'ogg', 'aac', 'wav']
1097                         if format_spec in extensions:
1098                             filter_f = lambda f: f['ext'] == format_spec
1099                         else:
1100                             filter_f = lambda f: f['format_id'] == format_spec
1101                         matches = list(filter(filter_f, formats))
1102                         if matches:
1103                             yield matches[-1]
1104             elif selector.type == MERGE:
1105                 def _merge(formats_info):
1106                     format_1, format_2 = [f['format_id'] for f in formats_info]
1107                     # The first format must contain the video and the
1108                     # second the audio
1109                     if formats_info[0].get('vcodec') == 'none':
1110                         self.report_error('The first format must '
1111                                           'contain the video, try using '
1112                                           '"-f %s+%s"' % (format_2, format_1))
1113                         return
1114                     # Formats must be opposite (video+audio)
1115                     if formats_info[0].get('acodec') == 'none' and formats_info[1].get('acodec') == 'none':
1116                         self.report_error(
1117                             'Both formats %s and %s are video-only, you must specify "-f video+audio"'
1118                             % (format_1, format_2))
1119                         return
1120                     output_ext = (
1121                         formats_info[0]['ext']
1122                         if self.params.get('merge_output_format') is None
1123                         else self.params['merge_output_format'])
1124                     return {
1125                         'requested_formats': formats_info,
1126                         'format': '%s+%s' % (formats_info[0].get('format'),
1127                                              formats_info[1].get('format')),
1128                         'format_id': '%s+%s' % (formats_info[0].get('format_id'),
1129                                                 formats_info[1].get('format_id')),
1130                         'width': formats_info[0].get('width'),
1131                         'height': formats_info[0].get('height'),
1132                         'resolution': formats_info[0].get('resolution'),
1133                         'fps': formats_info[0].get('fps'),
1134                         'vcodec': formats_info[0].get('vcodec'),
1135                         'vbr': formats_info[0].get('vbr'),
1136                         'stretched_ratio': formats_info[0].get('stretched_ratio'),
1137                         'acodec': formats_info[1].get('acodec'),
1138                         'abr': formats_info[1].get('abr'),
1139                         'ext': output_ext,
1140                     }
1141                 video_selector, audio_selector = map(_build_selector_function, selector.selector)
1142
1143                 def selector_function(formats):
1144                     formats = list(formats)
1145                     for pair in itertools.product(video_selector(formats), audio_selector(formats)):
1146                         yield _merge(pair)
1147
1148             filters = [self._build_format_filter(f) for f in selector.filters]
1149
1150             def final_selector(formats):
1151                 for _filter in filters:
1152                     formats = list(filter(_filter, formats))
1153                 return selector_function(formats)
1154             return final_selector
1155
1156         stream = io.BytesIO(format_spec.encode('utf-8'))
1157         try:
1158             tokens = list(_remove_unused_ops(compat_tokenize_tokenize(stream.readline)))
1159         except tokenize.TokenError:
1160             raise syntax_error('Missing closing/opening brackets or parenthesis', (0, len(format_spec)))
1161
1162         class TokenIterator(object):
1163             def __init__(self, tokens):
1164                 self.tokens = tokens
1165                 self.counter = 0
1166
1167             def __iter__(self):
1168                 return self
1169
1170             def __next__(self):
1171                 if self.counter >= len(self.tokens):
1172                     raise StopIteration()
1173                 value = self.tokens[self.counter]
1174                 self.counter += 1
1175                 return value
1176
1177             next = __next__
1178
1179             def restore_last_token(self):
1180                 self.counter -= 1
1181
1182         parsed_selector = _parse_format_selection(iter(TokenIterator(tokens)))
1183         return _build_selector_function(parsed_selector)
1184
1185     def _calc_headers(self, info_dict):
1186         res = std_headers.copy()
1187
1188         add_headers = info_dict.get('http_headers')
1189         if add_headers:
1190             res.update(add_headers)
1191
1192         cookies = self._calc_cookies(info_dict)
1193         if cookies:
1194             res['Cookie'] = cookies
1195
1196         return res
1197
1198     def _calc_cookies(self, info_dict):
1199         pr = sanitized_Request(info_dict['url'])
1200         self.cookiejar.add_cookie_header(pr)
1201         return pr.get_header('Cookie')
1202
1203     def process_video_result(self, info_dict, download=True):
1204         assert info_dict.get('_type', 'video') == 'video'
1205
1206         if 'id' not in info_dict:
1207             raise ExtractorError('Missing "id" field in extractor result')
1208         if 'title' not in info_dict:
1209             raise ExtractorError('Missing "title" field in extractor result')
1210
1211         if 'playlist' not in info_dict:
1212             # It isn't part of a playlist
1213             info_dict['playlist'] = None
1214             info_dict['playlist_index'] = None
1215
1216         thumbnails = info_dict.get('thumbnails')
1217         if thumbnails is None:
1218             thumbnail = info_dict.get('thumbnail')
1219             if thumbnail:
1220                 info_dict['thumbnails'] = thumbnails = [{'url': thumbnail}]
1221         if thumbnails:
1222             thumbnails.sort(key=lambda t: (
1223                 t.get('preference'), t.get('width'), t.get('height'),
1224                 t.get('id'), t.get('url')))
1225             for i, t in enumerate(thumbnails):
1226                 if t.get('width') and t.get('height'):
1227                     t['resolution'] = '%dx%d' % (t['width'], t['height'])
1228                 if t.get('id') is None:
1229                     t['id'] = '%d' % i
1230
1231         if thumbnails and 'thumbnail' not in info_dict:
1232             info_dict['thumbnail'] = thumbnails[-1]['url']
1233
1234         if 'display_id' not in info_dict and 'id' in info_dict:
1235             info_dict['display_id'] = info_dict['id']
1236
1237         if info_dict.get('upload_date') is None and info_dict.get('timestamp') is not None:
1238             # Working around out-of-range timestamp values (e.g. negative ones on Windows,
1239             # see http://bugs.python.org/issue1646728)
1240             try:
1241                 upload_date = datetime.datetime.utcfromtimestamp(info_dict['timestamp'])
1242                 info_dict['upload_date'] = upload_date.strftime('%Y%m%d')
1243             except (ValueError, OverflowError, OSError):
1244                 pass
1245
1246         subtitles = info_dict.get('subtitles')
1247         if subtitles:
1248             for _, subtitle in subtitles.items():
1249                 for subtitle_format in subtitle:
1250                     if 'ext' not in subtitle_format:
1251                         subtitle_format['ext'] = determine_ext(subtitle_format['url']).lower()
1252
1253         if self.params.get('listsubtitles', False):
1254             if 'automatic_captions' in info_dict:
1255                 self.list_subtitles(info_dict['id'], info_dict.get('automatic_captions'), 'automatic captions')
1256             self.list_subtitles(info_dict['id'], subtitles, 'subtitles')
1257             return
1258         info_dict['requested_subtitles'] = self.process_subtitles(
1259             info_dict['id'], subtitles,
1260             info_dict.get('automatic_captions'))
1261
1262         # We now pick which formats have to be downloaded
1263         if info_dict.get('formats') is None:
1264             # There's only one format available
1265             formats = [info_dict]
1266         else:
1267             formats = info_dict['formats']
1268
1269         if not formats:
1270             raise ExtractorError('No video formats found!')
1271
1272         formats_dict = {}
1273
1274         # We check that all the formats have the format and format_id fields
1275         for i, format in enumerate(formats):
1276             if 'url' not in format:
1277                 raise ExtractorError('Missing "url" key in result (index %d)' % i)
1278
1279             if format.get('format_id') is None:
1280                 format['format_id'] = compat_str(i)
1281             format_id = format['format_id']
1282             if format_id not in formats_dict:
1283                 formats_dict[format_id] = []
1284             formats_dict[format_id].append(format)
1285
1286         # Make sure all formats have unique format_id
1287         for format_id, ambiguous_formats in formats_dict.items():
1288             if len(ambiguous_formats) > 1:
1289                 for i, format in enumerate(ambiguous_formats):
1290                     format['format_id'] = '%s-%d' % (format_id, i)
1291
1292         for i, format in enumerate(formats):
1293             if format.get('format') is None:
1294                 format['format'] = '{id} - {res}{note}'.format(
1295                     id=format['format_id'],
1296                     res=self.format_resolution(format),
1297                     note=' ({0})'.format(format['format_note']) if format.get('format_note') is not None else '',
1298                 )
1299             # Automatically determine file extension if missing
1300             if 'ext' not in format:
1301                 format['ext'] = determine_ext(format['url']).lower()
1302             # Add HTTP headers, so that external programs can use them from the
1303             # json output
1304             full_format_info = info_dict.copy()
1305             full_format_info.update(format)
1306             format['http_headers'] = self._calc_headers(full_format_info)
1307
1308         # TODO Central sorting goes here
1309
1310         if formats[0] is not info_dict:
1311             # only set the 'formats' fields if the original info_dict list them
1312             # otherwise we end up with a circular reference, the first (and unique)
1313             # element in the 'formats' field in info_dict is info_dict itself,
1314             # wich can't be exported to json
1315             info_dict['formats'] = formats
1316         if self.params.get('listformats'):
1317             self.list_formats(info_dict)
1318             return
1319         if self.params.get('list_thumbnails'):
1320             self.list_thumbnails(info_dict)
1321             return
1322
1323         req_format = self.params.get('format')
1324         if req_format is None:
1325             req_format_list = []
1326             if (self.params.get('outtmpl', DEFAULT_OUTTMPL) != '-' and
1327                     info_dict['extractor'] in ['youtube', 'ted'] and
1328                     not info_dict.get('is_live')):
1329                 merger = FFmpegMergerPP(self)
1330                 if merger.available and merger.can_merge():
1331                     req_format_list.append('bestvideo+bestaudio')
1332             req_format_list.append('best')
1333             req_format = '/'.join(req_format_list)
1334         format_selector = self.build_format_selector(req_format)
1335         formats_to_download = list(format_selector(formats))
1336         if not formats_to_download:
1337             raise ExtractorError('requested format not available',
1338                                  expected=True)
1339
1340         if download:
1341             if len(formats_to_download) > 1:
1342                 self.to_screen('[info] %s: downloading video in %s formats' % (info_dict['id'], len(formats_to_download)))
1343             for format in formats_to_download:
1344                 new_info = dict(info_dict)
1345                 new_info.update(format)
1346                 self.process_info(new_info)
1347         # We update the info dict with the best quality format (backwards compatibility)
1348         info_dict.update(formats_to_download[-1])
1349         return info_dict
1350
1351     def process_subtitles(self, video_id, normal_subtitles, automatic_captions):
1352         """Select the requested subtitles and their format"""
1353         available_subs = {}
1354         if normal_subtitles and self.params.get('writesubtitles'):
1355             available_subs.update(normal_subtitles)
1356         if automatic_captions and self.params.get('writeautomaticsub'):
1357             for lang, cap_info in automatic_captions.items():
1358                 if lang not in available_subs:
1359                     available_subs[lang] = cap_info
1360
1361         if (not self.params.get('writesubtitles') and not
1362                 self.params.get('writeautomaticsub') or not
1363                 available_subs):
1364             return None
1365
1366         if self.params.get('allsubtitles', False):
1367             requested_langs = available_subs.keys()
1368         else:
1369             if self.params.get('subtitleslangs', False):
1370                 requested_langs = self.params.get('subtitleslangs')
1371             elif 'en' in available_subs:
1372                 requested_langs = ['en']
1373             else:
1374                 requested_langs = [list(available_subs.keys())[0]]
1375
1376         formats_query = self.params.get('subtitlesformat', 'best')
1377         formats_preference = formats_query.split('/') if formats_query else []
1378         subs = {}
1379         for lang in requested_langs:
1380             formats = available_subs.get(lang)
1381             if formats is None:
1382                 self.report_warning('%s subtitles not available for %s' % (lang, video_id))
1383                 continue
1384             for ext in formats_preference:
1385                 if ext == 'best':
1386                     f = formats[-1]
1387                     break
1388                 matches = list(filter(lambda f: f['ext'] == ext, formats))
1389                 if matches:
1390                     f = matches[-1]
1391                     break
1392             else:
1393                 f = formats[-1]
1394                 self.report_warning(
1395                     'No subtitle format found matching "%s" for language %s, '
1396                     'using %s' % (formats_query, lang, f['ext']))
1397             subs[lang] = f
1398         return subs
1399
1400     def process_info(self, info_dict):
1401         """Process a single resolved IE result."""
1402
1403         assert info_dict.get('_type', 'video') == 'video'
1404
1405         max_downloads = self.params.get('max_downloads')
1406         if max_downloads is not None:
1407             if self._num_downloads >= int(max_downloads):
1408                 raise MaxDownloadsReached()
1409
1410         info_dict['fulltitle'] = info_dict['title']
1411         if len(info_dict['title']) > 200:
1412             info_dict['title'] = info_dict['title'][:197] + '...'
1413
1414         if 'format' not in info_dict:
1415             info_dict['format'] = info_dict['ext']
1416
1417         reason = self._match_entry(info_dict, incomplete=False)
1418         if reason is not None:
1419             self.to_screen('[download] ' + reason)
1420             return
1421
1422         self._num_downloads += 1
1423
1424         info_dict['_filename'] = filename = self.prepare_filename(info_dict)
1425
1426         # Forced printings
1427         if self.params.get('forcetitle', False):
1428             self.to_stdout(info_dict['fulltitle'])
1429         if self.params.get('forceid', False):
1430             self.to_stdout(info_dict['id'])
1431         if self.params.get('forceurl', False):
1432             if info_dict.get('requested_formats') is not None:
1433                 for f in info_dict['requested_formats']:
1434                     self.to_stdout(f['url'] + f.get('play_path', ''))
1435             else:
1436                 # For RTMP URLs, also include the playpath
1437                 self.to_stdout(info_dict['url'] + info_dict.get('play_path', ''))
1438         if self.params.get('forcethumbnail', False) and info_dict.get('thumbnail') is not None:
1439             self.to_stdout(info_dict['thumbnail'])
1440         if self.params.get('forcedescription', False) and info_dict.get('description') is not None:
1441             self.to_stdout(info_dict['description'])
1442         if self.params.get('forcefilename', False) and filename is not None:
1443             self.to_stdout(filename)
1444         if self.params.get('forceduration', False) and info_dict.get('duration') is not None:
1445             self.to_stdout(formatSeconds(info_dict['duration']))
1446         if self.params.get('forceformat', False):
1447             self.to_stdout(info_dict['format'])
1448         if self.params.get('forcejson', False):
1449             self.to_stdout(json.dumps(info_dict))
1450
1451         # Do nothing else if in simulate mode
1452         if self.params.get('simulate', False):
1453             return
1454
1455         if filename is None:
1456             return
1457
1458         try:
1459             dn = os.path.dirname(sanitize_path(encodeFilename(filename)))
1460             if dn and not os.path.exists(dn):
1461                 os.makedirs(dn)
1462         except (OSError, IOError) as err:
1463             self.report_error('unable to create directory ' + error_to_str(err))
1464             return
1465
1466         if self.params.get('writedescription', False):
1467             descfn = replace_extension(filename, 'description', info_dict.get('ext'))
1468             if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(descfn)):
1469                 self.to_screen('[info] Video description is already present')
1470             elif info_dict.get('description') is None:
1471                 self.report_warning('There\'s no description to write.')
1472             else:
1473                 try:
1474                     self.to_screen('[info] Writing video description to: ' + descfn)
1475                     with io.open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile:
1476                         descfile.write(info_dict['description'])
1477                 except (OSError, IOError):
1478                     self.report_error('Cannot write description file ' + descfn)
1479                     return
1480
1481         if self.params.get('writeannotations', False):
1482             annofn = replace_extension(filename, 'annotations.xml', info_dict.get('ext'))
1483             if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(annofn)):
1484                 self.to_screen('[info] Video annotations are already present')
1485             else:
1486                 try:
1487                     self.to_screen('[info] Writing video annotations to: ' + annofn)
1488                     with io.open(encodeFilename(annofn), 'w', encoding='utf-8') as annofile:
1489                         annofile.write(info_dict['annotations'])
1490                 except (KeyError, TypeError):
1491                     self.report_warning('There are no annotations to write.')
1492                 except (OSError, IOError):
1493                     self.report_error('Cannot write annotations file: ' + annofn)
1494                     return
1495
1496         subtitles_are_requested = any([self.params.get('writesubtitles', False),
1497                                        self.params.get('writeautomaticsub')])
1498
1499         if subtitles_are_requested and info_dict.get('requested_subtitles'):
1500             # subtitles download errors are already managed as troubles in relevant IE
1501             # that way it will silently go on when used with unsupporting IE
1502             subtitles = info_dict['requested_subtitles']
1503             ie = self.get_info_extractor(info_dict['extractor_key'])
1504             for sub_lang, sub_info in subtitles.items():
1505                 sub_format = sub_info['ext']
1506                 if sub_info.get('data') is not None:
1507                     sub_data = sub_info['data']
1508                 else:
1509                     try:
1510                         sub_data = ie._download_webpage(
1511                             sub_info['url'], info_dict['id'], note=False)
1512                     except ExtractorError as err:
1513                         self.report_warning('Unable to download subtitle for "%s": %s' %
1514                                             (sub_lang, error_to_str(err.cause)))
1515                         continue
1516                 try:
1517                     sub_filename = subtitles_filename(filename, sub_lang, sub_format)
1518                     if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(sub_filename)):
1519                         self.to_screen('[info] Video subtitle %s.%s is already_present' % (sub_lang, sub_format))
1520                     else:
1521                         self.to_screen('[info] Writing video subtitles to: ' + sub_filename)
1522                         with io.open(encodeFilename(sub_filename), 'w', encoding='utf-8') as subfile:
1523                             subfile.write(sub_data)
1524                 except (OSError, IOError):
1525                     self.report_error('Cannot write subtitles file ' + sub_filename)
1526                     return
1527
1528         if self.params.get('writeinfojson', False):
1529             infofn = replace_extension(filename, 'info.json', info_dict.get('ext'))
1530             if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(infofn)):
1531                 self.to_screen('[info] Video description metadata is already present')
1532             else:
1533                 self.to_screen('[info] Writing video description metadata as JSON to: ' + infofn)
1534                 try:
1535                     write_json_file(self.filter_requested_info(info_dict), infofn)
1536                 except (OSError, IOError):
1537                     self.report_error('Cannot write metadata to JSON file ' + infofn)
1538                     return
1539
1540         self._write_thumbnails(info_dict, filename)
1541
1542         if not self.params.get('skip_download', False):
1543             try:
1544                 def dl(name, info):
1545                     fd = get_suitable_downloader(info, self.params)(self, self.params)
1546                     for ph in self._progress_hooks:
1547                         fd.add_progress_hook(ph)
1548                     if self.params.get('verbose'):
1549                         self.to_stdout('[debug] Invoking downloader on %r' % info.get('url'))
1550                     return fd.download(name, info)
1551
1552                 if info_dict.get('requested_formats') is not None:
1553                     downloaded = []
1554                     success = True
1555                     merger = FFmpegMergerPP(self)
1556                     if not merger.available:
1557                         postprocessors = []
1558                         self.report_warning('You have requested multiple '
1559                                             'formats but ffmpeg or avconv are not installed.'
1560                                             ' The formats won\'t be merged.')
1561                     else:
1562                         postprocessors = [merger]
1563
1564                     def compatible_formats(formats):
1565                         video, audio = formats
1566                         # Check extension
1567                         video_ext, audio_ext = audio.get('ext'), video.get('ext')
1568                         if video_ext and audio_ext:
1569                             COMPATIBLE_EXTS = (
1570                                 ('mp3', 'mp4', 'm4a', 'm4p', 'm4b', 'm4r', 'm4v'),
1571                                 ('webm')
1572                             )
1573                             for exts in COMPATIBLE_EXTS:
1574                                 if video_ext in exts and audio_ext in exts:
1575                                     return True
1576                         # TODO: Check acodec/vcodec
1577                         return False
1578
1579                     filename_real_ext = os.path.splitext(filename)[1][1:]
1580                     filename_wo_ext = (
1581                         os.path.splitext(filename)[0]
1582                         if filename_real_ext == info_dict['ext']
1583                         else filename)
1584                     requested_formats = info_dict['requested_formats']
1585                     if self.params.get('merge_output_format') is None and not compatible_formats(requested_formats):
1586                         info_dict['ext'] = 'mkv'
1587                         self.report_warning(
1588                             'Requested formats are incompatible for merge and will be merged into mkv.')
1589                     # Ensure filename always has a correct extension for successful merge
1590                     filename = '%s.%s' % (filename_wo_ext, info_dict['ext'])
1591                     if os.path.exists(encodeFilename(filename)):
1592                         self.to_screen(
1593                             '[download] %s has already been downloaded and '
1594                             'merged' % filename)
1595                     else:
1596                         for f in requested_formats:
1597                             new_info = dict(info_dict)
1598                             new_info.update(f)
1599                             fname = self.prepare_filename(new_info)
1600                             fname = prepend_extension(fname, 'f%s' % f['format_id'], new_info['ext'])
1601                             downloaded.append(fname)
1602                             partial_success = dl(fname, new_info)
1603                             success = success and partial_success
1604                         info_dict['__postprocessors'] = postprocessors
1605                         info_dict['__files_to_merge'] = downloaded
1606                 else:
1607                     # Just a single file
1608                     success = dl(filename, info_dict)
1609             except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
1610                 self.report_error('unable to download video data: %s' % str(err))
1611                 return
1612             except (OSError, IOError) as err:
1613                 raise UnavailableVideoError(err)
1614             except (ContentTooShortError, ) as err:
1615                 self.report_error('content too short (expected %s bytes and served %s)' % (err.expected, err.downloaded))
1616                 return
1617
1618             if success:
1619                 # Fixup content
1620                 fixup_policy = self.params.get('fixup')
1621                 if fixup_policy is None:
1622                     fixup_policy = 'detect_or_warn'
1623
1624                 stretched_ratio = info_dict.get('stretched_ratio')
1625                 if stretched_ratio is not None and stretched_ratio != 1:
1626                     if fixup_policy == 'warn':
1627                         self.report_warning('%s: Non-uniform pixel ratio (%s)' % (
1628                             info_dict['id'], stretched_ratio))
1629                     elif fixup_policy == 'detect_or_warn':
1630                         stretched_pp = FFmpegFixupStretchedPP(self)
1631                         if stretched_pp.available:
1632                             info_dict.setdefault('__postprocessors', [])
1633                             info_dict['__postprocessors'].append(stretched_pp)
1634                         else:
1635                             self.report_warning(
1636                                 '%s: Non-uniform pixel ratio (%s). Install ffmpeg or avconv to fix this automatically.' % (
1637                                     info_dict['id'], stretched_ratio))
1638                     else:
1639                         assert fixup_policy in ('ignore', 'never')
1640
1641                 if info_dict.get('requested_formats') is None and info_dict.get('container') == 'm4a_dash':
1642                     if fixup_policy == 'warn':
1643                         self.report_warning('%s: writing DASH m4a. Only some players support this container.' % (
1644                             info_dict['id']))
1645                     elif fixup_policy == 'detect_or_warn':
1646                         fixup_pp = FFmpegFixupM4aPP(self)
1647                         if fixup_pp.available:
1648                             info_dict.setdefault('__postprocessors', [])
1649                             info_dict['__postprocessors'].append(fixup_pp)
1650                         else:
1651                             self.report_warning(
1652                                 '%s: writing DASH m4a. Only some players support this container. Install ffmpeg or avconv to fix this automatically.' % (
1653                                     info_dict['id']))
1654                     else:
1655                         assert fixup_policy in ('ignore', 'never')
1656
1657                 try:
1658                     self.post_process(filename, info_dict)
1659                 except (PostProcessingError) as err:
1660                     self.report_error('postprocessing: %s' % str(err))
1661                     return
1662                 self.record_download_archive(info_dict)
1663
1664     def download(self, url_list):
1665         """Download a given list of URLs."""
1666         outtmpl = self.params.get('outtmpl', DEFAULT_OUTTMPL)
1667         if (len(url_list) > 1 and
1668                 '%' not in outtmpl and
1669                 self.params.get('max_downloads') != 1):
1670             raise SameFileError(outtmpl)
1671
1672         for url in url_list:
1673             try:
1674                 # It also downloads the videos
1675                 res = self.extract_info(
1676                     url, force_generic_extractor=self.params.get('force_generic_extractor', False))
1677             except UnavailableVideoError:
1678                 self.report_error('unable to download video')
1679             except MaxDownloadsReached:
1680                 self.to_screen('[info] Maximum number of downloaded files reached.')
1681                 raise
1682             else:
1683                 if self.params.get('dump_single_json', False):
1684                     self.to_stdout(json.dumps(res))
1685
1686         return self._download_retcode
1687
1688     def download_with_info_file(self, info_filename):
1689         with contextlib.closing(fileinput.FileInput(
1690                 [info_filename], mode='r',
1691                 openhook=fileinput.hook_encoded('utf-8'))) as f:
1692             # FileInput doesn't have a read method, we can't call json.load
1693             info = self.filter_requested_info(json.loads('\n'.join(f)))
1694         try:
1695             self.process_ie_result(info, download=True)
1696         except DownloadError:
1697             webpage_url = info.get('webpage_url')
1698             if webpage_url is not None:
1699                 self.report_warning('The info failed to download, trying with "%s"' % webpage_url)
1700                 return self.download([webpage_url])
1701             else:
1702                 raise
1703         return self._download_retcode
1704
1705     @staticmethod
1706     def filter_requested_info(info_dict):
1707         return dict(
1708             (k, v) for k, v in info_dict.items()
1709             if k not in ['requested_formats', 'requested_subtitles'])
1710
1711     def post_process(self, filename, ie_info):
1712         """Run all the postprocessors on the given file."""
1713         info = dict(ie_info)
1714         info['filepath'] = filename
1715         pps_chain = []
1716         if ie_info.get('__postprocessors') is not None:
1717             pps_chain.extend(ie_info['__postprocessors'])
1718         pps_chain.extend(self._pps)
1719         for pp in pps_chain:
1720             files_to_delete = []
1721             try:
1722                 files_to_delete, info = pp.run(info)
1723             except PostProcessingError as e:
1724                 self.report_error(e.msg)
1725             if files_to_delete and not self.params.get('keepvideo', False):
1726                 for old_filename in files_to_delete:
1727                     self.to_screen('Deleting original file %s (pass -k to keep)' % old_filename)
1728                     try:
1729                         os.remove(encodeFilename(old_filename))
1730                     except (IOError, OSError):
1731                         self.report_warning('Unable to remove downloaded original file')
1732
1733     def _make_archive_id(self, info_dict):
1734         # Future-proof against any change in case
1735         # and backwards compatibility with prior versions
1736         extractor = info_dict.get('extractor_key')
1737         if extractor is None:
1738             if 'id' in info_dict:
1739                 extractor = info_dict.get('ie_key')  # key in a playlist
1740         if extractor is None:
1741             return None  # Incomplete video information
1742         return extractor.lower() + ' ' + info_dict['id']
1743
1744     def in_download_archive(self, info_dict):
1745         fn = self.params.get('download_archive')
1746         if fn is None:
1747             return False
1748
1749         vid_id = self._make_archive_id(info_dict)
1750         if vid_id is None:
1751             return False  # Incomplete video information
1752
1753         try:
1754             with locked_file(fn, 'r', encoding='utf-8') as archive_file:
1755                 for line in archive_file:
1756                     if line.strip() == vid_id:
1757                         return True
1758         except IOError as ioe:
1759             if ioe.errno != errno.ENOENT:
1760                 raise
1761         return False
1762
1763     def record_download_archive(self, info_dict):
1764         fn = self.params.get('download_archive')
1765         if fn is None:
1766             return
1767         vid_id = self._make_archive_id(info_dict)
1768         assert vid_id
1769         with locked_file(fn, 'a', encoding='utf-8') as archive_file:
1770             archive_file.write(vid_id + '\n')
1771
1772     @staticmethod
1773     def format_resolution(format, default='unknown'):
1774         if format.get('vcodec') == 'none':
1775             return 'audio only'
1776         if format.get('resolution') is not None:
1777             return format['resolution']
1778         if format.get('height') is not None:
1779             if format.get('width') is not None:
1780                 res = '%sx%s' % (format['width'], format['height'])
1781             else:
1782                 res = '%sp' % format['height']
1783         elif format.get('width') is not None:
1784             res = '?x%d' % format['width']
1785         else:
1786             res = default
1787         return res
1788
1789     def _format_note(self, fdict):
1790         res = ''
1791         if fdict.get('ext') in ['f4f', 'f4m']:
1792             res += '(unsupported) '
1793         if fdict.get('format_note') is not None:
1794             res += fdict['format_note'] + ' '
1795         if fdict.get('tbr') is not None:
1796             res += '%4dk ' % fdict['tbr']
1797         if fdict.get('container') is not None:
1798             if res:
1799                 res += ', '
1800             res += '%s container' % fdict['container']
1801         if (fdict.get('vcodec') is not None and
1802                 fdict.get('vcodec') != 'none'):
1803             if res:
1804                 res += ', '
1805             res += fdict['vcodec']
1806             if fdict.get('vbr') is not None:
1807                 res += '@'
1808         elif fdict.get('vbr') is not None and fdict.get('abr') is not None:
1809             res += 'video@'
1810         if fdict.get('vbr') is not None:
1811             res += '%4dk' % fdict['vbr']
1812         if fdict.get('fps') is not None:
1813             res += ', %sfps' % fdict['fps']
1814         if fdict.get('acodec') is not None:
1815             if res:
1816                 res += ', '
1817             if fdict['acodec'] == 'none':
1818                 res += 'video only'
1819             else:
1820                 res += '%-5s' % fdict['acodec']
1821         elif fdict.get('abr') is not None:
1822             if res:
1823                 res += ', '
1824             res += 'audio'
1825         if fdict.get('abr') is not None:
1826             res += '@%3dk' % fdict['abr']
1827         if fdict.get('asr') is not None:
1828             res += ' (%5dHz)' % fdict['asr']
1829         if fdict.get('filesize') is not None:
1830             if res:
1831                 res += ', '
1832             res += format_bytes(fdict['filesize'])
1833         elif fdict.get('filesize_approx') is not None:
1834             if res:
1835                 res += ', '
1836             res += '~' + format_bytes(fdict['filesize_approx'])
1837         return res
1838
1839     def list_formats(self, info_dict):
1840         formats = info_dict.get('formats', [info_dict])
1841         table = [
1842             [f['format_id'], f['ext'], self.format_resolution(f), self._format_note(f)]
1843             for f in formats
1844             if f.get('preference') is None or f['preference'] >= -1000]
1845         if len(formats) > 1:
1846             table[-1][-1] += (' ' if table[-1][-1] else '') + '(best)'
1847
1848         header_line = ['format code', 'extension', 'resolution', 'note']
1849         self.to_screen(
1850             '[info] Available formats for %s:\n%s' %
1851             (info_dict['id'], render_table(header_line, table)))
1852
1853     def list_thumbnails(self, info_dict):
1854         thumbnails = info_dict.get('thumbnails')
1855         if not thumbnails:
1856             tn_url = info_dict.get('thumbnail')
1857             if tn_url:
1858                 thumbnails = [{'id': '0', 'url': tn_url}]
1859             else:
1860                 self.to_screen(
1861                     '[info] No thumbnails present for %s' % info_dict['id'])
1862                 return
1863
1864         self.to_screen(
1865             '[info] Thumbnails for %s:' % info_dict['id'])
1866         self.to_screen(render_table(
1867             ['ID', 'width', 'height', 'URL'],
1868             [[t['id'], t.get('width', 'unknown'), t.get('height', 'unknown'), t['url']] for t in thumbnails]))
1869
1870     def list_subtitles(self, video_id, subtitles, name='subtitles'):
1871         if not subtitles:
1872             self.to_screen('%s has no %s' % (video_id, name))
1873             return
1874         self.to_screen(
1875             'Available %s for %s:' % (name, video_id))
1876         self.to_screen(render_table(
1877             ['Language', 'formats'],
1878             [[lang, ', '.join(f['ext'] for f in reversed(formats))]
1879                 for lang, formats in subtitles.items()]))
1880
1881     def urlopen(self, req):
1882         """ Start an HTTP download """
1883         if isinstance(req, compat_basestring):
1884             req = sanitized_Request(req)
1885         return self._opener.open(req, timeout=self._socket_timeout)
1886
1887     def print_debug_header(self):
1888         if not self.params.get('verbose'):
1889             return
1890
1891         if type('') is not compat_str:
1892             # Python 2.6 on SLES11 SP1 (https://github.com/rg3/youtube-dl/issues/3326)
1893             self.report_warning(
1894                 'Your Python is broken! Update to a newer and supported version')
1895
1896         stdout_encoding = getattr(
1897             sys.stdout, 'encoding', 'missing (%s)' % type(sys.stdout).__name__)
1898         encoding_str = (
1899             '[debug] Encodings: locale %s, fs %s, out %s, pref %s\n' % (
1900                 locale.getpreferredencoding(),
1901                 sys.getfilesystemencoding(),
1902                 stdout_encoding,
1903                 self.get_encoding()))
1904         write_string(encoding_str, encoding=None)
1905
1906         self._write_string('[debug] youtube-dl version ' + __version__ + '\n')
1907         try:
1908             sp = subprocess.Popen(
1909                 ['git', 'rev-parse', '--short', 'HEAD'],
1910                 stdout=subprocess.PIPE, stderr=subprocess.PIPE,
1911                 cwd=os.path.dirname(os.path.abspath(__file__)))
1912             out, err = sp.communicate()
1913             out = out.decode().strip()
1914             if re.match('[0-9a-f]+', out):
1915                 self._write_string('[debug] Git HEAD: ' + out + '\n')
1916         except Exception:
1917             try:
1918                 sys.exc_clear()
1919             except Exception:
1920                 pass
1921         self._write_string('[debug] Python version %s - %s\n' % (
1922             platform.python_version(), platform_name()))
1923
1924         exe_versions = FFmpegPostProcessor.get_versions(self)
1925         exe_versions['rtmpdump'] = rtmpdump_version()
1926         exe_str = ', '.join(
1927             '%s %s' % (exe, v)
1928             for exe, v in sorted(exe_versions.items())
1929             if v
1930         )
1931         if not exe_str:
1932             exe_str = 'none'
1933         self._write_string('[debug] exe versions: %s\n' % exe_str)
1934
1935         proxy_map = {}
1936         for handler in self._opener.handlers:
1937             if hasattr(handler, 'proxies'):
1938                 proxy_map.update(handler.proxies)
1939         self._write_string('[debug] Proxy map: ' + compat_str(proxy_map) + '\n')
1940
1941         if self.params.get('call_home', False):
1942             ipaddr = self.urlopen('https://yt-dl.org/ip').read().decode('utf-8')
1943             self._write_string('[debug] Public IP address: %s\n' % ipaddr)
1944             latest_version = self.urlopen(
1945                 'https://yt-dl.org/latest/version').read().decode('utf-8')
1946             if version_tuple(latest_version) > version_tuple(__version__):
1947                 self.report_warning(
1948                     'You are using an outdated version (newest version: %s)! '
1949                     'See https://yt-dl.org/update if you need help updating.' %
1950                     latest_version)
1951
1952     def _setup_opener(self):
1953         timeout_val = self.params.get('socket_timeout')
1954         self._socket_timeout = 600 if timeout_val is None else float(timeout_val)
1955
1956         opts_cookiefile = self.params.get('cookiefile')
1957         opts_proxy = self.params.get('proxy')
1958
1959         if opts_cookiefile is None:
1960             self.cookiejar = compat_cookiejar.CookieJar()
1961         else:
1962             self.cookiejar = compat_cookiejar.MozillaCookieJar(
1963                 opts_cookiefile)
1964             if os.access(opts_cookiefile, os.R_OK):
1965                 self.cookiejar.load()
1966
1967         cookie_processor = YoutubeDLCookieProcessor(self.cookiejar)
1968         if opts_proxy is not None:
1969             if opts_proxy == '':
1970                 proxies = {}
1971             else:
1972                 proxies = {'http': opts_proxy, 'https': opts_proxy}
1973         else:
1974             proxies = compat_urllib_request.getproxies()
1975             # Set HTTPS proxy to HTTP one if given (https://github.com/rg3/youtube-dl/issues/805)
1976             if 'http' in proxies and 'https' not in proxies:
1977                 proxies['https'] = proxies['http']
1978         proxy_handler = PerRequestProxyHandler(proxies)
1979
1980         debuglevel = 1 if self.params.get('debug_printtraffic') else 0
1981         https_handler = make_HTTPS_handler(self.params, debuglevel=debuglevel)
1982         ydlh = YoutubeDLHandler(self.params, debuglevel=debuglevel)
1983         data_handler = compat_urllib_request_DataHandler()
1984         opener = compat_urllib_request.build_opener(
1985             proxy_handler, https_handler, cookie_processor, ydlh, data_handler)
1986
1987         # Delete the default user-agent header, which would otherwise apply in
1988         # cases where our custom HTTP handler doesn't come into play
1989         # (See https://github.com/rg3/youtube-dl/issues/1309 for details)
1990         opener.addheaders = []
1991         self._opener = opener
1992
1993     def encode(self, s):
1994         if isinstance(s, bytes):
1995             return s  # Already encoded
1996
1997         try:
1998             return s.encode(self.get_encoding())
1999         except UnicodeEncodeError as err:
2000             err.reason = err.reason + '. Check your system encoding configuration or use the --encoding option.'
2001             raise
2002
2003     def get_encoding(self):
2004         encoding = self.params.get('encoding')
2005         if encoding is None:
2006             encoding = preferredencoding()
2007         return encoding
2008
2009     def _write_thumbnails(self, info_dict, filename):
2010         if self.params.get('writethumbnail', False):
2011             thumbnails = info_dict.get('thumbnails')
2012             if thumbnails:
2013                 thumbnails = [thumbnails[-1]]
2014         elif self.params.get('write_all_thumbnails', False):
2015             thumbnails = info_dict.get('thumbnails')
2016         else:
2017             return
2018
2019         if not thumbnails:
2020             # No thumbnails present, so return immediately
2021             return
2022
2023         for t in thumbnails:
2024             thumb_ext = determine_ext(t['url'], 'jpg')
2025             suffix = '_%s' % t['id'] if len(thumbnails) > 1 else ''
2026             thumb_display_id = '%s ' % t['id'] if len(thumbnails) > 1 else ''
2027             t['filename'] = thumb_filename = os.path.splitext(filename)[0] + suffix + '.' + thumb_ext
2028
2029             if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(thumb_filename)):
2030                 self.to_screen('[%s] %s: Thumbnail %sis already present' %
2031                                (info_dict['extractor'], info_dict['id'], thumb_display_id))
2032             else:
2033                 self.to_screen('[%s] %s: Downloading thumbnail %s...' %
2034                                (info_dict['extractor'], info_dict['id'], thumb_display_id))
2035                 try:
2036                     uf = self.urlopen(t['url'])
2037                     with open(encodeFilename(thumb_filename), 'wb') as thumbf:
2038                         shutil.copyfileobj(uf, thumbf)
2039                     self.to_screen('[%s] %s: Writing thumbnail %sto: %s' %
2040                                    (info_dict['extractor'], info_dict['id'], thumb_display_id, thumb_filename))
2041                 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
2042                     self.report_warning('Unable to download thumbnail "%s": %s' %
2043                                         (t['url'], error_to_str(err)))