[YoutubeDL] Check formats for merge to be opposite (#7786)
[youtube-dl] / youtube_dl / YoutubeDL.py
1 #!/usr/bin/env python
2 # -*- coding: utf-8 -*-
3
4 from __future__ import absolute_import, unicode_literals
5
6 import collections
7 import contextlib
8 import datetime
9 import errno
10 import fileinput
11 import io
12 import itertools
13 import json
14 import locale
15 import operator
16 import os
17 import platform
18 import re
19 import shutil
20 import subprocess
21 import socket
22 import sys
23 import time
24 import tokenize
25 import traceback
26
27 if os.name == 'nt':
28     import ctypes
29
30 from .compat import (
31     compat_basestring,
32     compat_cookiejar,
33     compat_expanduser,
34     compat_get_terminal_size,
35     compat_http_client,
36     compat_kwargs,
37     compat_str,
38     compat_tokenize_tokenize,
39     compat_urllib_error,
40     compat_urllib_request,
41     compat_urllib_request_DataHandler,
42 )
43 from .utils import (
44     ContentTooShortError,
45     date_from_str,
46     DateRange,
47     DEFAULT_OUTTMPL,
48     determine_ext,
49     DownloadError,
50     encodeFilename,
51     ExtractorError,
52     format_bytes,
53     formatSeconds,
54     locked_file,
55     make_HTTPS_handler,
56     MaxDownloadsReached,
57     PagedList,
58     parse_filesize,
59     PerRequestProxyHandler,
60     PostProcessingError,
61     platform_name,
62     preferredencoding,
63     render_table,
64     SameFileError,
65     sanitize_filename,
66     sanitize_path,
67     sanitized_Request,
68     std_headers,
69     subtitles_filename,
70     UnavailableVideoError,
71     url_basename,
72     version_tuple,
73     write_json_file,
74     write_string,
75     YoutubeDLCookieProcessor,
76     YoutubeDLHandler,
77     prepend_extension,
78     replace_extension,
79     args_to_str,
80     age_restricted,
81 )
82 from .cache import Cache
83 from .extractor import get_info_extractor, gen_extractors
84 from .downloader import get_suitable_downloader
85 from .downloader.rtmp import rtmpdump_version
86 from .postprocessor import (
87     FFmpegFixupM4aPP,
88     FFmpegFixupStretchedPP,
89     FFmpegMergerPP,
90     FFmpegPostProcessor,
91     get_postprocessor,
92 )
93 from .version import __version__
94
95
96 class YoutubeDL(object):
97     """YoutubeDL class.
98
99     YoutubeDL objects are the ones responsible of downloading the
100     actual video file and writing it to disk if the user has requested
101     it, among some other tasks. In most cases there should be one per
102     program. As, given a video URL, the downloader doesn't know how to
103     extract all the needed information, task that InfoExtractors do, it
104     has to pass the URL to one of them.
105
106     For this, YoutubeDL objects have a method that allows
107     InfoExtractors to be registered in a given order. When it is passed
108     a URL, the YoutubeDL object handles it to the first InfoExtractor it
109     finds that reports being able to handle it. The InfoExtractor extracts
110     all the information about the video or videos the URL refers to, and
111     YoutubeDL process the extracted information, possibly using a File
112     Downloader to download the video.
113
114     YoutubeDL objects accept a lot of parameters. In order not to saturate
115     the object constructor with arguments, it receives a dictionary of
116     options instead. These options are available through the params
117     attribute for the InfoExtractors to use. The YoutubeDL also
118     registers itself as the downloader in charge for the InfoExtractors
119     that are added to it, so this is a "mutual registration".
120
121     Available options:
122
123     username:          Username for authentication purposes.
124     password:          Password for authentication purposes.
125     videopassword:     Password for accessing a video.
126     usenetrc:          Use netrc for authentication instead.
127     verbose:           Print additional info to stdout.
128     quiet:             Do not print messages to stdout.
129     no_warnings:       Do not print out anything for warnings.
130     forceurl:          Force printing final URL.
131     forcetitle:        Force printing title.
132     forceid:           Force printing ID.
133     forcethumbnail:    Force printing thumbnail URL.
134     forcedescription:  Force printing description.
135     forcefilename:     Force printing final filename.
136     forceduration:     Force printing duration.
137     forcejson:         Force printing info_dict as JSON.
138     dump_single_json:  Force printing the info_dict of the whole playlist
139                        (or video) as a single JSON line.
140     simulate:          Do not download the video files.
141     format:            Video format code. See options.py for more information.
142     outtmpl:           Template for output names.
143     restrictfilenames: Do not allow "&" and spaces in file names
144     ignoreerrors:      Do not stop on download errors.
145     force_generic_extractor: Force downloader to use the generic extractor
146     nooverwrites:      Prevent overwriting files.
147     playliststart:     Playlist item to start at.
148     playlistend:       Playlist item to end at.
149     playlist_items:    Specific indices of playlist to download.
150     playlistreverse:   Download playlist items in reverse order.
151     matchtitle:        Download only matching titles.
152     rejecttitle:       Reject downloads for matching titles.
153     logger:            Log messages to a logging.Logger instance.
154     logtostderr:       Log messages to stderr instead of stdout.
155     writedescription:  Write the video description to a .description file
156     writeinfojson:     Write the video description to a .info.json file
157     writeannotations:  Write the video annotations to a .annotations.xml file
158     writethumbnail:    Write the thumbnail image to a file
159     write_all_thumbnails:  Write all thumbnail formats to files
160     writesubtitles:    Write the video subtitles to a file
161     writeautomaticsub: Write the automatically generated subtitles to a file
162     allsubtitles:      Downloads all the subtitles of the video
163                        (requires writesubtitles or writeautomaticsub)
164     listsubtitles:     Lists all available subtitles for the video
165     subtitlesformat:   The format code for subtitles
166     subtitleslangs:    List of languages of the subtitles to download
167     keepvideo:         Keep the video file after post-processing
168     daterange:         A DateRange object, download only if the upload_date is in the range.
169     skip_download:     Skip the actual download of the video file
170     cachedir:          Location of the cache files in the filesystem.
171                        False to disable filesystem cache.
172     noplaylist:        Download single video instead of a playlist if in doubt.
173     age_limit:         An integer representing the user's age in years.
174                        Unsuitable videos for the given age are skipped.
175     min_views:         An integer representing the minimum view count the video
176                        must have in order to not be skipped.
177                        Videos without view count information are always
178                        downloaded. None for no limit.
179     max_views:         An integer representing the maximum view count.
180                        Videos that are more popular than that are not
181                        downloaded.
182                        Videos without view count information are always
183                        downloaded. None for no limit.
184     download_archive:  File name of a file where all downloads are recorded.
185                        Videos already present in the file are not downloaded
186                        again.
187     cookiefile:        File name where cookies should be read from and dumped to.
188     nocheckcertificate:Do not verify SSL certificates
189     prefer_insecure:   Use HTTP instead of HTTPS to retrieve information.
190                        At the moment, this is only supported by YouTube.
191     proxy:             URL of the proxy server to use
192     cn_verification_proxy:  URL of the proxy to use for IP address verification
193                        on Chinese sites. (Experimental)
194     socket_timeout:    Time to wait for unresponsive hosts, in seconds
195     bidi_workaround:   Work around buggy terminals without bidirectional text
196                        support, using fridibi
197     debug_printtraffic:Print out sent and received HTTP traffic
198     include_ads:       Download ads as well
199     default_search:    Prepend this string if an input url is not valid.
200                        'auto' for elaborate guessing
201     encoding:          Use this encoding instead of the system-specified.
202     extract_flat:      Do not resolve URLs, return the immediate result.
203                        Pass in 'in_playlist' to only show this behavior for
204                        playlist items.
205     postprocessors:    A list of dictionaries, each with an entry
206                        * key:  The name of the postprocessor. See
207                                youtube_dl/postprocessor/__init__.py for a list.
208                        as well as any further keyword arguments for the
209                        postprocessor.
210     progress_hooks:    A list of functions that get called on download
211                        progress, with a dictionary with the entries
212                        * status: One of "downloading", "error", or "finished".
213                                  Check this first and ignore unknown values.
214
215                        If status is one of "downloading", or "finished", the
216                        following properties may also be present:
217                        * filename: The final filename (always present)
218                        * tmpfilename: The filename we're currently writing to
219                        * downloaded_bytes: Bytes on disk
220                        * total_bytes: Size of the whole file, None if unknown
221                        * total_bytes_estimate: Guess of the eventual file size,
222                                                None if unavailable.
223                        * elapsed: The number of seconds since download started.
224                        * eta: The estimated time in seconds, None if unknown
225                        * speed: The download speed in bytes/second, None if
226                                 unknown
227                        * fragment_index: The counter of the currently
228                                          downloaded video fragment.
229                        * fragment_count: The number of fragments (= individual
230                                          files that will be merged)
231
232                        Progress hooks are guaranteed to be called at least once
233                        (with status "finished") if the download is successful.
234     merge_output_format: Extension to use when merging formats.
235     fixup:             Automatically correct known faults of the file.
236                        One of:
237                        - "never": do nothing
238                        - "warn": only emit a warning
239                        - "detect_or_warn": check whether we can do anything
240                                            about it, warn otherwise (default)
241     source_address:    (Experimental) Client-side IP address to bind to.
242     call_home:         Boolean, true iff we are allowed to contact the
243                        youtube-dl servers for debugging.
244     sleep_interval:    Number of seconds to sleep before each download.
245     listformats:       Print an overview of available video formats and exit.
246     list_thumbnails:   Print a table of all thumbnails and exit.
247     match_filter:      A function that gets called with the info_dict of
248                        every video.
249                        If it returns a message, the video is ignored.
250                        If it returns None, the video is downloaded.
251                        match_filter_func in utils.py is one example for this.
252     no_color:          Do not emit color codes in output.
253
254     The following options determine which downloader is picked:
255     external_downloader: Executable of the external downloader to call.
256                        None or unset for standard (built-in) downloader.
257     hls_prefer_native: Use the native HLS downloader instead of ffmpeg/avconv.
258
259     The following parameters are not used by YoutubeDL itself, they are used by
260     the downloader (see youtube_dl/downloader/common.py):
261     nopart, updatetime, buffersize, ratelimit, min_filesize, max_filesize, test,
262     noresizebuffer, retries, continuedl, noprogress, consoletitle,
263     xattr_set_filesize, external_downloader_args.
264
265     The following options are used by the post processors:
266     prefer_ffmpeg:     If True, use ffmpeg instead of avconv if both are available,
267                        otherwise prefer avconv.
268     postprocessor_args: A list of additional command-line arguments for the
269                         postprocessor.
270     """
271
272     params = None
273     _ies = []
274     _pps = []
275     _download_retcode = None
276     _num_downloads = None
277     _screen_file = None
278
279     def __init__(self, params=None, auto_init=True):
280         """Create a FileDownloader object with the given options."""
281         if params is None:
282             params = {}
283         self._ies = []
284         self._ies_instances = {}
285         self._pps = []
286         self._progress_hooks = []
287         self._download_retcode = 0
288         self._num_downloads = 0
289         self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)]
290         self._err_file = sys.stderr
291         self.params = {
292             # Default parameters
293             'nocheckcertificate': False,
294         }
295         self.params.update(params)
296         self.cache = Cache(self)
297
298         if params.get('bidi_workaround', False):
299             try:
300                 import pty
301                 master, slave = pty.openpty()
302                 width = compat_get_terminal_size().columns
303                 if width is None:
304                     width_args = []
305                 else:
306                     width_args = ['-w', str(width)]
307                 sp_kwargs = dict(
308                     stdin=subprocess.PIPE,
309                     stdout=slave,
310                     stderr=self._err_file)
311                 try:
312                     self._output_process = subprocess.Popen(
313                         ['bidiv'] + width_args, **sp_kwargs
314                     )
315                 except OSError:
316                     self._output_process = subprocess.Popen(
317                         ['fribidi', '-c', 'UTF-8'] + width_args, **sp_kwargs)
318                 self._output_channel = os.fdopen(master, 'rb')
319             except OSError as ose:
320                 if ose.errno == 2:
321                     self.report_warning('Could not find fribidi executable, ignoring --bidi-workaround . Make sure that  fribidi  is an executable file in one of the directories in your $PATH.')
322                 else:
323                     raise
324
325         if (sys.version_info >= (3,) and sys.platform != 'win32' and
326                 sys.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968'] and
327                 not params.get('restrictfilenames', False)):
328             # On Python 3, the Unicode filesystem API will throw errors (#1474)
329             self.report_warning(
330                 'Assuming --restrict-filenames since file system encoding '
331                 'cannot encode all characters. '
332                 'Set the LC_ALL environment variable to fix this.')
333             self.params['restrictfilenames'] = True
334
335         if isinstance(params.get('outtmpl'), bytes):
336             self.report_warning(
337                 'Parameter outtmpl is bytes, but should be a unicode string. '
338                 'Put  from __future__ import unicode_literals  at the top of your code file or consider switching to Python 3.x.')
339
340         self._setup_opener()
341
342         if auto_init:
343             self.print_debug_header()
344             self.add_default_info_extractors()
345
346         for pp_def_raw in self.params.get('postprocessors', []):
347             pp_class = get_postprocessor(pp_def_raw['key'])
348             pp_def = dict(pp_def_raw)
349             del pp_def['key']
350             pp = pp_class(self, **compat_kwargs(pp_def))
351             self.add_post_processor(pp)
352
353         for ph in self.params.get('progress_hooks', []):
354             self.add_progress_hook(ph)
355
356     def warn_if_short_id(self, argv):
357         # short YouTube ID starting with dash?
358         idxs = [
359             i for i, a in enumerate(argv)
360             if re.match(r'^-[0-9A-Za-z_-]{10}$', a)]
361         if idxs:
362             correct_argv = (
363                 ['youtube-dl'] +
364                 [a for i, a in enumerate(argv) if i not in idxs] +
365                 ['--'] + [argv[i] for i in idxs]
366             )
367             self.report_warning(
368                 'Long argument string detected. '
369                 'Use -- to separate parameters and URLs, like this:\n%s\n' %
370                 args_to_str(correct_argv))
371
372     def add_info_extractor(self, ie):
373         """Add an InfoExtractor object to the end of the list."""
374         self._ies.append(ie)
375         self._ies_instances[ie.ie_key()] = ie
376         ie.set_downloader(self)
377
378     def get_info_extractor(self, ie_key):
379         """
380         Get an instance of an IE with name ie_key, it will try to get one from
381         the _ies list, if there's no instance it will create a new one and add
382         it to the extractor list.
383         """
384         ie = self._ies_instances.get(ie_key)
385         if ie is None:
386             ie = get_info_extractor(ie_key)()
387             self.add_info_extractor(ie)
388         return ie
389
390     def add_default_info_extractors(self):
391         """
392         Add the InfoExtractors returned by gen_extractors to the end of the list
393         """
394         for ie in gen_extractors():
395             self.add_info_extractor(ie)
396
397     def add_post_processor(self, pp):
398         """Add a PostProcessor object to the end of the chain."""
399         self._pps.append(pp)
400         pp.set_downloader(self)
401
402     def add_progress_hook(self, ph):
403         """Add the progress hook (currently only for the file downloader)"""
404         self._progress_hooks.append(ph)
405
406     def _bidi_workaround(self, message):
407         if not hasattr(self, '_output_channel'):
408             return message
409
410         assert hasattr(self, '_output_process')
411         assert isinstance(message, compat_str)
412         line_count = message.count('\n') + 1
413         self._output_process.stdin.write((message + '\n').encode('utf-8'))
414         self._output_process.stdin.flush()
415         res = ''.join(self._output_channel.readline().decode('utf-8')
416                       for _ in range(line_count))
417         return res[:-len('\n')]
418
419     def to_screen(self, message, skip_eol=False):
420         """Print message to stdout if not in quiet mode."""
421         return self.to_stdout(message, skip_eol, check_quiet=True)
422
423     def _write_string(self, s, out=None):
424         write_string(s, out=out, encoding=self.params.get('encoding'))
425
426     def to_stdout(self, message, skip_eol=False, check_quiet=False):
427         """Print message to stdout if not in quiet mode."""
428         if self.params.get('logger'):
429             self.params['logger'].debug(message)
430         elif not check_quiet or not self.params.get('quiet', False):
431             message = self._bidi_workaround(message)
432             terminator = ['\n', ''][skip_eol]
433             output = message + terminator
434
435             self._write_string(output, self._screen_file)
436
437     def to_stderr(self, message):
438         """Print message to stderr."""
439         assert isinstance(message, compat_str)
440         if self.params.get('logger'):
441             self.params['logger'].error(message)
442         else:
443             message = self._bidi_workaround(message)
444             output = message + '\n'
445             self._write_string(output, self._err_file)
446
447     def to_console_title(self, message):
448         if not self.params.get('consoletitle', False):
449             return
450         if os.name == 'nt' and ctypes.windll.kernel32.GetConsoleWindow():
451             # c_wchar_p() might not be necessary if `message` is
452             # already of type unicode()
453             ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message))
454         elif 'TERM' in os.environ:
455             self._write_string('\033]0;%s\007' % message, self._screen_file)
456
457     def save_console_title(self):
458         if not self.params.get('consoletitle', False):
459             return
460         if 'TERM' in os.environ:
461             # Save the title on stack
462             self._write_string('\033[22;0t', self._screen_file)
463
464     def restore_console_title(self):
465         if not self.params.get('consoletitle', False):
466             return
467         if 'TERM' in os.environ:
468             # Restore the title from stack
469             self._write_string('\033[23;0t', self._screen_file)
470
471     def __enter__(self):
472         self.save_console_title()
473         return self
474
475     def __exit__(self, *args):
476         self.restore_console_title()
477
478         if self.params.get('cookiefile') is not None:
479             self.cookiejar.save()
480
481     def trouble(self, message=None, tb=None):
482         """Determine action to take when a download problem appears.
483
484         Depending on if the downloader has been configured to ignore
485         download errors or not, this method may throw an exception or
486         not when errors are found, after printing the message.
487
488         tb, if given, is additional traceback information.
489         """
490         if message is not None:
491             self.to_stderr(message)
492         if self.params.get('verbose'):
493             if tb is None:
494                 if sys.exc_info()[0]:  # if .trouble has been called from an except block
495                     tb = ''
496                     if hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
497                         tb += ''.join(traceback.format_exception(*sys.exc_info()[1].exc_info))
498                     tb += compat_str(traceback.format_exc())
499                 else:
500                     tb_data = traceback.format_list(traceback.extract_stack())
501                     tb = ''.join(tb_data)
502             self.to_stderr(tb)
503         if not self.params.get('ignoreerrors', False):
504             if sys.exc_info()[0] and hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
505                 exc_info = sys.exc_info()[1].exc_info
506             else:
507                 exc_info = sys.exc_info()
508             raise DownloadError(message, exc_info)
509         self._download_retcode = 1
510
511     def report_warning(self, message):
512         '''
513         Print the message to stderr, it will be prefixed with 'WARNING:'
514         If stderr is a tty file the 'WARNING:' will be colored
515         '''
516         if self.params.get('logger') is not None:
517             self.params['logger'].warning(message)
518         else:
519             if self.params.get('no_warnings'):
520                 return
521             if not self.params.get('no_color') and self._err_file.isatty() and os.name != 'nt':
522                 _msg_header = '\033[0;33mWARNING:\033[0m'
523             else:
524                 _msg_header = 'WARNING:'
525             warning_message = '%s %s' % (_msg_header, message)
526             self.to_stderr(warning_message)
527
528     def report_error(self, message, tb=None):
529         '''
530         Do the same as trouble, but prefixes the message with 'ERROR:', colored
531         in red if stderr is a tty file.
532         '''
533         if not self.params.get('no_color') and self._err_file.isatty() and os.name != 'nt':
534             _msg_header = '\033[0;31mERROR:\033[0m'
535         else:
536             _msg_header = 'ERROR:'
537         error_message = '%s %s' % (_msg_header, message)
538         self.trouble(error_message, tb)
539
540     def report_file_already_downloaded(self, file_name):
541         """Report file has already been fully downloaded."""
542         try:
543             self.to_screen('[download] %s has already been downloaded' % file_name)
544         except UnicodeEncodeError:
545             self.to_screen('[download] The file has already been downloaded')
546
547     def prepare_filename(self, info_dict):
548         """Generate the output filename."""
549         try:
550             template_dict = dict(info_dict)
551
552             template_dict['epoch'] = int(time.time())
553             autonumber_size = self.params.get('autonumber_size')
554             if autonumber_size is None:
555                 autonumber_size = 5
556             autonumber_templ = '%0' + str(autonumber_size) + 'd'
557             template_dict['autonumber'] = autonumber_templ % self._num_downloads
558             if template_dict.get('playlist_index') is not None:
559                 template_dict['playlist_index'] = '%0*d' % (len(str(template_dict['n_entries'])), template_dict['playlist_index'])
560             if template_dict.get('resolution') is None:
561                 if template_dict.get('width') and template_dict.get('height'):
562                     template_dict['resolution'] = '%dx%d' % (template_dict['width'], template_dict['height'])
563                 elif template_dict.get('height'):
564                     template_dict['resolution'] = '%sp' % template_dict['height']
565                 elif template_dict.get('width'):
566                     template_dict['resolution'] = '?x%d' % template_dict['width']
567
568             sanitize = lambda k, v: sanitize_filename(
569                 compat_str(v),
570                 restricted=self.params.get('restrictfilenames'),
571                 is_id=(k == 'id'))
572             template_dict = dict((k, sanitize(k, v))
573                                  for k, v in template_dict.items()
574                                  if v is not None)
575             template_dict = collections.defaultdict(lambda: 'NA', template_dict)
576
577             outtmpl = self.params.get('outtmpl', DEFAULT_OUTTMPL)
578             tmpl = compat_expanduser(outtmpl)
579             filename = tmpl % template_dict
580             # Temporary fix for #4787
581             # 'Treat' all problem characters by passing filename through preferredencoding
582             # to workaround encoding issues with subprocess on python2 @ Windows
583             if sys.version_info < (3, 0) and sys.platform == 'win32':
584                 filename = encodeFilename(filename, True).decode(preferredencoding())
585             return sanitize_path(filename)
586         except ValueError as err:
587             self.report_error('Error in output template: ' + str(err) + ' (encoding: ' + repr(preferredencoding()) + ')')
588             return None
589
590     def _match_entry(self, info_dict, incomplete):
591         """ Returns None iff the file should be downloaded """
592
593         video_title = info_dict.get('title', info_dict.get('id', 'video'))
594         if 'title' in info_dict:
595             # This can happen when we're just evaluating the playlist
596             title = info_dict['title']
597             matchtitle = self.params.get('matchtitle', False)
598             if matchtitle:
599                 if not re.search(matchtitle, title, re.IGNORECASE):
600                     return '"' + title + '" title did not match pattern "' + matchtitle + '"'
601             rejecttitle = self.params.get('rejecttitle', False)
602             if rejecttitle:
603                 if re.search(rejecttitle, title, re.IGNORECASE):
604                     return '"' + title + '" title matched reject pattern "' + rejecttitle + '"'
605         date = info_dict.get('upload_date', None)
606         if date is not None:
607             dateRange = self.params.get('daterange', DateRange())
608             if date not in dateRange:
609                 return '%s upload date is not in range %s' % (date_from_str(date).isoformat(), dateRange)
610         view_count = info_dict.get('view_count', None)
611         if view_count is not None:
612             min_views = self.params.get('min_views')
613             if min_views is not None and view_count < min_views:
614                 return 'Skipping %s, because it has not reached minimum view count (%d/%d)' % (video_title, view_count, min_views)
615             max_views = self.params.get('max_views')
616             if max_views is not None and view_count > max_views:
617                 return 'Skipping %s, because it has exceeded the maximum view count (%d/%d)' % (video_title, view_count, max_views)
618         if age_restricted(info_dict.get('age_limit'), self.params.get('age_limit')):
619             return 'Skipping "%s" because it is age restricted' % video_title
620         if self.in_download_archive(info_dict):
621             return '%s has already been recorded in archive' % video_title
622
623         if not incomplete:
624             match_filter = self.params.get('match_filter')
625             if match_filter is not None:
626                 ret = match_filter(info_dict)
627                 if ret is not None:
628                     return ret
629
630         return None
631
632     @staticmethod
633     def add_extra_info(info_dict, extra_info):
634         '''Set the keys from extra_info in info dict if they are missing'''
635         for key, value in extra_info.items():
636             info_dict.setdefault(key, value)
637
638     def extract_info(self, url, download=True, ie_key=None, extra_info={},
639                      process=True, force_generic_extractor=False):
640         '''
641         Returns a list with a dictionary for each video we find.
642         If 'download', also downloads the videos.
643         extra_info is a dict containing the extra values to add to each result
644         '''
645
646         if not ie_key and force_generic_extractor:
647             ie_key = 'Generic'
648
649         if ie_key:
650             ies = [self.get_info_extractor(ie_key)]
651         else:
652             ies = self._ies
653
654         for ie in ies:
655             if not ie.suitable(url):
656                 continue
657
658             if not ie.working():
659                 self.report_warning('The program functionality for this site has been marked as broken, '
660                                     'and will probably not work.')
661
662             try:
663                 ie_result = ie.extract(url)
664                 if ie_result is None:  # Finished already (backwards compatibility; listformats and friends should be moved here)
665                     break
666                 if isinstance(ie_result, list):
667                     # Backwards compatibility: old IE result format
668                     ie_result = {
669                         '_type': 'compat_list',
670                         'entries': ie_result,
671                     }
672                 self.add_default_extra_info(ie_result, ie, url)
673                 if process:
674                     return self.process_ie_result(ie_result, download, extra_info)
675                 else:
676                     return ie_result
677             except ExtractorError as de:  # An error we somewhat expected
678                 self.report_error(compat_str(de), de.format_traceback())
679                 break
680             except MaxDownloadsReached:
681                 raise
682             except Exception as e:
683                 if self.params.get('ignoreerrors', False):
684                     self.report_error(compat_str(e), tb=compat_str(traceback.format_exc()))
685                     break
686                 else:
687                     raise
688         else:
689             self.report_error('no suitable InfoExtractor for URL %s' % url)
690
691     def add_default_extra_info(self, ie_result, ie, url):
692         self.add_extra_info(ie_result, {
693             'extractor': ie.IE_NAME,
694             'webpage_url': url,
695             'webpage_url_basename': url_basename(url),
696             'extractor_key': ie.ie_key(),
697         })
698
699     def process_ie_result(self, ie_result, download=True, extra_info={}):
700         """
701         Take the result of the ie(may be modified) and resolve all unresolved
702         references (URLs, playlist items).
703
704         It will also download the videos if 'download'.
705         Returns the resolved ie_result.
706         """
707
708         result_type = ie_result.get('_type', 'video')
709
710         if result_type in ('url', 'url_transparent'):
711             extract_flat = self.params.get('extract_flat', False)
712             if ((extract_flat == 'in_playlist' and 'playlist' in extra_info) or
713                     extract_flat is True):
714                 if self.params.get('forcejson', False):
715                     self.to_stdout(json.dumps(ie_result))
716                 return ie_result
717
718         if result_type == 'video':
719             self.add_extra_info(ie_result, extra_info)
720             return self.process_video_result(ie_result, download=download)
721         elif result_type == 'url':
722             # We have to add extra_info to the results because it may be
723             # contained in a playlist
724             return self.extract_info(ie_result['url'],
725                                      download,
726                                      ie_key=ie_result.get('ie_key'),
727                                      extra_info=extra_info)
728         elif result_type == 'url_transparent':
729             # Use the information from the embedding page
730             info = self.extract_info(
731                 ie_result['url'], ie_key=ie_result.get('ie_key'),
732                 extra_info=extra_info, download=False, process=False)
733
734             force_properties = dict(
735                 (k, v) for k, v in ie_result.items() if v is not None)
736             for f in ('_type', 'url'):
737                 if f in force_properties:
738                     del force_properties[f]
739             new_result = info.copy()
740             new_result.update(force_properties)
741
742             assert new_result.get('_type') != 'url_transparent'
743
744             return self.process_ie_result(
745                 new_result, download=download, extra_info=extra_info)
746         elif result_type == 'playlist' or result_type == 'multi_video':
747             # We process each entry in the playlist
748             playlist = ie_result.get('title', None) or ie_result.get('id', None)
749             self.to_screen('[download] Downloading playlist: %s' % playlist)
750
751             playlist_results = []
752
753             playliststart = self.params.get('playliststart', 1) - 1
754             playlistend = self.params.get('playlistend', None)
755             # For backwards compatibility, interpret -1 as whole list
756             if playlistend == -1:
757                 playlistend = None
758
759             playlistitems_str = self.params.get('playlist_items', None)
760             playlistitems = None
761             if playlistitems_str is not None:
762                 def iter_playlistitems(format):
763                     for string_segment in format.split(','):
764                         if '-' in string_segment:
765                             start, end = string_segment.split('-')
766                             for item in range(int(start), int(end) + 1):
767                                 yield int(item)
768                         else:
769                             yield int(string_segment)
770                 playlistitems = iter_playlistitems(playlistitems_str)
771
772             ie_entries = ie_result['entries']
773             if isinstance(ie_entries, list):
774                 n_all_entries = len(ie_entries)
775                 if playlistitems:
776                     entries = [
777                         ie_entries[i - 1] for i in playlistitems
778                         if -n_all_entries <= i - 1 < n_all_entries]
779                 else:
780                     entries = ie_entries[playliststart:playlistend]
781                 n_entries = len(entries)
782                 self.to_screen(
783                     "[%s] playlist %s: Collected %d video ids (downloading %d of them)" %
784                     (ie_result['extractor'], playlist, n_all_entries, n_entries))
785             elif isinstance(ie_entries, PagedList):
786                 if playlistitems:
787                     entries = []
788                     for item in playlistitems:
789                         entries.extend(ie_entries.getslice(
790                             item - 1, item
791                         ))
792                 else:
793                     entries = ie_entries.getslice(
794                         playliststart, playlistend)
795                 n_entries = len(entries)
796                 self.to_screen(
797                     "[%s] playlist %s: Downloading %d videos" %
798                     (ie_result['extractor'], playlist, n_entries))
799             else:  # iterable
800                 if playlistitems:
801                     entry_list = list(ie_entries)
802                     entries = [entry_list[i - 1] for i in playlistitems]
803                 else:
804                     entries = list(itertools.islice(
805                         ie_entries, playliststart, playlistend))
806                 n_entries = len(entries)
807                 self.to_screen(
808                     "[%s] playlist %s: Downloading %d videos" %
809                     (ie_result['extractor'], playlist, n_entries))
810
811             if self.params.get('playlistreverse', False):
812                 entries = entries[::-1]
813
814             for i, entry in enumerate(entries, 1):
815                 self.to_screen('[download] Downloading video %s of %s' % (i, n_entries))
816                 extra = {
817                     'n_entries': n_entries,
818                     'playlist': playlist,
819                     'playlist_id': ie_result.get('id'),
820                     'playlist_title': ie_result.get('title'),
821                     'playlist_index': i + playliststart,
822                     'extractor': ie_result['extractor'],
823                     'webpage_url': ie_result['webpage_url'],
824                     'webpage_url_basename': url_basename(ie_result['webpage_url']),
825                     'extractor_key': ie_result['extractor_key'],
826                 }
827
828                 reason = self._match_entry(entry, incomplete=True)
829                 if reason is not None:
830                     self.to_screen('[download] ' + reason)
831                     continue
832
833                 entry_result = self.process_ie_result(entry,
834                                                       download=download,
835                                                       extra_info=extra)
836                 playlist_results.append(entry_result)
837             ie_result['entries'] = playlist_results
838             self.to_screen('[download] Finished downloading playlist: %s' % playlist)
839             return ie_result
840         elif result_type == 'compat_list':
841             self.report_warning(
842                 'Extractor %s returned a compat_list result. '
843                 'It needs to be updated.' % ie_result.get('extractor'))
844
845             def _fixup(r):
846                 self.add_extra_info(
847                     r,
848                     {
849                         'extractor': ie_result['extractor'],
850                         'webpage_url': ie_result['webpage_url'],
851                         'webpage_url_basename': url_basename(ie_result['webpage_url']),
852                         'extractor_key': ie_result['extractor_key'],
853                     }
854                 )
855                 return r
856             ie_result['entries'] = [
857                 self.process_ie_result(_fixup(r), download, extra_info)
858                 for r in ie_result['entries']
859             ]
860             return ie_result
861         else:
862             raise Exception('Invalid result type: %s' % result_type)
863
864     def _build_format_filter(self, filter_spec):
865         " Returns a function to filter the formats according to the filter_spec "
866
867         OPERATORS = {
868             '<': operator.lt,
869             '<=': operator.le,
870             '>': operator.gt,
871             '>=': operator.ge,
872             '=': operator.eq,
873             '!=': operator.ne,
874         }
875         operator_rex = re.compile(r'''(?x)\s*
876             (?P<key>width|height|tbr|abr|vbr|asr|filesize|fps)
877             \s*(?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
878             (?P<value>[0-9.]+(?:[kKmMgGtTpPeEzZyY]i?[Bb]?)?)
879             $
880             ''' % '|'.join(map(re.escape, OPERATORS.keys())))
881         m = operator_rex.search(filter_spec)
882         if m:
883             try:
884                 comparison_value = int(m.group('value'))
885             except ValueError:
886                 comparison_value = parse_filesize(m.group('value'))
887                 if comparison_value is None:
888                     comparison_value = parse_filesize(m.group('value') + 'B')
889                 if comparison_value is None:
890                     raise ValueError(
891                         'Invalid value %r in format specification %r' % (
892                             m.group('value'), filter_spec))
893             op = OPERATORS[m.group('op')]
894
895         if not m:
896             STR_OPERATORS = {
897                 '=': operator.eq,
898                 '!=': operator.ne,
899             }
900             str_operator_rex = re.compile(r'''(?x)
901                 \s*(?P<key>ext|acodec|vcodec|container|protocol)
902                 \s*(?P<op>%s)(?P<none_inclusive>\s*\?)?
903                 \s*(?P<value>[a-zA-Z0-9_-]+)
904                 \s*$
905                 ''' % '|'.join(map(re.escape, STR_OPERATORS.keys())))
906             m = str_operator_rex.search(filter_spec)
907             if m:
908                 comparison_value = m.group('value')
909                 op = STR_OPERATORS[m.group('op')]
910
911         if not m:
912             raise ValueError('Invalid filter specification %r' % filter_spec)
913
914         def _filter(f):
915             actual_value = f.get(m.group('key'))
916             if actual_value is None:
917                 return m.group('none_inclusive')
918             return op(actual_value, comparison_value)
919         return _filter
920
921     def build_format_selector(self, format_spec):
922         def syntax_error(note, start):
923             message = (
924                 'Invalid format specification: '
925                 '{0}\n\t{1}\n\t{2}^'.format(note, format_spec, ' ' * start[1]))
926             return SyntaxError(message)
927
928         PICKFIRST = 'PICKFIRST'
929         MERGE = 'MERGE'
930         SINGLE = 'SINGLE'
931         GROUP = 'GROUP'
932         FormatSelector = collections.namedtuple('FormatSelector', ['type', 'selector', 'filters'])
933
934         def _parse_filter(tokens):
935             filter_parts = []
936             for type, string, start, _, _ in tokens:
937                 if type == tokenize.OP and string == ']':
938                     return ''.join(filter_parts)
939                 else:
940                     filter_parts.append(string)
941
942         def _remove_unused_ops(tokens):
943             # Remove operators that we don't use and join them with the surrounding strings
944             # for example: 'mp4' '-' 'baseline' '-' '16x9' is converted to 'mp4-baseline-16x9'
945             ALLOWED_OPS = ('/', '+', ',', '(', ')')
946             last_string, last_start, last_end, last_line = None, None, None, None
947             for type, string, start, end, line in tokens:
948                 if type == tokenize.OP and string == '[':
949                     if last_string:
950                         yield tokenize.NAME, last_string, last_start, last_end, last_line
951                         last_string = None
952                     yield type, string, start, end, line
953                     # everything inside brackets will be handled by _parse_filter
954                     for type, string, start, end, line in tokens:
955                         yield type, string, start, end, line
956                         if type == tokenize.OP and string == ']':
957                             break
958                 elif type == tokenize.OP and string in ALLOWED_OPS:
959                     if last_string:
960                         yield tokenize.NAME, last_string, last_start, last_end, last_line
961                         last_string = None
962                     yield type, string, start, end, line
963                 elif type in [tokenize.NAME, tokenize.NUMBER, tokenize.OP]:
964                     if not last_string:
965                         last_string = string
966                         last_start = start
967                         last_end = end
968                     else:
969                         last_string += string
970             if last_string:
971                 yield tokenize.NAME, last_string, last_start, last_end, last_line
972
973         def _parse_format_selection(tokens, inside_merge=False, inside_choice=False, inside_group=False):
974             selectors = []
975             current_selector = None
976             for type, string, start, _, _ in tokens:
977                 # ENCODING is only defined in python 3.x
978                 if type == getattr(tokenize, 'ENCODING', None):
979                     continue
980                 elif type in [tokenize.NAME, tokenize.NUMBER]:
981                     current_selector = FormatSelector(SINGLE, string, [])
982                 elif type == tokenize.OP:
983                     if string == ')':
984                         if not inside_group:
985                             # ')' will be handled by the parentheses group
986                             tokens.restore_last_token()
987                         break
988                     elif inside_merge and string in ['/', ',']:
989                         tokens.restore_last_token()
990                         break
991                     elif inside_choice and string == ',':
992                         tokens.restore_last_token()
993                         break
994                     elif string == ',':
995                         if not current_selector:
996                             raise syntax_error('"," must follow a format selector', start)
997                         selectors.append(current_selector)
998                         current_selector = None
999                     elif string == '/':
1000                         if not current_selector:
1001                             raise syntax_error('"/" must follow a format selector', start)
1002                         first_choice = current_selector
1003                         second_choice = _parse_format_selection(tokens, inside_choice=True)
1004                         current_selector = FormatSelector(PICKFIRST, (first_choice, second_choice), [])
1005                     elif string == '[':
1006                         if not current_selector:
1007                             current_selector = FormatSelector(SINGLE, 'best', [])
1008                         format_filter = _parse_filter(tokens)
1009                         current_selector.filters.append(format_filter)
1010                     elif string == '(':
1011                         if current_selector:
1012                             raise syntax_error('Unexpected "("', start)
1013                         group = _parse_format_selection(tokens, inside_group=True)
1014                         current_selector = FormatSelector(GROUP, group, [])
1015                     elif string == '+':
1016                         video_selector = current_selector
1017                         audio_selector = _parse_format_selection(tokens, inside_merge=True)
1018                         if not video_selector or not audio_selector:
1019                             raise syntax_error('"+" must be between two format selectors', start)
1020                         current_selector = FormatSelector(MERGE, (video_selector, audio_selector), [])
1021                     else:
1022                         raise syntax_error('Operator not recognized: "{0}"'.format(string), start)
1023                 elif type == tokenize.ENDMARKER:
1024                     break
1025             if current_selector:
1026                 selectors.append(current_selector)
1027             return selectors
1028
1029         def _build_selector_function(selector):
1030             if isinstance(selector, list):
1031                 fs = [_build_selector_function(s) for s in selector]
1032
1033                 def selector_function(formats):
1034                     for f in fs:
1035                         for format in f(formats):
1036                             yield format
1037                 return selector_function
1038             elif selector.type == GROUP:
1039                 selector_function = _build_selector_function(selector.selector)
1040             elif selector.type == PICKFIRST:
1041                 fs = [_build_selector_function(s) for s in selector.selector]
1042
1043                 def selector_function(formats):
1044                     for f in fs:
1045                         picked_formats = list(f(formats))
1046                         if picked_formats:
1047                             return picked_formats
1048                     return []
1049             elif selector.type == SINGLE:
1050                 format_spec = selector.selector
1051
1052                 def selector_function(formats):
1053                     formats = list(formats)
1054                     if not formats:
1055                         return
1056                     if format_spec == 'all':
1057                         for f in formats:
1058                             yield f
1059                     elif format_spec in ['best', 'worst', None]:
1060                         format_idx = 0 if format_spec == 'worst' else -1
1061                         audiovideo_formats = [
1062                             f for f in formats
1063                             if f.get('vcodec') != 'none' and f.get('acodec') != 'none']
1064                         if audiovideo_formats:
1065                             yield audiovideo_formats[format_idx]
1066                         # for audio only (soundcloud) or video only (imgur) urls, select the best/worst audio format
1067                         elif (all(f.get('acodec') != 'none' for f in formats) or
1068                               all(f.get('vcodec') != 'none' for f in formats)):
1069                             yield formats[format_idx]
1070                     elif format_spec == 'bestaudio':
1071                         audio_formats = [
1072                             f for f in formats
1073                             if f.get('vcodec') == 'none']
1074                         if audio_formats:
1075                             yield audio_formats[-1]
1076                     elif format_spec == 'worstaudio':
1077                         audio_formats = [
1078                             f for f in formats
1079                             if f.get('vcodec') == 'none']
1080                         if audio_formats:
1081                             yield audio_formats[0]
1082                     elif format_spec == 'bestvideo':
1083                         video_formats = [
1084                             f for f in formats
1085                             if f.get('acodec') == 'none']
1086                         if video_formats:
1087                             yield video_formats[-1]
1088                     elif format_spec == 'worstvideo':
1089                         video_formats = [
1090                             f for f in formats
1091                             if f.get('acodec') == 'none']
1092                         if video_formats:
1093                             yield video_formats[0]
1094                     else:
1095                         extensions = ['mp4', 'flv', 'webm', '3gp', 'm4a', 'mp3', 'ogg', 'aac', 'wav']
1096                         if format_spec in extensions:
1097                             filter_f = lambda f: f['ext'] == format_spec
1098                         else:
1099                             filter_f = lambda f: f['format_id'] == format_spec
1100                         matches = list(filter(filter_f, formats))
1101                         if matches:
1102                             yield matches[-1]
1103             elif selector.type == MERGE:
1104                 def _merge(formats_info):
1105                     format_1, format_2 = [f['format_id'] for f in formats_info]
1106                     # The first format must contain the video and the
1107                     # second the audio
1108                     if formats_info[0].get('vcodec') == 'none':
1109                         self.report_error('The first format must '
1110                                           'contain the video, try using '
1111                                           '"-f %s+%s"' % (format_2, format_1))
1112                         return
1113                     # Formats must be opposite (video+audio)
1114                     if formats_info[0].get('acodec') == 'none' and formats_info[1].get('acodec') == 'none':
1115                         self.report_error(
1116                             'Both formats %s and %s are video-only, you must specify "-f video+audio"'
1117                             % (format_1, format_2))
1118                         return
1119                     output_ext = (
1120                         formats_info[0]['ext']
1121                         if self.params.get('merge_output_format') is None
1122                         else self.params['merge_output_format'])
1123                     return {
1124                         'requested_formats': formats_info,
1125                         'format': '%s+%s' % (formats_info[0].get('format'),
1126                                              formats_info[1].get('format')),
1127                         'format_id': '%s+%s' % (formats_info[0].get('format_id'),
1128                                                 formats_info[1].get('format_id')),
1129                         'width': formats_info[0].get('width'),
1130                         'height': formats_info[0].get('height'),
1131                         'resolution': formats_info[0].get('resolution'),
1132                         'fps': formats_info[0].get('fps'),
1133                         'vcodec': formats_info[0].get('vcodec'),
1134                         'vbr': formats_info[0].get('vbr'),
1135                         'stretched_ratio': formats_info[0].get('stretched_ratio'),
1136                         'acodec': formats_info[1].get('acodec'),
1137                         'abr': formats_info[1].get('abr'),
1138                         'ext': output_ext,
1139                     }
1140                 video_selector, audio_selector = map(_build_selector_function, selector.selector)
1141
1142                 def selector_function(formats):
1143                     formats = list(formats)
1144                     for pair in itertools.product(video_selector(formats), audio_selector(formats)):
1145                         yield _merge(pair)
1146
1147             filters = [self._build_format_filter(f) for f in selector.filters]
1148
1149             def final_selector(formats):
1150                 for _filter in filters:
1151                     formats = list(filter(_filter, formats))
1152                 return selector_function(formats)
1153             return final_selector
1154
1155         stream = io.BytesIO(format_spec.encode('utf-8'))
1156         try:
1157             tokens = list(_remove_unused_ops(compat_tokenize_tokenize(stream.readline)))
1158         except tokenize.TokenError:
1159             raise syntax_error('Missing closing/opening brackets or parenthesis', (0, len(format_spec)))
1160
1161         class TokenIterator(object):
1162             def __init__(self, tokens):
1163                 self.tokens = tokens
1164                 self.counter = 0
1165
1166             def __iter__(self):
1167                 return self
1168
1169             def __next__(self):
1170                 if self.counter >= len(self.tokens):
1171                     raise StopIteration()
1172                 value = self.tokens[self.counter]
1173                 self.counter += 1
1174                 return value
1175
1176             next = __next__
1177
1178             def restore_last_token(self):
1179                 self.counter -= 1
1180
1181         parsed_selector = _parse_format_selection(iter(TokenIterator(tokens)))
1182         return _build_selector_function(parsed_selector)
1183
1184     def _calc_headers(self, info_dict):
1185         res = std_headers.copy()
1186
1187         add_headers = info_dict.get('http_headers')
1188         if add_headers:
1189             res.update(add_headers)
1190
1191         cookies = self._calc_cookies(info_dict)
1192         if cookies:
1193             res['Cookie'] = cookies
1194
1195         return res
1196
1197     def _calc_cookies(self, info_dict):
1198         pr = sanitized_Request(info_dict['url'])
1199         self.cookiejar.add_cookie_header(pr)
1200         return pr.get_header('Cookie')
1201
1202     def process_video_result(self, info_dict, download=True):
1203         assert info_dict.get('_type', 'video') == 'video'
1204
1205         if 'id' not in info_dict:
1206             raise ExtractorError('Missing "id" field in extractor result')
1207         if 'title' not in info_dict:
1208             raise ExtractorError('Missing "title" field in extractor result')
1209
1210         if 'playlist' not in info_dict:
1211             # It isn't part of a playlist
1212             info_dict['playlist'] = None
1213             info_dict['playlist_index'] = None
1214
1215         thumbnails = info_dict.get('thumbnails')
1216         if thumbnails is None:
1217             thumbnail = info_dict.get('thumbnail')
1218             if thumbnail:
1219                 info_dict['thumbnails'] = thumbnails = [{'url': thumbnail}]
1220         if thumbnails:
1221             thumbnails.sort(key=lambda t: (
1222                 t.get('preference'), t.get('width'), t.get('height'),
1223                 t.get('id'), t.get('url')))
1224             for i, t in enumerate(thumbnails):
1225                 if t.get('width') and t.get('height'):
1226                     t['resolution'] = '%dx%d' % (t['width'], t['height'])
1227                 if t.get('id') is None:
1228                     t['id'] = '%d' % i
1229
1230         if thumbnails and 'thumbnail' not in info_dict:
1231             info_dict['thumbnail'] = thumbnails[-1]['url']
1232
1233         if 'display_id' not in info_dict and 'id' in info_dict:
1234             info_dict['display_id'] = info_dict['id']
1235
1236         if info_dict.get('upload_date') is None and info_dict.get('timestamp') is not None:
1237             # Working around out-of-range timestamp values (e.g. negative ones on Windows,
1238             # see http://bugs.python.org/issue1646728)
1239             try:
1240                 upload_date = datetime.datetime.utcfromtimestamp(info_dict['timestamp'])
1241                 info_dict['upload_date'] = upload_date.strftime('%Y%m%d')
1242             except (ValueError, OverflowError, OSError):
1243                 pass
1244
1245         subtitles = info_dict.get('subtitles')
1246         if subtitles:
1247             for _, subtitle in subtitles.items():
1248                 for subtitle_format in subtitle:
1249                     if 'ext' not in subtitle_format:
1250                         subtitle_format['ext'] = determine_ext(subtitle_format['url']).lower()
1251
1252         if self.params.get('listsubtitles', False):
1253             if 'automatic_captions' in info_dict:
1254                 self.list_subtitles(info_dict['id'], info_dict.get('automatic_captions'), 'automatic captions')
1255             self.list_subtitles(info_dict['id'], subtitles, 'subtitles')
1256             return
1257         info_dict['requested_subtitles'] = self.process_subtitles(
1258             info_dict['id'], subtitles,
1259             info_dict.get('automatic_captions'))
1260
1261         # We now pick which formats have to be downloaded
1262         if info_dict.get('formats') is None:
1263             # There's only one format available
1264             formats = [info_dict]
1265         else:
1266             formats = info_dict['formats']
1267
1268         if not formats:
1269             raise ExtractorError('No video formats found!')
1270
1271         formats_dict = {}
1272
1273         # We check that all the formats have the format and format_id fields
1274         for i, format in enumerate(formats):
1275             if 'url' not in format:
1276                 raise ExtractorError('Missing "url" key in result (index %d)' % i)
1277
1278             if format.get('format_id') is None:
1279                 format['format_id'] = compat_str(i)
1280             format_id = format['format_id']
1281             if format_id not in formats_dict:
1282                 formats_dict[format_id] = []
1283             formats_dict[format_id].append(format)
1284
1285         # Make sure all formats have unique format_id
1286         for format_id, ambiguous_formats in formats_dict.items():
1287             if len(ambiguous_formats) > 1:
1288                 for i, format in enumerate(ambiguous_formats):
1289                     format['format_id'] = '%s-%d' % (format_id, i)
1290
1291         for i, format in enumerate(formats):
1292             if format.get('format') is None:
1293                 format['format'] = '{id} - {res}{note}'.format(
1294                     id=format['format_id'],
1295                     res=self.format_resolution(format),
1296                     note=' ({0})'.format(format['format_note']) if format.get('format_note') is not None else '',
1297                 )
1298             # Automatically determine file extension if missing
1299             if 'ext' not in format:
1300                 format['ext'] = determine_ext(format['url']).lower()
1301             # Add HTTP headers, so that external programs can use them from the
1302             # json output
1303             full_format_info = info_dict.copy()
1304             full_format_info.update(format)
1305             format['http_headers'] = self._calc_headers(full_format_info)
1306
1307         # TODO Central sorting goes here
1308
1309         if formats[0] is not info_dict:
1310             # only set the 'formats' fields if the original info_dict list them
1311             # otherwise we end up with a circular reference, the first (and unique)
1312             # element in the 'formats' field in info_dict is info_dict itself,
1313             # wich can't be exported to json
1314             info_dict['formats'] = formats
1315         if self.params.get('listformats'):
1316             self.list_formats(info_dict)
1317             return
1318         if self.params.get('list_thumbnails'):
1319             self.list_thumbnails(info_dict)
1320             return
1321
1322         req_format = self.params.get('format')
1323         if req_format is None:
1324             req_format_list = []
1325             if (self.params.get('outtmpl', DEFAULT_OUTTMPL) != '-' and
1326                     info_dict['extractor'] in ['youtube', 'ted'] and
1327                     not info_dict.get('is_live')):
1328                 merger = FFmpegMergerPP(self)
1329                 if merger.available and merger.can_merge():
1330                     req_format_list.append('bestvideo+bestaudio')
1331             req_format_list.append('best')
1332             req_format = '/'.join(req_format_list)
1333         format_selector = self.build_format_selector(req_format)
1334         formats_to_download = list(format_selector(formats))
1335         if not formats_to_download:
1336             raise ExtractorError('requested format not available',
1337                                  expected=True)
1338
1339         if download:
1340             if len(formats_to_download) > 1:
1341                 self.to_screen('[info] %s: downloading video in %s formats' % (info_dict['id'], len(formats_to_download)))
1342             for format in formats_to_download:
1343                 new_info = dict(info_dict)
1344                 new_info.update(format)
1345                 self.process_info(new_info)
1346         # We update the info dict with the best quality format (backwards compatibility)
1347         info_dict.update(formats_to_download[-1])
1348         return info_dict
1349
1350     def process_subtitles(self, video_id, normal_subtitles, automatic_captions):
1351         """Select the requested subtitles and their format"""
1352         available_subs = {}
1353         if normal_subtitles and self.params.get('writesubtitles'):
1354             available_subs.update(normal_subtitles)
1355         if automatic_captions and self.params.get('writeautomaticsub'):
1356             for lang, cap_info in automatic_captions.items():
1357                 if lang not in available_subs:
1358                     available_subs[lang] = cap_info
1359
1360         if (not self.params.get('writesubtitles') and not
1361                 self.params.get('writeautomaticsub') or not
1362                 available_subs):
1363             return None
1364
1365         if self.params.get('allsubtitles', False):
1366             requested_langs = available_subs.keys()
1367         else:
1368             if self.params.get('subtitleslangs', False):
1369                 requested_langs = self.params.get('subtitleslangs')
1370             elif 'en' in available_subs:
1371                 requested_langs = ['en']
1372             else:
1373                 requested_langs = [list(available_subs.keys())[0]]
1374
1375         formats_query = self.params.get('subtitlesformat', 'best')
1376         formats_preference = formats_query.split('/') if formats_query else []
1377         subs = {}
1378         for lang in requested_langs:
1379             formats = available_subs.get(lang)
1380             if formats is None:
1381                 self.report_warning('%s subtitles not available for %s' % (lang, video_id))
1382                 continue
1383             for ext in formats_preference:
1384                 if ext == 'best':
1385                     f = formats[-1]
1386                     break
1387                 matches = list(filter(lambda f: f['ext'] == ext, formats))
1388                 if matches:
1389                     f = matches[-1]
1390                     break
1391             else:
1392                 f = formats[-1]
1393                 self.report_warning(
1394                     'No subtitle format found matching "%s" for language %s, '
1395                     'using %s' % (formats_query, lang, f['ext']))
1396             subs[lang] = f
1397         return subs
1398
1399     def process_info(self, info_dict):
1400         """Process a single resolved IE result."""
1401
1402         assert info_dict.get('_type', 'video') == 'video'
1403
1404         max_downloads = self.params.get('max_downloads')
1405         if max_downloads is not None:
1406             if self._num_downloads >= int(max_downloads):
1407                 raise MaxDownloadsReached()
1408
1409         info_dict['fulltitle'] = info_dict['title']
1410         if len(info_dict['title']) > 200:
1411             info_dict['title'] = info_dict['title'][:197] + '...'
1412
1413         if 'format' not in info_dict:
1414             info_dict['format'] = info_dict['ext']
1415
1416         reason = self._match_entry(info_dict, incomplete=False)
1417         if reason is not None:
1418             self.to_screen('[download] ' + reason)
1419             return
1420
1421         self._num_downloads += 1
1422
1423         info_dict['_filename'] = filename = self.prepare_filename(info_dict)
1424
1425         # Forced printings
1426         if self.params.get('forcetitle', False):
1427             self.to_stdout(info_dict['fulltitle'])
1428         if self.params.get('forceid', False):
1429             self.to_stdout(info_dict['id'])
1430         if self.params.get('forceurl', False):
1431             if info_dict.get('requested_formats') is not None:
1432                 for f in info_dict['requested_formats']:
1433                     self.to_stdout(f['url'] + f.get('play_path', ''))
1434             else:
1435                 # For RTMP URLs, also include the playpath
1436                 self.to_stdout(info_dict['url'] + info_dict.get('play_path', ''))
1437         if self.params.get('forcethumbnail', False) and info_dict.get('thumbnail') is not None:
1438             self.to_stdout(info_dict['thumbnail'])
1439         if self.params.get('forcedescription', False) and info_dict.get('description') is not None:
1440             self.to_stdout(info_dict['description'])
1441         if self.params.get('forcefilename', False) and filename is not None:
1442             self.to_stdout(filename)
1443         if self.params.get('forceduration', False) and info_dict.get('duration') is not None:
1444             self.to_stdout(formatSeconds(info_dict['duration']))
1445         if self.params.get('forceformat', False):
1446             self.to_stdout(info_dict['format'])
1447         if self.params.get('forcejson', False):
1448             self.to_stdout(json.dumps(info_dict))
1449
1450         # Do nothing else if in simulate mode
1451         if self.params.get('simulate', False):
1452             return
1453
1454         if filename is None:
1455             return
1456
1457         try:
1458             dn = os.path.dirname(sanitize_path(encodeFilename(filename)))
1459             if dn and not os.path.exists(dn):
1460                 os.makedirs(dn)
1461         except (OSError, IOError) as err:
1462             self.report_error('unable to create directory ' + compat_str(err))
1463             return
1464
1465         if self.params.get('writedescription', False):
1466             descfn = replace_extension(filename, 'description', info_dict.get('ext'))
1467             if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(descfn)):
1468                 self.to_screen('[info] Video description is already present')
1469             elif info_dict.get('description') is None:
1470                 self.report_warning('There\'s no description to write.')
1471             else:
1472                 try:
1473                     self.to_screen('[info] Writing video description to: ' + descfn)
1474                     with io.open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile:
1475                         descfile.write(info_dict['description'])
1476                 except (OSError, IOError):
1477                     self.report_error('Cannot write description file ' + descfn)
1478                     return
1479
1480         if self.params.get('writeannotations', False):
1481             annofn = replace_extension(filename, 'annotations.xml', info_dict.get('ext'))
1482             if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(annofn)):
1483                 self.to_screen('[info] Video annotations are already present')
1484             else:
1485                 try:
1486                     self.to_screen('[info] Writing video annotations to: ' + annofn)
1487                     with io.open(encodeFilename(annofn), 'w', encoding='utf-8') as annofile:
1488                         annofile.write(info_dict['annotations'])
1489                 except (KeyError, TypeError):
1490                     self.report_warning('There are no annotations to write.')
1491                 except (OSError, IOError):
1492                     self.report_error('Cannot write annotations file: ' + annofn)
1493                     return
1494
1495         subtitles_are_requested = any([self.params.get('writesubtitles', False),
1496                                        self.params.get('writeautomaticsub')])
1497
1498         if subtitles_are_requested and info_dict.get('requested_subtitles'):
1499             # subtitles download errors are already managed as troubles in relevant IE
1500             # that way it will silently go on when used with unsupporting IE
1501             subtitles = info_dict['requested_subtitles']
1502             ie = self.get_info_extractor(info_dict['extractor_key'])
1503             for sub_lang, sub_info in subtitles.items():
1504                 sub_format = sub_info['ext']
1505                 if sub_info.get('data') is not None:
1506                     sub_data = sub_info['data']
1507                 else:
1508                     try:
1509                         sub_data = ie._download_webpage(
1510                             sub_info['url'], info_dict['id'], note=False)
1511                     except ExtractorError as err:
1512                         self.report_warning('Unable to download subtitle for "%s": %s' %
1513                                             (sub_lang, compat_str(err.cause)))
1514                         continue
1515                 try:
1516                     sub_filename = subtitles_filename(filename, sub_lang, sub_format)
1517                     if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(sub_filename)):
1518                         self.to_screen('[info] Video subtitle %s.%s is already_present' % (sub_lang, sub_format))
1519                     else:
1520                         self.to_screen('[info] Writing video subtitles to: ' + sub_filename)
1521                         with io.open(encodeFilename(sub_filename), 'w', encoding='utf-8') as subfile:
1522                             subfile.write(sub_data)
1523                 except (OSError, IOError):
1524                     self.report_error('Cannot write subtitles file ' + sub_filename)
1525                     return
1526
1527         if self.params.get('writeinfojson', False):
1528             infofn = replace_extension(filename, 'info.json', info_dict.get('ext'))
1529             if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(infofn)):
1530                 self.to_screen('[info] Video description metadata is already present')
1531             else:
1532                 self.to_screen('[info] Writing video description metadata as JSON to: ' + infofn)
1533                 try:
1534                     write_json_file(self.filter_requested_info(info_dict), infofn)
1535                 except (OSError, IOError):
1536                     self.report_error('Cannot write metadata to JSON file ' + infofn)
1537                     return
1538
1539         self._write_thumbnails(info_dict, filename)
1540
1541         if not self.params.get('skip_download', False):
1542             try:
1543                 def dl(name, info):
1544                     fd = get_suitable_downloader(info, self.params)(self, self.params)
1545                     for ph in self._progress_hooks:
1546                         fd.add_progress_hook(ph)
1547                     if self.params.get('verbose'):
1548                         self.to_stdout('[debug] Invoking downloader on %r' % info.get('url'))
1549                     return fd.download(name, info)
1550
1551                 if info_dict.get('requested_formats') is not None:
1552                     downloaded = []
1553                     success = True
1554                     merger = FFmpegMergerPP(self)
1555                     if not merger.available:
1556                         postprocessors = []
1557                         self.report_warning('You have requested multiple '
1558                                             'formats but ffmpeg or avconv are not installed.'
1559                                             ' The formats won\'t be merged.')
1560                     else:
1561                         postprocessors = [merger]
1562
1563                     def compatible_formats(formats):
1564                         video, audio = formats
1565                         # Check extension
1566                         video_ext, audio_ext = audio.get('ext'), video.get('ext')
1567                         if video_ext and audio_ext:
1568                             COMPATIBLE_EXTS = (
1569                                 ('mp3', 'mp4', 'm4a', 'm4p', 'm4b', 'm4r', 'm4v'),
1570                                 ('webm')
1571                             )
1572                             for exts in COMPATIBLE_EXTS:
1573                                 if video_ext in exts and audio_ext in exts:
1574                                     return True
1575                         # TODO: Check acodec/vcodec
1576                         return False
1577
1578                     filename_real_ext = os.path.splitext(filename)[1][1:]
1579                     filename_wo_ext = (
1580                         os.path.splitext(filename)[0]
1581                         if filename_real_ext == info_dict['ext']
1582                         else filename)
1583                     requested_formats = info_dict['requested_formats']
1584                     if self.params.get('merge_output_format') is None and not compatible_formats(requested_formats):
1585                         info_dict['ext'] = 'mkv'
1586                         self.report_warning(
1587                             'Requested formats are incompatible for merge and will be merged into mkv.')
1588                     # Ensure filename always has a correct extension for successful merge
1589                     filename = '%s.%s' % (filename_wo_ext, info_dict['ext'])
1590                     if os.path.exists(encodeFilename(filename)):
1591                         self.to_screen(
1592                             '[download] %s has already been downloaded and '
1593                             'merged' % filename)
1594                     else:
1595                         for f in requested_formats:
1596                             new_info = dict(info_dict)
1597                             new_info.update(f)
1598                             fname = self.prepare_filename(new_info)
1599                             fname = prepend_extension(fname, 'f%s' % f['format_id'], new_info['ext'])
1600                             downloaded.append(fname)
1601                             partial_success = dl(fname, new_info)
1602                             success = success and partial_success
1603                         info_dict['__postprocessors'] = postprocessors
1604                         info_dict['__files_to_merge'] = downloaded
1605                 else:
1606                     # Just a single file
1607                     success = dl(filename, info_dict)
1608             except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
1609                 self.report_error('unable to download video data: %s' % str(err))
1610                 return
1611             except (OSError, IOError) as err:
1612                 raise UnavailableVideoError(err)
1613             except (ContentTooShortError, ) as err:
1614                 self.report_error('content too short (expected %s bytes and served %s)' % (err.expected, err.downloaded))
1615                 return
1616
1617             if success:
1618                 # Fixup content
1619                 fixup_policy = self.params.get('fixup')
1620                 if fixup_policy is None:
1621                     fixup_policy = 'detect_or_warn'
1622
1623                 stretched_ratio = info_dict.get('stretched_ratio')
1624                 if stretched_ratio is not None and stretched_ratio != 1:
1625                     if fixup_policy == 'warn':
1626                         self.report_warning('%s: Non-uniform pixel ratio (%s)' % (
1627                             info_dict['id'], stretched_ratio))
1628                     elif fixup_policy == 'detect_or_warn':
1629                         stretched_pp = FFmpegFixupStretchedPP(self)
1630                         if stretched_pp.available:
1631                             info_dict.setdefault('__postprocessors', [])
1632                             info_dict['__postprocessors'].append(stretched_pp)
1633                         else:
1634                             self.report_warning(
1635                                 '%s: Non-uniform pixel ratio (%s). Install ffmpeg or avconv to fix this automatically.' % (
1636                                     info_dict['id'], stretched_ratio))
1637                     else:
1638                         assert fixup_policy in ('ignore', 'never')
1639
1640                 if info_dict.get('requested_formats') is None and info_dict.get('container') == 'm4a_dash':
1641                     if fixup_policy == 'warn':
1642                         self.report_warning('%s: writing DASH m4a. Only some players support this container.' % (
1643                             info_dict['id']))
1644                     elif fixup_policy == 'detect_or_warn':
1645                         fixup_pp = FFmpegFixupM4aPP(self)
1646                         if fixup_pp.available:
1647                             info_dict.setdefault('__postprocessors', [])
1648                             info_dict['__postprocessors'].append(fixup_pp)
1649                         else:
1650                             self.report_warning(
1651                                 '%s: writing DASH m4a. Only some players support this container. Install ffmpeg or avconv to fix this automatically.' % (
1652                                     info_dict['id']))
1653                     else:
1654                         assert fixup_policy in ('ignore', 'never')
1655
1656                 try:
1657                     self.post_process(filename, info_dict)
1658                 except (PostProcessingError) as err:
1659                     self.report_error('postprocessing: %s' % str(err))
1660                     return
1661                 self.record_download_archive(info_dict)
1662
1663     def download(self, url_list):
1664         """Download a given list of URLs."""
1665         outtmpl = self.params.get('outtmpl', DEFAULT_OUTTMPL)
1666         if (len(url_list) > 1 and
1667                 '%' not in outtmpl and
1668                 self.params.get('max_downloads') != 1):
1669             raise SameFileError(outtmpl)
1670
1671         for url in url_list:
1672             try:
1673                 # It also downloads the videos
1674                 res = self.extract_info(
1675                     url, force_generic_extractor=self.params.get('force_generic_extractor', False))
1676             except UnavailableVideoError:
1677                 self.report_error('unable to download video')
1678             except MaxDownloadsReached:
1679                 self.to_screen('[info] Maximum number of downloaded files reached.')
1680                 raise
1681             else:
1682                 if self.params.get('dump_single_json', False):
1683                     self.to_stdout(json.dumps(res))
1684
1685         return self._download_retcode
1686
1687     def download_with_info_file(self, info_filename):
1688         with contextlib.closing(fileinput.FileInput(
1689                 [info_filename], mode='r',
1690                 openhook=fileinput.hook_encoded('utf-8'))) as f:
1691             # FileInput doesn't have a read method, we can't call json.load
1692             info = self.filter_requested_info(json.loads('\n'.join(f)))
1693         try:
1694             self.process_ie_result(info, download=True)
1695         except DownloadError:
1696             webpage_url = info.get('webpage_url')
1697             if webpage_url is not None:
1698                 self.report_warning('The info failed to download, trying with "%s"' % webpage_url)
1699                 return self.download([webpage_url])
1700             else:
1701                 raise
1702         return self._download_retcode
1703
1704     @staticmethod
1705     def filter_requested_info(info_dict):
1706         return dict(
1707             (k, v) for k, v in info_dict.items()
1708             if k not in ['requested_formats', 'requested_subtitles'])
1709
1710     def post_process(self, filename, ie_info):
1711         """Run all the postprocessors on the given file."""
1712         info = dict(ie_info)
1713         info['filepath'] = filename
1714         pps_chain = []
1715         if ie_info.get('__postprocessors') is not None:
1716             pps_chain.extend(ie_info['__postprocessors'])
1717         pps_chain.extend(self._pps)
1718         for pp in pps_chain:
1719             files_to_delete = []
1720             try:
1721                 files_to_delete, info = pp.run(info)
1722             except PostProcessingError as e:
1723                 self.report_error(e.msg)
1724             if files_to_delete and not self.params.get('keepvideo', False):
1725                 for old_filename in files_to_delete:
1726                     self.to_screen('Deleting original file %s (pass -k to keep)' % old_filename)
1727                     try:
1728                         os.remove(encodeFilename(old_filename))
1729                     except (IOError, OSError):
1730                         self.report_warning('Unable to remove downloaded original file')
1731
1732     def _make_archive_id(self, info_dict):
1733         # Future-proof against any change in case
1734         # and backwards compatibility with prior versions
1735         extractor = info_dict.get('extractor_key')
1736         if extractor is None:
1737             if 'id' in info_dict:
1738                 extractor = info_dict.get('ie_key')  # key in a playlist
1739         if extractor is None:
1740             return None  # Incomplete video information
1741         return extractor.lower() + ' ' + info_dict['id']
1742
1743     def in_download_archive(self, info_dict):
1744         fn = self.params.get('download_archive')
1745         if fn is None:
1746             return False
1747
1748         vid_id = self._make_archive_id(info_dict)
1749         if vid_id is None:
1750             return False  # Incomplete video information
1751
1752         try:
1753             with locked_file(fn, 'r', encoding='utf-8') as archive_file:
1754                 for line in archive_file:
1755                     if line.strip() == vid_id:
1756                         return True
1757         except IOError as ioe:
1758             if ioe.errno != errno.ENOENT:
1759                 raise
1760         return False
1761
1762     def record_download_archive(self, info_dict):
1763         fn = self.params.get('download_archive')
1764         if fn is None:
1765             return
1766         vid_id = self._make_archive_id(info_dict)
1767         assert vid_id
1768         with locked_file(fn, 'a', encoding='utf-8') as archive_file:
1769             archive_file.write(vid_id + '\n')
1770
1771     @staticmethod
1772     def format_resolution(format, default='unknown'):
1773         if format.get('vcodec') == 'none':
1774             return 'audio only'
1775         if format.get('resolution') is not None:
1776             return format['resolution']
1777         if format.get('height') is not None:
1778             if format.get('width') is not None:
1779                 res = '%sx%s' % (format['width'], format['height'])
1780             else:
1781                 res = '%sp' % format['height']
1782         elif format.get('width') is not None:
1783             res = '?x%d' % format['width']
1784         else:
1785             res = default
1786         return res
1787
1788     def _format_note(self, fdict):
1789         res = ''
1790         if fdict.get('ext') in ['f4f', 'f4m']:
1791             res += '(unsupported) '
1792         if fdict.get('format_note') is not None:
1793             res += fdict['format_note'] + ' '
1794         if fdict.get('tbr') is not None:
1795             res += '%4dk ' % fdict['tbr']
1796         if fdict.get('container') is not None:
1797             if res:
1798                 res += ', '
1799             res += '%s container' % fdict['container']
1800         if (fdict.get('vcodec') is not None and
1801                 fdict.get('vcodec') != 'none'):
1802             if res:
1803                 res += ', '
1804             res += fdict['vcodec']
1805             if fdict.get('vbr') is not None:
1806                 res += '@'
1807         elif fdict.get('vbr') is not None and fdict.get('abr') is not None:
1808             res += 'video@'
1809         if fdict.get('vbr') is not None:
1810             res += '%4dk' % fdict['vbr']
1811         if fdict.get('fps') is not None:
1812             res += ', %sfps' % fdict['fps']
1813         if fdict.get('acodec') is not None:
1814             if res:
1815                 res += ', '
1816             if fdict['acodec'] == 'none':
1817                 res += 'video only'
1818             else:
1819                 res += '%-5s' % fdict['acodec']
1820         elif fdict.get('abr') is not None:
1821             if res:
1822                 res += ', '
1823             res += 'audio'
1824         if fdict.get('abr') is not None:
1825             res += '@%3dk' % fdict['abr']
1826         if fdict.get('asr') is not None:
1827             res += ' (%5dHz)' % fdict['asr']
1828         if fdict.get('filesize') is not None:
1829             if res:
1830                 res += ', '
1831             res += format_bytes(fdict['filesize'])
1832         elif fdict.get('filesize_approx') is not None:
1833             if res:
1834                 res += ', '
1835             res += '~' + format_bytes(fdict['filesize_approx'])
1836         return res
1837
1838     def list_formats(self, info_dict):
1839         formats = info_dict.get('formats', [info_dict])
1840         table = [
1841             [f['format_id'], f['ext'], self.format_resolution(f), self._format_note(f)]
1842             for f in formats
1843             if f.get('preference') is None or f['preference'] >= -1000]
1844         if len(formats) > 1:
1845             table[-1][-1] += (' ' if table[-1][-1] else '') + '(best)'
1846
1847         header_line = ['format code', 'extension', 'resolution', 'note']
1848         self.to_screen(
1849             '[info] Available formats for %s:\n%s' %
1850             (info_dict['id'], render_table(header_line, table)))
1851
1852     def list_thumbnails(self, info_dict):
1853         thumbnails = info_dict.get('thumbnails')
1854         if not thumbnails:
1855             tn_url = info_dict.get('thumbnail')
1856             if tn_url:
1857                 thumbnails = [{'id': '0', 'url': tn_url}]
1858             else:
1859                 self.to_screen(
1860                     '[info] No thumbnails present for %s' % info_dict['id'])
1861                 return
1862
1863         self.to_screen(
1864             '[info] Thumbnails for %s:' % info_dict['id'])
1865         self.to_screen(render_table(
1866             ['ID', 'width', 'height', 'URL'],
1867             [[t['id'], t.get('width', 'unknown'), t.get('height', 'unknown'), t['url']] for t in thumbnails]))
1868
1869     def list_subtitles(self, video_id, subtitles, name='subtitles'):
1870         if not subtitles:
1871             self.to_screen('%s has no %s' % (video_id, name))
1872             return
1873         self.to_screen(
1874             'Available %s for %s:' % (name, video_id))
1875         self.to_screen(render_table(
1876             ['Language', 'formats'],
1877             [[lang, ', '.join(f['ext'] for f in reversed(formats))]
1878                 for lang, formats in subtitles.items()]))
1879
1880     def urlopen(self, req):
1881         """ Start an HTTP download """
1882         if isinstance(req, compat_basestring):
1883             req = sanitized_Request(req)
1884         return self._opener.open(req, timeout=self._socket_timeout)
1885
1886     def print_debug_header(self):
1887         if not self.params.get('verbose'):
1888             return
1889
1890         if type('') is not compat_str:
1891             # Python 2.6 on SLES11 SP1 (https://github.com/rg3/youtube-dl/issues/3326)
1892             self.report_warning(
1893                 'Your Python is broken! Update to a newer and supported version')
1894
1895         stdout_encoding = getattr(
1896             sys.stdout, 'encoding', 'missing (%s)' % type(sys.stdout).__name__)
1897         encoding_str = (
1898             '[debug] Encodings: locale %s, fs %s, out %s, pref %s\n' % (
1899                 locale.getpreferredencoding(),
1900                 sys.getfilesystemencoding(),
1901                 stdout_encoding,
1902                 self.get_encoding()))
1903         write_string(encoding_str, encoding=None)
1904
1905         self._write_string('[debug] youtube-dl version ' + __version__ + '\n')
1906         try:
1907             sp = subprocess.Popen(
1908                 ['git', 'rev-parse', '--short', 'HEAD'],
1909                 stdout=subprocess.PIPE, stderr=subprocess.PIPE,
1910                 cwd=os.path.dirname(os.path.abspath(__file__)))
1911             out, err = sp.communicate()
1912             out = out.decode().strip()
1913             if re.match('[0-9a-f]+', out):
1914                 self._write_string('[debug] Git HEAD: ' + out + '\n')
1915         except Exception:
1916             try:
1917                 sys.exc_clear()
1918             except Exception:
1919                 pass
1920         self._write_string('[debug] Python version %s - %s\n' % (
1921             platform.python_version(), platform_name()))
1922
1923         exe_versions = FFmpegPostProcessor.get_versions(self)
1924         exe_versions['rtmpdump'] = rtmpdump_version()
1925         exe_str = ', '.join(
1926             '%s %s' % (exe, v)
1927             for exe, v in sorted(exe_versions.items())
1928             if v
1929         )
1930         if not exe_str:
1931             exe_str = 'none'
1932         self._write_string('[debug] exe versions: %s\n' % exe_str)
1933
1934         proxy_map = {}
1935         for handler in self._opener.handlers:
1936             if hasattr(handler, 'proxies'):
1937                 proxy_map.update(handler.proxies)
1938         self._write_string('[debug] Proxy map: ' + compat_str(proxy_map) + '\n')
1939
1940         if self.params.get('call_home', False):
1941             ipaddr = self.urlopen('https://yt-dl.org/ip').read().decode('utf-8')
1942             self._write_string('[debug] Public IP address: %s\n' % ipaddr)
1943             latest_version = self.urlopen(
1944                 'https://yt-dl.org/latest/version').read().decode('utf-8')
1945             if version_tuple(latest_version) > version_tuple(__version__):
1946                 self.report_warning(
1947                     'You are using an outdated version (newest version: %s)! '
1948                     'See https://yt-dl.org/update if you need help updating.' %
1949                     latest_version)
1950
1951     def _setup_opener(self):
1952         timeout_val = self.params.get('socket_timeout')
1953         self._socket_timeout = 600 if timeout_val is None else float(timeout_val)
1954
1955         opts_cookiefile = self.params.get('cookiefile')
1956         opts_proxy = self.params.get('proxy')
1957
1958         if opts_cookiefile is None:
1959             self.cookiejar = compat_cookiejar.CookieJar()
1960         else:
1961             self.cookiejar = compat_cookiejar.MozillaCookieJar(
1962                 opts_cookiefile)
1963             if os.access(opts_cookiefile, os.R_OK):
1964                 self.cookiejar.load()
1965
1966         cookie_processor = YoutubeDLCookieProcessor(self.cookiejar)
1967         if opts_proxy is not None:
1968             if opts_proxy == '':
1969                 proxies = {}
1970             else:
1971                 proxies = {'http': opts_proxy, 'https': opts_proxy}
1972         else:
1973             proxies = compat_urllib_request.getproxies()
1974             # Set HTTPS proxy to HTTP one if given (https://github.com/rg3/youtube-dl/issues/805)
1975             if 'http' in proxies and 'https' not in proxies:
1976                 proxies['https'] = proxies['http']
1977         proxy_handler = PerRequestProxyHandler(proxies)
1978
1979         debuglevel = 1 if self.params.get('debug_printtraffic') else 0
1980         https_handler = make_HTTPS_handler(self.params, debuglevel=debuglevel)
1981         ydlh = YoutubeDLHandler(self.params, debuglevel=debuglevel)
1982         data_handler = compat_urllib_request_DataHandler()
1983         opener = compat_urllib_request.build_opener(
1984             proxy_handler, https_handler, cookie_processor, ydlh, data_handler)
1985
1986         # Delete the default user-agent header, which would otherwise apply in
1987         # cases where our custom HTTP handler doesn't come into play
1988         # (See https://github.com/rg3/youtube-dl/issues/1309 for details)
1989         opener.addheaders = []
1990         self._opener = opener
1991
1992     def encode(self, s):
1993         if isinstance(s, bytes):
1994             return s  # Already encoded
1995
1996         try:
1997             return s.encode(self.get_encoding())
1998         except UnicodeEncodeError as err:
1999             err.reason = err.reason + '. Check your system encoding configuration or use the --encoding option.'
2000             raise
2001
2002     def get_encoding(self):
2003         encoding = self.params.get('encoding')
2004         if encoding is None:
2005             encoding = preferredencoding()
2006         return encoding
2007
2008     def _write_thumbnails(self, info_dict, filename):
2009         if self.params.get('writethumbnail', False):
2010             thumbnails = info_dict.get('thumbnails')
2011             if thumbnails:
2012                 thumbnails = [thumbnails[-1]]
2013         elif self.params.get('write_all_thumbnails', False):
2014             thumbnails = info_dict.get('thumbnails')
2015         else:
2016             return
2017
2018         if not thumbnails:
2019             # No thumbnails present, so return immediately
2020             return
2021
2022         for t in thumbnails:
2023             thumb_ext = determine_ext(t['url'], 'jpg')
2024             suffix = '_%s' % t['id'] if len(thumbnails) > 1 else ''
2025             thumb_display_id = '%s ' % t['id'] if len(thumbnails) > 1 else ''
2026             t['filename'] = thumb_filename = os.path.splitext(filename)[0] + suffix + '.' + thumb_ext
2027
2028             if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(thumb_filename)):
2029                 self.to_screen('[%s] %s: Thumbnail %sis already present' %
2030                                (info_dict['extractor'], info_dict['id'], thumb_display_id))
2031             else:
2032                 self.to_screen('[%s] %s: Downloading thumbnail %s...' %
2033                                (info_dict['extractor'], info_dict['id'], thumb_display_id))
2034                 try:
2035                     uf = self.urlopen(t['url'])
2036                     with open(encodeFilename(thumb_filename), 'wb') as thumbf:
2037                         shutil.copyfileobj(uf, thumbf)
2038                     self.to_screen('[%s] %s: Writing thumbnail %sto: %s' %
2039                                    (info_dict['extractor'], info_dict['id'], thumb_display_id, thumb_filename))
2040                 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
2041                     self.report_warning('Unable to download thumbnail "%s": %s' %
2042                                         (t['url'], compat_str(err)))