[YoutubeDL] check for --list-thumbnails immediately after processing them
[youtube-dl] / youtube_dl / YoutubeDL.py
1 #!/usr/bin/env python
2 # -*- coding: utf-8 -*-
3
4 from __future__ import absolute_import, unicode_literals
5
6 import collections
7 import contextlib
8 import datetime
9 import errno
10 import fileinput
11 import io
12 import itertools
13 import json
14 import locale
15 import operator
16 import os
17 import platform
18 import re
19 import shutil
20 import subprocess
21 import socket
22 import sys
23 import time
24 import tokenize
25 import traceback
26
27 from .compat import (
28     compat_basestring,
29     compat_cookiejar,
30     compat_expanduser,
31     compat_get_terminal_size,
32     compat_http_client,
33     compat_kwargs,
34     compat_os_name,
35     compat_str,
36     compat_tokenize_tokenize,
37     compat_urllib_error,
38     compat_urllib_request,
39     compat_urllib_request_DataHandler,
40 )
41 from .utils import (
42     ContentTooShortError,
43     date_from_str,
44     DateRange,
45     DEFAULT_OUTTMPL,
46     determine_ext,
47     determine_protocol,
48     DownloadError,
49     encode_compat_str,
50     encodeFilename,
51     error_to_compat_str,
52     ExtractorError,
53     format_bytes,
54     formatSeconds,
55     locked_file,
56     make_HTTPS_handler,
57     MaxDownloadsReached,
58     PagedList,
59     parse_filesize,
60     PerRequestProxyHandler,
61     PostProcessingError,
62     platform_name,
63     preferredencoding,
64     render_table,
65     SameFileError,
66     sanitize_filename,
67     sanitize_path,
68     sanitized_Request,
69     std_headers,
70     subtitles_filename,
71     UnavailableVideoError,
72     url_basename,
73     version_tuple,
74     write_json_file,
75     write_string,
76     YoutubeDLCookieProcessor,
77     YoutubeDLHandler,
78     prepend_extension,
79     replace_extension,
80     args_to_str,
81     age_restricted,
82 )
83 from .cache import Cache
84 from .extractor import get_info_extractor, gen_extractors
85 from .downloader import get_suitable_downloader
86 from .downloader.rtmp import rtmpdump_version
87 from .postprocessor import (
88     FFmpegFixupM3u8PP,
89     FFmpegFixupM4aPP,
90     FFmpegFixupStretchedPP,
91     FFmpegMergerPP,
92     FFmpegPostProcessor,
93     get_postprocessor,
94 )
95 from .version import __version__
96
97 if compat_os_name == 'nt':
98     import ctypes
99
100
101 class YoutubeDL(object):
102     """YoutubeDL class.
103
104     YoutubeDL objects are the ones responsible of downloading the
105     actual video file and writing it to disk if the user has requested
106     it, among some other tasks. In most cases there should be one per
107     program. As, given a video URL, the downloader doesn't know how to
108     extract all the needed information, task that InfoExtractors do, it
109     has to pass the URL to one of them.
110
111     For this, YoutubeDL objects have a method that allows
112     InfoExtractors to be registered in a given order. When it is passed
113     a URL, the YoutubeDL object handles it to the first InfoExtractor it
114     finds that reports being able to handle it. The InfoExtractor extracts
115     all the information about the video or videos the URL refers to, and
116     YoutubeDL process the extracted information, possibly using a File
117     Downloader to download the video.
118
119     YoutubeDL objects accept a lot of parameters. In order not to saturate
120     the object constructor with arguments, it receives a dictionary of
121     options instead. These options are available through the params
122     attribute for the InfoExtractors to use. The YoutubeDL also
123     registers itself as the downloader in charge for the InfoExtractors
124     that are added to it, so this is a "mutual registration".
125
126     Available options:
127
128     username:          Username for authentication purposes.
129     password:          Password for authentication purposes.
130     videopassword:     Password for accessing a video.
131     usenetrc:          Use netrc for authentication instead.
132     verbose:           Print additional info to stdout.
133     quiet:             Do not print messages to stdout.
134     no_warnings:       Do not print out anything for warnings.
135     forceurl:          Force printing final URL.
136     forcetitle:        Force printing title.
137     forceid:           Force printing ID.
138     forcethumbnail:    Force printing thumbnail URL.
139     forcedescription:  Force printing description.
140     forcefilename:     Force printing final filename.
141     forceduration:     Force printing duration.
142     forcejson:         Force printing info_dict as JSON.
143     dump_single_json:  Force printing the info_dict of the whole playlist
144                        (or video) as a single JSON line.
145     simulate:          Do not download the video files.
146     format:            Video format code. See options.py for more information.
147     outtmpl:           Template for output names.
148     restrictfilenames: Do not allow "&" and spaces in file names
149     ignoreerrors:      Do not stop on download errors.
150     force_generic_extractor: Force downloader to use the generic extractor
151     nooverwrites:      Prevent overwriting files.
152     playliststart:     Playlist item to start at.
153     playlistend:       Playlist item to end at.
154     playlist_items:    Specific indices of playlist to download.
155     playlistreverse:   Download playlist items in reverse order.
156     matchtitle:        Download only matching titles.
157     rejecttitle:       Reject downloads for matching titles.
158     logger:            Log messages to a logging.Logger instance.
159     logtostderr:       Log messages to stderr instead of stdout.
160     writedescription:  Write the video description to a .description file
161     writeinfojson:     Write the video description to a .info.json file
162     writeannotations:  Write the video annotations to a .annotations.xml file
163     writethumbnail:    Write the thumbnail image to a file
164     write_all_thumbnails:  Write all thumbnail formats to files
165     writesubtitles:    Write the video subtitles to a file
166     writeautomaticsub: Write the automatically generated subtitles to a file
167     allsubtitles:      Downloads all the subtitles of the video
168                        (requires writesubtitles or writeautomaticsub)
169     listsubtitles:     Lists all available subtitles for the video
170     subtitlesformat:   The format code for subtitles
171     subtitleslangs:    List of languages of the subtitles to download
172     keepvideo:         Keep the video file after post-processing
173     daterange:         A DateRange object, download only if the upload_date is in the range.
174     skip_download:     Skip the actual download of the video file
175     cachedir:          Location of the cache files in the filesystem.
176                        False to disable filesystem cache.
177     noplaylist:        Download single video instead of a playlist if in doubt.
178     age_limit:         An integer representing the user's age in years.
179                        Unsuitable videos for the given age are skipped.
180     min_views:         An integer representing the minimum view count the video
181                        must have in order to not be skipped.
182                        Videos without view count information are always
183                        downloaded. None for no limit.
184     max_views:         An integer representing the maximum view count.
185                        Videos that are more popular than that are not
186                        downloaded.
187                        Videos without view count information are always
188                        downloaded. None for no limit.
189     download_archive:  File name of a file where all downloads are recorded.
190                        Videos already present in the file are not downloaded
191                        again.
192     cookiefile:        File name where cookies should be read from and dumped to.
193     nocheckcertificate:Do not verify SSL certificates
194     prefer_insecure:   Use HTTP instead of HTTPS to retrieve information.
195                        At the moment, this is only supported by YouTube.
196     proxy:             URL of the proxy server to use
197     cn_verification_proxy:  URL of the proxy to use for IP address verification
198                        on Chinese sites. (Experimental)
199     socket_timeout:    Time to wait for unresponsive hosts, in seconds
200     bidi_workaround:   Work around buggy terminals without bidirectional text
201                        support, using fridibi
202     debug_printtraffic:Print out sent and received HTTP traffic
203     include_ads:       Download ads as well
204     default_search:    Prepend this string if an input url is not valid.
205                        'auto' for elaborate guessing
206     encoding:          Use this encoding instead of the system-specified.
207     extract_flat:      Do not resolve URLs, return the immediate result.
208                        Pass in 'in_playlist' to only show this behavior for
209                        playlist items.
210     postprocessors:    A list of dictionaries, each with an entry
211                        * key:  The name of the postprocessor. See
212                                youtube_dl/postprocessor/__init__.py for a list.
213                        as well as any further keyword arguments for the
214                        postprocessor.
215     progress_hooks:    A list of functions that get called on download
216                        progress, with a dictionary with the entries
217                        * status: One of "downloading", "error", or "finished".
218                                  Check this first and ignore unknown values.
219
220                        If status is one of "downloading", or "finished", the
221                        following properties may also be present:
222                        * filename: The final filename (always present)
223                        * tmpfilename: The filename we're currently writing to
224                        * downloaded_bytes: Bytes on disk
225                        * total_bytes: Size of the whole file, None if unknown
226                        * total_bytes_estimate: Guess of the eventual file size,
227                                                None if unavailable.
228                        * elapsed: The number of seconds since download started.
229                        * eta: The estimated time in seconds, None if unknown
230                        * speed: The download speed in bytes/second, None if
231                                 unknown
232                        * fragment_index: The counter of the currently
233                                          downloaded video fragment.
234                        * fragment_count: The number of fragments (= individual
235                                          files that will be merged)
236
237                        Progress hooks are guaranteed to be called at least once
238                        (with status "finished") if the download is successful.
239     merge_output_format: Extension to use when merging formats.
240     fixup:             Automatically correct known faults of the file.
241                        One of:
242                        - "never": do nothing
243                        - "warn": only emit a warning
244                        - "detect_or_warn": check whether we can do anything
245                                            about it, warn otherwise (default)
246     source_address:    (Experimental) Client-side IP address to bind to.
247     call_home:         Boolean, true iff we are allowed to contact the
248                        youtube-dl servers for debugging.
249     sleep_interval:    Number of seconds to sleep before each download.
250     listformats:       Print an overview of available video formats and exit.
251     list_thumbnails:   Print a table of all thumbnails and exit.
252     match_filter:      A function that gets called with the info_dict of
253                        every video.
254                        If it returns a message, the video is ignored.
255                        If it returns None, the video is downloaded.
256                        match_filter_func in utils.py is one example for this.
257     no_color:          Do not emit color codes in output.
258
259     The following options determine which downloader is picked:
260     external_downloader: Executable of the external downloader to call.
261                        None or unset for standard (built-in) downloader.
262     hls_prefer_native: Use the native HLS downloader instead of ffmpeg/avconv.
263
264     The following parameters are not used by YoutubeDL itself, they are used by
265     the downloader (see youtube_dl/downloader/common.py):
266     nopart, updatetime, buffersize, ratelimit, min_filesize, max_filesize, test,
267     noresizebuffer, retries, continuedl, noprogress, consoletitle,
268     xattr_set_filesize, external_downloader_args, hls_use_mpegts.
269
270     The following options are used by the post processors:
271     prefer_ffmpeg:     If True, use ffmpeg instead of avconv if both are available,
272                        otherwise prefer avconv.
273     postprocessor_args: A list of additional command-line arguments for the
274                         postprocessor.
275     """
276
277     params = None
278     _ies = []
279     _pps = []
280     _download_retcode = None
281     _num_downloads = None
282     _screen_file = None
283
284     def __init__(self, params=None, auto_init=True):
285         """Create a FileDownloader object with the given options."""
286         if params is None:
287             params = {}
288         self._ies = []
289         self._ies_instances = {}
290         self._pps = []
291         self._progress_hooks = []
292         self._download_retcode = 0
293         self._num_downloads = 0
294         self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)]
295         self._err_file = sys.stderr
296         self.params = {
297             # Default parameters
298             'nocheckcertificate': False,
299         }
300         self.params.update(params)
301         self.cache = Cache(self)
302
303         if params.get('bidi_workaround', False):
304             try:
305                 import pty
306                 master, slave = pty.openpty()
307                 width = compat_get_terminal_size().columns
308                 if width is None:
309                     width_args = []
310                 else:
311                     width_args = ['-w', str(width)]
312                 sp_kwargs = dict(
313                     stdin=subprocess.PIPE,
314                     stdout=slave,
315                     stderr=self._err_file)
316                 try:
317                     self._output_process = subprocess.Popen(
318                         ['bidiv'] + width_args, **sp_kwargs
319                     )
320                 except OSError:
321                     self._output_process = subprocess.Popen(
322                         ['fribidi', '-c', 'UTF-8'] + width_args, **sp_kwargs)
323                 self._output_channel = os.fdopen(master, 'rb')
324             except OSError as ose:
325                 if ose.errno == 2:
326                     self.report_warning('Could not find fribidi executable, ignoring --bidi-workaround . Make sure that  fribidi  is an executable file in one of the directories in your $PATH.')
327                 else:
328                     raise
329
330         if (sys.version_info >= (3,) and sys.platform != 'win32' and
331                 sys.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968'] and
332                 not params.get('restrictfilenames', False)):
333             # On Python 3, the Unicode filesystem API will throw errors (#1474)
334             self.report_warning(
335                 'Assuming --restrict-filenames since file system encoding '
336                 'cannot encode all characters. '
337                 'Set the LC_ALL environment variable to fix this.')
338             self.params['restrictfilenames'] = True
339
340         if isinstance(params.get('outtmpl'), bytes):
341             self.report_warning(
342                 'Parameter outtmpl is bytes, but should be a unicode string. '
343                 'Put  from __future__ import unicode_literals  at the top of your code file or consider switching to Python 3.x.')
344
345         self._setup_opener()
346
347         if auto_init:
348             self.print_debug_header()
349             self.add_default_info_extractors()
350
351         for pp_def_raw in self.params.get('postprocessors', []):
352             pp_class = get_postprocessor(pp_def_raw['key'])
353             pp_def = dict(pp_def_raw)
354             del pp_def['key']
355             pp = pp_class(self, **compat_kwargs(pp_def))
356             self.add_post_processor(pp)
357
358         for ph in self.params.get('progress_hooks', []):
359             self.add_progress_hook(ph)
360
361     def warn_if_short_id(self, argv):
362         # short YouTube ID starting with dash?
363         idxs = [
364             i for i, a in enumerate(argv)
365             if re.match(r'^-[0-9A-Za-z_-]{10}$', a)]
366         if idxs:
367             correct_argv = (
368                 ['youtube-dl'] +
369                 [a for i, a in enumerate(argv) if i not in idxs] +
370                 ['--'] + [argv[i] for i in idxs]
371             )
372             self.report_warning(
373                 'Long argument string detected. '
374                 'Use -- to separate parameters and URLs, like this:\n%s\n' %
375                 args_to_str(correct_argv))
376
377     def add_info_extractor(self, ie):
378         """Add an InfoExtractor object to the end of the list."""
379         self._ies.append(ie)
380         self._ies_instances[ie.ie_key()] = ie
381         ie.set_downloader(self)
382
383     def get_info_extractor(self, ie_key):
384         """
385         Get an instance of an IE with name ie_key, it will try to get one from
386         the _ies list, if there's no instance it will create a new one and add
387         it to the extractor list.
388         """
389         ie = self._ies_instances.get(ie_key)
390         if ie is None:
391             ie = get_info_extractor(ie_key)()
392             self.add_info_extractor(ie)
393         return ie
394
395     def add_default_info_extractors(self):
396         """
397         Add the InfoExtractors returned by gen_extractors to the end of the list
398         """
399         for ie in gen_extractors():
400             self.add_info_extractor(ie)
401
402     def add_post_processor(self, pp):
403         """Add a PostProcessor object to the end of the chain."""
404         self._pps.append(pp)
405         pp.set_downloader(self)
406
407     def add_progress_hook(self, ph):
408         """Add the progress hook (currently only for the file downloader)"""
409         self._progress_hooks.append(ph)
410
411     def _bidi_workaround(self, message):
412         if not hasattr(self, '_output_channel'):
413             return message
414
415         assert hasattr(self, '_output_process')
416         assert isinstance(message, compat_str)
417         line_count = message.count('\n') + 1
418         self._output_process.stdin.write((message + '\n').encode('utf-8'))
419         self._output_process.stdin.flush()
420         res = ''.join(self._output_channel.readline().decode('utf-8')
421                       for _ in range(line_count))
422         return res[:-len('\n')]
423
424     def to_screen(self, message, skip_eol=False):
425         """Print message to stdout if not in quiet mode."""
426         return self.to_stdout(message, skip_eol, check_quiet=True)
427
428     def _write_string(self, s, out=None):
429         write_string(s, out=out, encoding=self.params.get('encoding'))
430
431     def to_stdout(self, message, skip_eol=False, check_quiet=False):
432         """Print message to stdout if not in quiet mode."""
433         if self.params.get('logger'):
434             self.params['logger'].debug(message)
435         elif not check_quiet or not self.params.get('quiet', False):
436             message = self._bidi_workaround(message)
437             terminator = ['\n', ''][skip_eol]
438             output = message + terminator
439
440             self._write_string(output, self._screen_file)
441
442     def to_stderr(self, message):
443         """Print message to stderr."""
444         assert isinstance(message, compat_str)
445         if self.params.get('logger'):
446             self.params['logger'].error(message)
447         else:
448             message = self._bidi_workaround(message)
449             output = message + '\n'
450             self._write_string(output, self._err_file)
451
452     def to_console_title(self, message):
453         if not self.params.get('consoletitle', False):
454             return
455         if compat_os_name == 'nt' and ctypes.windll.kernel32.GetConsoleWindow():
456             # c_wchar_p() might not be necessary if `message` is
457             # already of type unicode()
458             ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message))
459         elif 'TERM' in os.environ:
460             self._write_string('\033]0;%s\007' % message, self._screen_file)
461
462     def save_console_title(self):
463         if not self.params.get('consoletitle', False):
464             return
465         if 'TERM' in os.environ:
466             # Save the title on stack
467             self._write_string('\033[22;0t', self._screen_file)
468
469     def restore_console_title(self):
470         if not self.params.get('consoletitle', False):
471             return
472         if 'TERM' in os.environ:
473             # Restore the title from stack
474             self._write_string('\033[23;0t', self._screen_file)
475
476     def __enter__(self):
477         self.save_console_title()
478         return self
479
480     def __exit__(self, *args):
481         self.restore_console_title()
482
483         if self.params.get('cookiefile') is not None:
484             self.cookiejar.save()
485
486     def trouble(self, message=None, tb=None):
487         """Determine action to take when a download problem appears.
488
489         Depending on if the downloader has been configured to ignore
490         download errors or not, this method may throw an exception or
491         not when errors are found, after printing the message.
492
493         tb, if given, is additional traceback information.
494         """
495         if message is not None:
496             self.to_stderr(message)
497         if self.params.get('verbose'):
498             if tb is None:
499                 if sys.exc_info()[0]:  # if .trouble has been called from an except block
500                     tb = ''
501                     if hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
502                         tb += ''.join(traceback.format_exception(*sys.exc_info()[1].exc_info))
503                     tb += encode_compat_str(traceback.format_exc())
504                 else:
505                     tb_data = traceback.format_list(traceback.extract_stack())
506                     tb = ''.join(tb_data)
507             self.to_stderr(tb)
508         if not self.params.get('ignoreerrors', False):
509             if sys.exc_info()[0] and hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
510                 exc_info = sys.exc_info()[1].exc_info
511             else:
512                 exc_info = sys.exc_info()
513             raise DownloadError(message, exc_info)
514         self._download_retcode = 1
515
516     def report_warning(self, message):
517         '''
518         Print the message to stderr, it will be prefixed with 'WARNING:'
519         If stderr is a tty file the 'WARNING:' will be colored
520         '''
521         if self.params.get('logger') is not None:
522             self.params['logger'].warning(message)
523         else:
524             if self.params.get('no_warnings'):
525                 return
526             if not self.params.get('no_color') and self._err_file.isatty() and compat_os_name != 'nt':
527                 _msg_header = '\033[0;33mWARNING:\033[0m'
528             else:
529                 _msg_header = 'WARNING:'
530             warning_message = '%s %s' % (_msg_header, message)
531             self.to_stderr(warning_message)
532
533     def report_error(self, message, tb=None):
534         '''
535         Do the same as trouble, but prefixes the message with 'ERROR:', colored
536         in red if stderr is a tty file.
537         '''
538         if not self.params.get('no_color') and self._err_file.isatty() and compat_os_name != 'nt':
539             _msg_header = '\033[0;31mERROR:\033[0m'
540         else:
541             _msg_header = 'ERROR:'
542         error_message = '%s %s' % (_msg_header, message)
543         self.trouble(error_message, tb)
544
545     def report_file_already_downloaded(self, file_name):
546         """Report file has already been fully downloaded."""
547         try:
548             self.to_screen('[download] %s has already been downloaded' % file_name)
549         except UnicodeEncodeError:
550             self.to_screen('[download] The file has already been downloaded')
551
552     def prepare_filename(self, info_dict):
553         """Generate the output filename."""
554         try:
555             template_dict = dict(info_dict)
556
557             template_dict['epoch'] = int(time.time())
558             autonumber_size = self.params.get('autonumber_size')
559             if autonumber_size is None:
560                 autonumber_size = 5
561             autonumber_templ = '%0' + str(autonumber_size) + 'd'
562             template_dict['autonumber'] = autonumber_templ % self._num_downloads
563             if template_dict.get('playlist_index') is not None:
564                 template_dict['playlist_index'] = '%0*d' % (len(str(template_dict['n_entries'])), template_dict['playlist_index'])
565             if template_dict.get('resolution') is None:
566                 if template_dict.get('width') and template_dict.get('height'):
567                     template_dict['resolution'] = '%dx%d' % (template_dict['width'], template_dict['height'])
568                 elif template_dict.get('height'):
569                     template_dict['resolution'] = '%sp' % template_dict['height']
570                 elif template_dict.get('width'):
571                     template_dict['resolution'] = '%dx?' % template_dict['width']
572
573             sanitize = lambda k, v: sanitize_filename(
574                 compat_str(v),
575                 restricted=self.params.get('restrictfilenames'),
576                 is_id=(k == 'id'))
577             template_dict = dict((k, sanitize(k, v))
578                                  for k, v in template_dict.items()
579                                  if v is not None)
580             template_dict = collections.defaultdict(lambda: 'NA', template_dict)
581
582             outtmpl = self.params.get('outtmpl', DEFAULT_OUTTMPL)
583             tmpl = compat_expanduser(outtmpl)
584             filename = tmpl % template_dict
585             # Temporary fix for #4787
586             # 'Treat' all problem characters by passing filename through preferredencoding
587             # to workaround encoding issues with subprocess on python2 @ Windows
588             if sys.version_info < (3, 0) and sys.platform == 'win32':
589                 filename = encodeFilename(filename, True).decode(preferredencoding())
590             return sanitize_path(filename)
591         except ValueError as err:
592             self.report_error('Error in output template: ' + str(err) + ' (encoding: ' + repr(preferredencoding()) + ')')
593             return None
594
595     def _match_entry(self, info_dict, incomplete):
596         """ Returns None iff the file should be downloaded """
597
598         video_title = info_dict.get('title', info_dict.get('id', 'video'))
599         if 'title' in info_dict:
600             # This can happen when we're just evaluating the playlist
601             title = info_dict['title']
602             matchtitle = self.params.get('matchtitle', False)
603             if matchtitle:
604                 if not re.search(matchtitle, title, re.IGNORECASE):
605                     return '"' + title + '" title did not match pattern "' + matchtitle + '"'
606             rejecttitle = self.params.get('rejecttitle', False)
607             if rejecttitle:
608                 if re.search(rejecttitle, title, re.IGNORECASE):
609                     return '"' + title + '" title matched reject pattern "' + rejecttitle + '"'
610         date = info_dict.get('upload_date')
611         if date is not None:
612             dateRange = self.params.get('daterange', DateRange())
613             if date not in dateRange:
614                 return '%s upload date is not in range %s' % (date_from_str(date).isoformat(), dateRange)
615         view_count = info_dict.get('view_count')
616         if view_count is not None:
617             min_views = self.params.get('min_views')
618             if min_views is not None and view_count < min_views:
619                 return 'Skipping %s, because it has not reached minimum view count (%d/%d)' % (video_title, view_count, min_views)
620             max_views = self.params.get('max_views')
621             if max_views is not None and view_count > max_views:
622                 return 'Skipping %s, because it has exceeded the maximum view count (%d/%d)' % (video_title, view_count, max_views)
623         if age_restricted(info_dict.get('age_limit'), self.params.get('age_limit')):
624             return 'Skipping "%s" because it is age restricted' % video_title
625         if self.in_download_archive(info_dict):
626             return '%s has already been recorded in archive' % video_title
627
628         if not incomplete:
629             match_filter = self.params.get('match_filter')
630             if match_filter is not None:
631                 ret = match_filter(info_dict)
632                 if ret is not None:
633                     return ret
634
635         return None
636
637     @staticmethod
638     def add_extra_info(info_dict, extra_info):
639         '''Set the keys from extra_info in info dict if they are missing'''
640         for key, value in extra_info.items():
641             info_dict.setdefault(key, value)
642
643     def extract_info(self, url, download=True, ie_key=None, extra_info={},
644                      process=True, force_generic_extractor=False):
645         '''
646         Returns a list with a dictionary for each video we find.
647         If 'download', also downloads the videos.
648         extra_info is a dict containing the extra values to add to each result
649         '''
650
651         if not ie_key and force_generic_extractor:
652             ie_key = 'Generic'
653
654         if ie_key:
655             ies = [self.get_info_extractor(ie_key)]
656         else:
657             ies = self._ies
658
659         for ie in ies:
660             if not ie.suitable(url):
661                 continue
662
663             if not ie.working():
664                 self.report_warning('The program functionality for this site has been marked as broken, '
665                                     'and will probably not work.')
666
667             try:
668                 ie_result = ie.extract(url)
669                 if ie_result is None:  # Finished already (backwards compatibility; listformats and friends should be moved here)
670                     break
671                 if isinstance(ie_result, list):
672                     # Backwards compatibility: old IE result format
673                     ie_result = {
674                         '_type': 'compat_list',
675                         'entries': ie_result,
676                     }
677                 self.add_default_extra_info(ie_result, ie, url)
678                 if process:
679                     return self.process_ie_result(ie_result, download, extra_info)
680                 else:
681                     return ie_result
682             except ExtractorError as e:  # An error we somewhat expected
683                 self.report_error(compat_str(e), e.format_traceback())
684                 break
685             except MaxDownloadsReached:
686                 raise
687             except Exception as e:
688                 if self.params.get('ignoreerrors', False):
689                     self.report_error(error_to_compat_str(e), tb=encode_compat_str(traceback.format_exc()))
690                     break
691                 else:
692                     raise
693         else:
694             self.report_error('no suitable InfoExtractor for URL %s' % url)
695
696     def add_default_extra_info(self, ie_result, ie, url):
697         self.add_extra_info(ie_result, {
698             'extractor': ie.IE_NAME,
699             'webpage_url': url,
700             'webpage_url_basename': url_basename(url),
701             'extractor_key': ie.ie_key(),
702         })
703
704     def process_ie_result(self, ie_result, download=True, extra_info={}):
705         """
706         Take the result of the ie(may be modified) and resolve all unresolved
707         references (URLs, playlist items).
708
709         It will also download the videos if 'download'.
710         Returns the resolved ie_result.
711         """
712         result_type = ie_result.get('_type', 'video')
713
714         if result_type in ('url', 'url_transparent'):
715             extract_flat = self.params.get('extract_flat', False)
716             if ((extract_flat == 'in_playlist' and 'playlist' in extra_info) or
717                     extract_flat is True):
718                 if self.params.get('forcejson', False):
719                     self.to_stdout(json.dumps(ie_result))
720                 return ie_result
721
722         if result_type == 'video':
723             self.add_extra_info(ie_result, extra_info)
724             return self.process_video_result(ie_result, download=download)
725         elif result_type == 'url':
726             # We have to add extra_info to the results because it may be
727             # contained in a playlist
728             return self.extract_info(ie_result['url'],
729                                      download,
730                                      ie_key=ie_result.get('ie_key'),
731                                      extra_info=extra_info)
732         elif result_type == 'url_transparent':
733             # Use the information from the embedding page
734             info = self.extract_info(
735                 ie_result['url'], ie_key=ie_result.get('ie_key'),
736                 extra_info=extra_info, download=False, process=False)
737
738             force_properties = dict(
739                 (k, v) for k, v in ie_result.items() if v is not None)
740             for f in ('_type', 'url', 'ie_key'):
741                 if f in force_properties:
742                     del force_properties[f]
743             new_result = info.copy()
744             new_result.update(force_properties)
745
746             assert new_result.get('_type') != 'url_transparent'
747
748             return self.process_ie_result(
749                 new_result, download=download, extra_info=extra_info)
750         elif result_type == 'playlist' or result_type == 'multi_video':
751             # We process each entry in the playlist
752             playlist = ie_result.get('title') or ie_result.get('id')
753             self.to_screen('[download] Downloading playlist: %s' % playlist)
754
755             playlist_results = []
756
757             playliststart = self.params.get('playliststart', 1) - 1
758             playlistend = self.params.get('playlistend')
759             # For backwards compatibility, interpret -1 as whole list
760             if playlistend == -1:
761                 playlistend = None
762
763             playlistitems_str = self.params.get('playlist_items')
764             playlistitems = None
765             if playlistitems_str is not None:
766                 def iter_playlistitems(format):
767                     for string_segment in format.split(','):
768                         if '-' in string_segment:
769                             start, end = string_segment.split('-')
770                             for item in range(int(start), int(end) + 1):
771                                 yield int(item)
772                         else:
773                             yield int(string_segment)
774                 playlistitems = iter_playlistitems(playlistitems_str)
775
776             ie_entries = ie_result['entries']
777             if isinstance(ie_entries, list):
778                 n_all_entries = len(ie_entries)
779                 if playlistitems:
780                     entries = [
781                         ie_entries[i - 1] for i in playlistitems
782                         if -n_all_entries <= i - 1 < n_all_entries]
783                 else:
784                     entries = ie_entries[playliststart:playlistend]
785                 n_entries = len(entries)
786                 self.to_screen(
787                     '[%s] playlist %s: Collected %d video ids (downloading %d of them)' %
788                     (ie_result['extractor'], playlist, n_all_entries, n_entries))
789             elif isinstance(ie_entries, PagedList):
790                 if playlistitems:
791                     entries = []
792                     for item in playlistitems:
793                         entries.extend(ie_entries.getslice(
794                             item - 1, item
795                         ))
796                 else:
797                     entries = ie_entries.getslice(
798                         playliststart, playlistend)
799                 n_entries = len(entries)
800                 self.to_screen(
801                     '[%s] playlist %s: Downloading %d videos' %
802                     (ie_result['extractor'], playlist, n_entries))
803             else:  # iterable
804                 if playlistitems:
805                     entry_list = list(ie_entries)
806                     entries = [entry_list[i - 1] for i in playlistitems]
807                 else:
808                     entries = list(itertools.islice(
809                         ie_entries, playliststart, playlistend))
810                 n_entries = len(entries)
811                 self.to_screen(
812                     '[%s] playlist %s: Downloading %d videos' %
813                     (ie_result['extractor'], playlist, n_entries))
814
815             if self.params.get('playlistreverse', False):
816                 entries = entries[::-1]
817
818             for i, entry in enumerate(entries, 1):
819                 self.to_screen('[download] Downloading video %s of %s' % (i, n_entries))
820                 extra = {
821                     'n_entries': n_entries,
822                     'playlist': playlist,
823                     'playlist_id': ie_result.get('id'),
824                     'playlist_title': ie_result.get('title'),
825                     'playlist_index': i + playliststart,
826                     'extractor': ie_result['extractor'],
827                     'webpage_url': ie_result['webpage_url'],
828                     'webpage_url_basename': url_basename(ie_result['webpage_url']),
829                     'extractor_key': ie_result['extractor_key'],
830                 }
831
832                 reason = self._match_entry(entry, incomplete=True)
833                 if reason is not None:
834                     self.to_screen('[download] ' + reason)
835                     continue
836
837                 entry_result = self.process_ie_result(entry,
838                                                       download=download,
839                                                       extra_info=extra)
840                 playlist_results.append(entry_result)
841             ie_result['entries'] = playlist_results
842             self.to_screen('[download] Finished downloading playlist: %s' % playlist)
843             return ie_result
844         elif result_type == 'compat_list':
845             self.report_warning(
846                 'Extractor %s returned a compat_list result. '
847                 'It needs to be updated.' % ie_result.get('extractor'))
848
849             def _fixup(r):
850                 self.add_extra_info(
851                     r,
852                     {
853                         'extractor': ie_result['extractor'],
854                         'webpage_url': ie_result['webpage_url'],
855                         'webpage_url_basename': url_basename(ie_result['webpage_url']),
856                         'extractor_key': ie_result['extractor_key'],
857                     }
858                 )
859                 return r
860             ie_result['entries'] = [
861                 self.process_ie_result(_fixup(r), download, extra_info)
862                 for r in ie_result['entries']
863             ]
864             return ie_result
865         else:
866             raise Exception('Invalid result type: %s' % result_type)
867
868     def _build_format_filter(self, filter_spec):
869         " Returns a function to filter the formats according to the filter_spec "
870
871         OPERATORS = {
872             '<': operator.lt,
873             '<=': operator.le,
874             '>': operator.gt,
875             '>=': operator.ge,
876             '=': operator.eq,
877             '!=': operator.ne,
878         }
879         operator_rex = re.compile(r'''(?x)\s*
880             (?P<key>width|height|tbr|abr|vbr|asr|filesize|fps)
881             \s*(?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
882             (?P<value>[0-9.]+(?:[kKmMgGtTpPeEzZyY]i?[Bb]?)?)
883             $
884             ''' % '|'.join(map(re.escape, OPERATORS.keys())))
885         m = operator_rex.search(filter_spec)
886         if m:
887             try:
888                 comparison_value = int(m.group('value'))
889             except ValueError:
890                 comparison_value = parse_filesize(m.group('value'))
891                 if comparison_value is None:
892                     comparison_value = parse_filesize(m.group('value') + 'B')
893                 if comparison_value is None:
894                     raise ValueError(
895                         'Invalid value %r in format specification %r' % (
896                             m.group('value'), filter_spec))
897             op = OPERATORS[m.group('op')]
898
899         if not m:
900             STR_OPERATORS = {
901                 '=': operator.eq,
902                 '!=': operator.ne,
903                 '^=': lambda attr, value: attr.startswith(value),
904                 '$=': lambda attr, value: attr.endswith(value),
905                 '*=': lambda attr, value: value in attr,
906             }
907             str_operator_rex = re.compile(r'''(?x)
908                 \s*(?P<key>ext|acodec|vcodec|container|protocol)
909                 \s*(?P<op>%s)(?P<none_inclusive>\s*\?)?
910                 \s*(?P<value>[a-zA-Z0-9._-]+)
911                 \s*$
912                 ''' % '|'.join(map(re.escape, STR_OPERATORS.keys())))
913             m = str_operator_rex.search(filter_spec)
914             if m:
915                 comparison_value = m.group('value')
916                 op = STR_OPERATORS[m.group('op')]
917
918         if not m:
919             raise ValueError('Invalid filter specification %r' % filter_spec)
920
921         def _filter(f):
922             actual_value = f.get(m.group('key'))
923             if actual_value is None:
924                 return m.group('none_inclusive')
925             return op(actual_value, comparison_value)
926         return _filter
927
928     def build_format_selector(self, format_spec):
929         def syntax_error(note, start):
930             message = (
931                 'Invalid format specification: '
932                 '{0}\n\t{1}\n\t{2}^'.format(note, format_spec, ' ' * start[1]))
933             return SyntaxError(message)
934
935         PICKFIRST = 'PICKFIRST'
936         MERGE = 'MERGE'
937         SINGLE = 'SINGLE'
938         GROUP = 'GROUP'
939         FormatSelector = collections.namedtuple('FormatSelector', ['type', 'selector', 'filters'])
940
941         def _parse_filter(tokens):
942             filter_parts = []
943             for type, string, start, _, _ in tokens:
944                 if type == tokenize.OP and string == ']':
945                     return ''.join(filter_parts)
946                 else:
947                     filter_parts.append(string)
948
949         def _remove_unused_ops(tokens):
950             # Remove operators that we don't use and join them with the surrounding strings
951             # for example: 'mp4' '-' 'baseline' '-' '16x9' is converted to 'mp4-baseline-16x9'
952             ALLOWED_OPS = ('/', '+', ',', '(', ')')
953             last_string, last_start, last_end, last_line = None, None, None, None
954             for type, string, start, end, line in tokens:
955                 if type == tokenize.OP and string == '[':
956                     if last_string:
957                         yield tokenize.NAME, last_string, last_start, last_end, last_line
958                         last_string = None
959                     yield type, string, start, end, line
960                     # everything inside brackets will be handled by _parse_filter
961                     for type, string, start, end, line in tokens:
962                         yield type, string, start, end, line
963                         if type == tokenize.OP and string == ']':
964                             break
965                 elif type == tokenize.OP and string in ALLOWED_OPS:
966                     if last_string:
967                         yield tokenize.NAME, last_string, last_start, last_end, last_line
968                         last_string = None
969                     yield type, string, start, end, line
970                 elif type in [tokenize.NAME, tokenize.NUMBER, tokenize.OP]:
971                     if not last_string:
972                         last_string = string
973                         last_start = start
974                         last_end = end
975                     else:
976                         last_string += string
977             if last_string:
978                 yield tokenize.NAME, last_string, last_start, last_end, last_line
979
980         def _parse_format_selection(tokens, inside_merge=False, inside_choice=False, inside_group=False):
981             selectors = []
982             current_selector = None
983             for type, string, start, _, _ in tokens:
984                 # ENCODING is only defined in python 3.x
985                 if type == getattr(tokenize, 'ENCODING', None):
986                     continue
987                 elif type in [tokenize.NAME, tokenize.NUMBER]:
988                     current_selector = FormatSelector(SINGLE, string, [])
989                 elif type == tokenize.OP:
990                     if string == ')':
991                         if not inside_group:
992                             # ')' will be handled by the parentheses group
993                             tokens.restore_last_token()
994                         break
995                     elif inside_merge and string in ['/', ',']:
996                         tokens.restore_last_token()
997                         break
998                     elif inside_choice and string == ',':
999                         tokens.restore_last_token()
1000                         break
1001                     elif string == ',':
1002                         if not current_selector:
1003                             raise syntax_error('"," must follow a format selector', start)
1004                         selectors.append(current_selector)
1005                         current_selector = None
1006                     elif string == '/':
1007                         if not current_selector:
1008                             raise syntax_error('"/" must follow a format selector', start)
1009                         first_choice = current_selector
1010                         second_choice = _parse_format_selection(tokens, inside_choice=True)
1011                         current_selector = FormatSelector(PICKFIRST, (first_choice, second_choice), [])
1012                     elif string == '[':
1013                         if not current_selector:
1014                             current_selector = FormatSelector(SINGLE, 'best', [])
1015                         format_filter = _parse_filter(tokens)
1016                         current_selector.filters.append(format_filter)
1017                     elif string == '(':
1018                         if current_selector:
1019                             raise syntax_error('Unexpected "("', start)
1020                         group = _parse_format_selection(tokens, inside_group=True)
1021                         current_selector = FormatSelector(GROUP, group, [])
1022                     elif string == '+':
1023                         video_selector = current_selector
1024                         audio_selector = _parse_format_selection(tokens, inside_merge=True)
1025                         if not video_selector or not audio_selector:
1026                             raise syntax_error('"+" must be between two format selectors', start)
1027                         current_selector = FormatSelector(MERGE, (video_selector, audio_selector), [])
1028                     else:
1029                         raise syntax_error('Operator not recognized: "{0}"'.format(string), start)
1030                 elif type == tokenize.ENDMARKER:
1031                     break
1032             if current_selector:
1033                 selectors.append(current_selector)
1034             return selectors
1035
1036         def _build_selector_function(selector):
1037             if isinstance(selector, list):
1038                 fs = [_build_selector_function(s) for s in selector]
1039
1040                 def selector_function(formats):
1041                     for f in fs:
1042                         for format in f(formats):
1043                             yield format
1044                 return selector_function
1045             elif selector.type == GROUP:
1046                 selector_function = _build_selector_function(selector.selector)
1047             elif selector.type == PICKFIRST:
1048                 fs = [_build_selector_function(s) for s in selector.selector]
1049
1050                 def selector_function(formats):
1051                     for f in fs:
1052                         picked_formats = list(f(formats))
1053                         if picked_formats:
1054                             return picked_formats
1055                     return []
1056             elif selector.type == SINGLE:
1057                 format_spec = selector.selector
1058
1059                 def selector_function(formats):
1060                     formats = list(formats)
1061                     if not formats:
1062                         return
1063                     if format_spec == 'all':
1064                         for f in formats:
1065                             yield f
1066                     elif format_spec in ['best', 'worst', None]:
1067                         format_idx = 0 if format_spec == 'worst' else -1
1068                         audiovideo_formats = [
1069                             f for f in formats
1070                             if f.get('vcodec') != 'none' and f.get('acodec') != 'none']
1071                         if audiovideo_formats:
1072                             yield audiovideo_formats[format_idx]
1073                         # for audio only (soundcloud) or video only (imgur) urls, select the best/worst audio format
1074                         elif (all(f.get('acodec') != 'none' for f in formats) or
1075                               all(f.get('vcodec') != 'none' for f in formats)):
1076                             yield formats[format_idx]
1077                     elif format_spec == 'bestaudio':
1078                         audio_formats = [
1079                             f for f in formats
1080                             if f.get('vcodec') == 'none']
1081                         if audio_formats:
1082                             yield audio_formats[-1]
1083                     elif format_spec == 'worstaudio':
1084                         audio_formats = [
1085                             f for f in formats
1086                             if f.get('vcodec') == 'none']
1087                         if audio_formats:
1088                             yield audio_formats[0]
1089                     elif format_spec == 'bestvideo':
1090                         video_formats = [
1091                             f for f in formats
1092                             if f.get('acodec') == 'none']
1093                         if video_formats:
1094                             yield video_formats[-1]
1095                     elif format_spec == 'worstvideo':
1096                         video_formats = [
1097                             f for f in formats
1098                             if f.get('acodec') == 'none']
1099                         if video_formats:
1100                             yield video_formats[0]
1101                     else:
1102                         extensions = ['mp4', 'flv', 'webm', '3gp', 'm4a', 'mp3', 'ogg', 'aac', 'wav']
1103                         if format_spec in extensions:
1104                             filter_f = lambda f: f['ext'] == format_spec
1105                         else:
1106                             filter_f = lambda f: f['format_id'] == format_spec
1107                         matches = list(filter(filter_f, formats))
1108                         if matches:
1109                             yield matches[-1]
1110             elif selector.type == MERGE:
1111                 def _merge(formats_info):
1112                     format_1, format_2 = [f['format_id'] for f in formats_info]
1113                     # The first format must contain the video and the
1114                     # second the audio
1115                     if formats_info[0].get('vcodec') == 'none':
1116                         self.report_error('The first format must '
1117                                           'contain the video, try using '
1118                                           '"-f %s+%s"' % (format_2, format_1))
1119                         return
1120                     # Formats must be opposite (video+audio)
1121                     if formats_info[0].get('acodec') == 'none' and formats_info[1].get('acodec') == 'none':
1122                         self.report_error(
1123                             'Both formats %s and %s are video-only, you must specify "-f video+audio"'
1124                             % (format_1, format_2))
1125                         return
1126                     output_ext = (
1127                         formats_info[0]['ext']
1128                         if self.params.get('merge_output_format') is None
1129                         else self.params['merge_output_format'])
1130                     return {
1131                         'requested_formats': formats_info,
1132                         'format': '%s+%s' % (formats_info[0].get('format'),
1133                                              formats_info[1].get('format')),
1134                         'format_id': '%s+%s' % (formats_info[0].get('format_id'),
1135                                                 formats_info[1].get('format_id')),
1136                         'width': formats_info[0].get('width'),
1137                         'height': formats_info[0].get('height'),
1138                         'resolution': formats_info[0].get('resolution'),
1139                         'fps': formats_info[0].get('fps'),
1140                         'vcodec': formats_info[0].get('vcodec'),
1141                         'vbr': formats_info[0].get('vbr'),
1142                         'stretched_ratio': formats_info[0].get('stretched_ratio'),
1143                         'acodec': formats_info[1].get('acodec'),
1144                         'abr': formats_info[1].get('abr'),
1145                         'ext': output_ext,
1146                     }
1147                 video_selector, audio_selector = map(_build_selector_function, selector.selector)
1148
1149                 def selector_function(formats):
1150                     formats = list(formats)
1151                     for pair in itertools.product(video_selector(formats), audio_selector(formats)):
1152                         yield _merge(pair)
1153
1154             filters = [self._build_format_filter(f) for f in selector.filters]
1155
1156             def final_selector(formats):
1157                 for _filter in filters:
1158                     formats = list(filter(_filter, formats))
1159                 return selector_function(formats)
1160             return final_selector
1161
1162         stream = io.BytesIO(format_spec.encode('utf-8'))
1163         try:
1164             tokens = list(_remove_unused_ops(compat_tokenize_tokenize(stream.readline)))
1165         except tokenize.TokenError:
1166             raise syntax_error('Missing closing/opening brackets or parenthesis', (0, len(format_spec)))
1167
1168         class TokenIterator(object):
1169             def __init__(self, tokens):
1170                 self.tokens = tokens
1171                 self.counter = 0
1172
1173             def __iter__(self):
1174                 return self
1175
1176             def __next__(self):
1177                 if self.counter >= len(self.tokens):
1178                     raise StopIteration()
1179                 value = self.tokens[self.counter]
1180                 self.counter += 1
1181                 return value
1182
1183             next = __next__
1184
1185             def restore_last_token(self):
1186                 self.counter -= 1
1187
1188         parsed_selector = _parse_format_selection(iter(TokenIterator(tokens)))
1189         return _build_selector_function(parsed_selector)
1190
1191     def _calc_headers(self, info_dict):
1192         res = std_headers.copy()
1193
1194         add_headers = info_dict.get('http_headers')
1195         if add_headers:
1196             res.update(add_headers)
1197
1198         cookies = self._calc_cookies(info_dict)
1199         if cookies:
1200             res['Cookie'] = cookies
1201
1202         return res
1203
1204     def _calc_cookies(self, info_dict):
1205         pr = sanitized_Request(info_dict['url'])
1206         self.cookiejar.add_cookie_header(pr)
1207         return pr.get_header('Cookie')
1208
1209     def process_video_result(self, info_dict, download=True):
1210         assert info_dict.get('_type', 'video') == 'video'
1211
1212         if 'id' not in info_dict:
1213             raise ExtractorError('Missing "id" field in extractor result')
1214         if 'title' not in info_dict:
1215             raise ExtractorError('Missing "title" field in extractor result')
1216
1217         if 'playlist' not in info_dict:
1218             # It isn't part of a playlist
1219             info_dict['playlist'] = None
1220             info_dict['playlist_index'] = None
1221
1222         thumbnails = info_dict.get('thumbnails')
1223         if thumbnails is None:
1224             thumbnail = info_dict.get('thumbnail')
1225             if thumbnail:
1226                 info_dict['thumbnails'] = thumbnails = [{'url': thumbnail}]
1227         if thumbnails:
1228             thumbnails.sort(key=lambda t: (
1229                 t.get('preference'), t.get('width'), t.get('height'),
1230                 t.get('id'), t.get('url')))
1231             for i, t in enumerate(thumbnails):
1232                 if t.get('width') and t.get('height'):
1233                     t['resolution'] = '%dx%d' % (t['width'], t['height'])
1234                 if t.get('id') is None:
1235                     t['id'] = '%d' % i
1236
1237         if self.params.get('list_thumbnails'):
1238             self.list_thumbnails(info_dict)
1239             return
1240
1241         if thumbnails and 'thumbnail' not in info_dict:
1242             info_dict['thumbnail'] = thumbnails[-1]['url']
1243
1244         if 'display_id' not in info_dict and 'id' in info_dict:
1245             info_dict['display_id'] = info_dict['id']
1246
1247         if info_dict.get('upload_date') is None and info_dict.get('timestamp') is not None:
1248             # Working around out-of-range timestamp values (e.g. negative ones on Windows,
1249             # see http://bugs.python.org/issue1646728)
1250             try:
1251                 upload_date = datetime.datetime.utcfromtimestamp(info_dict['timestamp'])
1252                 info_dict['upload_date'] = upload_date.strftime('%Y%m%d')
1253             except (ValueError, OverflowError, OSError):
1254                 pass
1255
1256         # Auto generate title fields corresponding to the *_number fields when missing
1257         # in order to always have clean titles. This is very common for TV series.
1258         for field in ('chapter', 'season', 'episode'):
1259             if info_dict.get('%s_number' % field) is not None and not info_dict.get(field):
1260                 info_dict[field] = '%s %d' % (field.capitalize(), info_dict['%s_number' % field])
1261
1262         subtitles = info_dict.get('subtitles')
1263         if subtitles:
1264             for _, subtitle in subtitles.items():
1265                 for subtitle_format in subtitle:
1266                     if 'ext' not in subtitle_format:
1267                         subtitle_format['ext'] = determine_ext(subtitle_format['url']).lower()
1268
1269         if self.params.get('listsubtitles', False):
1270             if 'automatic_captions' in info_dict:
1271                 self.list_subtitles(info_dict['id'], info_dict.get('automatic_captions'), 'automatic captions')
1272             self.list_subtitles(info_dict['id'], subtitles, 'subtitles')
1273             return
1274         info_dict['requested_subtitles'] = self.process_subtitles(
1275             info_dict['id'], subtitles,
1276             info_dict.get('automatic_captions'))
1277
1278         # We now pick which formats have to be downloaded
1279         if info_dict.get('formats') is None:
1280             # There's only one format available
1281             formats = [info_dict]
1282         else:
1283             formats = info_dict['formats']
1284
1285         if not formats:
1286             raise ExtractorError('No video formats found!')
1287
1288         formats_dict = {}
1289
1290         # We check that all the formats have the format and format_id fields
1291         for i, format in enumerate(formats):
1292             if 'url' not in format:
1293                 raise ExtractorError('Missing "url" key in result (index %d)' % i)
1294
1295             if format.get('format_id') is None:
1296                 format['format_id'] = compat_str(i)
1297             else:
1298                 # Sanitize format_id from characters used in format selector expression
1299                 format['format_id'] = re.sub('[\s,/+\[\]()]', '_', format['format_id'])
1300             format_id = format['format_id']
1301             if format_id not in formats_dict:
1302                 formats_dict[format_id] = []
1303             formats_dict[format_id].append(format)
1304
1305         # Make sure all formats have unique format_id
1306         for format_id, ambiguous_formats in formats_dict.items():
1307             if len(ambiguous_formats) > 1:
1308                 for i, format in enumerate(ambiguous_formats):
1309                     format['format_id'] = '%s-%d' % (format_id, i)
1310
1311         for i, format in enumerate(formats):
1312             if format.get('format') is None:
1313                 format['format'] = '{id} - {res}{note}'.format(
1314                     id=format['format_id'],
1315                     res=self.format_resolution(format),
1316                     note=' ({0})'.format(format['format_note']) if format.get('format_note') is not None else '',
1317                 )
1318             # Automatically determine file extension if missing
1319             if 'ext' not in format:
1320                 format['ext'] = determine_ext(format['url']).lower()
1321             # Automatically determine protocol if missing (useful for format
1322             # selection purposes)
1323             if 'protocol' not in format:
1324                 format['protocol'] = determine_protocol(format)
1325             # Add HTTP headers, so that external programs can use them from the
1326             # json output
1327             full_format_info = info_dict.copy()
1328             full_format_info.update(format)
1329             format['http_headers'] = self._calc_headers(full_format_info)
1330
1331         # TODO Central sorting goes here
1332
1333         if formats[0] is not info_dict:
1334             # only set the 'formats' fields if the original info_dict list them
1335             # otherwise we end up with a circular reference, the first (and unique)
1336             # element in the 'formats' field in info_dict is info_dict itself,
1337             # which can't be exported to json
1338             info_dict['formats'] = formats
1339         if self.params.get('listformats'):
1340             self.list_formats(info_dict)
1341             return
1342
1343         req_format = self.params.get('format')
1344         if req_format is None:
1345             req_format_list = []
1346             if (self.params.get('outtmpl', DEFAULT_OUTTMPL) != '-' and
1347                     not info_dict.get('is_live')):
1348                 merger = FFmpegMergerPP(self)
1349                 if merger.available and merger.can_merge():
1350                     req_format_list.append('bestvideo+bestaudio')
1351             req_format_list.append('best')
1352             req_format = '/'.join(req_format_list)
1353         format_selector = self.build_format_selector(req_format)
1354         formats_to_download = list(format_selector(formats))
1355         if not formats_to_download:
1356             raise ExtractorError('requested format not available',
1357                                  expected=True)
1358
1359         if download:
1360             if len(formats_to_download) > 1:
1361                 self.to_screen('[info] %s: downloading video in %s formats' % (info_dict['id'], len(formats_to_download)))
1362             for format in formats_to_download:
1363                 new_info = dict(info_dict)
1364                 new_info.update(format)
1365                 self.process_info(new_info)
1366         # We update the info dict with the best quality format (backwards compatibility)
1367         info_dict.update(formats_to_download[-1])
1368         return info_dict
1369
1370     def process_subtitles(self, video_id, normal_subtitles, automatic_captions):
1371         """Select the requested subtitles and their format"""
1372         available_subs = {}
1373         if normal_subtitles and self.params.get('writesubtitles'):
1374             available_subs.update(normal_subtitles)
1375         if automatic_captions and self.params.get('writeautomaticsub'):
1376             for lang, cap_info in automatic_captions.items():
1377                 if lang not in available_subs:
1378                     available_subs[lang] = cap_info
1379
1380         if (not self.params.get('writesubtitles') and not
1381                 self.params.get('writeautomaticsub') or not
1382                 available_subs):
1383             return None
1384
1385         if self.params.get('allsubtitles', False):
1386             requested_langs = available_subs.keys()
1387         else:
1388             if self.params.get('subtitleslangs', False):
1389                 requested_langs = self.params.get('subtitleslangs')
1390             elif 'en' in available_subs:
1391                 requested_langs = ['en']
1392             else:
1393                 requested_langs = [list(available_subs.keys())[0]]
1394
1395         formats_query = self.params.get('subtitlesformat', 'best')
1396         formats_preference = formats_query.split('/') if formats_query else []
1397         subs = {}
1398         for lang in requested_langs:
1399             formats = available_subs.get(lang)
1400             if formats is None:
1401                 self.report_warning('%s subtitles not available for %s' % (lang, video_id))
1402                 continue
1403             for ext in formats_preference:
1404                 if ext == 'best':
1405                     f = formats[-1]
1406                     break
1407                 matches = list(filter(lambda f: f['ext'] == ext, formats))
1408                 if matches:
1409                     f = matches[-1]
1410                     break
1411             else:
1412                 f = formats[-1]
1413                 self.report_warning(
1414                     'No subtitle format found matching "%s" for language %s, '
1415                     'using %s' % (formats_query, lang, f['ext']))
1416             subs[lang] = f
1417         return subs
1418
1419     def process_info(self, info_dict):
1420         """Process a single resolved IE result."""
1421
1422         assert info_dict.get('_type', 'video') == 'video'
1423
1424         max_downloads = self.params.get('max_downloads')
1425         if max_downloads is not None:
1426             if self._num_downloads >= int(max_downloads):
1427                 raise MaxDownloadsReached()
1428
1429         info_dict['fulltitle'] = info_dict['title']
1430         if len(info_dict['title']) > 200:
1431             info_dict['title'] = info_dict['title'][:197] + '...'
1432
1433         if 'format' not in info_dict:
1434             info_dict['format'] = info_dict['ext']
1435
1436         reason = self._match_entry(info_dict, incomplete=False)
1437         if reason is not None:
1438             self.to_screen('[download] ' + reason)
1439             return
1440
1441         self._num_downloads += 1
1442
1443         info_dict['_filename'] = filename = self.prepare_filename(info_dict)
1444
1445         # Forced printings
1446         if self.params.get('forcetitle', False):
1447             self.to_stdout(info_dict['fulltitle'])
1448         if self.params.get('forceid', False):
1449             self.to_stdout(info_dict['id'])
1450         if self.params.get('forceurl', False):
1451             if info_dict.get('requested_formats') is not None:
1452                 for f in info_dict['requested_formats']:
1453                     self.to_stdout(f['url'] + f.get('play_path', ''))
1454             else:
1455                 # For RTMP URLs, also include the playpath
1456                 self.to_stdout(info_dict['url'] + info_dict.get('play_path', ''))
1457         if self.params.get('forcethumbnail', False) and info_dict.get('thumbnail') is not None:
1458             self.to_stdout(info_dict['thumbnail'])
1459         if self.params.get('forcedescription', False) and info_dict.get('description') is not None:
1460             self.to_stdout(info_dict['description'])
1461         if self.params.get('forcefilename', False) and filename is not None:
1462             self.to_stdout(filename)
1463         if self.params.get('forceduration', False) and info_dict.get('duration') is not None:
1464             self.to_stdout(formatSeconds(info_dict['duration']))
1465         if self.params.get('forceformat', False):
1466             self.to_stdout(info_dict['format'])
1467         if self.params.get('forcejson', False):
1468             self.to_stdout(json.dumps(info_dict))
1469
1470         # Do nothing else if in simulate mode
1471         if self.params.get('simulate', False):
1472             return
1473
1474         if filename is None:
1475             return
1476
1477         try:
1478             dn = os.path.dirname(sanitize_path(encodeFilename(filename)))
1479             if dn and not os.path.exists(dn):
1480                 os.makedirs(dn)
1481         except (OSError, IOError) as err:
1482             self.report_error('unable to create directory ' + error_to_compat_str(err))
1483             return
1484
1485         if self.params.get('writedescription', False):
1486             descfn = replace_extension(filename, 'description', info_dict.get('ext'))
1487             if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(descfn)):
1488                 self.to_screen('[info] Video description is already present')
1489             elif info_dict.get('description') is None:
1490                 self.report_warning('There\'s no description to write.')
1491             else:
1492                 try:
1493                     self.to_screen('[info] Writing video description to: ' + descfn)
1494                     with io.open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile:
1495                         descfile.write(info_dict['description'])
1496                 except (OSError, IOError):
1497                     self.report_error('Cannot write description file ' + descfn)
1498                     return
1499
1500         if self.params.get('writeannotations', False):
1501             annofn = replace_extension(filename, 'annotations.xml', info_dict.get('ext'))
1502             if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(annofn)):
1503                 self.to_screen('[info] Video annotations are already present')
1504             else:
1505                 try:
1506                     self.to_screen('[info] Writing video annotations to: ' + annofn)
1507                     with io.open(encodeFilename(annofn), 'w', encoding='utf-8') as annofile:
1508                         annofile.write(info_dict['annotations'])
1509                 except (KeyError, TypeError):
1510                     self.report_warning('There are no annotations to write.')
1511                 except (OSError, IOError):
1512                     self.report_error('Cannot write annotations file: ' + annofn)
1513                     return
1514
1515         subtitles_are_requested = any([self.params.get('writesubtitles', False),
1516                                        self.params.get('writeautomaticsub')])
1517
1518         if subtitles_are_requested and info_dict.get('requested_subtitles'):
1519             # subtitles download errors are already managed as troubles in relevant IE
1520             # that way it will silently go on when used with unsupporting IE
1521             subtitles = info_dict['requested_subtitles']
1522             ie = self.get_info_extractor(info_dict['extractor_key'])
1523             for sub_lang, sub_info in subtitles.items():
1524                 sub_format = sub_info['ext']
1525                 if sub_info.get('data') is not None:
1526                     sub_data = sub_info['data']
1527                 else:
1528                     try:
1529                         sub_data = ie._download_webpage(
1530                             sub_info['url'], info_dict['id'], note=False)
1531                     except ExtractorError as err:
1532                         self.report_warning('Unable to download subtitle for "%s": %s' %
1533                                             (sub_lang, error_to_compat_str(err.cause)))
1534                         continue
1535                 try:
1536                     sub_filename = subtitles_filename(filename, sub_lang, sub_format)
1537                     if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(sub_filename)):
1538                         self.to_screen('[info] Video subtitle %s.%s is already_present' % (sub_lang, sub_format))
1539                     else:
1540                         self.to_screen('[info] Writing video subtitles to: ' + sub_filename)
1541                         with io.open(encodeFilename(sub_filename), 'w', encoding='utf-8') as subfile:
1542                             subfile.write(sub_data)
1543                 except (OSError, IOError):
1544                     self.report_error('Cannot write subtitles file ' + sub_filename)
1545                     return
1546
1547         if self.params.get('writeinfojson', False):
1548             infofn = replace_extension(filename, 'info.json', info_dict.get('ext'))
1549             if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(infofn)):
1550                 self.to_screen('[info] Video description metadata is already present')
1551             else:
1552                 self.to_screen('[info] Writing video description metadata as JSON to: ' + infofn)
1553                 try:
1554                     write_json_file(self.filter_requested_info(info_dict), infofn)
1555                 except (OSError, IOError):
1556                     self.report_error('Cannot write metadata to JSON file ' + infofn)
1557                     return
1558
1559         self._write_thumbnails(info_dict, filename)
1560
1561         if not self.params.get('skip_download', False):
1562             try:
1563                 def dl(name, info):
1564                     fd = get_suitable_downloader(info, self.params)(self, self.params)
1565                     for ph in self._progress_hooks:
1566                         fd.add_progress_hook(ph)
1567                     if self.params.get('verbose'):
1568                         self.to_stdout('[debug] Invoking downloader on %r' % info.get('url'))
1569                     return fd.download(name, info)
1570
1571                 if info_dict.get('requested_formats') is not None:
1572                     downloaded = []
1573                     success = True
1574                     merger = FFmpegMergerPP(self)
1575                     if not merger.available:
1576                         postprocessors = []
1577                         self.report_warning('You have requested multiple '
1578                                             'formats but ffmpeg or avconv are not installed.'
1579                                             ' The formats won\'t be merged.')
1580                     else:
1581                         postprocessors = [merger]
1582
1583                     def compatible_formats(formats):
1584                         video, audio = formats
1585                         # Check extension
1586                         video_ext, audio_ext = audio.get('ext'), video.get('ext')
1587                         if video_ext and audio_ext:
1588                             COMPATIBLE_EXTS = (
1589                                 ('mp3', 'mp4', 'm4a', 'm4p', 'm4b', 'm4r', 'm4v'),
1590                                 ('webm')
1591                             )
1592                             for exts in COMPATIBLE_EXTS:
1593                                 if video_ext in exts and audio_ext in exts:
1594                                     return True
1595                         # TODO: Check acodec/vcodec
1596                         return False
1597
1598                     filename_real_ext = os.path.splitext(filename)[1][1:]
1599                     filename_wo_ext = (
1600                         os.path.splitext(filename)[0]
1601                         if filename_real_ext == info_dict['ext']
1602                         else filename)
1603                     requested_formats = info_dict['requested_formats']
1604                     if self.params.get('merge_output_format') is None and not compatible_formats(requested_formats):
1605                         info_dict['ext'] = 'mkv'
1606                         self.report_warning(
1607                             'Requested formats are incompatible for merge and will be merged into mkv.')
1608                     # Ensure filename always has a correct extension for successful merge
1609                     filename = '%s.%s' % (filename_wo_ext, info_dict['ext'])
1610                     if os.path.exists(encodeFilename(filename)):
1611                         self.to_screen(
1612                             '[download] %s has already been downloaded and '
1613                             'merged' % filename)
1614                     else:
1615                         for f in requested_formats:
1616                             new_info = dict(info_dict)
1617                             new_info.update(f)
1618                             fname = self.prepare_filename(new_info)
1619                             fname = prepend_extension(fname, 'f%s' % f['format_id'], new_info['ext'])
1620                             downloaded.append(fname)
1621                             partial_success = dl(fname, new_info)
1622                             success = success and partial_success
1623                         info_dict['__postprocessors'] = postprocessors
1624                         info_dict['__files_to_merge'] = downloaded
1625                 else:
1626                     # Just a single file
1627                     success = dl(filename, info_dict)
1628             except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
1629                 self.report_error('unable to download video data: %s' % str(err))
1630                 return
1631             except (OSError, IOError) as err:
1632                 raise UnavailableVideoError(err)
1633             except (ContentTooShortError, ) as err:
1634                 self.report_error('content too short (expected %s bytes and served %s)' % (err.expected, err.downloaded))
1635                 return
1636
1637             if success and filename != '-':
1638                 # Fixup content
1639                 fixup_policy = self.params.get('fixup')
1640                 if fixup_policy is None:
1641                     fixup_policy = 'detect_or_warn'
1642
1643                 INSTALL_FFMPEG_MESSAGE = 'Install ffmpeg or avconv to fix this automatically.'
1644
1645                 stretched_ratio = info_dict.get('stretched_ratio')
1646                 if stretched_ratio is not None and stretched_ratio != 1:
1647                     if fixup_policy == 'warn':
1648                         self.report_warning('%s: Non-uniform pixel ratio (%s)' % (
1649                             info_dict['id'], stretched_ratio))
1650                     elif fixup_policy == 'detect_or_warn':
1651                         stretched_pp = FFmpegFixupStretchedPP(self)
1652                         if stretched_pp.available:
1653                             info_dict.setdefault('__postprocessors', [])
1654                             info_dict['__postprocessors'].append(stretched_pp)
1655                         else:
1656                             self.report_warning(
1657                                 '%s: Non-uniform pixel ratio (%s). %s'
1658                                 % (info_dict['id'], stretched_ratio, INSTALL_FFMPEG_MESSAGE))
1659                     else:
1660                         assert fixup_policy in ('ignore', 'never')
1661
1662                 if (info_dict.get('requested_formats') is None and
1663                         info_dict.get('container') == 'm4a_dash'):
1664                     if fixup_policy == 'warn':
1665                         self.report_warning(
1666                             '%s: writing DASH m4a. '
1667                             'Only some players support this container.'
1668                             % info_dict['id'])
1669                     elif fixup_policy == 'detect_or_warn':
1670                         fixup_pp = FFmpegFixupM4aPP(self)
1671                         if fixup_pp.available:
1672                             info_dict.setdefault('__postprocessors', [])
1673                             info_dict['__postprocessors'].append(fixup_pp)
1674                         else:
1675                             self.report_warning(
1676                                 '%s: writing DASH m4a. '
1677                                 'Only some players support this container. %s'
1678                                 % (info_dict['id'], INSTALL_FFMPEG_MESSAGE))
1679                     else:
1680                         assert fixup_policy in ('ignore', 'never')
1681
1682                 if (info_dict.get('protocol') == 'm3u8_native' or
1683                         info_dict.get('protocol') == 'm3u8' and
1684                         self.params.get('hls_prefer_native')):
1685                     if fixup_policy == 'warn':
1686                         self.report_warning('%s: malformated aac bitstream.' % (
1687                             info_dict['id']))
1688                     elif fixup_policy == 'detect_or_warn':
1689                         fixup_pp = FFmpegFixupM3u8PP(self)
1690                         if fixup_pp.available:
1691                             info_dict.setdefault('__postprocessors', [])
1692                             info_dict['__postprocessors'].append(fixup_pp)
1693                         else:
1694                             self.report_warning(
1695                                 '%s: malformated aac bitstream. %s'
1696                                 % (info_dict['id'], INSTALL_FFMPEG_MESSAGE))
1697                     else:
1698                         assert fixup_policy in ('ignore', 'never')
1699
1700                 try:
1701                     self.post_process(filename, info_dict)
1702                 except (PostProcessingError) as err:
1703                     self.report_error('postprocessing: %s' % str(err))
1704                     return
1705                 self.record_download_archive(info_dict)
1706
1707     def download(self, url_list):
1708         """Download a given list of URLs."""
1709         outtmpl = self.params.get('outtmpl', DEFAULT_OUTTMPL)
1710         if (len(url_list) > 1 and
1711                 '%' not in outtmpl and
1712                 self.params.get('max_downloads') != 1):
1713             raise SameFileError(outtmpl)
1714
1715         for url in url_list:
1716             try:
1717                 # It also downloads the videos
1718                 res = self.extract_info(
1719                     url, force_generic_extractor=self.params.get('force_generic_extractor', False))
1720             except UnavailableVideoError:
1721                 self.report_error('unable to download video')
1722             except MaxDownloadsReached:
1723                 self.to_screen('[info] Maximum number of downloaded files reached.')
1724                 raise
1725             else:
1726                 if self.params.get('dump_single_json', False):
1727                     self.to_stdout(json.dumps(res))
1728
1729         return self._download_retcode
1730
1731     def download_with_info_file(self, info_filename):
1732         with contextlib.closing(fileinput.FileInput(
1733                 [info_filename], mode='r',
1734                 openhook=fileinput.hook_encoded('utf-8'))) as f:
1735             # FileInput doesn't have a read method, we can't call json.load
1736             info = self.filter_requested_info(json.loads('\n'.join(f)))
1737         try:
1738             self.process_ie_result(info, download=True)
1739         except DownloadError:
1740             webpage_url = info.get('webpage_url')
1741             if webpage_url is not None:
1742                 self.report_warning('The info failed to download, trying with "%s"' % webpage_url)
1743                 return self.download([webpage_url])
1744             else:
1745                 raise
1746         return self._download_retcode
1747
1748     @staticmethod
1749     def filter_requested_info(info_dict):
1750         return dict(
1751             (k, v) for k, v in info_dict.items()
1752             if k not in ['requested_formats', 'requested_subtitles'])
1753
1754     def post_process(self, filename, ie_info):
1755         """Run all the postprocessors on the given file."""
1756         info = dict(ie_info)
1757         info['filepath'] = filename
1758         pps_chain = []
1759         if ie_info.get('__postprocessors') is not None:
1760             pps_chain.extend(ie_info['__postprocessors'])
1761         pps_chain.extend(self._pps)
1762         for pp in pps_chain:
1763             files_to_delete = []
1764             try:
1765                 files_to_delete, info = pp.run(info)
1766             except PostProcessingError as e:
1767                 self.report_error(e.msg)
1768             if files_to_delete and not self.params.get('keepvideo', False):
1769                 for old_filename in files_to_delete:
1770                     self.to_screen('Deleting original file %s (pass -k to keep)' % old_filename)
1771                     try:
1772                         os.remove(encodeFilename(old_filename))
1773                     except (IOError, OSError):
1774                         self.report_warning('Unable to remove downloaded original file')
1775
1776     def _make_archive_id(self, info_dict):
1777         # Future-proof against any change in case
1778         # and backwards compatibility with prior versions
1779         extractor = info_dict.get('extractor_key')
1780         if extractor is None:
1781             if 'id' in info_dict:
1782                 extractor = info_dict.get('ie_key')  # key in a playlist
1783         if extractor is None:
1784             return None  # Incomplete video information
1785         return extractor.lower() + ' ' + info_dict['id']
1786
1787     def in_download_archive(self, info_dict):
1788         fn = self.params.get('download_archive')
1789         if fn is None:
1790             return False
1791
1792         vid_id = self._make_archive_id(info_dict)
1793         if vid_id is None:
1794             return False  # Incomplete video information
1795
1796         try:
1797             with locked_file(fn, 'r', encoding='utf-8') as archive_file:
1798                 for line in archive_file:
1799                     if line.strip() == vid_id:
1800                         return True
1801         except IOError as ioe:
1802             if ioe.errno != errno.ENOENT:
1803                 raise
1804         return False
1805
1806     def record_download_archive(self, info_dict):
1807         fn = self.params.get('download_archive')
1808         if fn is None:
1809             return
1810         vid_id = self._make_archive_id(info_dict)
1811         assert vid_id
1812         with locked_file(fn, 'a', encoding='utf-8') as archive_file:
1813             archive_file.write(vid_id + '\n')
1814
1815     @staticmethod
1816     def format_resolution(format, default='unknown'):
1817         if format.get('vcodec') == 'none':
1818             return 'audio only'
1819         if format.get('resolution') is not None:
1820             return format['resolution']
1821         if format.get('height') is not None:
1822             if format.get('width') is not None:
1823                 res = '%sx%s' % (format['width'], format['height'])
1824             else:
1825                 res = '%sp' % format['height']
1826         elif format.get('width') is not None:
1827             res = '%dx?' % format['width']
1828         else:
1829             res = default
1830         return res
1831
1832     def _format_note(self, fdict):
1833         res = ''
1834         if fdict.get('ext') in ['f4f', 'f4m']:
1835             res += '(unsupported) '
1836         if fdict.get('language'):
1837             if res:
1838                 res += ' '
1839             res += '[%s]' % fdict['language']
1840         if fdict.get('format_note') is not None:
1841             res += fdict['format_note'] + ' '
1842         if fdict.get('tbr') is not None:
1843             res += '%4dk ' % fdict['tbr']
1844         if fdict.get('container') is not None:
1845             if res:
1846                 res += ', '
1847             res += '%s container' % fdict['container']
1848         if (fdict.get('vcodec') is not None and
1849                 fdict.get('vcodec') != 'none'):
1850             if res:
1851                 res += ', '
1852             res += fdict['vcodec']
1853             if fdict.get('vbr') is not None:
1854                 res += '@'
1855         elif fdict.get('vbr') is not None and fdict.get('abr') is not None:
1856             res += 'video@'
1857         if fdict.get('vbr') is not None:
1858             res += '%4dk' % fdict['vbr']
1859         if fdict.get('fps') is not None:
1860             if res:
1861                 res += ', '
1862             res += '%sfps' % fdict['fps']
1863         if fdict.get('acodec') is not None:
1864             if res:
1865                 res += ', '
1866             if fdict['acodec'] == 'none':
1867                 res += 'video only'
1868             else:
1869                 res += '%-5s' % fdict['acodec']
1870         elif fdict.get('abr') is not None:
1871             if res:
1872                 res += ', '
1873             res += 'audio'
1874         if fdict.get('abr') is not None:
1875             res += '@%3dk' % fdict['abr']
1876         if fdict.get('asr') is not None:
1877             res += ' (%5dHz)' % fdict['asr']
1878         if fdict.get('filesize') is not None:
1879             if res:
1880                 res += ', '
1881             res += format_bytes(fdict['filesize'])
1882         elif fdict.get('filesize_approx') is not None:
1883             if res:
1884                 res += ', '
1885             res += '~' + format_bytes(fdict['filesize_approx'])
1886         return res
1887
1888     def list_formats(self, info_dict):
1889         formats = info_dict.get('formats', [info_dict])
1890         table = [
1891             [f['format_id'], f['ext'], self.format_resolution(f), self._format_note(f)]
1892             for f in formats
1893             if f.get('preference') is None or f['preference'] >= -1000]
1894         if len(formats) > 1:
1895             table[-1][-1] += (' ' if table[-1][-1] else '') + '(best)'
1896
1897         header_line = ['format code', 'extension', 'resolution', 'note']
1898         self.to_screen(
1899             '[info] Available formats for %s:\n%s' %
1900             (info_dict['id'], render_table(header_line, table)))
1901
1902     def list_thumbnails(self, info_dict):
1903         thumbnails = info_dict.get('thumbnails')
1904         if not thumbnails:
1905             self.to_screen('[info] No thumbnails present for %s' % info_dict['id'])
1906             return
1907
1908         self.to_screen(
1909             '[info] Thumbnails for %s:' % info_dict['id'])
1910         self.to_screen(render_table(
1911             ['ID', 'width', 'height', 'URL'],
1912             [[t['id'], t.get('width', 'unknown'), t.get('height', 'unknown'), t['url']] for t in thumbnails]))
1913
1914     def list_subtitles(self, video_id, subtitles, name='subtitles'):
1915         if not subtitles:
1916             self.to_screen('%s has no %s' % (video_id, name))
1917             return
1918         self.to_screen(
1919             'Available %s for %s:' % (name, video_id))
1920         self.to_screen(render_table(
1921             ['Language', 'formats'],
1922             [[lang, ', '.join(f['ext'] for f in reversed(formats))]
1923                 for lang, formats in subtitles.items()]))
1924
1925     def urlopen(self, req):
1926         """ Start an HTTP download """
1927         if isinstance(req, compat_basestring):
1928             req = sanitized_Request(req)
1929         return self._opener.open(req, timeout=self._socket_timeout)
1930
1931     def print_debug_header(self):
1932         if not self.params.get('verbose'):
1933             return
1934
1935         if type('') is not compat_str:
1936             # Python 2.6 on SLES11 SP1 (https://github.com/rg3/youtube-dl/issues/3326)
1937             self.report_warning(
1938                 'Your Python is broken! Update to a newer and supported version')
1939
1940         stdout_encoding = getattr(
1941             sys.stdout, 'encoding', 'missing (%s)' % type(sys.stdout).__name__)
1942         encoding_str = (
1943             '[debug] Encodings: locale %s, fs %s, out %s, pref %s\n' % (
1944                 locale.getpreferredencoding(),
1945                 sys.getfilesystemencoding(),
1946                 stdout_encoding,
1947                 self.get_encoding()))
1948         write_string(encoding_str, encoding=None)
1949
1950         self._write_string('[debug] youtube-dl version ' + __version__ + '\n')
1951         try:
1952             sp = subprocess.Popen(
1953                 ['git', 'rev-parse', '--short', 'HEAD'],
1954                 stdout=subprocess.PIPE, stderr=subprocess.PIPE,
1955                 cwd=os.path.dirname(os.path.abspath(__file__)))
1956             out, err = sp.communicate()
1957             out = out.decode().strip()
1958             if re.match('[0-9a-f]+', out):
1959                 self._write_string('[debug] Git HEAD: ' + out + '\n')
1960         except Exception:
1961             try:
1962                 sys.exc_clear()
1963             except Exception:
1964                 pass
1965         self._write_string('[debug] Python version %s - %s\n' % (
1966             platform.python_version(), platform_name()))
1967
1968         exe_versions = FFmpegPostProcessor.get_versions(self)
1969         exe_versions['rtmpdump'] = rtmpdump_version()
1970         exe_str = ', '.join(
1971             '%s %s' % (exe, v)
1972             for exe, v in sorted(exe_versions.items())
1973             if v
1974         )
1975         if not exe_str:
1976             exe_str = 'none'
1977         self._write_string('[debug] exe versions: %s\n' % exe_str)
1978
1979         proxy_map = {}
1980         for handler in self._opener.handlers:
1981             if hasattr(handler, 'proxies'):
1982                 proxy_map.update(handler.proxies)
1983         self._write_string('[debug] Proxy map: ' + compat_str(proxy_map) + '\n')
1984
1985         if self.params.get('call_home', False):
1986             ipaddr = self.urlopen('https://yt-dl.org/ip').read().decode('utf-8')
1987             self._write_string('[debug] Public IP address: %s\n' % ipaddr)
1988             latest_version = self.urlopen(
1989                 'https://yt-dl.org/latest/version').read().decode('utf-8')
1990             if version_tuple(latest_version) > version_tuple(__version__):
1991                 self.report_warning(
1992                     'You are using an outdated version (newest version: %s)! '
1993                     'See https://yt-dl.org/update if you need help updating.' %
1994                     latest_version)
1995
1996     def _setup_opener(self):
1997         timeout_val = self.params.get('socket_timeout')
1998         self._socket_timeout = 600 if timeout_val is None else float(timeout_val)
1999
2000         opts_cookiefile = self.params.get('cookiefile')
2001         opts_proxy = self.params.get('proxy')
2002
2003         if opts_cookiefile is None:
2004             self.cookiejar = compat_cookiejar.CookieJar()
2005         else:
2006             self.cookiejar = compat_cookiejar.MozillaCookieJar(
2007                 opts_cookiefile)
2008             if os.access(opts_cookiefile, os.R_OK):
2009                 self.cookiejar.load()
2010
2011         cookie_processor = YoutubeDLCookieProcessor(self.cookiejar)
2012         if opts_proxy is not None:
2013             if opts_proxy == '':
2014                 proxies = {}
2015             else:
2016                 proxies = {'http': opts_proxy, 'https': opts_proxy}
2017         else:
2018             proxies = compat_urllib_request.getproxies()
2019             # Set HTTPS proxy to HTTP one if given (https://github.com/rg3/youtube-dl/issues/805)
2020             if 'http' in proxies and 'https' not in proxies:
2021                 proxies['https'] = proxies['http']
2022         proxy_handler = PerRequestProxyHandler(proxies)
2023
2024         debuglevel = 1 if self.params.get('debug_printtraffic') else 0
2025         https_handler = make_HTTPS_handler(self.params, debuglevel=debuglevel)
2026         ydlh = YoutubeDLHandler(self.params, debuglevel=debuglevel)
2027         data_handler = compat_urllib_request_DataHandler()
2028
2029         # When passing our own FileHandler instance, build_opener won't add the
2030         # default FileHandler and allows us to disable the file protocol, which
2031         # can be used for malicious purposes (see
2032         # https://github.com/rg3/youtube-dl/issues/8227)
2033         file_handler = compat_urllib_request.FileHandler()
2034
2035         def file_open(*args, **kwargs):
2036             raise compat_urllib_error.URLError('file:// scheme is explicitly disabled in youtube-dl for security reasons')
2037         file_handler.file_open = file_open
2038
2039         opener = compat_urllib_request.build_opener(
2040             proxy_handler, https_handler, cookie_processor, ydlh, data_handler, file_handler)
2041
2042         # Delete the default user-agent header, which would otherwise apply in
2043         # cases where our custom HTTP handler doesn't come into play
2044         # (See https://github.com/rg3/youtube-dl/issues/1309 for details)
2045         opener.addheaders = []
2046         self._opener = opener
2047
2048     def encode(self, s):
2049         if isinstance(s, bytes):
2050             return s  # Already encoded
2051
2052         try:
2053             return s.encode(self.get_encoding())
2054         except UnicodeEncodeError as err:
2055             err.reason = err.reason + '. Check your system encoding configuration or use the --encoding option.'
2056             raise
2057
2058     def get_encoding(self):
2059         encoding = self.params.get('encoding')
2060         if encoding is None:
2061             encoding = preferredencoding()
2062         return encoding
2063
2064     def _write_thumbnails(self, info_dict, filename):
2065         if self.params.get('writethumbnail', False):
2066             thumbnails = info_dict.get('thumbnails')
2067             if thumbnails:
2068                 thumbnails = [thumbnails[-1]]
2069         elif self.params.get('write_all_thumbnails', False):
2070             thumbnails = info_dict.get('thumbnails')
2071         else:
2072             return
2073
2074         if not thumbnails:
2075             # No thumbnails present, so return immediately
2076             return
2077
2078         for t in thumbnails:
2079             thumb_ext = determine_ext(t['url'], 'jpg')
2080             suffix = '_%s' % t['id'] if len(thumbnails) > 1 else ''
2081             thumb_display_id = '%s ' % t['id'] if len(thumbnails) > 1 else ''
2082             t['filename'] = thumb_filename = os.path.splitext(filename)[0] + suffix + '.' + thumb_ext
2083
2084             if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(thumb_filename)):
2085                 self.to_screen('[%s] %s: Thumbnail %sis already present' %
2086                                (info_dict['extractor'], info_dict['id'], thumb_display_id))
2087             else:
2088                 self.to_screen('[%s] %s: Downloading thumbnail %s...' %
2089                                (info_dict['extractor'], info_dict['id'], thumb_display_id))
2090                 try:
2091                     uf = self.urlopen(t['url'])
2092                     with open(encodeFilename(thumb_filename), 'wb') as thumbf:
2093                         shutil.copyfileobj(uf, thumbf)
2094                     self.to_screen('[%s] %s: Writing thumbnail %sto: %s' %
2095                                    (info_dict['extractor'], info_dict['id'], thumb_display_id, thumb_filename))
2096                 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
2097                     self.report_warning('Unable to download thumbnail "%s": %s' %
2098                                         (t['url'], error_to_compat_str(err)))