[YoutubeDL] Ensure bool params always present in downloader
[youtube-dl] / youtube_dl / YoutubeDL.py
1 #!/usr/bin/env python
2 # -*- coding: utf-8 -*-
3
4 from __future__ import absolute_import, unicode_literals
5
6 import collections
7 import contextlib
8 import datetime
9 import errno
10 import fileinput
11 import io
12 import itertools
13 import json
14 import locale
15 import operator
16 import os
17 import platform
18 import re
19 import shutil
20 import subprocess
21 import socket
22 import sys
23 import time
24 import tokenize
25 import traceback
26
27 if os.name == 'nt':
28     import ctypes
29
30 from .compat import (
31     compat_cookiejar,
32     compat_expanduser,
33     compat_get_terminal_size,
34     compat_http_client,
35     compat_kwargs,
36     compat_str,
37     compat_tokenize_tokenize,
38     compat_urllib_error,
39     compat_urllib_request,
40 )
41 from .utils import (
42     ContentTooShortError,
43     date_from_str,
44     DateRange,
45     DEFAULT_OUTTMPL,
46     determine_ext,
47     DownloadError,
48     encodeFilename,
49     ExtractorError,
50     format_bytes,
51     formatSeconds,
52     locked_file,
53     make_HTTPS_handler,
54     MaxDownloadsReached,
55     PagedList,
56     parse_filesize,
57     PerRequestProxyHandler,
58     PostProcessingError,
59     platform_name,
60     preferredencoding,
61     render_table,
62     SameFileError,
63     sanitize_filename,
64     sanitize_path,
65     std_headers,
66     subtitles_filename,
67     UnavailableVideoError,
68     url_basename,
69     version_tuple,
70     write_json_file,
71     write_string,
72     YoutubeDLHandler,
73     prepend_extension,
74     replace_extension,
75     args_to_str,
76     age_restricted,
77 )
78 from .cache import Cache
79 from .extractor import get_info_extractor, gen_extractors
80 from .downloader import get_suitable_downloader
81 from .downloader.rtmp import rtmpdump_version
82 from .postprocessor import (
83     FFmpegFixupM4aPP,
84     FFmpegFixupStretchedPP,
85     FFmpegMergerPP,
86     FFmpegPostProcessor,
87     get_postprocessor,
88 )
89 from .version import __version__
90
91
92 class YoutubeDL(object):
93     """YoutubeDL class.
94
95     YoutubeDL objects are the ones responsible of downloading the
96     actual video file and writing it to disk if the user has requested
97     it, among some other tasks. In most cases there should be one per
98     program. As, given a video URL, the downloader doesn't know how to
99     extract all the needed information, task that InfoExtractors do, it
100     has to pass the URL to one of them.
101
102     For this, YoutubeDL objects have a method that allows
103     InfoExtractors to be registered in a given order. When it is passed
104     a URL, the YoutubeDL object handles it to the first InfoExtractor it
105     finds that reports being able to handle it. The InfoExtractor extracts
106     all the information about the video or videos the URL refers to, and
107     YoutubeDL process the extracted information, possibly using a File
108     Downloader to download the video.
109
110     YoutubeDL objects accept a lot of parameters. In order not to saturate
111     the object constructor with arguments, it receives a dictionary of
112     options instead. These options are available through the params
113     attribute for the InfoExtractors to use. The YoutubeDL also
114     registers itself as the downloader in charge for the InfoExtractors
115     that are added to it, so this is a "mutual registration".
116
117     Available options:
118
119     username:          Username for authentication purposes.
120     password:          Password for authentication purposes.
121     videopassword:     Password for accessing a video.
122     usenetrc:          Use netrc for authentication instead.
123     verbose:           Print additional info to stdout.
124     quiet:             Do not print messages to stdout.
125     no_warnings:       Do not print out anything for warnings.
126     forceurl:          Force printing final URL.
127     forcetitle:        Force printing title.
128     forceid:           Force printing ID.
129     forcethumbnail:    Force printing thumbnail URL.
130     forcedescription:  Force printing description.
131     forcefilename:     Force printing final filename.
132     forceduration:     Force printing duration.
133     forcejson:         Force printing info_dict as JSON.
134     dump_single_json:  Force printing the info_dict of the whole playlist
135                        (or video) as a single JSON line.
136     simulate:          Do not download the video files.
137     format:            Video format code. See options.py for more information.
138     outtmpl:           Template for output names.
139     restrictfilenames: Do not allow "&" and spaces in file names
140     ignoreerrors:      Do not stop on download errors.
141     force_generic_extractor: Force downloader to use the generic extractor
142     nooverwrites:      Prevent overwriting files.
143     playliststart:     Playlist item to start at.
144     playlistend:       Playlist item to end at.
145     playlist_items:    Specific indices of playlist to download.
146     playlistreverse:   Download playlist items in reverse order.
147     matchtitle:        Download only matching titles.
148     rejecttitle:       Reject downloads for matching titles.
149     logger:            Log messages to a logging.Logger instance.
150     logtostderr:       Log messages to stderr instead of stdout.
151     writedescription:  Write the video description to a .description file
152     writeinfojson:     Write the video description to a .info.json file
153     writeannotations:  Write the video annotations to a .annotations.xml file
154     writethumbnail:    Write the thumbnail image to a file
155     write_all_thumbnails:  Write all thumbnail formats to files
156     writesubtitles:    Write the video subtitles to a file
157     writeautomaticsub: Write the automatic subtitles to a file
158     allsubtitles:      Downloads all the subtitles of the video
159                        (requires writesubtitles or writeautomaticsub)
160     listsubtitles:     Lists all available subtitles for the video
161     subtitlesformat:   The format code for subtitles
162     subtitleslangs:    List of languages of the subtitles to download
163     keepvideo:         Keep the video file after post-processing
164     daterange:         A DateRange object, download only if the upload_date is in the range.
165     skip_download:     Skip the actual download of the video file
166     cachedir:          Location of the cache files in the filesystem.
167                        False to disable filesystem cache.
168     noplaylist:        Download single video instead of a playlist if in doubt.
169     age_limit:         An integer representing the user's age in years.
170                        Unsuitable videos for the given age are skipped.
171     min_views:         An integer representing the minimum view count the video
172                        must have in order to not be skipped.
173                        Videos without view count information are always
174                        downloaded. None for no limit.
175     max_views:         An integer representing the maximum view count.
176                        Videos that are more popular than that are not
177                        downloaded.
178                        Videos without view count information are always
179                        downloaded. None for no limit.
180     download_archive:  File name of a file where all downloads are recorded.
181                        Videos already present in the file are not downloaded
182                        again.
183     cookiefile:        File name where cookies should be read from and dumped to.
184     nocheckcertificate:Do not verify SSL certificates
185     prefer_insecure:   Use HTTP instead of HTTPS to retrieve information.
186                        At the moment, this is only supported by YouTube.
187     proxy:             URL of the proxy server to use
188     cn_verification_proxy:  URL of the proxy to use for IP address verification
189                        on Chinese sites. (Experimental)
190     socket_timeout:    Time to wait for unresponsive hosts, in seconds
191     bidi_workaround:   Work around buggy terminals without bidirectional text
192                        support, using fridibi
193     debug_printtraffic:Print out sent and received HTTP traffic
194     include_ads:       Download ads as well
195     default_search:    Prepend this string if an input url is not valid.
196                        'auto' for elaborate guessing
197     encoding:          Use this encoding instead of the system-specified.
198     extract_flat:      Do not resolve URLs, return the immediate result.
199                        Pass in 'in_playlist' to only show this behavior for
200                        playlist items.
201     postprocessors:    A list of dictionaries, each with an entry
202                        * key:  The name of the postprocessor. See
203                                youtube_dl/postprocessor/__init__.py for a list.
204                        as well as any further keyword arguments for the
205                        postprocessor.
206     progress_hooks:    A list of functions that get called on download
207                        progress, with a dictionary with the entries
208                        * status: One of "downloading", "error", or "finished".
209                                  Check this first and ignore unknown values.
210
211                        If status is one of "downloading", or "finished", the
212                        following properties may also be present:
213                        * filename: The final filename (always present)
214                        * tmpfilename: The filename we're currently writing to
215                        * downloaded_bytes: Bytes on disk
216                        * total_bytes: Size of the whole file, None if unknown
217                        * total_bytes_estimate: Guess of the eventual file size,
218                                                None if unavailable.
219                        * elapsed: The number of seconds since download started.
220                        * eta: The estimated time in seconds, None if unknown
221                        * speed: The download speed in bytes/second, None if
222                                 unknown
223                        * fragment_index: The counter of the currently
224                                          downloaded video fragment.
225                        * fragment_count: The number of fragments (= individual
226                                          files that will be merged)
227
228                        Progress hooks are guaranteed to be called at least once
229                        (with status "finished") if the download is successful.
230     merge_output_format: Extension to use when merging formats.
231     fixup:             Automatically correct known faults of the file.
232                        One of:
233                        - "never": do nothing
234                        - "warn": only emit a warning
235                        - "detect_or_warn": check whether we can do anything
236                                            about it, warn otherwise (default)
237     source_address:    (Experimental) Client-side IP address to bind to.
238     call_home:         Boolean, true iff we are allowed to contact the
239                        youtube-dl servers for debugging.
240     sleep_interval:    Number of seconds to sleep before each download.
241     listformats:       Print an overview of available video formats and exit.
242     list_thumbnails:   Print a table of all thumbnails and exit.
243     match_filter:      A function that gets called with the info_dict of
244                        every video.
245                        If it returns a message, the video is ignored.
246                        If it returns None, the video is downloaded.
247                        match_filter_func in utils.py is one example for this.
248     no_color:          Do not emit color codes in output.
249
250     The following options determine which downloader is picked:
251     external_downloader: Executable of the external downloader to call.
252                        None or unset for standard (built-in) downloader.
253     hls_prefer_native: Use the native HLS downloader instead of ffmpeg/avconv.
254
255     The following parameters are not used by YoutubeDL itself, they are used by
256     the downloader (see youtube_dl/downloader/common.py):
257     nopart, updatetime, buffersize, ratelimit, min_filesize, max_filesize, test,
258     noresizebuffer, retries, continuedl, noprogress, consoletitle,
259     xattr_set_filesize, external_downloader_args.
260
261     The following options are used by the post processors:
262     prefer_ffmpeg:     If True, use ffmpeg instead of avconv if both are available,
263                        otherwise prefer avconv.
264     postprocessor_args: A list of additional command-line arguments for the
265                         postprocessor.
266     """
267
268     params = None
269     _ies = []
270     _pps = []
271     _download_retcode = None
272     _num_downloads = None
273     _screen_file = None
274
275     def __init__(self, params=None, auto_init=True):
276         """Create a FileDownloader object with the given options."""
277         if params is None:
278             params = {}
279         self._ies = []
280         self._ies_instances = {}
281         self._pps = []
282         self._progress_hooks = []
283         self._download_retcode = 0
284         self._num_downloads = 0
285         self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)]
286         self._err_file = sys.stderr
287         self.params = {
288             # Default parameters
289             'nocheckcertificate': False,
290         }
291         self.params.update(params)
292         self.cache = Cache(self)
293
294         if params.get('bidi_workaround', False):
295             try:
296                 import pty
297                 master, slave = pty.openpty()
298                 width = compat_get_terminal_size().columns
299                 if width is None:
300                     width_args = []
301                 else:
302                     width_args = ['-w', str(width)]
303                 sp_kwargs = dict(
304                     stdin=subprocess.PIPE,
305                     stdout=slave,
306                     stderr=self._err_file)
307                 try:
308                     self._output_process = subprocess.Popen(
309                         ['bidiv'] + width_args, **sp_kwargs
310                     )
311                 except OSError:
312                     self._output_process = subprocess.Popen(
313                         ['fribidi', '-c', 'UTF-8'] + width_args, **sp_kwargs)
314                 self._output_channel = os.fdopen(master, 'rb')
315             except OSError as ose:
316                 if ose.errno == 2:
317                     self.report_warning('Could not find fribidi executable, ignoring --bidi-workaround . Make sure that  fribidi  is an executable file in one of the directories in your $PATH.')
318                 else:
319                     raise
320
321         if (sys.version_info >= (3,) and sys.platform != 'win32' and
322                 sys.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968'] and
323                 not params.get('restrictfilenames', False)):
324             # On Python 3, the Unicode filesystem API will throw errors (#1474)
325             self.report_warning(
326                 'Assuming --restrict-filenames since file system encoding '
327                 'cannot encode all characters. '
328                 'Set the LC_ALL environment variable to fix this.')
329             self.params['restrictfilenames'] = True
330
331         if isinstance(params.get('outtmpl'), bytes):
332             self.report_warning(
333                 'Parameter outtmpl is bytes, but should be a unicode string. '
334                 'Put  from __future__ import unicode_literals  at the top of your code file or consider switching to Python 3.x.')
335
336         self._setup_opener()
337
338         if auto_init:
339             self.print_debug_header()
340             self.add_default_info_extractors()
341
342         for pp_def_raw in self.params.get('postprocessors', []):
343             pp_class = get_postprocessor(pp_def_raw['key'])
344             pp_def = dict(pp_def_raw)
345             del pp_def['key']
346             pp = pp_class(self, **compat_kwargs(pp_def))
347             self.add_post_processor(pp)
348
349         for ph in self.params.get('progress_hooks', []):
350             self.add_progress_hook(ph)
351
352     def warn_if_short_id(self, argv):
353         # short YouTube ID starting with dash?
354         idxs = [
355             i for i, a in enumerate(argv)
356             if re.match(r'^-[0-9A-Za-z_-]{10}$', a)]
357         if idxs:
358             correct_argv = (
359                 ['youtube-dl'] +
360                 [a for i, a in enumerate(argv) if i not in idxs] +
361                 ['--'] + [argv[i] for i in idxs]
362             )
363             self.report_warning(
364                 'Long argument string detected. '
365                 'Use -- to separate parameters and URLs, like this:\n%s\n' %
366                 args_to_str(correct_argv))
367
368     def add_info_extractor(self, ie):
369         """Add an InfoExtractor object to the end of the list."""
370         self._ies.append(ie)
371         self._ies_instances[ie.ie_key()] = ie
372         ie.set_downloader(self)
373
374     def get_info_extractor(self, ie_key):
375         """
376         Get an instance of an IE with name ie_key, it will try to get one from
377         the _ies list, if there's no instance it will create a new one and add
378         it to the extractor list.
379         """
380         ie = self._ies_instances.get(ie_key)
381         if ie is None:
382             ie = get_info_extractor(ie_key)()
383             self.add_info_extractor(ie)
384         return ie
385
386     def add_default_info_extractors(self):
387         """
388         Add the InfoExtractors returned by gen_extractors to the end of the list
389         """
390         for ie in gen_extractors():
391             self.add_info_extractor(ie)
392
393     def add_post_processor(self, pp):
394         """Add a PostProcessor object to the end of the chain."""
395         self._pps.append(pp)
396         pp.set_downloader(self)
397
398     def add_progress_hook(self, ph):
399         """Add the progress hook (currently only for the file downloader)"""
400         self._progress_hooks.append(ph)
401
402     def _bidi_workaround(self, message):
403         if not hasattr(self, '_output_channel'):
404             return message
405
406         assert hasattr(self, '_output_process')
407         assert isinstance(message, compat_str)
408         line_count = message.count('\n') + 1
409         self._output_process.stdin.write((message + '\n').encode('utf-8'))
410         self._output_process.stdin.flush()
411         res = ''.join(self._output_channel.readline().decode('utf-8')
412                       for _ in range(line_count))
413         return res[:-len('\n')]
414
415     def to_screen(self, message, skip_eol=False):
416         """Print message to stdout if not in quiet mode."""
417         return self.to_stdout(message, skip_eol, check_quiet=True)
418
419     def _write_string(self, s, out=None):
420         write_string(s, out=out, encoding=self.params.get('encoding'))
421
422     def to_stdout(self, message, skip_eol=False, check_quiet=False):
423         """Print message to stdout if not in quiet mode."""
424         if self.params.get('logger'):
425             self.params['logger'].debug(message)
426         elif not check_quiet or not self.params.get('quiet', False):
427             message = self._bidi_workaround(message)
428             terminator = ['\n', ''][skip_eol]
429             output = message + terminator
430
431             self._write_string(output, self._screen_file)
432
433     def to_stderr(self, message):
434         """Print message to stderr."""
435         assert isinstance(message, compat_str)
436         if self.params.get('logger'):
437             self.params['logger'].error(message)
438         else:
439             message = self._bidi_workaround(message)
440             output = message + '\n'
441             self._write_string(output, self._err_file)
442
443     def to_console_title(self, message):
444         if not self.params.get('consoletitle', False):
445             return
446         if os.name == 'nt' and ctypes.windll.kernel32.GetConsoleWindow():
447             # c_wchar_p() might not be necessary if `message` is
448             # already of type unicode()
449             ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message))
450         elif 'TERM' in os.environ:
451             self._write_string('\033]0;%s\007' % message, self._screen_file)
452
453     def save_console_title(self):
454         if not self.params.get('consoletitle', False):
455             return
456         if 'TERM' in os.environ:
457             # Save the title on stack
458             self._write_string('\033[22;0t', self._screen_file)
459
460     def restore_console_title(self):
461         if not self.params.get('consoletitle', False):
462             return
463         if 'TERM' in os.environ:
464             # Restore the title from stack
465             self._write_string('\033[23;0t', self._screen_file)
466
467     def __enter__(self):
468         self.save_console_title()
469         return self
470
471     def __exit__(self, *args):
472         self.restore_console_title()
473
474         if self.params.get('cookiefile') is not None:
475             self.cookiejar.save()
476
477     def trouble(self, message=None, tb=None):
478         """Determine action to take when a download problem appears.
479
480         Depending on if the downloader has been configured to ignore
481         download errors or not, this method may throw an exception or
482         not when errors are found, after printing the message.
483
484         tb, if given, is additional traceback information.
485         """
486         if message is not None:
487             self.to_stderr(message)
488         if self.params.get('verbose'):
489             if tb is None:
490                 if sys.exc_info()[0]:  # if .trouble has been called from an except block
491                     tb = ''
492                     if hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
493                         tb += ''.join(traceback.format_exception(*sys.exc_info()[1].exc_info))
494                     tb += compat_str(traceback.format_exc())
495                 else:
496                     tb_data = traceback.format_list(traceback.extract_stack())
497                     tb = ''.join(tb_data)
498             self.to_stderr(tb)
499         if not self.params.get('ignoreerrors', False):
500             if sys.exc_info()[0] and hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
501                 exc_info = sys.exc_info()[1].exc_info
502             else:
503                 exc_info = sys.exc_info()
504             raise DownloadError(message, exc_info)
505         self._download_retcode = 1
506
507     def report_warning(self, message):
508         '''
509         Print the message to stderr, it will be prefixed with 'WARNING:'
510         If stderr is a tty file the 'WARNING:' will be colored
511         '''
512         if self.params.get('logger') is not None:
513             self.params['logger'].warning(message)
514         else:
515             if self.params.get('no_warnings'):
516                 return
517             if not self.params.get('no_color') and self._err_file.isatty() and os.name != 'nt':
518                 _msg_header = '\033[0;33mWARNING:\033[0m'
519             else:
520                 _msg_header = 'WARNING:'
521             warning_message = '%s %s' % (_msg_header, message)
522             self.to_stderr(warning_message)
523
524     def report_error(self, message, tb=None):
525         '''
526         Do the same as trouble, but prefixes the message with 'ERROR:', colored
527         in red if stderr is a tty file.
528         '''
529         if not self.params.get('no_color') and self._err_file.isatty() and os.name != 'nt':
530             _msg_header = '\033[0;31mERROR:\033[0m'
531         else:
532             _msg_header = 'ERROR:'
533         error_message = '%s %s' % (_msg_header, message)
534         self.trouble(error_message, tb)
535
536     def report_file_already_downloaded(self, file_name):
537         """Report file has already been fully downloaded."""
538         try:
539             self.to_screen('[download] %s has already been downloaded' % file_name)
540         except UnicodeEncodeError:
541             self.to_screen('[download] The file has already been downloaded')
542
543     def prepare_filename(self, info_dict):
544         """Generate the output filename."""
545         try:
546             template_dict = dict(info_dict)
547
548             template_dict['epoch'] = int(time.time())
549             autonumber_size = self.params.get('autonumber_size')
550             if autonumber_size is None:
551                 autonumber_size = 5
552             autonumber_templ = '%0' + str(autonumber_size) + 'd'
553             template_dict['autonumber'] = autonumber_templ % self._num_downloads
554             if template_dict.get('playlist_index') is not None:
555                 template_dict['playlist_index'] = '%0*d' % (len(str(template_dict['n_entries'])), template_dict['playlist_index'])
556             if template_dict.get('resolution') is None:
557                 if template_dict.get('width') and template_dict.get('height'):
558                     template_dict['resolution'] = '%dx%d' % (template_dict['width'], template_dict['height'])
559                 elif template_dict.get('height'):
560                     template_dict['resolution'] = '%sp' % template_dict['height']
561                 elif template_dict.get('width'):
562                     template_dict['resolution'] = '?x%d' % template_dict['width']
563
564             sanitize = lambda k, v: sanitize_filename(
565                 compat_str(v),
566                 restricted=self.params.get('restrictfilenames'),
567                 is_id=(k == 'id'))
568             template_dict = dict((k, sanitize(k, v))
569                                  for k, v in template_dict.items()
570                                  if v is not None)
571             template_dict = collections.defaultdict(lambda: 'NA', template_dict)
572
573             outtmpl = sanitize_path(self.params.get('outtmpl', DEFAULT_OUTTMPL))
574             tmpl = compat_expanduser(outtmpl)
575             filename = tmpl % template_dict
576             # Temporary fix for #4787
577             # 'Treat' all problem characters by passing filename through preferredencoding
578             # to workaround encoding issues with subprocess on python2 @ Windows
579             if sys.version_info < (3, 0) and sys.platform == 'win32':
580                 filename = encodeFilename(filename, True).decode(preferredencoding())
581             return filename
582         except ValueError as err:
583             self.report_error('Error in output template: ' + str(err) + ' (encoding: ' + repr(preferredencoding()) + ')')
584             return None
585
586     def _match_entry(self, info_dict, incomplete):
587         """ Returns None iff the file should be downloaded """
588
589         video_title = info_dict.get('title', info_dict.get('id', 'video'))
590         if 'title' in info_dict:
591             # This can happen when we're just evaluating the playlist
592             title = info_dict['title']
593             matchtitle = self.params.get('matchtitle', False)
594             if matchtitle:
595                 if not re.search(matchtitle, title, re.IGNORECASE):
596                     return '"' + title + '" title did not match pattern "' + matchtitle + '"'
597             rejecttitle = self.params.get('rejecttitle', False)
598             if rejecttitle:
599                 if re.search(rejecttitle, title, re.IGNORECASE):
600                     return '"' + title + '" title matched reject pattern "' + rejecttitle + '"'
601         date = info_dict.get('upload_date', None)
602         if date is not None:
603             dateRange = self.params.get('daterange', DateRange())
604             if date not in dateRange:
605                 return '%s upload date is not in range %s' % (date_from_str(date).isoformat(), dateRange)
606         view_count = info_dict.get('view_count', None)
607         if view_count is not None:
608             min_views = self.params.get('min_views')
609             if min_views is not None and view_count < min_views:
610                 return 'Skipping %s, because it has not reached minimum view count (%d/%d)' % (video_title, view_count, min_views)
611             max_views = self.params.get('max_views')
612             if max_views is not None and view_count > max_views:
613                 return 'Skipping %s, because it has exceeded the maximum view count (%d/%d)' % (video_title, view_count, max_views)
614         if age_restricted(info_dict.get('age_limit'), self.params.get('age_limit')):
615             return 'Skipping "%s" because it is age restricted' % video_title
616         if self.in_download_archive(info_dict):
617             return '%s has already been recorded in archive' % video_title
618
619         if not incomplete:
620             match_filter = self.params.get('match_filter')
621             if match_filter is not None:
622                 ret = match_filter(info_dict)
623                 if ret is not None:
624                     return ret
625
626         return None
627
628     @staticmethod
629     def add_extra_info(info_dict, extra_info):
630         '''Set the keys from extra_info in info dict if they are missing'''
631         for key, value in extra_info.items():
632             info_dict.setdefault(key, value)
633
634     def extract_info(self, url, download=True, ie_key=None, extra_info={},
635                      process=True, force_generic_extractor=False):
636         '''
637         Returns a list with a dictionary for each video we find.
638         If 'download', also downloads the videos.
639         extra_info is a dict containing the extra values to add to each result
640         '''
641
642         if not ie_key and force_generic_extractor:
643             ie_key = 'Generic'
644
645         if ie_key:
646             ies = [self.get_info_extractor(ie_key)]
647         else:
648             ies = self._ies
649
650         for ie in ies:
651             if not ie.suitable(url):
652                 continue
653
654             if not ie.working():
655                 self.report_warning('The program functionality for this site has been marked as broken, '
656                                     'and will probably not work.')
657
658             try:
659                 ie_result = ie.extract(url)
660                 if ie_result is None:  # Finished already (backwards compatibility; listformats and friends should be moved here)
661                     break
662                 if isinstance(ie_result, list):
663                     # Backwards compatibility: old IE result format
664                     ie_result = {
665                         '_type': 'compat_list',
666                         'entries': ie_result,
667                     }
668                 self.add_default_extra_info(ie_result, ie, url)
669                 if process:
670                     return self.process_ie_result(ie_result, download, extra_info)
671                 else:
672                     return ie_result
673             except ExtractorError as de:  # An error we somewhat expected
674                 self.report_error(compat_str(de), de.format_traceback())
675                 break
676             except MaxDownloadsReached:
677                 raise
678             except Exception as e:
679                 if self.params.get('ignoreerrors', False):
680                     self.report_error(compat_str(e), tb=compat_str(traceback.format_exc()))
681                     break
682                 else:
683                     raise
684         else:
685             self.report_error('no suitable InfoExtractor for URL %s' % url)
686
687     def add_default_extra_info(self, ie_result, ie, url):
688         self.add_extra_info(ie_result, {
689             'extractor': ie.IE_NAME,
690             'webpage_url': url,
691             'webpage_url_basename': url_basename(url),
692             'extractor_key': ie.ie_key(),
693         })
694
695     def process_ie_result(self, ie_result, download=True, extra_info={}):
696         """
697         Take the result of the ie(may be modified) and resolve all unresolved
698         references (URLs, playlist items).
699
700         It will also download the videos if 'download'.
701         Returns the resolved ie_result.
702         """
703
704         result_type = ie_result.get('_type', 'video')
705
706         if result_type in ('url', 'url_transparent'):
707             extract_flat = self.params.get('extract_flat', False)
708             if ((extract_flat == 'in_playlist' and 'playlist' in extra_info) or
709                     extract_flat is True):
710                 if self.params.get('forcejson', False):
711                     self.to_stdout(json.dumps(ie_result))
712                 return ie_result
713
714         if result_type == 'video':
715             self.add_extra_info(ie_result, extra_info)
716             return self.process_video_result(ie_result, download=download)
717         elif result_type == 'url':
718             # We have to add extra_info to the results because it may be
719             # contained in a playlist
720             return self.extract_info(ie_result['url'],
721                                      download,
722                                      ie_key=ie_result.get('ie_key'),
723                                      extra_info=extra_info)
724         elif result_type == 'url_transparent':
725             # Use the information from the embedding page
726             info = self.extract_info(
727                 ie_result['url'], ie_key=ie_result.get('ie_key'),
728                 extra_info=extra_info, download=False, process=False)
729
730             force_properties = dict(
731                 (k, v) for k, v in ie_result.items() if v is not None)
732             for f in ('_type', 'url'):
733                 if f in force_properties:
734                     del force_properties[f]
735             new_result = info.copy()
736             new_result.update(force_properties)
737
738             assert new_result.get('_type') != 'url_transparent'
739
740             return self.process_ie_result(
741                 new_result, download=download, extra_info=extra_info)
742         elif result_type == 'playlist' or result_type == 'multi_video':
743             # We process each entry in the playlist
744             playlist = ie_result.get('title', None) or ie_result.get('id', None)
745             self.to_screen('[download] Downloading playlist: %s' % playlist)
746
747             playlist_results = []
748
749             playliststart = self.params.get('playliststart', 1) - 1
750             playlistend = self.params.get('playlistend', None)
751             # For backwards compatibility, interpret -1 as whole list
752             if playlistend == -1:
753                 playlistend = None
754
755             playlistitems_str = self.params.get('playlist_items', None)
756             playlistitems = None
757             if playlistitems_str is not None:
758                 def iter_playlistitems(format):
759                     for string_segment in format.split(','):
760                         if '-' in string_segment:
761                             start, end = string_segment.split('-')
762                             for item in range(int(start), int(end) + 1):
763                                 yield int(item)
764                         else:
765                             yield int(string_segment)
766                 playlistitems = iter_playlistitems(playlistitems_str)
767
768             ie_entries = ie_result['entries']
769             if isinstance(ie_entries, list):
770                 n_all_entries = len(ie_entries)
771                 if playlistitems:
772                     entries = [
773                         ie_entries[i - 1] for i in playlistitems
774                         if -n_all_entries <= i - 1 < n_all_entries]
775                 else:
776                     entries = ie_entries[playliststart:playlistend]
777                 n_entries = len(entries)
778                 self.to_screen(
779                     "[%s] playlist %s: Collected %d video ids (downloading %d of them)" %
780                     (ie_result['extractor'], playlist, n_all_entries, n_entries))
781             elif isinstance(ie_entries, PagedList):
782                 if playlistitems:
783                     entries = []
784                     for item in playlistitems:
785                         entries.extend(ie_entries.getslice(
786                             item - 1, item
787                         ))
788                 else:
789                     entries = ie_entries.getslice(
790                         playliststart, playlistend)
791                 n_entries = len(entries)
792                 self.to_screen(
793                     "[%s] playlist %s: Downloading %d videos" %
794                     (ie_result['extractor'], playlist, n_entries))
795             else:  # iterable
796                 if playlistitems:
797                     entry_list = list(ie_entries)
798                     entries = [entry_list[i - 1] for i in playlistitems]
799                 else:
800                     entries = list(itertools.islice(
801                         ie_entries, playliststart, playlistend))
802                 n_entries = len(entries)
803                 self.to_screen(
804                     "[%s] playlist %s: Downloading %d videos" %
805                     (ie_result['extractor'], playlist, n_entries))
806
807             if self.params.get('playlistreverse', False):
808                 entries = entries[::-1]
809
810             for i, entry in enumerate(entries, 1):
811                 self.to_screen('[download] Downloading video %s of %s' % (i, n_entries))
812                 extra = {
813                     'n_entries': n_entries,
814                     'playlist': playlist,
815                     'playlist_id': ie_result.get('id'),
816                     'playlist_title': ie_result.get('title'),
817                     'playlist_index': i + playliststart,
818                     'extractor': ie_result['extractor'],
819                     'webpage_url': ie_result['webpage_url'],
820                     'webpage_url_basename': url_basename(ie_result['webpage_url']),
821                     'extractor_key': ie_result['extractor_key'],
822                 }
823
824                 reason = self._match_entry(entry, incomplete=True)
825                 if reason is not None:
826                     self.to_screen('[download] ' + reason)
827                     continue
828
829                 entry_result = self.process_ie_result(entry,
830                                                       download=download,
831                                                       extra_info=extra)
832                 playlist_results.append(entry_result)
833             ie_result['entries'] = playlist_results
834             return ie_result
835         elif result_type == 'compat_list':
836             self.report_warning(
837                 'Extractor %s returned a compat_list result. '
838                 'It needs to be updated.' % ie_result.get('extractor'))
839
840             def _fixup(r):
841                 self.add_extra_info(
842                     r,
843                     {
844                         'extractor': ie_result['extractor'],
845                         'webpage_url': ie_result['webpage_url'],
846                         'webpage_url_basename': url_basename(ie_result['webpage_url']),
847                         'extractor_key': ie_result['extractor_key'],
848                     }
849                 )
850                 return r
851             ie_result['entries'] = [
852                 self.process_ie_result(_fixup(r), download, extra_info)
853                 for r in ie_result['entries']
854             ]
855             return ie_result
856         else:
857             raise Exception('Invalid result type: %s' % result_type)
858
859     def _build_format_filter(self, filter_spec):
860         " Returns a function to filter the formats according to the filter_spec "
861
862         OPERATORS = {
863             '<': operator.lt,
864             '<=': operator.le,
865             '>': operator.gt,
866             '>=': operator.ge,
867             '=': operator.eq,
868             '!=': operator.ne,
869         }
870         operator_rex = re.compile(r'''(?x)\s*
871             (?P<key>width|height|tbr|abr|vbr|asr|filesize|fps)
872             \s*(?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
873             (?P<value>[0-9.]+(?:[kKmMgGtTpPeEzZyY]i?[Bb]?)?)
874             $
875             ''' % '|'.join(map(re.escape, OPERATORS.keys())))
876         m = operator_rex.search(filter_spec)
877         if m:
878             try:
879                 comparison_value = int(m.group('value'))
880             except ValueError:
881                 comparison_value = parse_filesize(m.group('value'))
882                 if comparison_value is None:
883                     comparison_value = parse_filesize(m.group('value') + 'B')
884                 if comparison_value is None:
885                     raise ValueError(
886                         'Invalid value %r in format specification %r' % (
887                             m.group('value'), filter_spec))
888             op = OPERATORS[m.group('op')]
889
890         if not m:
891             STR_OPERATORS = {
892                 '=': operator.eq,
893                 '!=': operator.ne,
894             }
895             str_operator_rex = re.compile(r'''(?x)
896                 \s*(?P<key>ext|acodec|vcodec|container|protocol)
897                 \s*(?P<op>%s)(?P<none_inclusive>\s*\?)?
898                 \s*(?P<value>[a-zA-Z0-9_-]+)
899                 \s*$
900                 ''' % '|'.join(map(re.escape, STR_OPERATORS.keys())))
901             m = str_operator_rex.search(filter_spec)
902             if m:
903                 comparison_value = m.group('value')
904                 op = STR_OPERATORS[m.group('op')]
905
906         if not m:
907             raise ValueError('Invalid filter specification %r' % filter_spec)
908
909         def _filter(f):
910             actual_value = f.get(m.group('key'))
911             if actual_value is None:
912                 return m.group('none_inclusive')
913             return op(actual_value, comparison_value)
914         return _filter
915
916     def build_format_selector(self, format_spec):
917         def syntax_error(note, start):
918             message = (
919                 'Invalid format specification: '
920                 '{0}\n\t{1}\n\t{2}^'.format(note, format_spec, ' ' * start[1]))
921             return SyntaxError(message)
922
923         PICKFIRST = 'PICKFIRST'
924         MERGE = 'MERGE'
925         SINGLE = 'SINGLE'
926         GROUP = 'GROUP'
927         FormatSelector = collections.namedtuple('FormatSelector', ['type', 'selector', 'filters'])
928
929         def _parse_filter(tokens):
930             filter_parts = []
931             for type, string, start, _, _ in tokens:
932                 if type == tokenize.OP and string == ']':
933                     return ''.join(filter_parts)
934                 else:
935                     filter_parts.append(string)
936
937         def _remove_unused_ops(tokens):
938             # Remove operators that we don't use and join them with the sourrounding strings
939             # for example: 'mp4' '-' 'baseline' '-' '16x9' is converted to 'mp4-baseline-16x9'
940             ALLOWED_OPS = ('/', '+', ',', '(', ')')
941             last_string, last_start, last_end, last_line = None, None, None, None
942             for type, string, start, end, line in tokens:
943                 if type == tokenize.OP and string == '[':
944                     if last_string:
945                         yield tokenize.NAME, last_string, last_start, last_end, last_line
946                         last_string = None
947                     yield type, string, start, end, line
948                     # everything inside brackets will be handled by _parse_filter
949                     for type, string, start, end, line in tokens:
950                         yield type, string, start, end, line
951                         if type == tokenize.OP and string == ']':
952                             break
953                 elif type == tokenize.OP and string in ALLOWED_OPS:
954                     if last_string:
955                         yield tokenize.NAME, last_string, last_start, last_end, last_line
956                         last_string = None
957                     yield type, string, start, end, line
958                 elif type in [tokenize.NAME, tokenize.NUMBER, tokenize.OP]:
959                     if not last_string:
960                         last_string = string
961                         last_start = start
962                         last_end = end
963                     else:
964                         last_string += string
965             if last_string:
966                 yield tokenize.NAME, last_string, last_start, last_end, last_line
967
968         def _parse_format_selection(tokens, inside_merge=False, inside_choice=False, inside_group=False):
969             selectors = []
970             current_selector = None
971             for type, string, start, _, _ in tokens:
972                 # ENCODING is only defined in python 3.x
973                 if type == getattr(tokenize, 'ENCODING', None):
974                     continue
975                 elif type in [tokenize.NAME, tokenize.NUMBER]:
976                     current_selector = FormatSelector(SINGLE, string, [])
977                 elif type == tokenize.OP:
978                     if string == ')':
979                         if not inside_group:
980                             # ')' will be handled by the parentheses group
981                             tokens.restore_last_token()
982                         break
983                     elif inside_merge and string in ['/', ',']:
984                         tokens.restore_last_token()
985                         break
986                     elif inside_choice and string == ',':
987                         tokens.restore_last_token()
988                         break
989                     elif string == ',':
990                         if not current_selector:
991                             raise syntax_error('"," must follow a format selector', start)
992                         selectors.append(current_selector)
993                         current_selector = None
994                     elif string == '/':
995                         if not current_selector:
996                             raise syntax_error('"/" must follow a format selector', start)
997                         first_choice = current_selector
998                         second_choice = _parse_format_selection(tokens, inside_choice=True)
999                         current_selector = FormatSelector(PICKFIRST, (first_choice, second_choice), [])
1000                     elif string == '[':
1001                         if not current_selector:
1002                             current_selector = FormatSelector(SINGLE, 'best', [])
1003                         format_filter = _parse_filter(tokens)
1004                         current_selector.filters.append(format_filter)
1005                     elif string == '(':
1006                         if current_selector:
1007                             raise syntax_error('Unexpected "("', start)
1008                         group = _parse_format_selection(tokens, inside_group=True)
1009                         current_selector = FormatSelector(GROUP, group, [])
1010                     elif string == '+':
1011                         video_selector = current_selector
1012                         audio_selector = _parse_format_selection(tokens, inside_merge=True)
1013                         if not video_selector or not audio_selector:
1014                             raise syntax_error('"+" must be between two format selectors', start)
1015                         current_selector = FormatSelector(MERGE, (video_selector, audio_selector), [])
1016                     else:
1017                         raise syntax_error('Operator not recognized: "{0}"'.format(string), start)
1018                 elif type == tokenize.ENDMARKER:
1019                     break
1020             if current_selector:
1021                 selectors.append(current_selector)
1022             return selectors
1023
1024         def _build_selector_function(selector):
1025             if isinstance(selector, list):
1026                 fs = [_build_selector_function(s) for s in selector]
1027
1028                 def selector_function(formats):
1029                     for f in fs:
1030                         for format in f(formats):
1031                             yield format
1032                 return selector_function
1033             elif selector.type == GROUP:
1034                 selector_function = _build_selector_function(selector.selector)
1035             elif selector.type == PICKFIRST:
1036                 fs = [_build_selector_function(s) for s in selector.selector]
1037
1038                 def selector_function(formats):
1039                     for f in fs:
1040                         picked_formats = list(f(formats))
1041                         if picked_formats:
1042                             return picked_formats
1043                     return []
1044             elif selector.type == SINGLE:
1045                 format_spec = selector.selector
1046
1047                 def selector_function(formats):
1048                     formats = list(formats)
1049                     if not formats:
1050                         return
1051                     if format_spec == 'all':
1052                         for f in formats:
1053                             yield f
1054                     elif format_spec in ['best', 'worst', None]:
1055                         format_idx = 0 if format_spec == 'worst' else -1
1056                         audiovideo_formats = [
1057                             f for f in formats
1058                             if f.get('vcodec') != 'none' and f.get('acodec') != 'none']
1059                         if audiovideo_formats:
1060                             yield audiovideo_formats[format_idx]
1061                         # for audio only (soundcloud) or video only (imgur) urls, select the best/worst audio format
1062                         elif (all(f.get('acodec') != 'none' for f in formats) or
1063                               all(f.get('vcodec') != 'none' for f in formats)):
1064                             yield formats[format_idx]
1065                     elif format_spec == 'bestaudio':
1066                         audio_formats = [
1067                             f for f in formats
1068                             if f.get('vcodec') == 'none']
1069                         if audio_formats:
1070                             yield audio_formats[-1]
1071                     elif format_spec == 'worstaudio':
1072                         audio_formats = [
1073                             f for f in formats
1074                             if f.get('vcodec') == 'none']
1075                         if audio_formats:
1076                             yield audio_formats[0]
1077                     elif format_spec == 'bestvideo':
1078                         video_formats = [
1079                             f for f in formats
1080                             if f.get('acodec') == 'none']
1081                         if video_formats:
1082                             yield video_formats[-1]
1083                     elif format_spec == 'worstvideo':
1084                         video_formats = [
1085                             f for f in formats
1086                             if f.get('acodec') == 'none']
1087                         if video_formats:
1088                             yield video_formats[0]
1089                     else:
1090                         extensions = ['mp4', 'flv', 'webm', '3gp', 'm4a', 'mp3', 'ogg', 'aac', 'wav']
1091                         if format_spec in extensions:
1092                             filter_f = lambda f: f['ext'] == format_spec
1093                         else:
1094                             filter_f = lambda f: f['format_id'] == format_spec
1095                         matches = list(filter(filter_f, formats))
1096                         if matches:
1097                             yield matches[-1]
1098             elif selector.type == MERGE:
1099                 def _merge(formats_info):
1100                     format_1, format_2 = [f['format_id'] for f in formats_info]
1101                     # The first format must contain the video and the
1102                     # second the audio
1103                     if formats_info[0].get('vcodec') == 'none':
1104                         self.report_error('The first format must '
1105                                           'contain the video, try using '
1106                                           '"-f %s+%s"' % (format_2, format_1))
1107                         return
1108                     output_ext = (
1109                         formats_info[0]['ext']
1110                         if self.params.get('merge_output_format') is None
1111                         else self.params['merge_output_format'])
1112                     return {
1113                         'requested_formats': formats_info,
1114                         'format': '%s+%s' % (formats_info[0].get('format'),
1115                                              formats_info[1].get('format')),
1116                         'format_id': '%s+%s' % (formats_info[0].get('format_id'),
1117                                                 formats_info[1].get('format_id')),
1118                         'width': formats_info[0].get('width'),
1119                         'height': formats_info[0].get('height'),
1120                         'resolution': formats_info[0].get('resolution'),
1121                         'fps': formats_info[0].get('fps'),
1122                         'vcodec': formats_info[0].get('vcodec'),
1123                         'vbr': formats_info[0].get('vbr'),
1124                         'stretched_ratio': formats_info[0].get('stretched_ratio'),
1125                         'acodec': formats_info[1].get('acodec'),
1126                         'abr': formats_info[1].get('abr'),
1127                         'ext': output_ext,
1128                     }
1129                 video_selector, audio_selector = map(_build_selector_function, selector.selector)
1130
1131                 def selector_function(formats):
1132                     formats = list(formats)
1133                     for pair in itertools.product(video_selector(formats), audio_selector(formats)):
1134                         yield _merge(pair)
1135
1136             filters = [self._build_format_filter(f) for f in selector.filters]
1137
1138             def final_selector(formats):
1139                 for _filter in filters:
1140                     formats = list(filter(_filter, formats))
1141                 return selector_function(formats)
1142             return final_selector
1143
1144         stream = io.BytesIO(format_spec.encode('utf-8'))
1145         try:
1146             tokens = list(_remove_unused_ops(compat_tokenize_tokenize(stream.readline)))
1147         except tokenize.TokenError:
1148             raise syntax_error('Missing closing/opening brackets or parenthesis', (0, len(format_spec)))
1149
1150         class TokenIterator(object):
1151             def __init__(self, tokens):
1152                 self.tokens = tokens
1153                 self.counter = 0
1154
1155             def __iter__(self):
1156                 return self
1157
1158             def __next__(self):
1159                 if self.counter >= len(self.tokens):
1160                     raise StopIteration()
1161                 value = self.tokens[self.counter]
1162                 self.counter += 1
1163                 return value
1164
1165             next = __next__
1166
1167             def restore_last_token(self):
1168                 self.counter -= 1
1169
1170         parsed_selector = _parse_format_selection(iter(TokenIterator(tokens)))
1171         return _build_selector_function(parsed_selector)
1172
1173     def _calc_headers(self, info_dict):
1174         res = std_headers.copy()
1175
1176         add_headers = info_dict.get('http_headers')
1177         if add_headers:
1178             res.update(add_headers)
1179
1180         cookies = self._calc_cookies(info_dict)
1181         if cookies:
1182             res['Cookie'] = cookies
1183
1184         return res
1185
1186     def _calc_cookies(self, info_dict):
1187         pr = compat_urllib_request.Request(info_dict['url'])
1188         self.cookiejar.add_cookie_header(pr)
1189         return pr.get_header('Cookie')
1190
1191     def process_video_result(self, info_dict, download=True):
1192         assert info_dict.get('_type', 'video') == 'video'
1193
1194         if 'id' not in info_dict:
1195             raise ExtractorError('Missing "id" field in extractor result')
1196         if 'title' not in info_dict:
1197             raise ExtractorError('Missing "title" field in extractor result')
1198
1199         if 'playlist' not in info_dict:
1200             # It isn't part of a playlist
1201             info_dict['playlist'] = None
1202             info_dict['playlist_index'] = None
1203
1204         thumbnails = info_dict.get('thumbnails')
1205         if thumbnails is None:
1206             thumbnail = info_dict.get('thumbnail')
1207             if thumbnail:
1208                 info_dict['thumbnails'] = thumbnails = [{'url': thumbnail}]
1209         if thumbnails:
1210             thumbnails.sort(key=lambda t: (
1211                 t.get('preference'), t.get('width'), t.get('height'),
1212                 t.get('id'), t.get('url')))
1213             for i, t in enumerate(thumbnails):
1214                 if t.get('width') and t.get('height'):
1215                     t['resolution'] = '%dx%d' % (t['width'], t['height'])
1216                 if t.get('id') is None:
1217                     t['id'] = '%d' % i
1218
1219         if thumbnails and 'thumbnail' not in info_dict:
1220             info_dict['thumbnail'] = thumbnails[-1]['url']
1221
1222         if 'display_id' not in info_dict and 'id' in info_dict:
1223             info_dict['display_id'] = info_dict['id']
1224
1225         if info_dict.get('upload_date') is None and info_dict.get('timestamp') is not None:
1226             # Working around out-of-range timestamp values (e.g. negative ones on Windows,
1227             # see http://bugs.python.org/issue1646728)
1228             try:
1229                 upload_date = datetime.datetime.utcfromtimestamp(info_dict['timestamp'])
1230                 info_dict['upload_date'] = upload_date.strftime('%Y%m%d')
1231             except (ValueError, OverflowError, OSError):
1232                 pass
1233
1234         if self.params.get('listsubtitles', False):
1235             if 'automatic_captions' in info_dict:
1236                 self.list_subtitles(info_dict['id'], info_dict.get('automatic_captions'), 'automatic captions')
1237             self.list_subtitles(info_dict['id'], info_dict.get('subtitles'), 'subtitles')
1238             return
1239         info_dict['requested_subtitles'] = self.process_subtitles(
1240             info_dict['id'], info_dict.get('subtitles'),
1241             info_dict.get('automatic_captions'))
1242
1243         # We now pick which formats have to be downloaded
1244         if info_dict.get('formats') is None:
1245             # There's only one format available
1246             formats = [info_dict]
1247         else:
1248             formats = info_dict['formats']
1249
1250         if not formats:
1251             raise ExtractorError('No video formats found!')
1252
1253         formats_dict = {}
1254
1255         # We check that all the formats have the format and format_id fields
1256         for i, format in enumerate(formats):
1257             if 'url' not in format:
1258                 raise ExtractorError('Missing "url" key in result (index %d)' % i)
1259
1260             if format.get('format_id') is None:
1261                 format['format_id'] = compat_str(i)
1262             format_id = format['format_id']
1263             if format_id not in formats_dict:
1264                 formats_dict[format_id] = []
1265             formats_dict[format_id].append(format)
1266
1267         # Make sure all formats have unique format_id
1268         for format_id, ambiguous_formats in formats_dict.items():
1269             if len(ambiguous_formats) > 1:
1270                 for i, format in enumerate(ambiguous_formats):
1271                     format['format_id'] = '%s-%d' % (format_id, i)
1272
1273         for i, format in enumerate(formats):
1274             if format.get('format') is None:
1275                 format['format'] = '{id} - {res}{note}'.format(
1276                     id=format['format_id'],
1277                     res=self.format_resolution(format),
1278                     note=' ({0})'.format(format['format_note']) if format.get('format_note') is not None else '',
1279                 )
1280             # Automatically determine file extension if missing
1281             if 'ext' not in format:
1282                 format['ext'] = determine_ext(format['url']).lower()
1283             # Add HTTP headers, so that external programs can use them from the
1284             # json output
1285             full_format_info = info_dict.copy()
1286             full_format_info.update(format)
1287             format['http_headers'] = self._calc_headers(full_format_info)
1288
1289         # TODO Central sorting goes here
1290
1291         if formats[0] is not info_dict:
1292             # only set the 'formats' fields if the original info_dict list them
1293             # otherwise we end up with a circular reference, the first (and unique)
1294             # element in the 'formats' field in info_dict is info_dict itself,
1295             # wich can't be exported to json
1296             info_dict['formats'] = formats
1297         if self.params.get('listformats'):
1298             self.list_formats(info_dict)
1299             return
1300         if self.params.get('list_thumbnails'):
1301             self.list_thumbnails(info_dict)
1302             return
1303
1304         req_format = self.params.get('format')
1305         if req_format is None:
1306             req_format_list = []
1307             if (self.params.get('outtmpl', DEFAULT_OUTTMPL) != '-' and
1308                     info_dict['extractor'] in ['youtube', 'ted'] and
1309                     not info_dict.get('is_live')):
1310                 merger = FFmpegMergerPP(self)
1311                 if merger.available and merger.can_merge():
1312                     req_format_list.append('bestvideo+bestaudio')
1313             req_format_list.append('best')
1314             req_format = '/'.join(req_format_list)
1315         format_selector = self.build_format_selector(req_format)
1316         formats_to_download = list(format_selector(formats))
1317         if not formats_to_download:
1318             raise ExtractorError('requested format not available',
1319                                  expected=True)
1320
1321         if download:
1322             if len(formats_to_download) > 1:
1323                 self.to_screen('[info] %s: downloading video in %s formats' % (info_dict['id'], len(formats_to_download)))
1324             for format in formats_to_download:
1325                 new_info = dict(info_dict)
1326                 new_info.update(format)
1327                 self.process_info(new_info)
1328         # We update the info dict with the best quality format (backwards compatibility)
1329         info_dict.update(formats_to_download[-1])
1330         return info_dict
1331
1332     def process_subtitles(self, video_id, normal_subtitles, automatic_captions):
1333         """Select the requested subtitles and their format"""
1334         available_subs = {}
1335         if normal_subtitles and self.params.get('writesubtitles'):
1336             available_subs.update(normal_subtitles)
1337         if automatic_captions and self.params.get('writeautomaticsub'):
1338             for lang, cap_info in automatic_captions.items():
1339                 if lang not in available_subs:
1340                     available_subs[lang] = cap_info
1341
1342         if (not self.params.get('writesubtitles') and not
1343                 self.params.get('writeautomaticsub') or not
1344                 available_subs):
1345             return None
1346
1347         if self.params.get('allsubtitles', False):
1348             requested_langs = available_subs.keys()
1349         else:
1350             if self.params.get('subtitleslangs', False):
1351                 requested_langs = self.params.get('subtitleslangs')
1352             elif 'en' in available_subs:
1353                 requested_langs = ['en']
1354             else:
1355                 requested_langs = [list(available_subs.keys())[0]]
1356
1357         formats_query = self.params.get('subtitlesformat', 'best')
1358         formats_preference = formats_query.split('/') if formats_query else []
1359         subs = {}
1360         for lang in requested_langs:
1361             formats = available_subs.get(lang)
1362             if formats is None:
1363                 self.report_warning('%s subtitles not available for %s' % (lang, video_id))
1364                 continue
1365             for ext in formats_preference:
1366                 if ext == 'best':
1367                     f = formats[-1]
1368                     break
1369                 matches = list(filter(lambda f: f['ext'] == ext, formats))
1370                 if matches:
1371                     f = matches[-1]
1372                     break
1373             else:
1374                 f = formats[-1]
1375                 self.report_warning(
1376                     'No subtitle format found matching "%s" for language %s, '
1377                     'using %s' % (formats_query, lang, f['ext']))
1378             subs[lang] = f
1379         return subs
1380
1381     def process_info(self, info_dict):
1382         """Process a single resolved IE result."""
1383
1384         assert info_dict.get('_type', 'video') == 'video'
1385
1386         max_downloads = self.params.get('max_downloads')
1387         if max_downloads is not None:
1388             if self._num_downloads >= int(max_downloads):
1389                 raise MaxDownloadsReached()
1390
1391         info_dict['fulltitle'] = info_dict['title']
1392         if len(info_dict['title']) > 200:
1393             info_dict['title'] = info_dict['title'][:197] + '...'
1394
1395         if 'format' not in info_dict:
1396             info_dict['format'] = info_dict['ext']
1397
1398         reason = self._match_entry(info_dict, incomplete=False)
1399         if reason is not None:
1400             self.to_screen('[download] ' + reason)
1401             return
1402
1403         self._num_downloads += 1
1404
1405         info_dict['_filename'] = filename = self.prepare_filename(info_dict)
1406
1407         # Forced printings
1408         if self.params.get('forcetitle', False):
1409             self.to_stdout(info_dict['fulltitle'])
1410         if self.params.get('forceid', False):
1411             self.to_stdout(info_dict['id'])
1412         if self.params.get('forceurl', False):
1413             if info_dict.get('requested_formats') is not None:
1414                 for f in info_dict['requested_formats']:
1415                     self.to_stdout(f['url'] + f.get('play_path', ''))
1416             else:
1417                 # For RTMP URLs, also include the playpath
1418                 self.to_stdout(info_dict['url'] + info_dict.get('play_path', ''))
1419         if self.params.get('forcethumbnail', False) and info_dict.get('thumbnail') is not None:
1420             self.to_stdout(info_dict['thumbnail'])
1421         if self.params.get('forcedescription', False) and info_dict.get('description') is not None:
1422             self.to_stdout(info_dict['description'])
1423         if self.params.get('forcefilename', False) and filename is not None:
1424             self.to_stdout(filename)
1425         if self.params.get('forceduration', False) and info_dict.get('duration') is not None:
1426             self.to_stdout(formatSeconds(info_dict['duration']))
1427         if self.params.get('forceformat', False):
1428             self.to_stdout(info_dict['format'])
1429         if self.params.get('forcejson', False):
1430             self.to_stdout(json.dumps(info_dict))
1431
1432         # Do nothing else if in simulate mode
1433         if self.params.get('simulate', False):
1434             return
1435
1436         if filename is None:
1437             return
1438
1439         try:
1440             dn = os.path.dirname(sanitize_path(encodeFilename(filename)))
1441             if dn and not os.path.exists(dn):
1442                 os.makedirs(dn)
1443         except (OSError, IOError) as err:
1444             self.report_error('unable to create directory ' + compat_str(err))
1445             return
1446
1447         if self.params.get('writedescription', False):
1448             descfn = replace_extension(filename, 'description', info_dict.get('ext'))
1449             if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(descfn)):
1450                 self.to_screen('[info] Video description is already present')
1451             elif info_dict.get('description') is None:
1452                 self.report_warning('There\'s no description to write.')
1453             else:
1454                 try:
1455                     self.to_screen('[info] Writing video description to: ' + descfn)
1456                     with io.open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile:
1457                         descfile.write(info_dict['description'])
1458                 except (OSError, IOError):
1459                     self.report_error('Cannot write description file ' + descfn)
1460                     return
1461
1462         if self.params.get('writeannotations', False):
1463             annofn = replace_extension(filename, 'annotations.xml', info_dict.get('ext'))
1464             if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(annofn)):
1465                 self.to_screen('[info] Video annotations are already present')
1466             else:
1467                 try:
1468                     self.to_screen('[info] Writing video annotations to: ' + annofn)
1469                     with io.open(encodeFilename(annofn), 'w', encoding='utf-8') as annofile:
1470                         annofile.write(info_dict['annotations'])
1471                 except (KeyError, TypeError):
1472                     self.report_warning('There are no annotations to write.')
1473                 except (OSError, IOError):
1474                     self.report_error('Cannot write annotations file: ' + annofn)
1475                     return
1476
1477         subtitles_are_requested = any([self.params.get('writesubtitles', False),
1478                                        self.params.get('writeautomaticsub')])
1479
1480         if subtitles_are_requested and info_dict.get('requested_subtitles'):
1481             # subtitles download errors are already managed as troubles in relevant IE
1482             # that way it will silently go on when used with unsupporting IE
1483             subtitles = info_dict['requested_subtitles']
1484             ie = self.get_info_extractor(info_dict['extractor_key'])
1485             for sub_lang, sub_info in subtitles.items():
1486                 sub_format = sub_info['ext']
1487                 if sub_info.get('data') is not None:
1488                     sub_data = sub_info['data']
1489                 else:
1490                     try:
1491                         sub_data = ie._download_webpage(
1492                             sub_info['url'], info_dict['id'], note=False)
1493                     except ExtractorError as err:
1494                         self.report_warning('Unable to download subtitle for "%s": %s' %
1495                                             (sub_lang, compat_str(err.cause)))
1496                         continue
1497                 try:
1498                     sub_filename = subtitles_filename(filename, sub_lang, sub_format)
1499                     if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(sub_filename)):
1500                         self.to_screen('[info] Video subtitle %s.%s is already_present' % (sub_lang, sub_format))
1501                     else:
1502                         self.to_screen('[info] Writing video subtitles to: ' + sub_filename)
1503                         with io.open(encodeFilename(sub_filename), 'w', encoding='utf-8') as subfile:
1504                             subfile.write(sub_data)
1505                 except (OSError, IOError):
1506                     self.report_error('Cannot write subtitles file ' + sub_filename)
1507                     return
1508
1509         if self.params.get('writeinfojson', False):
1510             infofn = replace_extension(filename, 'info.json', info_dict.get('ext'))
1511             if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(infofn)):
1512                 self.to_screen('[info] Video description metadata is already present')
1513             else:
1514                 self.to_screen('[info] Writing video description metadata as JSON to: ' + infofn)
1515                 try:
1516                     write_json_file(self.filter_requested_info(info_dict), infofn)
1517                 except (OSError, IOError):
1518                     self.report_error('Cannot write metadata to JSON file ' + infofn)
1519                     return
1520
1521         self._write_thumbnails(info_dict, filename)
1522
1523         if not self.params.get('skip_download', False):
1524             try:
1525                 def dl(name, info):
1526                     fd = get_suitable_downloader(info, self.params)(self, self.params)
1527                     for ph in self._progress_hooks:
1528                         fd.add_progress_hook(ph)
1529                     if self.params.get('verbose'):
1530                         self.to_stdout('[debug] Invoking downloader on %r' % info.get('url'))
1531                     return fd.download(name, info)
1532
1533                 if info_dict.get('requested_formats') is not None:
1534                     downloaded = []
1535                     success = True
1536                     merger = FFmpegMergerPP(self)
1537                     if not merger.available:
1538                         postprocessors = []
1539                         self.report_warning('You have requested multiple '
1540                                             'formats but ffmpeg or avconv are not installed.'
1541                                             ' The formats won\'t be merged.')
1542                     else:
1543                         postprocessors = [merger]
1544
1545                     def compatible_formats(formats):
1546                         video, audio = formats
1547                         # Check extension
1548                         video_ext, audio_ext = audio.get('ext'), video.get('ext')
1549                         if video_ext and audio_ext:
1550                             COMPATIBLE_EXTS = (
1551                                 ('mp3', 'mp4', 'm4a', 'm4p', 'm4b', 'm4r', 'm4v'),
1552                                 ('webm')
1553                             )
1554                             for exts in COMPATIBLE_EXTS:
1555                                 if video_ext in exts and audio_ext in exts:
1556                                     return True
1557                         # TODO: Check acodec/vcodec
1558                         return False
1559
1560                     filename_real_ext = os.path.splitext(filename)[1][1:]
1561                     filename_wo_ext = (
1562                         os.path.splitext(filename)[0]
1563                         if filename_real_ext == info_dict['ext']
1564                         else filename)
1565                     requested_formats = info_dict['requested_formats']
1566                     if self.params.get('merge_output_format') is None and not compatible_formats(requested_formats):
1567                         info_dict['ext'] = 'mkv'
1568                         self.report_warning(
1569                             'Requested formats are incompatible for merge and will be merged into mkv.')
1570                     # Ensure filename always has a correct extension for successful merge
1571                     filename = '%s.%s' % (filename_wo_ext, info_dict['ext'])
1572                     if os.path.exists(encodeFilename(filename)):
1573                         self.to_screen(
1574                             '[download] %s has already been downloaded and '
1575                             'merged' % filename)
1576                     else:
1577                         for f in requested_formats:
1578                             new_info = dict(info_dict)
1579                             new_info.update(f)
1580                             fname = self.prepare_filename(new_info)
1581                             fname = prepend_extension(fname, 'f%s' % f['format_id'], new_info['ext'])
1582                             downloaded.append(fname)
1583                             partial_success = dl(fname, new_info)
1584                             success = success and partial_success
1585                         info_dict['__postprocessors'] = postprocessors
1586                         info_dict['__files_to_merge'] = downloaded
1587                 else:
1588                     # Just a single file
1589                     success = dl(filename, info_dict)
1590             except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
1591                 self.report_error('unable to download video data: %s' % str(err))
1592                 return
1593             except (OSError, IOError) as err:
1594                 raise UnavailableVideoError(err)
1595             except (ContentTooShortError, ) as err:
1596                 self.report_error('content too short (expected %s bytes and served %s)' % (err.expected, err.downloaded))
1597                 return
1598
1599             if success:
1600                 # Fixup content
1601                 fixup_policy = self.params.get('fixup')
1602                 if fixup_policy is None:
1603                     fixup_policy = 'detect_or_warn'
1604
1605                 stretched_ratio = info_dict.get('stretched_ratio')
1606                 if stretched_ratio is not None and stretched_ratio != 1:
1607                     if fixup_policy == 'warn':
1608                         self.report_warning('%s: Non-uniform pixel ratio (%s)' % (
1609                             info_dict['id'], stretched_ratio))
1610                     elif fixup_policy == 'detect_or_warn':
1611                         stretched_pp = FFmpegFixupStretchedPP(self)
1612                         if stretched_pp.available:
1613                             info_dict.setdefault('__postprocessors', [])
1614                             info_dict['__postprocessors'].append(stretched_pp)
1615                         else:
1616                             self.report_warning(
1617                                 '%s: Non-uniform pixel ratio (%s). Install ffmpeg or avconv to fix this automatically.' % (
1618                                     info_dict['id'], stretched_ratio))
1619                     else:
1620                         assert fixup_policy in ('ignore', 'never')
1621
1622                 if info_dict.get('requested_formats') is None and info_dict.get('container') == 'm4a_dash':
1623                     if fixup_policy == 'warn':
1624                         self.report_warning('%s: writing DASH m4a. Only some players support this container.' % (
1625                             info_dict['id']))
1626                     elif fixup_policy == 'detect_or_warn':
1627                         fixup_pp = FFmpegFixupM4aPP(self)
1628                         if fixup_pp.available:
1629                             info_dict.setdefault('__postprocessors', [])
1630                             info_dict['__postprocessors'].append(fixup_pp)
1631                         else:
1632                             self.report_warning(
1633                                 '%s: writing DASH m4a. Only some players support this container. Install ffmpeg or avconv to fix this automatically.' % (
1634                                     info_dict['id']))
1635                     else:
1636                         assert fixup_policy in ('ignore', 'never')
1637
1638                 try:
1639                     self.post_process(filename, info_dict)
1640                 except (PostProcessingError) as err:
1641                     self.report_error('postprocessing: %s' % str(err))
1642                     return
1643                 self.record_download_archive(info_dict)
1644
1645     def download(self, url_list):
1646         """Download a given list of URLs."""
1647         outtmpl = self.params.get('outtmpl', DEFAULT_OUTTMPL)
1648         if (len(url_list) > 1 and
1649                 '%' not in outtmpl and
1650                 self.params.get('max_downloads') != 1):
1651             raise SameFileError(outtmpl)
1652
1653         for url in url_list:
1654             try:
1655                 # It also downloads the videos
1656                 res = self.extract_info(
1657                     url, force_generic_extractor=self.params.get('force_generic_extractor', False))
1658             except UnavailableVideoError:
1659                 self.report_error('unable to download video')
1660             except MaxDownloadsReached:
1661                 self.to_screen('[info] Maximum number of downloaded files reached.')
1662                 raise
1663             else:
1664                 if self.params.get('dump_single_json', False):
1665                     self.to_stdout(json.dumps(res))
1666
1667         return self._download_retcode
1668
1669     def download_with_info_file(self, info_filename):
1670         with contextlib.closing(fileinput.FileInput(
1671                 [info_filename], mode='r',
1672                 openhook=fileinput.hook_encoded('utf-8'))) as f:
1673             # FileInput doesn't have a read method, we can't call json.load
1674             info = self.filter_requested_info(json.loads('\n'.join(f)))
1675         try:
1676             self.process_ie_result(info, download=True)
1677         except DownloadError:
1678             webpage_url = info.get('webpage_url')
1679             if webpage_url is not None:
1680                 self.report_warning('The info failed to download, trying with "%s"' % webpage_url)
1681                 return self.download([webpage_url])
1682             else:
1683                 raise
1684         return self._download_retcode
1685
1686     @staticmethod
1687     def filter_requested_info(info_dict):
1688         return dict(
1689             (k, v) for k, v in info_dict.items()
1690             if k not in ['requested_formats', 'requested_subtitles'])
1691
1692     def post_process(self, filename, ie_info):
1693         """Run all the postprocessors on the given file."""
1694         info = dict(ie_info)
1695         info['filepath'] = filename
1696         pps_chain = []
1697         if ie_info.get('__postprocessors') is not None:
1698             pps_chain.extend(ie_info['__postprocessors'])
1699         pps_chain.extend(self._pps)
1700         for pp in pps_chain:
1701             files_to_delete = []
1702             try:
1703                 files_to_delete, info = pp.run(info)
1704             except PostProcessingError as e:
1705                 self.report_error(e.msg)
1706             if files_to_delete and not self.params.get('keepvideo', False):
1707                 for old_filename in files_to_delete:
1708                     self.to_screen('Deleting original file %s (pass -k to keep)' % old_filename)
1709                     try:
1710                         os.remove(encodeFilename(old_filename))
1711                     except (IOError, OSError):
1712                         self.report_warning('Unable to remove downloaded original file')
1713
1714     def _make_archive_id(self, info_dict):
1715         # Future-proof against any change in case
1716         # and backwards compatibility with prior versions
1717         extractor = info_dict.get('extractor_key')
1718         if extractor is None:
1719             if 'id' in info_dict:
1720                 extractor = info_dict.get('ie_key')  # key in a playlist
1721         if extractor is None:
1722             return None  # Incomplete video information
1723         return extractor.lower() + ' ' + info_dict['id']
1724
1725     def in_download_archive(self, info_dict):
1726         fn = self.params.get('download_archive')
1727         if fn is None:
1728             return False
1729
1730         vid_id = self._make_archive_id(info_dict)
1731         if vid_id is None:
1732             return False  # Incomplete video information
1733
1734         try:
1735             with locked_file(fn, 'r', encoding='utf-8') as archive_file:
1736                 for line in archive_file:
1737                     if line.strip() == vid_id:
1738                         return True
1739         except IOError as ioe:
1740             if ioe.errno != errno.ENOENT:
1741                 raise
1742         return False
1743
1744     def record_download_archive(self, info_dict):
1745         fn = self.params.get('download_archive')
1746         if fn is None:
1747             return
1748         vid_id = self._make_archive_id(info_dict)
1749         assert vid_id
1750         with locked_file(fn, 'a', encoding='utf-8') as archive_file:
1751             archive_file.write(vid_id + '\n')
1752
1753     @staticmethod
1754     def format_resolution(format, default='unknown'):
1755         if format.get('vcodec') == 'none':
1756             return 'audio only'
1757         if format.get('resolution') is not None:
1758             return format['resolution']
1759         if format.get('height') is not None:
1760             if format.get('width') is not None:
1761                 res = '%sx%s' % (format['width'], format['height'])
1762             else:
1763                 res = '%sp' % format['height']
1764         elif format.get('width') is not None:
1765             res = '?x%d' % format['width']
1766         else:
1767             res = default
1768         return res
1769
1770     def _format_note(self, fdict):
1771         res = ''
1772         if fdict.get('ext') in ['f4f', 'f4m']:
1773             res += '(unsupported) '
1774         if fdict.get('format_note') is not None:
1775             res += fdict['format_note'] + ' '
1776         if fdict.get('tbr') is not None:
1777             res += '%4dk ' % fdict['tbr']
1778         if fdict.get('container') is not None:
1779             if res:
1780                 res += ', '
1781             res += '%s container' % fdict['container']
1782         if (fdict.get('vcodec') is not None and
1783                 fdict.get('vcodec') != 'none'):
1784             if res:
1785                 res += ', '
1786             res += fdict['vcodec']
1787             if fdict.get('vbr') is not None:
1788                 res += '@'
1789         elif fdict.get('vbr') is not None and fdict.get('abr') is not None:
1790             res += 'video@'
1791         if fdict.get('vbr') is not None:
1792             res += '%4dk' % fdict['vbr']
1793         if fdict.get('fps') is not None:
1794             res += ', %sfps' % fdict['fps']
1795         if fdict.get('acodec') is not None:
1796             if res:
1797                 res += ', '
1798             if fdict['acodec'] == 'none':
1799                 res += 'video only'
1800             else:
1801                 res += '%-5s' % fdict['acodec']
1802         elif fdict.get('abr') is not None:
1803             if res:
1804                 res += ', '
1805             res += 'audio'
1806         if fdict.get('abr') is not None:
1807             res += '@%3dk' % fdict['abr']
1808         if fdict.get('asr') is not None:
1809             res += ' (%5dHz)' % fdict['asr']
1810         if fdict.get('filesize') is not None:
1811             if res:
1812                 res += ', '
1813             res += format_bytes(fdict['filesize'])
1814         elif fdict.get('filesize_approx') is not None:
1815             if res:
1816                 res += ', '
1817             res += '~' + format_bytes(fdict['filesize_approx'])
1818         return res
1819
1820     def list_formats(self, info_dict):
1821         formats = info_dict.get('formats', [info_dict])
1822         table = [
1823             [f['format_id'], f['ext'], self.format_resolution(f), self._format_note(f)]
1824             for f in formats
1825             if f.get('preference') is None or f['preference'] >= -1000]
1826         if len(formats) > 1:
1827             table[-1][-1] += (' ' if table[-1][-1] else '') + '(best)'
1828
1829         header_line = ['format code', 'extension', 'resolution', 'note']
1830         self.to_screen(
1831             '[info] Available formats for %s:\n%s' %
1832             (info_dict['id'], render_table(header_line, table)))
1833
1834     def list_thumbnails(self, info_dict):
1835         thumbnails = info_dict.get('thumbnails')
1836         if not thumbnails:
1837             tn_url = info_dict.get('thumbnail')
1838             if tn_url:
1839                 thumbnails = [{'id': '0', 'url': tn_url}]
1840             else:
1841                 self.to_screen(
1842                     '[info] No thumbnails present for %s' % info_dict['id'])
1843                 return
1844
1845         self.to_screen(
1846             '[info] Thumbnails for %s:' % info_dict['id'])
1847         self.to_screen(render_table(
1848             ['ID', 'width', 'height', 'URL'],
1849             [[t['id'], t.get('width', 'unknown'), t.get('height', 'unknown'), t['url']] for t in thumbnails]))
1850
1851     def list_subtitles(self, video_id, subtitles, name='subtitles'):
1852         if not subtitles:
1853             self.to_screen('%s has no %s' % (video_id, name))
1854             return
1855         self.to_screen(
1856             'Available %s for %s:' % (name, video_id))
1857         self.to_screen(render_table(
1858             ['Language', 'formats'],
1859             [[lang, ', '.join(f['ext'] for f in reversed(formats))]
1860                 for lang, formats in subtitles.items()]))
1861
1862     def urlopen(self, req):
1863         """ Start an HTTP download """
1864         return self._opener.open(req, timeout=self._socket_timeout)
1865
1866     def print_debug_header(self):
1867         if not self.params.get('verbose'):
1868             return
1869
1870         if type('') is not compat_str:
1871             # Python 2.6 on SLES11 SP1 (https://github.com/rg3/youtube-dl/issues/3326)
1872             self.report_warning(
1873                 'Your Python is broken! Update to a newer and supported version')
1874
1875         stdout_encoding = getattr(
1876             sys.stdout, 'encoding', 'missing (%s)' % type(sys.stdout).__name__)
1877         encoding_str = (
1878             '[debug] Encodings: locale %s, fs %s, out %s, pref %s\n' % (
1879                 locale.getpreferredencoding(),
1880                 sys.getfilesystemencoding(),
1881                 stdout_encoding,
1882                 self.get_encoding()))
1883         write_string(encoding_str, encoding=None)
1884
1885         self._write_string('[debug] youtube-dl version ' + __version__ + '\n')
1886         try:
1887             sp = subprocess.Popen(
1888                 ['git', 'rev-parse', '--short', 'HEAD'],
1889                 stdout=subprocess.PIPE, stderr=subprocess.PIPE,
1890                 cwd=os.path.dirname(os.path.abspath(__file__)))
1891             out, err = sp.communicate()
1892             out = out.decode().strip()
1893             if re.match('[0-9a-f]+', out):
1894                 self._write_string('[debug] Git HEAD: ' + out + '\n')
1895         except Exception:
1896             try:
1897                 sys.exc_clear()
1898             except Exception:
1899                 pass
1900         self._write_string('[debug] Python version %s - %s\n' % (
1901             platform.python_version(), platform_name()))
1902
1903         exe_versions = FFmpegPostProcessor.get_versions(self)
1904         exe_versions['rtmpdump'] = rtmpdump_version()
1905         exe_str = ', '.join(
1906             '%s %s' % (exe, v)
1907             for exe, v in sorted(exe_versions.items())
1908             if v
1909         )
1910         if not exe_str:
1911             exe_str = 'none'
1912         self._write_string('[debug] exe versions: %s\n' % exe_str)
1913
1914         proxy_map = {}
1915         for handler in self._opener.handlers:
1916             if hasattr(handler, 'proxies'):
1917                 proxy_map.update(handler.proxies)
1918         self._write_string('[debug] Proxy map: ' + compat_str(proxy_map) + '\n')
1919
1920         if self.params.get('call_home', False):
1921             ipaddr = self.urlopen('https://yt-dl.org/ip').read().decode('utf-8')
1922             self._write_string('[debug] Public IP address: %s\n' % ipaddr)
1923             latest_version = self.urlopen(
1924                 'https://yt-dl.org/latest/version').read().decode('utf-8')
1925             if version_tuple(latest_version) > version_tuple(__version__):
1926                 self.report_warning(
1927                     'You are using an outdated version (newest version: %s)! '
1928                     'See https://yt-dl.org/update if you need help updating.' %
1929                     latest_version)
1930
1931     def _setup_opener(self):
1932         timeout_val = self.params.get('socket_timeout')
1933         self._socket_timeout = 600 if timeout_val is None else float(timeout_val)
1934
1935         opts_cookiefile = self.params.get('cookiefile')
1936         opts_proxy = self.params.get('proxy')
1937
1938         if opts_cookiefile is None:
1939             self.cookiejar = compat_cookiejar.CookieJar()
1940         else:
1941             self.cookiejar = compat_cookiejar.MozillaCookieJar(
1942                 opts_cookiefile)
1943             if os.access(opts_cookiefile, os.R_OK):
1944                 self.cookiejar.load()
1945
1946         cookie_processor = compat_urllib_request.HTTPCookieProcessor(
1947             self.cookiejar)
1948         if opts_proxy is not None:
1949             if opts_proxy == '':
1950                 proxies = {}
1951             else:
1952                 proxies = {'http': opts_proxy, 'https': opts_proxy}
1953         else:
1954             proxies = compat_urllib_request.getproxies()
1955             # Set HTTPS proxy to HTTP one if given (https://github.com/rg3/youtube-dl/issues/805)
1956             if 'http' in proxies and 'https' not in proxies:
1957                 proxies['https'] = proxies['http']
1958         proxy_handler = PerRequestProxyHandler(proxies)
1959
1960         debuglevel = 1 if self.params.get('debug_printtraffic') else 0
1961         https_handler = make_HTTPS_handler(self.params, debuglevel=debuglevel)
1962         ydlh = YoutubeDLHandler(self.params, debuglevel=debuglevel)
1963         opener = compat_urllib_request.build_opener(
1964             proxy_handler, https_handler, cookie_processor, ydlh)
1965
1966         # Delete the default user-agent header, which would otherwise apply in
1967         # cases where our custom HTTP handler doesn't come into play
1968         # (See https://github.com/rg3/youtube-dl/issues/1309 for details)
1969         opener.addheaders = []
1970         self._opener = opener
1971
1972     def encode(self, s):
1973         if isinstance(s, bytes):
1974             return s  # Already encoded
1975
1976         try:
1977             return s.encode(self.get_encoding())
1978         except UnicodeEncodeError as err:
1979             err.reason = err.reason + '. Check your system encoding configuration or use the --encoding option.'
1980             raise
1981
1982     def get_encoding(self):
1983         encoding = self.params.get('encoding')
1984         if encoding is None:
1985             encoding = preferredencoding()
1986         return encoding
1987
1988     def _write_thumbnails(self, info_dict, filename):
1989         if self.params.get('writethumbnail', False):
1990             thumbnails = info_dict.get('thumbnails')
1991             if thumbnails:
1992                 thumbnails = [thumbnails[-1]]
1993         elif self.params.get('write_all_thumbnails', False):
1994             thumbnails = info_dict.get('thumbnails')
1995         else:
1996             return
1997
1998         if not thumbnails:
1999             # No thumbnails present, so return immediately
2000             return
2001
2002         for t in thumbnails:
2003             thumb_ext = determine_ext(t['url'], 'jpg')
2004             suffix = '_%s' % t['id'] if len(thumbnails) > 1 else ''
2005             thumb_display_id = '%s ' % t['id'] if len(thumbnails) > 1 else ''
2006             t['filename'] = thumb_filename = os.path.splitext(filename)[0] + suffix + '.' + thumb_ext
2007
2008             if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(thumb_filename)):
2009                 self.to_screen('[%s] %s: Thumbnail %sis already present' %
2010                                (info_dict['extractor'], info_dict['id'], thumb_display_id))
2011             else:
2012                 self.to_screen('[%s] %s: Downloading thumbnail %s...' %
2013                                (info_dict['extractor'], info_dict['id'], thumb_display_id))
2014                 try:
2015                     uf = self.urlopen(t['url'])
2016                     with open(encodeFilename(thumb_filename), 'wb') as thumbf:
2017                         shutil.copyfileobj(uf, thumbf)
2018                     self.to_screen('[%s] %s: Writing thumbnail %sto: %s' %
2019                                    (info_dict['extractor'], info_dict['id'], thumb_display_id, thumb_filename))
2020                 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
2021                     self.report_warning('Unable to download thumbnail "%s": %s' %
2022                                         (t['url'], compat_str(err)))