[adobetv] use compat_str
[youtube-dl] / youtube_dl / YoutubeDL.py
1 #!/usr/bin/env python
2 # -*- coding: utf-8 -*-
3
4 from __future__ import absolute_import, unicode_literals
5
6 import collections
7 import contextlib
8 import datetime
9 import errno
10 import fileinput
11 import io
12 import itertools
13 import json
14 import locale
15 import operator
16 import os
17 import platform
18 import re
19 import shutil
20 import subprocess
21 import socket
22 import sys
23 import time
24 import tokenize
25 import traceback
26
27 if os.name == 'nt':
28     import ctypes
29
30 from .compat import (
31     compat_cookiejar,
32     compat_expanduser,
33     compat_get_terminal_size,
34     compat_http_client,
35     compat_kwargs,
36     compat_str,
37     compat_tokenize_tokenize,
38     compat_urllib_error,
39     compat_urllib_request,
40     compat_urllib_request_DataHandler,
41 )
42 from .utils import (
43     ContentTooShortError,
44     date_from_str,
45     DateRange,
46     DEFAULT_OUTTMPL,
47     determine_ext,
48     DownloadError,
49     encodeFilename,
50     ExtractorError,
51     format_bytes,
52     formatSeconds,
53     locked_file,
54     make_HTTPS_handler,
55     MaxDownloadsReached,
56     PagedList,
57     parse_filesize,
58     PerRequestProxyHandler,
59     PostProcessingError,
60     platform_name,
61     preferredencoding,
62     render_table,
63     SameFileError,
64     sanitize_filename,
65     sanitize_path,
66     std_headers,
67     subtitles_filename,
68     UnavailableVideoError,
69     url_basename,
70     version_tuple,
71     write_json_file,
72     write_string,
73     YoutubeDLCookieProcessor,
74     YoutubeDLHandler,
75     prepend_extension,
76     replace_extension,
77     args_to_str,
78     age_restricted,
79 )
80 from .cache import Cache
81 from .extractor import get_info_extractor, gen_extractors
82 from .downloader import get_suitable_downloader
83 from .downloader.rtmp import rtmpdump_version
84 from .postprocessor import (
85     FFmpegFixupM4aPP,
86     FFmpegFixupStretchedPP,
87     FFmpegMergerPP,
88     FFmpegPostProcessor,
89     get_postprocessor,
90 )
91 from .version import __version__
92
93
94 class YoutubeDL(object):
95     """YoutubeDL class.
96
97     YoutubeDL objects are the ones responsible of downloading the
98     actual video file and writing it to disk if the user has requested
99     it, among some other tasks. In most cases there should be one per
100     program. As, given a video URL, the downloader doesn't know how to
101     extract all the needed information, task that InfoExtractors do, it
102     has to pass the URL to one of them.
103
104     For this, YoutubeDL objects have a method that allows
105     InfoExtractors to be registered in a given order. When it is passed
106     a URL, the YoutubeDL object handles it to the first InfoExtractor it
107     finds that reports being able to handle it. The InfoExtractor extracts
108     all the information about the video or videos the URL refers to, and
109     YoutubeDL process the extracted information, possibly using a File
110     Downloader to download the video.
111
112     YoutubeDL objects accept a lot of parameters. In order not to saturate
113     the object constructor with arguments, it receives a dictionary of
114     options instead. These options are available through the params
115     attribute for the InfoExtractors to use. The YoutubeDL also
116     registers itself as the downloader in charge for the InfoExtractors
117     that are added to it, so this is a "mutual registration".
118
119     Available options:
120
121     username:          Username for authentication purposes.
122     password:          Password for authentication purposes.
123     videopassword:     Password for accessing a video.
124     usenetrc:          Use netrc for authentication instead.
125     verbose:           Print additional info to stdout.
126     quiet:             Do not print messages to stdout.
127     no_warnings:       Do not print out anything for warnings.
128     forceurl:          Force printing final URL.
129     forcetitle:        Force printing title.
130     forceid:           Force printing ID.
131     forcethumbnail:    Force printing thumbnail URL.
132     forcedescription:  Force printing description.
133     forcefilename:     Force printing final filename.
134     forceduration:     Force printing duration.
135     forcejson:         Force printing info_dict as JSON.
136     dump_single_json:  Force printing the info_dict of the whole playlist
137                        (or video) as a single JSON line.
138     simulate:          Do not download the video files.
139     format:            Video format code. See options.py for more information.
140     outtmpl:           Template for output names.
141     restrictfilenames: Do not allow "&" and spaces in file names
142     ignoreerrors:      Do not stop on download errors.
143     force_generic_extractor: Force downloader to use the generic extractor
144     nooverwrites:      Prevent overwriting files.
145     playliststart:     Playlist item to start at.
146     playlistend:       Playlist item to end at.
147     playlist_items:    Specific indices of playlist to download.
148     playlistreverse:   Download playlist items in reverse order.
149     matchtitle:        Download only matching titles.
150     rejecttitle:       Reject downloads for matching titles.
151     logger:            Log messages to a logging.Logger instance.
152     logtostderr:       Log messages to stderr instead of stdout.
153     writedescription:  Write the video description to a .description file
154     writeinfojson:     Write the video description to a .info.json file
155     writeannotations:  Write the video annotations to a .annotations.xml file
156     writethumbnail:    Write the thumbnail image to a file
157     write_all_thumbnails:  Write all thumbnail formats to files
158     writesubtitles:    Write the video subtitles to a file
159     writeautomaticsub: Write the automatic subtitles to a file
160     allsubtitles:      Downloads all the subtitles of the video
161                        (requires writesubtitles or writeautomaticsub)
162     listsubtitles:     Lists all available subtitles for the video
163     subtitlesformat:   The format code for subtitles
164     subtitleslangs:    List of languages of the subtitles to download
165     keepvideo:         Keep the video file after post-processing
166     daterange:         A DateRange object, download only if the upload_date is in the range.
167     skip_download:     Skip the actual download of the video file
168     cachedir:          Location of the cache files in the filesystem.
169                        False to disable filesystem cache.
170     noplaylist:        Download single video instead of a playlist if in doubt.
171     age_limit:         An integer representing the user's age in years.
172                        Unsuitable videos for the given age are skipped.
173     min_views:         An integer representing the minimum view count the video
174                        must have in order to not be skipped.
175                        Videos without view count information are always
176                        downloaded. None for no limit.
177     max_views:         An integer representing the maximum view count.
178                        Videos that are more popular than that are not
179                        downloaded.
180                        Videos without view count information are always
181                        downloaded. None for no limit.
182     download_archive:  File name of a file where all downloads are recorded.
183                        Videos already present in the file are not downloaded
184                        again.
185     cookiefile:        File name where cookies should be read from and dumped to.
186     nocheckcertificate:Do not verify SSL certificates
187     prefer_insecure:   Use HTTP instead of HTTPS to retrieve information.
188                        At the moment, this is only supported by YouTube.
189     proxy:             URL of the proxy server to use
190     cn_verification_proxy:  URL of the proxy to use for IP address verification
191                        on Chinese sites. (Experimental)
192     socket_timeout:    Time to wait for unresponsive hosts, in seconds
193     bidi_workaround:   Work around buggy terminals without bidirectional text
194                        support, using fridibi
195     debug_printtraffic:Print out sent and received HTTP traffic
196     include_ads:       Download ads as well
197     default_search:    Prepend this string if an input url is not valid.
198                        'auto' for elaborate guessing
199     encoding:          Use this encoding instead of the system-specified.
200     extract_flat:      Do not resolve URLs, return the immediate result.
201                        Pass in 'in_playlist' to only show this behavior for
202                        playlist items.
203     postprocessors:    A list of dictionaries, each with an entry
204                        * key:  The name of the postprocessor. See
205                                youtube_dl/postprocessor/__init__.py for a list.
206                        as well as any further keyword arguments for the
207                        postprocessor.
208     progress_hooks:    A list of functions that get called on download
209                        progress, with a dictionary with the entries
210                        * status: One of "downloading", "error", or "finished".
211                                  Check this first and ignore unknown values.
212
213                        If status is one of "downloading", or "finished", the
214                        following properties may also be present:
215                        * filename: The final filename (always present)
216                        * tmpfilename: The filename we're currently writing to
217                        * downloaded_bytes: Bytes on disk
218                        * total_bytes: Size of the whole file, None if unknown
219                        * total_bytes_estimate: Guess of the eventual file size,
220                                                None if unavailable.
221                        * elapsed: The number of seconds since download started.
222                        * eta: The estimated time in seconds, None if unknown
223                        * speed: The download speed in bytes/second, None if
224                                 unknown
225                        * fragment_index: The counter of the currently
226                                          downloaded video fragment.
227                        * fragment_count: The number of fragments (= individual
228                                          files that will be merged)
229
230                        Progress hooks are guaranteed to be called at least once
231                        (with status "finished") if the download is successful.
232     merge_output_format: Extension to use when merging formats.
233     fixup:             Automatically correct known faults of the file.
234                        One of:
235                        - "never": do nothing
236                        - "warn": only emit a warning
237                        - "detect_or_warn": check whether we can do anything
238                                            about it, warn otherwise (default)
239     source_address:    (Experimental) Client-side IP address to bind to.
240     call_home:         Boolean, true iff we are allowed to contact the
241                        youtube-dl servers for debugging.
242     sleep_interval:    Number of seconds to sleep before each download.
243     listformats:       Print an overview of available video formats and exit.
244     list_thumbnails:   Print a table of all thumbnails and exit.
245     match_filter:      A function that gets called with the info_dict of
246                        every video.
247                        If it returns a message, the video is ignored.
248                        If it returns None, the video is downloaded.
249                        match_filter_func in utils.py is one example for this.
250     no_color:          Do not emit color codes in output.
251
252     The following options determine which downloader is picked:
253     external_downloader: Executable of the external downloader to call.
254                        None or unset for standard (built-in) downloader.
255     hls_prefer_native: Use the native HLS downloader instead of ffmpeg/avconv.
256
257     The following parameters are not used by YoutubeDL itself, they are used by
258     the downloader (see youtube_dl/downloader/common.py):
259     nopart, updatetime, buffersize, ratelimit, min_filesize, max_filesize, test,
260     noresizebuffer, retries, continuedl, noprogress, consoletitle,
261     xattr_set_filesize, external_downloader_args.
262
263     The following options are used by the post processors:
264     prefer_ffmpeg:     If True, use ffmpeg instead of avconv if both are available,
265                        otherwise prefer avconv.
266     postprocessor_args: A list of additional command-line arguments for the
267                         postprocessor.
268     """
269
270     params = None
271     _ies = []
272     _pps = []
273     _download_retcode = None
274     _num_downloads = None
275     _screen_file = None
276
277     def __init__(self, params=None, auto_init=True):
278         """Create a FileDownloader object with the given options."""
279         if params is None:
280             params = {}
281         self._ies = []
282         self._ies_instances = {}
283         self._pps = []
284         self._progress_hooks = []
285         self._download_retcode = 0
286         self._num_downloads = 0
287         self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)]
288         self._err_file = sys.stderr
289         self.params = {
290             # Default parameters
291             'nocheckcertificate': False,
292         }
293         self.params.update(params)
294         self.cache = Cache(self)
295
296         if params.get('bidi_workaround', False):
297             try:
298                 import pty
299                 master, slave = pty.openpty()
300                 width = compat_get_terminal_size().columns
301                 if width is None:
302                     width_args = []
303                 else:
304                     width_args = ['-w', str(width)]
305                 sp_kwargs = dict(
306                     stdin=subprocess.PIPE,
307                     stdout=slave,
308                     stderr=self._err_file)
309                 try:
310                     self._output_process = subprocess.Popen(
311                         ['bidiv'] + width_args, **sp_kwargs
312                     )
313                 except OSError:
314                     self._output_process = subprocess.Popen(
315                         ['fribidi', '-c', 'UTF-8'] + width_args, **sp_kwargs)
316                 self._output_channel = os.fdopen(master, 'rb')
317             except OSError as ose:
318                 if ose.errno == 2:
319                     self.report_warning('Could not find fribidi executable, ignoring --bidi-workaround . Make sure that  fribidi  is an executable file in one of the directories in your $PATH.')
320                 else:
321                     raise
322
323         if (sys.version_info >= (3,) and sys.platform != 'win32' and
324                 sys.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968'] and
325                 not params.get('restrictfilenames', False)):
326             # On Python 3, the Unicode filesystem API will throw errors (#1474)
327             self.report_warning(
328                 'Assuming --restrict-filenames since file system encoding '
329                 'cannot encode all characters. '
330                 'Set the LC_ALL environment variable to fix this.')
331             self.params['restrictfilenames'] = True
332
333         if isinstance(params.get('outtmpl'), bytes):
334             self.report_warning(
335                 'Parameter outtmpl is bytes, but should be a unicode string. '
336                 'Put  from __future__ import unicode_literals  at the top of your code file or consider switching to Python 3.x.')
337
338         self._setup_opener()
339
340         if auto_init:
341             self.print_debug_header()
342             self.add_default_info_extractors()
343
344         for pp_def_raw in self.params.get('postprocessors', []):
345             pp_class = get_postprocessor(pp_def_raw['key'])
346             pp_def = dict(pp_def_raw)
347             del pp_def['key']
348             pp = pp_class(self, **compat_kwargs(pp_def))
349             self.add_post_processor(pp)
350
351         for ph in self.params.get('progress_hooks', []):
352             self.add_progress_hook(ph)
353
354     def warn_if_short_id(self, argv):
355         # short YouTube ID starting with dash?
356         idxs = [
357             i for i, a in enumerate(argv)
358             if re.match(r'^-[0-9A-Za-z_-]{10}$', a)]
359         if idxs:
360             correct_argv = (
361                 ['youtube-dl'] +
362                 [a for i, a in enumerate(argv) if i not in idxs] +
363                 ['--'] + [argv[i] for i in idxs]
364             )
365             self.report_warning(
366                 'Long argument string detected. '
367                 'Use -- to separate parameters and URLs, like this:\n%s\n' %
368                 args_to_str(correct_argv))
369
370     def add_info_extractor(self, ie):
371         """Add an InfoExtractor object to the end of the list."""
372         self._ies.append(ie)
373         self._ies_instances[ie.ie_key()] = ie
374         ie.set_downloader(self)
375
376     def get_info_extractor(self, ie_key):
377         """
378         Get an instance of an IE with name ie_key, it will try to get one from
379         the _ies list, if there's no instance it will create a new one and add
380         it to the extractor list.
381         """
382         ie = self._ies_instances.get(ie_key)
383         if ie is None:
384             ie = get_info_extractor(ie_key)()
385             self.add_info_extractor(ie)
386         return ie
387
388     def add_default_info_extractors(self):
389         """
390         Add the InfoExtractors returned by gen_extractors to the end of the list
391         """
392         for ie in gen_extractors():
393             self.add_info_extractor(ie)
394
395     def add_post_processor(self, pp):
396         """Add a PostProcessor object to the end of the chain."""
397         self._pps.append(pp)
398         pp.set_downloader(self)
399
400     def add_progress_hook(self, ph):
401         """Add the progress hook (currently only for the file downloader)"""
402         self._progress_hooks.append(ph)
403
404     def _bidi_workaround(self, message):
405         if not hasattr(self, '_output_channel'):
406             return message
407
408         assert hasattr(self, '_output_process')
409         assert isinstance(message, compat_str)
410         line_count = message.count('\n') + 1
411         self._output_process.stdin.write((message + '\n').encode('utf-8'))
412         self._output_process.stdin.flush()
413         res = ''.join(self._output_channel.readline().decode('utf-8')
414                       for _ in range(line_count))
415         return res[:-len('\n')]
416
417     def to_screen(self, message, skip_eol=False):
418         """Print message to stdout if not in quiet mode."""
419         return self.to_stdout(message, skip_eol, check_quiet=True)
420
421     def _write_string(self, s, out=None):
422         write_string(s, out=out, encoding=self.params.get('encoding'))
423
424     def to_stdout(self, message, skip_eol=False, check_quiet=False):
425         """Print message to stdout if not in quiet mode."""
426         if self.params.get('logger'):
427             self.params['logger'].debug(message)
428         elif not check_quiet or not self.params.get('quiet', False):
429             message = self._bidi_workaround(message)
430             terminator = ['\n', ''][skip_eol]
431             output = message + terminator
432
433             self._write_string(output, self._screen_file)
434
435     def to_stderr(self, message):
436         """Print message to stderr."""
437         assert isinstance(message, compat_str)
438         if self.params.get('logger'):
439             self.params['logger'].error(message)
440         else:
441             message = self._bidi_workaround(message)
442             output = message + '\n'
443             self._write_string(output, self._err_file)
444
445     def to_console_title(self, message):
446         if not self.params.get('consoletitle', False):
447             return
448         if os.name == 'nt' and ctypes.windll.kernel32.GetConsoleWindow():
449             # c_wchar_p() might not be necessary if `message` is
450             # already of type unicode()
451             ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message))
452         elif 'TERM' in os.environ:
453             self._write_string('\033]0;%s\007' % message, self._screen_file)
454
455     def save_console_title(self):
456         if not self.params.get('consoletitle', False):
457             return
458         if 'TERM' in os.environ:
459             # Save the title on stack
460             self._write_string('\033[22;0t', self._screen_file)
461
462     def restore_console_title(self):
463         if not self.params.get('consoletitle', False):
464             return
465         if 'TERM' in os.environ:
466             # Restore the title from stack
467             self._write_string('\033[23;0t', self._screen_file)
468
469     def __enter__(self):
470         self.save_console_title()
471         return self
472
473     def __exit__(self, *args):
474         self.restore_console_title()
475
476         if self.params.get('cookiefile') is not None:
477             self.cookiejar.save()
478
479     def trouble(self, message=None, tb=None):
480         """Determine action to take when a download problem appears.
481
482         Depending on if the downloader has been configured to ignore
483         download errors or not, this method may throw an exception or
484         not when errors are found, after printing the message.
485
486         tb, if given, is additional traceback information.
487         """
488         if message is not None:
489             self.to_stderr(message)
490         if self.params.get('verbose'):
491             if tb is None:
492                 if sys.exc_info()[0]:  # if .trouble has been called from an except block
493                     tb = ''
494                     if hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
495                         tb += ''.join(traceback.format_exception(*sys.exc_info()[1].exc_info))
496                     tb += compat_str(traceback.format_exc())
497                 else:
498                     tb_data = traceback.format_list(traceback.extract_stack())
499                     tb = ''.join(tb_data)
500             self.to_stderr(tb)
501         if not self.params.get('ignoreerrors', False):
502             if sys.exc_info()[0] and hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
503                 exc_info = sys.exc_info()[1].exc_info
504             else:
505                 exc_info = sys.exc_info()
506             raise DownloadError(message, exc_info)
507         self._download_retcode = 1
508
509     def report_warning(self, message):
510         '''
511         Print the message to stderr, it will be prefixed with 'WARNING:'
512         If stderr is a tty file the 'WARNING:' will be colored
513         '''
514         if self.params.get('logger') is not None:
515             self.params['logger'].warning(message)
516         else:
517             if self.params.get('no_warnings'):
518                 return
519             if not self.params.get('no_color') and self._err_file.isatty() and os.name != 'nt':
520                 _msg_header = '\033[0;33mWARNING:\033[0m'
521             else:
522                 _msg_header = 'WARNING:'
523             warning_message = '%s %s' % (_msg_header, message)
524             self.to_stderr(warning_message)
525
526     def report_error(self, message, tb=None):
527         '''
528         Do the same as trouble, but prefixes the message with 'ERROR:', colored
529         in red if stderr is a tty file.
530         '''
531         if not self.params.get('no_color') and self._err_file.isatty() and os.name != 'nt':
532             _msg_header = '\033[0;31mERROR:\033[0m'
533         else:
534             _msg_header = 'ERROR:'
535         error_message = '%s %s' % (_msg_header, message)
536         self.trouble(error_message, tb)
537
538     def report_file_already_downloaded(self, file_name):
539         """Report file has already been fully downloaded."""
540         try:
541             self.to_screen('[download] %s has already been downloaded' % file_name)
542         except UnicodeEncodeError:
543             self.to_screen('[download] The file has already been downloaded')
544
545     def prepare_filename(self, info_dict):
546         """Generate the output filename."""
547         try:
548             template_dict = dict(info_dict)
549
550             template_dict['epoch'] = int(time.time())
551             autonumber_size = self.params.get('autonumber_size')
552             if autonumber_size is None:
553                 autonumber_size = 5
554             autonumber_templ = '%0' + str(autonumber_size) + 'd'
555             template_dict['autonumber'] = autonumber_templ % self._num_downloads
556             if template_dict.get('playlist_index') is not None:
557                 template_dict['playlist_index'] = '%0*d' % (len(str(template_dict['n_entries'])), template_dict['playlist_index'])
558             if template_dict.get('resolution') is None:
559                 if template_dict.get('width') and template_dict.get('height'):
560                     template_dict['resolution'] = '%dx%d' % (template_dict['width'], template_dict['height'])
561                 elif template_dict.get('height'):
562                     template_dict['resolution'] = '%sp' % template_dict['height']
563                 elif template_dict.get('width'):
564                     template_dict['resolution'] = '?x%d' % template_dict['width']
565
566             sanitize = lambda k, v: sanitize_filename(
567                 compat_str(v),
568                 restricted=self.params.get('restrictfilenames'),
569                 is_id=(k == 'id'))
570             template_dict = dict((k, sanitize(k, v))
571                                  for k, v in template_dict.items()
572                                  if v is not None)
573             template_dict = collections.defaultdict(lambda: 'NA', template_dict)
574
575             outtmpl = sanitize_path(self.params.get('outtmpl', DEFAULT_OUTTMPL))
576             tmpl = compat_expanduser(outtmpl)
577             filename = tmpl % template_dict
578             # Temporary fix for #4787
579             # 'Treat' all problem characters by passing filename through preferredencoding
580             # to workaround encoding issues with subprocess on python2 @ Windows
581             if sys.version_info < (3, 0) and sys.platform == 'win32':
582                 filename = encodeFilename(filename, True).decode(preferredencoding())
583             return filename
584         except ValueError as err:
585             self.report_error('Error in output template: ' + str(err) + ' (encoding: ' + repr(preferredencoding()) + ')')
586             return None
587
588     def _match_entry(self, info_dict, incomplete):
589         """ Returns None iff the file should be downloaded """
590
591         video_title = info_dict.get('title', info_dict.get('id', 'video'))
592         if 'title' in info_dict:
593             # This can happen when we're just evaluating the playlist
594             title = info_dict['title']
595             matchtitle = self.params.get('matchtitle', False)
596             if matchtitle:
597                 if not re.search(matchtitle, title, re.IGNORECASE):
598                     return '"' + title + '" title did not match pattern "' + matchtitle + '"'
599             rejecttitle = self.params.get('rejecttitle', False)
600             if rejecttitle:
601                 if re.search(rejecttitle, title, re.IGNORECASE):
602                     return '"' + title + '" title matched reject pattern "' + rejecttitle + '"'
603         date = info_dict.get('upload_date', None)
604         if date is not None:
605             dateRange = self.params.get('daterange', DateRange())
606             if date not in dateRange:
607                 return '%s upload date is not in range %s' % (date_from_str(date).isoformat(), dateRange)
608         view_count = info_dict.get('view_count', None)
609         if view_count is not None:
610             min_views = self.params.get('min_views')
611             if min_views is not None and view_count < min_views:
612                 return 'Skipping %s, because it has not reached minimum view count (%d/%d)' % (video_title, view_count, min_views)
613             max_views = self.params.get('max_views')
614             if max_views is not None and view_count > max_views:
615                 return 'Skipping %s, because it has exceeded the maximum view count (%d/%d)' % (video_title, view_count, max_views)
616         if age_restricted(info_dict.get('age_limit'), self.params.get('age_limit')):
617             return 'Skipping "%s" because it is age restricted' % video_title
618         if self.in_download_archive(info_dict):
619             return '%s has already been recorded in archive' % video_title
620
621         if not incomplete:
622             match_filter = self.params.get('match_filter')
623             if match_filter is not None:
624                 ret = match_filter(info_dict)
625                 if ret is not None:
626                     return ret
627
628         return None
629
630     @staticmethod
631     def add_extra_info(info_dict, extra_info):
632         '''Set the keys from extra_info in info dict if they are missing'''
633         for key, value in extra_info.items():
634             info_dict.setdefault(key, value)
635
636     def extract_info(self, url, download=True, ie_key=None, extra_info={},
637                      process=True, force_generic_extractor=False):
638         '''
639         Returns a list with a dictionary for each video we find.
640         If 'download', also downloads the videos.
641         extra_info is a dict containing the extra values to add to each result
642         '''
643
644         if not ie_key and force_generic_extractor:
645             ie_key = 'Generic'
646
647         if ie_key:
648             ies = [self.get_info_extractor(ie_key)]
649         else:
650             ies = self._ies
651
652         for ie in ies:
653             if not ie.suitable(url):
654                 continue
655
656             if not ie.working():
657                 self.report_warning('The program functionality for this site has been marked as broken, '
658                                     'and will probably not work.')
659
660             try:
661                 ie_result = ie.extract(url)
662                 if ie_result is None:  # Finished already (backwards compatibility; listformats and friends should be moved here)
663                     break
664                 if isinstance(ie_result, list):
665                     # Backwards compatibility: old IE result format
666                     ie_result = {
667                         '_type': 'compat_list',
668                         'entries': ie_result,
669                     }
670                 self.add_default_extra_info(ie_result, ie, url)
671                 if process:
672                     return self.process_ie_result(ie_result, download, extra_info)
673                 else:
674                     return ie_result
675             except ExtractorError as de:  # An error we somewhat expected
676                 self.report_error(compat_str(de), de.format_traceback())
677                 break
678             except MaxDownloadsReached:
679                 raise
680             except Exception as e:
681                 if self.params.get('ignoreerrors', False):
682                     self.report_error(compat_str(e), tb=compat_str(traceback.format_exc()))
683                     break
684                 else:
685                     raise
686         else:
687             self.report_error('no suitable InfoExtractor for URL %s' % url)
688
689     def add_default_extra_info(self, ie_result, ie, url):
690         self.add_extra_info(ie_result, {
691             'extractor': ie.IE_NAME,
692             'webpage_url': url,
693             'webpage_url_basename': url_basename(url),
694             'extractor_key': ie.ie_key(),
695         })
696
697     def process_ie_result(self, ie_result, download=True, extra_info={}):
698         """
699         Take the result of the ie(may be modified) and resolve all unresolved
700         references (URLs, playlist items).
701
702         It will also download the videos if 'download'.
703         Returns the resolved ie_result.
704         """
705
706         result_type = ie_result.get('_type', 'video')
707
708         if result_type in ('url', 'url_transparent'):
709             extract_flat = self.params.get('extract_flat', False)
710             if ((extract_flat == 'in_playlist' and 'playlist' in extra_info) or
711                     extract_flat is True):
712                 if self.params.get('forcejson', False):
713                     self.to_stdout(json.dumps(ie_result))
714                 return ie_result
715
716         if result_type == 'video':
717             self.add_extra_info(ie_result, extra_info)
718             return self.process_video_result(ie_result, download=download)
719         elif result_type == 'url':
720             # We have to add extra_info to the results because it may be
721             # contained in a playlist
722             return self.extract_info(ie_result['url'],
723                                      download,
724                                      ie_key=ie_result.get('ie_key'),
725                                      extra_info=extra_info)
726         elif result_type == 'url_transparent':
727             # Use the information from the embedding page
728             info = self.extract_info(
729                 ie_result['url'], ie_key=ie_result.get('ie_key'),
730                 extra_info=extra_info, download=False, process=False)
731
732             force_properties = dict(
733                 (k, v) for k, v in ie_result.items() if v is not None)
734             for f in ('_type', 'url'):
735                 if f in force_properties:
736                     del force_properties[f]
737             new_result = info.copy()
738             new_result.update(force_properties)
739
740             assert new_result.get('_type') != 'url_transparent'
741
742             return self.process_ie_result(
743                 new_result, download=download, extra_info=extra_info)
744         elif result_type == 'playlist' or result_type == 'multi_video':
745             # We process each entry in the playlist
746             playlist = ie_result.get('title', None) or ie_result.get('id', None)
747             self.to_screen('[download] Downloading playlist: %s' % playlist)
748
749             playlist_results = []
750
751             playliststart = self.params.get('playliststart', 1) - 1
752             playlistend = self.params.get('playlistend', None)
753             # For backwards compatibility, interpret -1 as whole list
754             if playlistend == -1:
755                 playlistend = None
756
757             playlistitems_str = self.params.get('playlist_items', None)
758             playlistitems = None
759             if playlistitems_str is not None:
760                 def iter_playlistitems(format):
761                     for string_segment in format.split(','):
762                         if '-' in string_segment:
763                             start, end = string_segment.split('-')
764                             for item in range(int(start), int(end) + 1):
765                                 yield int(item)
766                         else:
767                             yield int(string_segment)
768                 playlistitems = iter_playlistitems(playlistitems_str)
769
770             ie_entries = ie_result['entries']
771             if isinstance(ie_entries, list):
772                 n_all_entries = len(ie_entries)
773                 if playlistitems:
774                     entries = [
775                         ie_entries[i - 1] for i in playlistitems
776                         if -n_all_entries <= i - 1 < n_all_entries]
777                 else:
778                     entries = ie_entries[playliststart:playlistend]
779                 n_entries = len(entries)
780                 self.to_screen(
781                     "[%s] playlist %s: Collected %d video ids (downloading %d of them)" %
782                     (ie_result['extractor'], playlist, n_all_entries, n_entries))
783             elif isinstance(ie_entries, PagedList):
784                 if playlistitems:
785                     entries = []
786                     for item in playlistitems:
787                         entries.extend(ie_entries.getslice(
788                             item - 1, item
789                         ))
790                 else:
791                     entries = ie_entries.getslice(
792                         playliststart, playlistend)
793                 n_entries = len(entries)
794                 self.to_screen(
795                     "[%s] playlist %s: Downloading %d videos" %
796                     (ie_result['extractor'], playlist, n_entries))
797             else:  # iterable
798                 if playlistitems:
799                     entry_list = list(ie_entries)
800                     entries = [entry_list[i - 1] for i in playlistitems]
801                 else:
802                     entries = list(itertools.islice(
803                         ie_entries, playliststart, playlistend))
804                 n_entries = len(entries)
805                 self.to_screen(
806                     "[%s] playlist %s: Downloading %d videos" %
807                     (ie_result['extractor'], playlist, n_entries))
808
809             if self.params.get('playlistreverse', False):
810                 entries = entries[::-1]
811
812             for i, entry in enumerate(entries, 1):
813                 self.to_screen('[download] Downloading video %s of %s' % (i, n_entries))
814                 extra = {
815                     'n_entries': n_entries,
816                     'playlist': playlist,
817                     'playlist_id': ie_result.get('id'),
818                     'playlist_title': ie_result.get('title'),
819                     'playlist_index': i + playliststart,
820                     'extractor': ie_result['extractor'],
821                     'webpage_url': ie_result['webpage_url'],
822                     'webpage_url_basename': url_basename(ie_result['webpage_url']),
823                     'extractor_key': ie_result['extractor_key'],
824                 }
825
826                 reason = self._match_entry(entry, incomplete=True)
827                 if reason is not None:
828                     self.to_screen('[download] ' + reason)
829                     continue
830
831                 entry_result = self.process_ie_result(entry,
832                                                       download=download,
833                                                       extra_info=extra)
834                 playlist_results.append(entry_result)
835             ie_result['entries'] = playlist_results
836             return ie_result
837         elif result_type == 'compat_list':
838             self.report_warning(
839                 'Extractor %s returned a compat_list result. '
840                 'It needs to be updated.' % ie_result.get('extractor'))
841
842             def _fixup(r):
843                 self.add_extra_info(
844                     r,
845                     {
846                         'extractor': ie_result['extractor'],
847                         'webpage_url': ie_result['webpage_url'],
848                         'webpage_url_basename': url_basename(ie_result['webpage_url']),
849                         'extractor_key': ie_result['extractor_key'],
850                     }
851                 )
852                 return r
853             ie_result['entries'] = [
854                 self.process_ie_result(_fixup(r), download, extra_info)
855                 for r in ie_result['entries']
856             ]
857             return ie_result
858         else:
859             raise Exception('Invalid result type: %s' % result_type)
860
861     def _build_format_filter(self, filter_spec):
862         " Returns a function to filter the formats according to the filter_spec "
863
864         OPERATORS = {
865             '<': operator.lt,
866             '<=': operator.le,
867             '>': operator.gt,
868             '>=': operator.ge,
869             '=': operator.eq,
870             '!=': operator.ne,
871         }
872         operator_rex = re.compile(r'''(?x)\s*
873             (?P<key>width|height|tbr|abr|vbr|asr|filesize|fps)
874             \s*(?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
875             (?P<value>[0-9.]+(?:[kKmMgGtTpPeEzZyY]i?[Bb]?)?)
876             $
877             ''' % '|'.join(map(re.escape, OPERATORS.keys())))
878         m = operator_rex.search(filter_spec)
879         if m:
880             try:
881                 comparison_value = int(m.group('value'))
882             except ValueError:
883                 comparison_value = parse_filesize(m.group('value'))
884                 if comparison_value is None:
885                     comparison_value = parse_filesize(m.group('value') + 'B')
886                 if comparison_value is None:
887                     raise ValueError(
888                         'Invalid value %r in format specification %r' % (
889                             m.group('value'), filter_spec))
890             op = OPERATORS[m.group('op')]
891
892         if not m:
893             STR_OPERATORS = {
894                 '=': operator.eq,
895                 '!=': operator.ne,
896             }
897             str_operator_rex = re.compile(r'''(?x)
898                 \s*(?P<key>ext|acodec|vcodec|container|protocol)
899                 \s*(?P<op>%s)(?P<none_inclusive>\s*\?)?
900                 \s*(?P<value>[a-zA-Z0-9_-]+)
901                 \s*$
902                 ''' % '|'.join(map(re.escape, STR_OPERATORS.keys())))
903             m = str_operator_rex.search(filter_spec)
904             if m:
905                 comparison_value = m.group('value')
906                 op = STR_OPERATORS[m.group('op')]
907
908         if not m:
909             raise ValueError('Invalid filter specification %r' % filter_spec)
910
911         def _filter(f):
912             actual_value = f.get(m.group('key'))
913             if actual_value is None:
914                 return m.group('none_inclusive')
915             return op(actual_value, comparison_value)
916         return _filter
917
918     def build_format_selector(self, format_spec):
919         def syntax_error(note, start):
920             message = (
921                 'Invalid format specification: '
922                 '{0}\n\t{1}\n\t{2}^'.format(note, format_spec, ' ' * start[1]))
923             return SyntaxError(message)
924
925         PICKFIRST = 'PICKFIRST'
926         MERGE = 'MERGE'
927         SINGLE = 'SINGLE'
928         GROUP = 'GROUP'
929         FormatSelector = collections.namedtuple('FormatSelector', ['type', 'selector', 'filters'])
930
931         def _parse_filter(tokens):
932             filter_parts = []
933             for type, string, start, _, _ in tokens:
934                 if type == tokenize.OP and string == ']':
935                     return ''.join(filter_parts)
936                 else:
937                     filter_parts.append(string)
938
939         def _remove_unused_ops(tokens):
940             # Remove operators that we don't use and join them with the sourrounding strings
941             # for example: 'mp4' '-' 'baseline' '-' '16x9' is converted to 'mp4-baseline-16x9'
942             ALLOWED_OPS = ('/', '+', ',', '(', ')')
943             last_string, last_start, last_end, last_line = None, None, None, None
944             for type, string, start, end, line in tokens:
945                 if type == tokenize.OP and string == '[':
946                     if last_string:
947                         yield tokenize.NAME, last_string, last_start, last_end, last_line
948                         last_string = None
949                     yield type, string, start, end, line
950                     # everything inside brackets will be handled by _parse_filter
951                     for type, string, start, end, line in tokens:
952                         yield type, string, start, end, line
953                         if type == tokenize.OP and string == ']':
954                             break
955                 elif type == tokenize.OP and string in ALLOWED_OPS:
956                     if last_string:
957                         yield tokenize.NAME, last_string, last_start, last_end, last_line
958                         last_string = None
959                     yield type, string, start, end, line
960                 elif type in [tokenize.NAME, tokenize.NUMBER, tokenize.OP]:
961                     if not last_string:
962                         last_string = string
963                         last_start = start
964                         last_end = end
965                     else:
966                         last_string += string
967             if last_string:
968                 yield tokenize.NAME, last_string, last_start, last_end, last_line
969
970         def _parse_format_selection(tokens, inside_merge=False, inside_choice=False, inside_group=False):
971             selectors = []
972             current_selector = None
973             for type, string, start, _, _ in tokens:
974                 # ENCODING is only defined in python 3.x
975                 if type == getattr(tokenize, 'ENCODING', None):
976                     continue
977                 elif type in [tokenize.NAME, tokenize.NUMBER]:
978                     current_selector = FormatSelector(SINGLE, string, [])
979                 elif type == tokenize.OP:
980                     if string == ')':
981                         if not inside_group:
982                             # ')' will be handled by the parentheses group
983                             tokens.restore_last_token()
984                         break
985                     elif inside_merge and string in ['/', ',']:
986                         tokens.restore_last_token()
987                         break
988                     elif inside_choice and string == ',':
989                         tokens.restore_last_token()
990                         break
991                     elif string == ',':
992                         if not current_selector:
993                             raise syntax_error('"," must follow a format selector', start)
994                         selectors.append(current_selector)
995                         current_selector = None
996                     elif string == '/':
997                         if not current_selector:
998                             raise syntax_error('"/" must follow a format selector', start)
999                         first_choice = current_selector
1000                         second_choice = _parse_format_selection(tokens, inside_choice=True)
1001                         current_selector = FormatSelector(PICKFIRST, (first_choice, second_choice), [])
1002                     elif string == '[':
1003                         if not current_selector:
1004                             current_selector = FormatSelector(SINGLE, 'best', [])
1005                         format_filter = _parse_filter(tokens)
1006                         current_selector.filters.append(format_filter)
1007                     elif string == '(':
1008                         if current_selector:
1009                             raise syntax_error('Unexpected "("', start)
1010                         group = _parse_format_selection(tokens, inside_group=True)
1011                         current_selector = FormatSelector(GROUP, group, [])
1012                     elif string == '+':
1013                         video_selector = current_selector
1014                         audio_selector = _parse_format_selection(tokens, inside_merge=True)
1015                         if not video_selector or not audio_selector:
1016                             raise syntax_error('"+" must be between two format selectors', start)
1017                         current_selector = FormatSelector(MERGE, (video_selector, audio_selector), [])
1018                     else:
1019                         raise syntax_error('Operator not recognized: "{0}"'.format(string), start)
1020                 elif type == tokenize.ENDMARKER:
1021                     break
1022             if current_selector:
1023                 selectors.append(current_selector)
1024             return selectors
1025
1026         def _build_selector_function(selector):
1027             if isinstance(selector, list):
1028                 fs = [_build_selector_function(s) for s in selector]
1029
1030                 def selector_function(formats):
1031                     for f in fs:
1032                         for format in f(formats):
1033                             yield format
1034                 return selector_function
1035             elif selector.type == GROUP:
1036                 selector_function = _build_selector_function(selector.selector)
1037             elif selector.type == PICKFIRST:
1038                 fs = [_build_selector_function(s) for s in selector.selector]
1039
1040                 def selector_function(formats):
1041                     for f in fs:
1042                         picked_formats = list(f(formats))
1043                         if picked_formats:
1044                             return picked_formats
1045                     return []
1046             elif selector.type == SINGLE:
1047                 format_spec = selector.selector
1048
1049                 def selector_function(formats):
1050                     formats = list(formats)
1051                     if not formats:
1052                         return
1053                     if format_spec == 'all':
1054                         for f in formats:
1055                             yield f
1056                     elif format_spec in ['best', 'worst', None]:
1057                         format_idx = 0 if format_spec == 'worst' else -1
1058                         audiovideo_formats = [
1059                             f for f in formats
1060                             if f.get('vcodec') != 'none' and f.get('acodec') != 'none']
1061                         if audiovideo_formats:
1062                             yield audiovideo_formats[format_idx]
1063                         # for audio only (soundcloud) or video only (imgur) urls, select the best/worst audio format
1064                         elif (all(f.get('acodec') != 'none' for f in formats) or
1065                               all(f.get('vcodec') != 'none' for f in formats)):
1066                             yield formats[format_idx]
1067                     elif format_spec == 'bestaudio':
1068                         audio_formats = [
1069                             f for f in formats
1070                             if f.get('vcodec') == 'none']
1071                         if audio_formats:
1072                             yield audio_formats[-1]
1073                     elif format_spec == 'worstaudio':
1074                         audio_formats = [
1075                             f for f in formats
1076                             if f.get('vcodec') == 'none']
1077                         if audio_formats:
1078                             yield audio_formats[0]
1079                     elif format_spec == 'bestvideo':
1080                         video_formats = [
1081                             f for f in formats
1082                             if f.get('acodec') == 'none']
1083                         if video_formats:
1084                             yield video_formats[-1]
1085                     elif format_spec == 'worstvideo':
1086                         video_formats = [
1087                             f for f in formats
1088                             if f.get('acodec') == 'none']
1089                         if video_formats:
1090                             yield video_formats[0]
1091                     else:
1092                         extensions = ['mp4', 'flv', 'webm', '3gp', 'm4a', 'mp3', 'ogg', 'aac', 'wav']
1093                         if format_spec in extensions:
1094                             filter_f = lambda f: f['ext'] == format_spec
1095                         else:
1096                             filter_f = lambda f: f['format_id'] == format_spec
1097                         matches = list(filter(filter_f, formats))
1098                         if matches:
1099                             yield matches[-1]
1100             elif selector.type == MERGE:
1101                 def _merge(formats_info):
1102                     format_1, format_2 = [f['format_id'] for f in formats_info]
1103                     # The first format must contain the video and the
1104                     # second the audio
1105                     if formats_info[0].get('vcodec') == 'none':
1106                         self.report_error('The first format must '
1107                                           'contain the video, try using '
1108                                           '"-f %s+%s"' % (format_2, format_1))
1109                         return
1110                     output_ext = (
1111                         formats_info[0]['ext']
1112                         if self.params.get('merge_output_format') is None
1113                         else self.params['merge_output_format'])
1114                     return {
1115                         'requested_formats': formats_info,
1116                         'format': '%s+%s' % (formats_info[0].get('format'),
1117                                              formats_info[1].get('format')),
1118                         'format_id': '%s+%s' % (formats_info[0].get('format_id'),
1119                                                 formats_info[1].get('format_id')),
1120                         'width': formats_info[0].get('width'),
1121                         'height': formats_info[0].get('height'),
1122                         'resolution': formats_info[0].get('resolution'),
1123                         'fps': formats_info[0].get('fps'),
1124                         'vcodec': formats_info[0].get('vcodec'),
1125                         'vbr': formats_info[0].get('vbr'),
1126                         'stretched_ratio': formats_info[0].get('stretched_ratio'),
1127                         'acodec': formats_info[1].get('acodec'),
1128                         'abr': formats_info[1].get('abr'),
1129                         'ext': output_ext,
1130                     }
1131                 video_selector, audio_selector = map(_build_selector_function, selector.selector)
1132
1133                 def selector_function(formats):
1134                     formats = list(formats)
1135                     for pair in itertools.product(video_selector(formats), audio_selector(formats)):
1136                         yield _merge(pair)
1137
1138             filters = [self._build_format_filter(f) for f in selector.filters]
1139
1140             def final_selector(formats):
1141                 for _filter in filters:
1142                     formats = list(filter(_filter, formats))
1143                 return selector_function(formats)
1144             return final_selector
1145
1146         stream = io.BytesIO(format_spec.encode('utf-8'))
1147         try:
1148             tokens = list(_remove_unused_ops(compat_tokenize_tokenize(stream.readline)))
1149         except tokenize.TokenError:
1150             raise syntax_error('Missing closing/opening brackets or parenthesis', (0, len(format_spec)))
1151
1152         class TokenIterator(object):
1153             def __init__(self, tokens):
1154                 self.tokens = tokens
1155                 self.counter = 0
1156
1157             def __iter__(self):
1158                 return self
1159
1160             def __next__(self):
1161                 if self.counter >= len(self.tokens):
1162                     raise StopIteration()
1163                 value = self.tokens[self.counter]
1164                 self.counter += 1
1165                 return value
1166
1167             next = __next__
1168
1169             def restore_last_token(self):
1170                 self.counter -= 1
1171
1172         parsed_selector = _parse_format_selection(iter(TokenIterator(tokens)))
1173         return _build_selector_function(parsed_selector)
1174
1175     def _calc_headers(self, info_dict):
1176         res = std_headers.copy()
1177
1178         add_headers = info_dict.get('http_headers')
1179         if add_headers:
1180             res.update(add_headers)
1181
1182         cookies = self._calc_cookies(info_dict)
1183         if cookies:
1184             res['Cookie'] = cookies
1185
1186         return res
1187
1188     def _calc_cookies(self, info_dict):
1189         pr = compat_urllib_request.Request(info_dict['url'])
1190         self.cookiejar.add_cookie_header(pr)
1191         return pr.get_header('Cookie')
1192
1193     def process_video_result(self, info_dict, download=True):
1194         assert info_dict.get('_type', 'video') == 'video'
1195
1196         if 'id' not in info_dict:
1197             raise ExtractorError('Missing "id" field in extractor result')
1198         if 'title' not in info_dict:
1199             raise ExtractorError('Missing "title" field in extractor result')
1200
1201         if 'playlist' not in info_dict:
1202             # It isn't part of a playlist
1203             info_dict['playlist'] = None
1204             info_dict['playlist_index'] = None
1205
1206         thumbnails = info_dict.get('thumbnails')
1207         if thumbnails is None:
1208             thumbnail = info_dict.get('thumbnail')
1209             if thumbnail:
1210                 info_dict['thumbnails'] = thumbnails = [{'url': thumbnail}]
1211         if thumbnails:
1212             thumbnails.sort(key=lambda t: (
1213                 t.get('preference'), t.get('width'), t.get('height'),
1214                 t.get('id'), t.get('url')))
1215             for i, t in enumerate(thumbnails):
1216                 if t.get('width') and t.get('height'):
1217                     t['resolution'] = '%dx%d' % (t['width'], t['height'])
1218                 if t.get('id') is None:
1219                     t['id'] = '%d' % i
1220
1221         if thumbnails and 'thumbnail' not in info_dict:
1222             info_dict['thumbnail'] = thumbnails[-1]['url']
1223
1224         if 'display_id' not in info_dict and 'id' in info_dict:
1225             info_dict['display_id'] = info_dict['id']
1226
1227         if info_dict.get('upload_date') is None and info_dict.get('timestamp') is not None:
1228             # Working around out-of-range timestamp values (e.g. negative ones on Windows,
1229             # see http://bugs.python.org/issue1646728)
1230             try:
1231                 upload_date = datetime.datetime.utcfromtimestamp(info_dict['timestamp'])
1232                 info_dict['upload_date'] = upload_date.strftime('%Y%m%d')
1233             except (ValueError, OverflowError, OSError):
1234                 pass
1235
1236         subtitles = info_dict.get('subtitles')
1237         if subtitles:
1238             for _, subtitle in subtitles.items():
1239                 for subtitle_format in subtitle:
1240                     if 'ext' not in subtitle_format:
1241                         subtitle_format['ext'] = determine_ext(subtitle_format['url']).lower()
1242
1243         if self.params.get('listsubtitles', False):
1244             if 'automatic_captions' in info_dict:
1245                 self.list_subtitles(info_dict['id'], info_dict.get('automatic_captions'), 'automatic captions')
1246             self.list_subtitles(info_dict['id'], subtitles, 'subtitles')
1247             return
1248         info_dict['requested_subtitles'] = self.process_subtitles(
1249             info_dict['id'], subtitles,
1250             info_dict.get('automatic_captions'))
1251
1252         # We now pick which formats have to be downloaded
1253         if info_dict.get('formats') is None:
1254             # There's only one format available
1255             formats = [info_dict]
1256         else:
1257             formats = info_dict['formats']
1258
1259         if not formats:
1260             raise ExtractorError('No video formats found!')
1261
1262         formats_dict = {}
1263
1264         # We check that all the formats have the format and format_id fields
1265         for i, format in enumerate(formats):
1266             if 'url' not in format:
1267                 raise ExtractorError('Missing "url" key in result (index %d)' % i)
1268
1269             if format.get('format_id') is None:
1270                 format['format_id'] = compat_str(i)
1271             format_id = format['format_id']
1272             if format_id not in formats_dict:
1273                 formats_dict[format_id] = []
1274             formats_dict[format_id].append(format)
1275
1276         # Make sure all formats have unique format_id
1277         for format_id, ambiguous_formats in formats_dict.items():
1278             if len(ambiguous_formats) > 1:
1279                 for i, format in enumerate(ambiguous_formats):
1280                     format['format_id'] = '%s-%d' % (format_id, i)
1281
1282         for i, format in enumerate(formats):
1283             if format.get('format') is None:
1284                 format['format'] = '{id} - {res}{note}'.format(
1285                     id=format['format_id'],
1286                     res=self.format_resolution(format),
1287                     note=' ({0})'.format(format['format_note']) if format.get('format_note') is not None else '',
1288                 )
1289             # Automatically determine file extension if missing
1290             if 'ext' not in format:
1291                 format['ext'] = determine_ext(format['url']).lower()
1292             # Add HTTP headers, so that external programs can use them from the
1293             # json output
1294             full_format_info = info_dict.copy()
1295             full_format_info.update(format)
1296             format['http_headers'] = self._calc_headers(full_format_info)
1297
1298         # TODO Central sorting goes here
1299
1300         if formats[0] is not info_dict:
1301             # only set the 'formats' fields if the original info_dict list them
1302             # otherwise we end up with a circular reference, the first (and unique)
1303             # element in the 'formats' field in info_dict is info_dict itself,
1304             # wich can't be exported to json
1305             info_dict['formats'] = formats
1306         if self.params.get('listformats'):
1307             self.list_formats(info_dict)
1308             return
1309         if self.params.get('list_thumbnails'):
1310             self.list_thumbnails(info_dict)
1311             return
1312
1313         req_format = self.params.get('format')
1314         if req_format is None:
1315             req_format_list = []
1316             if (self.params.get('outtmpl', DEFAULT_OUTTMPL) != '-' and
1317                     info_dict['extractor'] in ['youtube', 'ted'] and
1318                     not info_dict.get('is_live')):
1319                 merger = FFmpegMergerPP(self)
1320                 if merger.available and merger.can_merge():
1321                     req_format_list.append('bestvideo+bestaudio')
1322             req_format_list.append('best')
1323             req_format = '/'.join(req_format_list)
1324         format_selector = self.build_format_selector(req_format)
1325         formats_to_download = list(format_selector(formats))
1326         if not formats_to_download:
1327             raise ExtractorError('requested format not available',
1328                                  expected=True)
1329
1330         if download:
1331             if len(formats_to_download) > 1:
1332                 self.to_screen('[info] %s: downloading video in %s formats' % (info_dict['id'], len(formats_to_download)))
1333             for format in formats_to_download:
1334                 new_info = dict(info_dict)
1335                 new_info.update(format)
1336                 self.process_info(new_info)
1337         # We update the info dict with the best quality format (backwards compatibility)
1338         info_dict.update(formats_to_download[-1])
1339         return info_dict
1340
1341     def process_subtitles(self, video_id, normal_subtitles, automatic_captions):
1342         """Select the requested subtitles and their format"""
1343         available_subs = {}
1344         if normal_subtitles and self.params.get('writesubtitles'):
1345             available_subs.update(normal_subtitles)
1346         if automatic_captions and self.params.get('writeautomaticsub'):
1347             for lang, cap_info in automatic_captions.items():
1348                 if lang not in available_subs:
1349                     available_subs[lang] = cap_info
1350
1351         if (not self.params.get('writesubtitles') and not
1352                 self.params.get('writeautomaticsub') or not
1353                 available_subs):
1354             return None
1355
1356         if self.params.get('allsubtitles', False):
1357             requested_langs = available_subs.keys()
1358         else:
1359             if self.params.get('subtitleslangs', False):
1360                 requested_langs = self.params.get('subtitleslangs')
1361             elif 'en' in available_subs:
1362                 requested_langs = ['en']
1363             else:
1364                 requested_langs = [list(available_subs.keys())[0]]
1365
1366         formats_query = self.params.get('subtitlesformat', 'best')
1367         formats_preference = formats_query.split('/') if formats_query else []
1368         subs = {}
1369         for lang in requested_langs:
1370             formats = available_subs.get(lang)
1371             if formats is None:
1372                 self.report_warning('%s subtitles not available for %s' % (lang, video_id))
1373                 continue
1374             for ext in formats_preference:
1375                 if ext == 'best':
1376                     f = formats[-1]
1377                     break
1378                 matches = list(filter(lambda f: f['ext'] == ext, formats))
1379                 if matches:
1380                     f = matches[-1]
1381                     break
1382             else:
1383                 f = formats[-1]
1384                 self.report_warning(
1385                     'No subtitle format found matching "%s" for language %s, '
1386                     'using %s' % (formats_query, lang, f['ext']))
1387             subs[lang] = f
1388         return subs
1389
1390     def process_info(self, info_dict):
1391         """Process a single resolved IE result."""
1392
1393         assert info_dict.get('_type', 'video') == 'video'
1394
1395         max_downloads = self.params.get('max_downloads')
1396         if max_downloads is not None:
1397             if self._num_downloads >= int(max_downloads):
1398                 raise MaxDownloadsReached()
1399
1400         info_dict['fulltitle'] = info_dict['title']
1401         if len(info_dict['title']) > 200:
1402             info_dict['title'] = info_dict['title'][:197] + '...'
1403
1404         if 'format' not in info_dict:
1405             info_dict['format'] = info_dict['ext']
1406
1407         reason = self._match_entry(info_dict, incomplete=False)
1408         if reason is not None:
1409             self.to_screen('[download] ' + reason)
1410             return
1411
1412         self._num_downloads += 1
1413
1414         info_dict['_filename'] = filename = self.prepare_filename(info_dict)
1415
1416         # Forced printings
1417         if self.params.get('forcetitle', False):
1418             self.to_stdout(info_dict['fulltitle'])
1419         if self.params.get('forceid', False):
1420             self.to_stdout(info_dict['id'])
1421         if self.params.get('forceurl', False):
1422             if info_dict.get('requested_formats') is not None:
1423                 for f in info_dict['requested_formats']:
1424                     self.to_stdout(f['url'] + f.get('play_path', ''))
1425             else:
1426                 # For RTMP URLs, also include the playpath
1427                 self.to_stdout(info_dict['url'] + info_dict.get('play_path', ''))
1428         if self.params.get('forcethumbnail', False) and info_dict.get('thumbnail') is not None:
1429             self.to_stdout(info_dict['thumbnail'])
1430         if self.params.get('forcedescription', False) and info_dict.get('description') is not None:
1431             self.to_stdout(info_dict['description'])
1432         if self.params.get('forcefilename', False) and filename is not None:
1433             self.to_stdout(filename)
1434         if self.params.get('forceduration', False) and info_dict.get('duration') is not None:
1435             self.to_stdout(formatSeconds(info_dict['duration']))
1436         if self.params.get('forceformat', False):
1437             self.to_stdout(info_dict['format'])
1438         if self.params.get('forcejson', False):
1439             self.to_stdout(json.dumps(info_dict))
1440
1441         # Do nothing else if in simulate mode
1442         if self.params.get('simulate', False):
1443             return
1444
1445         if filename is None:
1446             return
1447
1448         try:
1449             dn = os.path.dirname(sanitize_path(encodeFilename(filename)))
1450             if dn and not os.path.exists(dn):
1451                 os.makedirs(dn)
1452         except (OSError, IOError) as err:
1453             self.report_error('unable to create directory ' + compat_str(err))
1454             return
1455
1456         if self.params.get('writedescription', False):
1457             descfn = replace_extension(filename, 'description', info_dict.get('ext'))
1458             if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(descfn)):
1459                 self.to_screen('[info] Video description is already present')
1460             elif info_dict.get('description') is None:
1461                 self.report_warning('There\'s no description to write.')
1462             else:
1463                 try:
1464                     self.to_screen('[info] Writing video description to: ' + descfn)
1465                     with io.open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile:
1466                         descfile.write(info_dict['description'])
1467                 except (OSError, IOError):
1468                     self.report_error('Cannot write description file ' + descfn)
1469                     return
1470
1471         if self.params.get('writeannotations', False):
1472             annofn = replace_extension(filename, 'annotations.xml', info_dict.get('ext'))
1473             if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(annofn)):
1474                 self.to_screen('[info] Video annotations are already present')
1475             else:
1476                 try:
1477                     self.to_screen('[info] Writing video annotations to: ' + annofn)
1478                     with io.open(encodeFilename(annofn), 'w', encoding='utf-8') as annofile:
1479                         annofile.write(info_dict['annotations'])
1480                 except (KeyError, TypeError):
1481                     self.report_warning('There are no annotations to write.')
1482                 except (OSError, IOError):
1483                     self.report_error('Cannot write annotations file: ' + annofn)
1484                     return
1485
1486         subtitles_are_requested = any([self.params.get('writesubtitles', False),
1487                                        self.params.get('writeautomaticsub')])
1488
1489         if subtitles_are_requested and info_dict.get('requested_subtitles'):
1490             # subtitles download errors are already managed as troubles in relevant IE
1491             # that way it will silently go on when used with unsupporting IE
1492             subtitles = info_dict['requested_subtitles']
1493             ie = self.get_info_extractor(info_dict['extractor_key'])
1494             for sub_lang, sub_info in subtitles.items():
1495                 sub_format = sub_info['ext']
1496                 if sub_info.get('data') is not None:
1497                     sub_data = sub_info['data']
1498                 else:
1499                     try:
1500                         sub_data = ie._download_webpage(
1501                             sub_info['url'], info_dict['id'], note=False)
1502                     except ExtractorError as err:
1503                         self.report_warning('Unable to download subtitle for "%s": %s' %
1504                                             (sub_lang, compat_str(err.cause)))
1505                         continue
1506                 try:
1507                     sub_filename = subtitles_filename(filename, sub_lang, sub_format)
1508                     if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(sub_filename)):
1509                         self.to_screen('[info] Video subtitle %s.%s is already_present' % (sub_lang, sub_format))
1510                     else:
1511                         self.to_screen('[info] Writing video subtitles to: ' + sub_filename)
1512                         with io.open(encodeFilename(sub_filename), 'w', encoding='utf-8') as subfile:
1513                             subfile.write(sub_data)
1514                 except (OSError, IOError):
1515                     self.report_error('Cannot write subtitles file ' + sub_filename)
1516                     return
1517
1518         if self.params.get('writeinfojson', False):
1519             infofn = replace_extension(filename, 'info.json', info_dict.get('ext'))
1520             if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(infofn)):
1521                 self.to_screen('[info] Video description metadata is already present')
1522             else:
1523                 self.to_screen('[info] Writing video description metadata as JSON to: ' + infofn)
1524                 try:
1525                     write_json_file(self.filter_requested_info(info_dict), infofn)
1526                 except (OSError, IOError):
1527                     self.report_error('Cannot write metadata to JSON file ' + infofn)
1528                     return
1529
1530         self._write_thumbnails(info_dict, filename)
1531
1532         if not self.params.get('skip_download', False):
1533             try:
1534                 def dl(name, info):
1535                     fd = get_suitable_downloader(info, self.params)(self, self.params)
1536                     for ph in self._progress_hooks:
1537                         fd.add_progress_hook(ph)
1538                     if self.params.get('verbose'):
1539                         self.to_stdout('[debug] Invoking downloader on %r' % info.get('url'))
1540                     return fd.download(name, info)
1541
1542                 if info_dict.get('requested_formats') is not None:
1543                     downloaded = []
1544                     success = True
1545                     merger = FFmpegMergerPP(self)
1546                     if not merger.available:
1547                         postprocessors = []
1548                         self.report_warning('You have requested multiple '
1549                                             'formats but ffmpeg or avconv are not installed.'
1550                                             ' The formats won\'t be merged.')
1551                     else:
1552                         postprocessors = [merger]
1553
1554                     def compatible_formats(formats):
1555                         video, audio = formats
1556                         # Check extension
1557                         video_ext, audio_ext = audio.get('ext'), video.get('ext')
1558                         if video_ext and audio_ext:
1559                             COMPATIBLE_EXTS = (
1560                                 ('mp3', 'mp4', 'm4a', 'm4p', 'm4b', 'm4r', 'm4v'),
1561                                 ('webm')
1562                             )
1563                             for exts in COMPATIBLE_EXTS:
1564                                 if video_ext in exts and audio_ext in exts:
1565                                     return True
1566                         # TODO: Check acodec/vcodec
1567                         return False
1568
1569                     filename_real_ext = os.path.splitext(filename)[1][1:]
1570                     filename_wo_ext = (
1571                         os.path.splitext(filename)[0]
1572                         if filename_real_ext == info_dict['ext']
1573                         else filename)
1574                     requested_formats = info_dict['requested_formats']
1575                     if self.params.get('merge_output_format') is None and not compatible_formats(requested_formats):
1576                         info_dict['ext'] = 'mkv'
1577                         self.report_warning(
1578                             'Requested formats are incompatible for merge and will be merged into mkv.')
1579                     # Ensure filename always has a correct extension for successful merge
1580                     filename = '%s.%s' % (filename_wo_ext, info_dict['ext'])
1581                     if os.path.exists(encodeFilename(filename)):
1582                         self.to_screen(
1583                             '[download] %s has already been downloaded and '
1584                             'merged' % filename)
1585                     else:
1586                         for f in requested_formats:
1587                             new_info = dict(info_dict)
1588                             new_info.update(f)
1589                             fname = self.prepare_filename(new_info)
1590                             fname = prepend_extension(fname, 'f%s' % f['format_id'], new_info['ext'])
1591                             downloaded.append(fname)
1592                             partial_success = dl(fname, new_info)
1593                             success = success and partial_success
1594                         info_dict['__postprocessors'] = postprocessors
1595                         info_dict['__files_to_merge'] = downloaded
1596                 else:
1597                     # Just a single file
1598                     success = dl(filename, info_dict)
1599             except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
1600                 self.report_error('unable to download video data: %s' % str(err))
1601                 return
1602             except (OSError, IOError) as err:
1603                 raise UnavailableVideoError(err)
1604             except (ContentTooShortError, ) as err:
1605                 self.report_error('content too short (expected %s bytes and served %s)' % (err.expected, err.downloaded))
1606                 return
1607
1608             if success:
1609                 # Fixup content
1610                 fixup_policy = self.params.get('fixup')
1611                 if fixup_policy is None:
1612                     fixup_policy = 'detect_or_warn'
1613
1614                 stretched_ratio = info_dict.get('stretched_ratio')
1615                 if stretched_ratio is not None and stretched_ratio != 1:
1616                     if fixup_policy == 'warn':
1617                         self.report_warning('%s: Non-uniform pixel ratio (%s)' % (
1618                             info_dict['id'], stretched_ratio))
1619                     elif fixup_policy == 'detect_or_warn':
1620                         stretched_pp = FFmpegFixupStretchedPP(self)
1621                         if stretched_pp.available:
1622                             info_dict.setdefault('__postprocessors', [])
1623                             info_dict['__postprocessors'].append(stretched_pp)
1624                         else:
1625                             self.report_warning(
1626                                 '%s: Non-uniform pixel ratio (%s). Install ffmpeg or avconv to fix this automatically.' % (
1627                                     info_dict['id'], stretched_ratio))
1628                     else:
1629                         assert fixup_policy in ('ignore', 'never')
1630
1631                 if info_dict.get('requested_formats') is None and info_dict.get('container') == 'm4a_dash':
1632                     if fixup_policy == 'warn':
1633                         self.report_warning('%s: writing DASH m4a. Only some players support this container.' % (
1634                             info_dict['id']))
1635                     elif fixup_policy == 'detect_or_warn':
1636                         fixup_pp = FFmpegFixupM4aPP(self)
1637                         if fixup_pp.available:
1638                             info_dict.setdefault('__postprocessors', [])
1639                             info_dict['__postprocessors'].append(fixup_pp)
1640                         else:
1641                             self.report_warning(
1642                                 '%s: writing DASH m4a. Only some players support this container. Install ffmpeg or avconv to fix this automatically.' % (
1643                                     info_dict['id']))
1644                     else:
1645                         assert fixup_policy in ('ignore', 'never')
1646
1647                 try:
1648                     self.post_process(filename, info_dict)
1649                 except (PostProcessingError) as err:
1650                     self.report_error('postprocessing: %s' % str(err))
1651                     return
1652                 self.record_download_archive(info_dict)
1653
1654     def download(self, url_list):
1655         """Download a given list of URLs."""
1656         outtmpl = self.params.get('outtmpl', DEFAULT_OUTTMPL)
1657         if (len(url_list) > 1 and
1658                 '%' not in outtmpl and
1659                 self.params.get('max_downloads') != 1):
1660             raise SameFileError(outtmpl)
1661
1662         for url in url_list:
1663             try:
1664                 # It also downloads the videos
1665                 res = self.extract_info(
1666                     url, force_generic_extractor=self.params.get('force_generic_extractor', False))
1667             except UnavailableVideoError:
1668                 self.report_error('unable to download video')
1669             except MaxDownloadsReached:
1670                 self.to_screen('[info] Maximum number of downloaded files reached.')
1671                 raise
1672             else:
1673                 if self.params.get('dump_single_json', False):
1674                     self.to_stdout(json.dumps(res))
1675
1676         return self._download_retcode
1677
1678     def download_with_info_file(self, info_filename):
1679         with contextlib.closing(fileinput.FileInput(
1680                 [info_filename], mode='r',
1681                 openhook=fileinput.hook_encoded('utf-8'))) as f:
1682             # FileInput doesn't have a read method, we can't call json.load
1683             info = self.filter_requested_info(json.loads('\n'.join(f)))
1684         try:
1685             self.process_ie_result(info, download=True)
1686         except DownloadError:
1687             webpage_url = info.get('webpage_url')
1688             if webpage_url is not None:
1689                 self.report_warning('The info failed to download, trying with "%s"' % webpage_url)
1690                 return self.download([webpage_url])
1691             else:
1692                 raise
1693         return self._download_retcode
1694
1695     @staticmethod
1696     def filter_requested_info(info_dict):
1697         return dict(
1698             (k, v) for k, v in info_dict.items()
1699             if k not in ['requested_formats', 'requested_subtitles'])
1700
1701     def post_process(self, filename, ie_info):
1702         """Run all the postprocessors on the given file."""
1703         info = dict(ie_info)
1704         info['filepath'] = filename
1705         pps_chain = []
1706         if ie_info.get('__postprocessors') is not None:
1707             pps_chain.extend(ie_info['__postprocessors'])
1708         pps_chain.extend(self._pps)
1709         for pp in pps_chain:
1710             files_to_delete = []
1711             try:
1712                 files_to_delete, info = pp.run(info)
1713             except PostProcessingError as e:
1714                 self.report_error(e.msg)
1715             if files_to_delete and not self.params.get('keepvideo', False):
1716                 for old_filename in files_to_delete:
1717                     self.to_screen('Deleting original file %s (pass -k to keep)' % old_filename)
1718                     try:
1719                         os.remove(encodeFilename(old_filename))
1720                     except (IOError, OSError):
1721                         self.report_warning('Unable to remove downloaded original file')
1722
1723     def _make_archive_id(self, info_dict):
1724         # Future-proof against any change in case
1725         # and backwards compatibility with prior versions
1726         extractor = info_dict.get('extractor_key')
1727         if extractor is None:
1728             if 'id' in info_dict:
1729                 extractor = info_dict.get('ie_key')  # key in a playlist
1730         if extractor is None:
1731             return None  # Incomplete video information
1732         return extractor.lower() + ' ' + info_dict['id']
1733
1734     def in_download_archive(self, info_dict):
1735         fn = self.params.get('download_archive')
1736         if fn is None:
1737             return False
1738
1739         vid_id = self._make_archive_id(info_dict)
1740         if vid_id is None:
1741             return False  # Incomplete video information
1742
1743         try:
1744             with locked_file(fn, 'r', encoding='utf-8') as archive_file:
1745                 for line in archive_file:
1746                     if line.strip() == vid_id:
1747                         return True
1748         except IOError as ioe:
1749             if ioe.errno != errno.ENOENT:
1750                 raise
1751         return False
1752
1753     def record_download_archive(self, info_dict):
1754         fn = self.params.get('download_archive')
1755         if fn is None:
1756             return
1757         vid_id = self._make_archive_id(info_dict)
1758         assert vid_id
1759         with locked_file(fn, 'a', encoding='utf-8') as archive_file:
1760             archive_file.write(vid_id + '\n')
1761
1762     @staticmethod
1763     def format_resolution(format, default='unknown'):
1764         if format.get('vcodec') == 'none':
1765             return 'audio only'
1766         if format.get('resolution') is not None:
1767             return format['resolution']
1768         if format.get('height') is not None:
1769             if format.get('width') is not None:
1770                 res = '%sx%s' % (format['width'], format['height'])
1771             else:
1772                 res = '%sp' % format['height']
1773         elif format.get('width') is not None:
1774             res = '?x%d' % format['width']
1775         else:
1776             res = default
1777         return res
1778
1779     def _format_note(self, fdict):
1780         res = ''
1781         if fdict.get('ext') in ['f4f', 'f4m']:
1782             res += '(unsupported) '
1783         if fdict.get('format_note') is not None:
1784             res += fdict['format_note'] + ' '
1785         if fdict.get('tbr') is not None:
1786             res += '%4dk ' % fdict['tbr']
1787         if fdict.get('container') is not None:
1788             if res:
1789                 res += ', '
1790             res += '%s container' % fdict['container']
1791         if (fdict.get('vcodec') is not None and
1792                 fdict.get('vcodec') != 'none'):
1793             if res:
1794                 res += ', '
1795             res += fdict['vcodec']
1796             if fdict.get('vbr') is not None:
1797                 res += '@'
1798         elif fdict.get('vbr') is not None and fdict.get('abr') is not None:
1799             res += 'video@'
1800         if fdict.get('vbr') is not None:
1801             res += '%4dk' % fdict['vbr']
1802         if fdict.get('fps') is not None:
1803             res += ', %sfps' % fdict['fps']
1804         if fdict.get('acodec') is not None:
1805             if res:
1806                 res += ', '
1807             if fdict['acodec'] == 'none':
1808                 res += 'video only'
1809             else:
1810                 res += '%-5s' % fdict['acodec']
1811         elif fdict.get('abr') is not None:
1812             if res:
1813                 res += ', '
1814             res += 'audio'
1815         if fdict.get('abr') is not None:
1816             res += '@%3dk' % fdict['abr']
1817         if fdict.get('asr') is not None:
1818             res += ' (%5dHz)' % fdict['asr']
1819         if fdict.get('filesize') is not None:
1820             if res:
1821                 res += ', '
1822             res += format_bytes(fdict['filesize'])
1823         elif fdict.get('filesize_approx') is not None:
1824             if res:
1825                 res += ', '
1826             res += '~' + format_bytes(fdict['filesize_approx'])
1827         return res
1828
1829     def list_formats(self, info_dict):
1830         formats = info_dict.get('formats', [info_dict])
1831         table = [
1832             [f['format_id'], f['ext'], self.format_resolution(f), self._format_note(f)]
1833             for f in formats
1834             if f.get('preference') is None or f['preference'] >= -1000]
1835         if len(formats) > 1:
1836             table[-1][-1] += (' ' if table[-1][-1] else '') + '(best)'
1837
1838         header_line = ['format code', 'extension', 'resolution', 'note']
1839         self.to_screen(
1840             '[info] Available formats for %s:\n%s' %
1841             (info_dict['id'], render_table(header_line, table)))
1842
1843     def list_thumbnails(self, info_dict):
1844         thumbnails = info_dict.get('thumbnails')
1845         if not thumbnails:
1846             tn_url = info_dict.get('thumbnail')
1847             if tn_url:
1848                 thumbnails = [{'id': '0', 'url': tn_url}]
1849             else:
1850                 self.to_screen(
1851                     '[info] No thumbnails present for %s' % info_dict['id'])
1852                 return
1853
1854         self.to_screen(
1855             '[info] Thumbnails for %s:' % info_dict['id'])
1856         self.to_screen(render_table(
1857             ['ID', 'width', 'height', 'URL'],
1858             [[t['id'], t.get('width', 'unknown'), t.get('height', 'unknown'), t['url']] for t in thumbnails]))
1859
1860     def list_subtitles(self, video_id, subtitles, name='subtitles'):
1861         if not subtitles:
1862             self.to_screen('%s has no %s' % (video_id, name))
1863             return
1864         self.to_screen(
1865             'Available %s for %s:' % (name, video_id))
1866         self.to_screen(render_table(
1867             ['Language', 'formats'],
1868             [[lang, ', '.join(f['ext'] for f in reversed(formats))]
1869                 for lang, formats in subtitles.items()]))
1870
1871     def urlopen(self, req):
1872         """ Start an HTTP download """
1873         return self._opener.open(req, timeout=self._socket_timeout)
1874
1875     def print_debug_header(self):
1876         if not self.params.get('verbose'):
1877             return
1878
1879         if type('') is not compat_str:
1880             # Python 2.6 on SLES11 SP1 (https://github.com/rg3/youtube-dl/issues/3326)
1881             self.report_warning(
1882                 'Your Python is broken! Update to a newer and supported version')
1883
1884         stdout_encoding = getattr(
1885             sys.stdout, 'encoding', 'missing (%s)' % type(sys.stdout).__name__)
1886         encoding_str = (
1887             '[debug] Encodings: locale %s, fs %s, out %s, pref %s\n' % (
1888                 locale.getpreferredencoding(),
1889                 sys.getfilesystemencoding(),
1890                 stdout_encoding,
1891                 self.get_encoding()))
1892         write_string(encoding_str, encoding=None)
1893
1894         self._write_string('[debug] youtube-dl version ' + __version__ + '\n')
1895         try:
1896             sp = subprocess.Popen(
1897                 ['git', 'rev-parse', '--short', 'HEAD'],
1898                 stdout=subprocess.PIPE, stderr=subprocess.PIPE,
1899                 cwd=os.path.dirname(os.path.abspath(__file__)))
1900             out, err = sp.communicate()
1901             out = out.decode().strip()
1902             if re.match('[0-9a-f]+', out):
1903                 self._write_string('[debug] Git HEAD: ' + out + '\n')
1904         except Exception:
1905             try:
1906                 sys.exc_clear()
1907             except Exception:
1908                 pass
1909         self._write_string('[debug] Python version %s - %s\n' % (
1910             platform.python_version(), platform_name()))
1911
1912         exe_versions = FFmpegPostProcessor.get_versions(self)
1913         exe_versions['rtmpdump'] = rtmpdump_version()
1914         exe_str = ', '.join(
1915             '%s %s' % (exe, v)
1916             for exe, v in sorted(exe_versions.items())
1917             if v
1918         )
1919         if not exe_str:
1920             exe_str = 'none'
1921         self._write_string('[debug] exe versions: %s\n' % exe_str)
1922
1923         proxy_map = {}
1924         for handler in self._opener.handlers:
1925             if hasattr(handler, 'proxies'):
1926                 proxy_map.update(handler.proxies)
1927         self._write_string('[debug] Proxy map: ' + compat_str(proxy_map) + '\n')
1928
1929         if self.params.get('call_home', False):
1930             ipaddr = self.urlopen('https://yt-dl.org/ip').read().decode('utf-8')
1931             self._write_string('[debug] Public IP address: %s\n' % ipaddr)
1932             latest_version = self.urlopen(
1933                 'https://yt-dl.org/latest/version').read().decode('utf-8')
1934             if version_tuple(latest_version) > version_tuple(__version__):
1935                 self.report_warning(
1936                     'You are using an outdated version (newest version: %s)! '
1937                     'See https://yt-dl.org/update if you need help updating.' %
1938                     latest_version)
1939
1940     def _setup_opener(self):
1941         timeout_val = self.params.get('socket_timeout')
1942         self._socket_timeout = 600 if timeout_val is None else float(timeout_val)
1943
1944         opts_cookiefile = self.params.get('cookiefile')
1945         opts_proxy = self.params.get('proxy')
1946
1947         if opts_cookiefile is None:
1948             self.cookiejar = compat_cookiejar.CookieJar()
1949         else:
1950             self.cookiejar = compat_cookiejar.MozillaCookieJar(
1951                 opts_cookiefile)
1952             if os.access(opts_cookiefile, os.R_OK):
1953                 self.cookiejar.load()
1954
1955         cookie_processor = YoutubeDLCookieProcessor(self.cookiejar)
1956         if opts_proxy is not None:
1957             if opts_proxy == '':
1958                 proxies = {}
1959             else:
1960                 proxies = {'http': opts_proxy, 'https': opts_proxy}
1961         else:
1962             proxies = compat_urllib_request.getproxies()
1963             # Set HTTPS proxy to HTTP one if given (https://github.com/rg3/youtube-dl/issues/805)
1964             if 'http' in proxies and 'https' not in proxies:
1965                 proxies['https'] = proxies['http']
1966         proxy_handler = PerRequestProxyHandler(proxies)
1967
1968         debuglevel = 1 if self.params.get('debug_printtraffic') else 0
1969         https_handler = make_HTTPS_handler(self.params, debuglevel=debuglevel)
1970         ydlh = YoutubeDLHandler(self.params, debuglevel=debuglevel)
1971         data_handler = compat_urllib_request_DataHandler()
1972         opener = compat_urllib_request.build_opener(
1973             proxy_handler, https_handler, cookie_processor, ydlh, data_handler)
1974
1975         # Delete the default user-agent header, which would otherwise apply in
1976         # cases where our custom HTTP handler doesn't come into play
1977         # (See https://github.com/rg3/youtube-dl/issues/1309 for details)
1978         opener.addheaders = []
1979         self._opener = opener
1980
1981     def encode(self, s):
1982         if isinstance(s, bytes):
1983             return s  # Already encoded
1984
1985         try:
1986             return s.encode(self.get_encoding())
1987         except UnicodeEncodeError as err:
1988             err.reason = err.reason + '. Check your system encoding configuration or use the --encoding option.'
1989             raise
1990
1991     def get_encoding(self):
1992         encoding = self.params.get('encoding')
1993         if encoding is None:
1994             encoding = preferredencoding()
1995         return encoding
1996
1997     def _write_thumbnails(self, info_dict, filename):
1998         if self.params.get('writethumbnail', False):
1999             thumbnails = info_dict.get('thumbnails')
2000             if thumbnails:
2001                 thumbnails = [thumbnails[-1]]
2002         elif self.params.get('write_all_thumbnails', False):
2003             thumbnails = info_dict.get('thumbnails')
2004         else:
2005             return
2006
2007         if not thumbnails:
2008             # No thumbnails present, so return immediately
2009             return
2010
2011         for t in thumbnails:
2012             thumb_ext = determine_ext(t['url'], 'jpg')
2013             suffix = '_%s' % t['id'] if len(thumbnails) > 1 else ''
2014             thumb_display_id = '%s ' % t['id'] if len(thumbnails) > 1 else ''
2015             t['filename'] = thumb_filename = os.path.splitext(filename)[0] + suffix + '.' + thumb_ext
2016
2017             if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(thumb_filename)):
2018                 self.to_screen('[%s] %s: Thumbnail %sis already present' %
2019                                (info_dict['extractor'], info_dict['id'], thumb_display_id))
2020             else:
2021                 self.to_screen('[%s] %s: Downloading thumbnail %s...' %
2022                                (info_dict['extractor'], info_dict['id'], thumb_display_id))
2023                 try:
2024                     uf = self.urlopen(t['url'])
2025                     with open(encodeFilename(thumb_filename), 'wb') as thumbf:
2026                         shutil.copyfileobj(uf, thumbf)
2027                     self.to_screen('[%s] %s: Writing thumbnail %sto: %s' %
2028                                    (info_dict['extractor'], info_dict['id'], thumb_display_id, thumb_filename))
2029                 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
2030                     self.report_warning('Unable to download thumbnail "%s": %s' %
2031                                         (t['url'], compat_str(err)))