[ign] improve extraction and extract uploader_id
[youtube-dl] / youtube_dl / YoutubeDL.py
1 #!/usr/bin/env python
2 # -*- coding: utf-8 -*-
3
4 from __future__ import absolute_import, unicode_literals
5
6 import collections
7 import contextlib
8 import datetime
9 import errno
10 import fileinput
11 import io
12 import itertools
13 import json
14 import locale
15 import operator
16 import os
17 import platform
18 import re
19 import shutil
20 import subprocess
21 import socket
22 import sys
23 import time
24 import tokenize
25 import traceback
26
27 if os.name == 'nt':
28     import ctypes
29
30 from .compat import (
31     compat_cookiejar,
32     compat_expanduser,
33     compat_get_terminal_size,
34     compat_http_client,
35     compat_kwargs,
36     compat_str,
37     compat_tokenize_tokenize,
38     compat_urllib_error,
39     compat_urllib_request,
40 )
41 from .utils import (
42     ContentTooShortError,
43     date_from_str,
44     DateRange,
45     DEFAULT_OUTTMPL,
46     determine_ext,
47     DownloadError,
48     encodeFilename,
49     ExtractorError,
50     format_bytes,
51     formatSeconds,
52     locked_file,
53     make_HTTPS_handler,
54     MaxDownloadsReached,
55     PagedList,
56     parse_filesize,
57     PerRequestProxyHandler,
58     PostProcessingError,
59     platform_name,
60     preferredencoding,
61     render_table,
62     SameFileError,
63     sanitize_filename,
64     sanitize_path,
65     std_headers,
66     subtitles_filename,
67     UnavailableVideoError,
68     url_basename,
69     version_tuple,
70     write_json_file,
71     write_string,
72     YoutubeDLCookieProcessor,
73     YoutubeDLHandler,
74     prepend_extension,
75     replace_extension,
76     args_to_str,
77     age_restricted,
78 )
79 from .cache import Cache
80 from .extractor import get_info_extractor, gen_extractors
81 from .downloader import get_suitable_downloader
82 from .downloader.rtmp import rtmpdump_version
83 from .postprocessor import (
84     FFmpegFixupM4aPP,
85     FFmpegFixupStretchedPP,
86     FFmpegMergerPP,
87     FFmpegPostProcessor,
88     get_postprocessor,
89 )
90 from .version import __version__
91
92
93 class YoutubeDL(object):
94     """YoutubeDL class.
95
96     YoutubeDL objects are the ones responsible of downloading the
97     actual video file and writing it to disk if the user has requested
98     it, among some other tasks. In most cases there should be one per
99     program. As, given a video URL, the downloader doesn't know how to
100     extract all the needed information, task that InfoExtractors do, it
101     has to pass the URL to one of them.
102
103     For this, YoutubeDL objects have a method that allows
104     InfoExtractors to be registered in a given order. When it is passed
105     a URL, the YoutubeDL object handles it to the first InfoExtractor it
106     finds that reports being able to handle it. The InfoExtractor extracts
107     all the information about the video or videos the URL refers to, and
108     YoutubeDL process the extracted information, possibly using a File
109     Downloader to download the video.
110
111     YoutubeDL objects accept a lot of parameters. In order not to saturate
112     the object constructor with arguments, it receives a dictionary of
113     options instead. These options are available through the params
114     attribute for the InfoExtractors to use. The YoutubeDL also
115     registers itself as the downloader in charge for the InfoExtractors
116     that are added to it, so this is a "mutual registration".
117
118     Available options:
119
120     username:          Username for authentication purposes.
121     password:          Password for authentication purposes.
122     videopassword:     Password for accessing a video.
123     usenetrc:          Use netrc for authentication instead.
124     verbose:           Print additional info to stdout.
125     quiet:             Do not print messages to stdout.
126     no_warnings:       Do not print out anything for warnings.
127     forceurl:          Force printing final URL.
128     forcetitle:        Force printing title.
129     forceid:           Force printing ID.
130     forcethumbnail:    Force printing thumbnail URL.
131     forcedescription:  Force printing description.
132     forcefilename:     Force printing final filename.
133     forceduration:     Force printing duration.
134     forcejson:         Force printing info_dict as JSON.
135     dump_single_json:  Force printing the info_dict of the whole playlist
136                        (or video) as a single JSON line.
137     simulate:          Do not download the video files.
138     format:            Video format code. See options.py for more information.
139     outtmpl:           Template for output names.
140     restrictfilenames: Do not allow "&" and spaces in file names
141     ignoreerrors:      Do not stop on download errors.
142     force_generic_extractor: Force downloader to use the generic extractor
143     nooverwrites:      Prevent overwriting files.
144     playliststart:     Playlist item to start at.
145     playlistend:       Playlist item to end at.
146     playlist_items:    Specific indices of playlist to download.
147     playlistreverse:   Download playlist items in reverse order.
148     matchtitle:        Download only matching titles.
149     rejecttitle:       Reject downloads for matching titles.
150     logger:            Log messages to a logging.Logger instance.
151     logtostderr:       Log messages to stderr instead of stdout.
152     writedescription:  Write the video description to a .description file
153     writeinfojson:     Write the video description to a .info.json file
154     writeannotations:  Write the video annotations to a .annotations.xml file
155     writethumbnail:    Write the thumbnail image to a file
156     write_all_thumbnails:  Write all thumbnail formats to files
157     writesubtitles:    Write the video subtitles to a file
158     writeautomaticsub: Write the automatic subtitles to a file
159     allsubtitles:      Downloads all the subtitles of the video
160                        (requires writesubtitles or writeautomaticsub)
161     listsubtitles:     Lists all available subtitles for the video
162     subtitlesformat:   The format code for subtitles
163     subtitleslangs:    List of languages of the subtitles to download
164     keepvideo:         Keep the video file after post-processing
165     daterange:         A DateRange object, download only if the upload_date is in the range.
166     skip_download:     Skip the actual download of the video file
167     cachedir:          Location of the cache files in the filesystem.
168                        False to disable filesystem cache.
169     noplaylist:        Download single video instead of a playlist if in doubt.
170     age_limit:         An integer representing the user's age in years.
171                        Unsuitable videos for the given age are skipped.
172     min_views:         An integer representing the minimum view count the video
173                        must have in order to not be skipped.
174                        Videos without view count information are always
175                        downloaded. None for no limit.
176     max_views:         An integer representing the maximum view count.
177                        Videos that are more popular than that are not
178                        downloaded.
179                        Videos without view count information are always
180                        downloaded. None for no limit.
181     download_archive:  File name of a file where all downloads are recorded.
182                        Videos already present in the file are not downloaded
183                        again.
184     cookiefile:        File name where cookies should be read from and dumped to.
185     nocheckcertificate:Do not verify SSL certificates
186     prefer_insecure:   Use HTTP instead of HTTPS to retrieve information.
187                        At the moment, this is only supported by YouTube.
188     proxy:             URL of the proxy server to use
189     cn_verification_proxy:  URL of the proxy to use for IP address verification
190                        on Chinese sites. (Experimental)
191     socket_timeout:    Time to wait for unresponsive hosts, in seconds
192     bidi_workaround:   Work around buggy terminals without bidirectional text
193                        support, using fridibi
194     debug_printtraffic:Print out sent and received HTTP traffic
195     include_ads:       Download ads as well
196     default_search:    Prepend this string if an input url is not valid.
197                        'auto' for elaborate guessing
198     encoding:          Use this encoding instead of the system-specified.
199     extract_flat:      Do not resolve URLs, return the immediate result.
200                        Pass in 'in_playlist' to only show this behavior for
201                        playlist items.
202     postprocessors:    A list of dictionaries, each with an entry
203                        * key:  The name of the postprocessor. See
204                                youtube_dl/postprocessor/__init__.py for a list.
205                        as well as any further keyword arguments for the
206                        postprocessor.
207     progress_hooks:    A list of functions that get called on download
208                        progress, with a dictionary with the entries
209                        * status: One of "downloading", "error", or "finished".
210                                  Check this first and ignore unknown values.
211
212                        If status is one of "downloading", or "finished", the
213                        following properties may also be present:
214                        * filename: The final filename (always present)
215                        * tmpfilename: The filename we're currently writing to
216                        * downloaded_bytes: Bytes on disk
217                        * total_bytes: Size of the whole file, None if unknown
218                        * total_bytes_estimate: Guess of the eventual file size,
219                                                None if unavailable.
220                        * elapsed: The number of seconds since download started.
221                        * eta: The estimated time in seconds, None if unknown
222                        * speed: The download speed in bytes/second, None if
223                                 unknown
224                        * fragment_index: The counter of the currently
225                                          downloaded video fragment.
226                        * fragment_count: The number of fragments (= individual
227                                          files that will be merged)
228
229                        Progress hooks are guaranteed to be called at least once
230                        (with status "finished") if the download is successful.
231     merge_output_format: Extension to use when merging formats.
232     fixup:             Automatically correct known faults of the file.
233                        One of:
234                        - "never": do nothing
235                        - "warn": only emit a warning
236                        - "detect_or_warn": check whether we can do anything
237                                            about it, warn otherwise (default)
238     source_address:    (Experimental) Client-side IP address to bind to.
239     call_home:         Boolean, true iff we are allowed to contact the
240                        youtube-dl servers for debugging.
241     sleep_interval:    Number of seconds to sleep before each download.
242     listformats:       Print an overview of available video formats and exit.
243     list_thumbnails:   Print a table of all thumbnails and exit.
244     match_filter:      A function that gets called with the info_dict of
245                        every video.
246                        If it returns a message, the video is ignored.
247                        If it returns None, the video is downloaded.
248                        match_filter_func in utils.py is one example for this.
249     no_color:          Do not emit color codes in output.
250
251     The following options determine which downloader is picked:
252     external_downloader: Executable of the external downloader to call.
253                        None or unset for standard (built-in) downloader.
254     hls_prefer_native: Use the native HLS downloader instead of ffmpeg/avconv.
255
256     The following parameters are not used by YoutubeDL itself, they are used by
257     the downloader (see youtube_dl/downloader/common.py):
258     nopart, updatetime, buffersize, ratelimit, min_filesize, max_filesize, test,
259     noresizebuffer, retries, continuedl, noprogress, consoletitle,
260     xattr_set_filesize, external_downloader_args.
261
262     The following options are used by the post processors:
263     prefer_ffmpeg:     If True, use ffmpeg instead of avconv if both are available,
264                        otherwise prefer avconv.
265     postprocessor_args: A list of additional command-line arguments for the
266                         postprocessor.
267     """
268
269     params = None
270     _ies = []
271     _pps = []
272     _download_retcode = None
273     _num_downloads = None
274     _screen_file = None
275
276     def __init__(self, params=None, auto_init=True):
277         """Create a FileDownloader object with the given options."""
278         if params is None:
279             params = {}
280         self._ies = []
281         self._ies_instances = {}
282         self._pps = []
283         self._progress_hooks = []
284         self._download_retcode = 0
285         self._num_downloads = 0
286         self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)]
287         self._err_file = sys.stderr
288         self.params = {
289             # Default parameters
290             'nocheckcertificate': False,
291         }
292         self.params.update(params)
293         self.cache = Cache(self)
294
295         if params.get('bidi_workaround', False):
296             try:
297                 import pty
298                 master, slave = pty.openpty()
299                 width = compat_get_terminal_size().columns
300                 if width is None:
301                     width_args = []
302                 else:
303                     width_args = ['-w', str(width)]
304                 sp_kwargs = dict(
305                     stdin=subprocess.PIPE,
306                     stdout=slave,
307                     stderr=self._err_file)
308                 try:
309                     self._output_process = subprocess.Popen(
310                         ['bidiv'] + width_args, **sp_kwargs
311                     )
312                 except OSError:
313                     self._output_process = subprocess.Popen(
314                         ['fribidi', '-c', 'UTF-8'] + width_args, **sp_kwargs)
315                 self._output_channel = os.fdopen(master, 'rb')
316             except OSError as ose:
317                 if ose.errno == 2:
318                     self.report_warning('Could not find fribidi executable, ignoring --bidi-workaround . Make sure that  fribidi  is an executable file in one of the directories in your $PATH.')
319                 else:
320                     raise
321
322         if (sys.version_info >= (3,) and sys.platform != 'win32' and
323                 sys.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968'] and
324                 not params.get('restrictfilenames', False)):
325             # On Python 3, the Unicode filesystem API will throw errors (#1474)
326             self.report_warning(
327                 'Assuming --restrict-filenames since file system encoding '
328                 'cannot encode all characters. '
329                 'Set the LC_ALL environment variable to fix this.')
330             self.params['restrictfilenames'] = True
331
332         if isinstance(params.get('outtmpl'), bytes):
333             self.report_warning(
334                 'Parameter outtmpl is bytes, but should be a unicode string. '
335                 'Put  from __future__ import unicode_literals  at the top of your code file or consider switching to Python 3.x.')
336
337         self._setup_opener()
338
339         if auto_init:
340             self.print_debug_header()
341             self.add_default_info_extractors()
342
343         for pp_def_raw in self.params.get('postprocessors', []):
344             pp_class = get_postprocessor(pp_def_raw['key'])
345             pp_def = dict(pp_def_raw)
346             del pp_def['key']
347             pp = pp_class(self, **compat_kwargs(pp_def))
348             self.add_post_processor(pp)
349
350         for ph in self.params.get('progress_hooks', []):
351             self.add_progress_hook(ph)
352
353     def warn_if_short_id(self, argv):
354         # short YouTube ID starting with dash?
355         idxs = [
356             i for i, a in enumerate(argv)
357             if re.match(r'^-[0-9A-Za-z_-]{10}$', a)]
358         if idxs:
359             correct_argv = (
360                 ['youtube-dl'] +
361                 [a for i, a in enumerate(argv) if i not in idxs] +
362                 ['--'] + [argv[i] for i in idxs]
363             )
364             self.report_warning(
365                 'Long argument string detected. '
366                 'Use -- to separate parameters and URLs, like this:\n%s\n' %
367                 args_to_str(correct_argv))
368
369     def add_info_extractor(self, ie):
370         """Add an InfoExtractor object to the end of the list."""
371         self._ies.append(ie)
372         self._ies_instances[ie.ie_key()] = ie
373         ie.set_downloader(self)
374
375     def get_info_extractor(self, ie_key):
376         """
377         Get an instance of an IE with name ie_key, it will try to get one from
378         the _ies list, if there's no instance it will create a new one and add
379         it to the extractor list.
380         """
381         ie = self._ies_instances.get(ie_key)
382         if ie is None:
383             ie = get_info_extractor(ie_key)()
384             self.add_info_extractor(ie)
385         return ie
386
387     def add_default_info_extractors(self):
388         """
389         Add the InfoExtractors returned by gen_extractors to the end of the list
390         """
391         for ie in gen_extractors():
392             self.add_info_extractor(ie)
393
394     def add_post_processor(self, pp):
395         """Add a PostProcessor object to the end of the chain."""
396         self._pps.append(pp)
397         pp.set_downloader(self)
398
399     def add_progress_hook(self, ph):
400         """Add the progress hook (currently only for the file downloader)"""
401         self._progress_hooks.append(ph)
402
403     def _bidi_workaround(self, message):
404         if not hasattr(self, '_output_channel'):
405             return message
406
407         assert hasattr(self, '_output_process')
408         assert isinstance(message, compat_str)
409         line_count = message.count('\n') + 1
410         self._output_process.stdin.write((message + '\n').encode('utf-8'))
411         self._output_process.stdin.flush()
412         res = ''.join(self._output_channel.readline().decode('utf-8')
413                       for _ in range(line_count))
414         return res[:-len('\n')]
415
416     def to_screen(self, message, skip_eol=False):
417         """Print message to stdout if not in quiet mode."""
418         return self.to_stdout(message, skip_eol, check_quiet=True)
419
420     def _write_string(self, s, out=None):
421         write_string(s, out=out, encoding=self.params.get('encoding'))
422
423     def to_stdout(self, message, skip_eol=False, check_quiet=False):
424         """Print message to stdout if not in quiet mode."""
425         if self.params.get('logger'):
426             self.params['logger'].debug(message)
427         elif not check_quiet or not self.params.get('quiet', False):
428             message = self._bidi_workaround(message)
429             terminator = ['\n', ''][skip_eol]
430             output = message + terminator
431
432             self._write_string(output, self._screen_file)
433
434     def to_stderr(self, message):
435         """Print message to stderr."""
436         assert isinstance(message, compat_str)
437         if self.params.get('logger'):
438             self.params['logger'].error(message)
439         else:
440             message = self._bidi_workaround(message)
441             output = message + '\n'
442             self._write_string(output, self._err_file)
443
444     def to_console_title(self, message):
445         if not self.params.get('consoletitle', False):
446             return
447         if os.name == 'nt' and ctypes.windll.kernel32.GetConsoleWindow():
448             # c_wchar_p() might not be necessary if `message` is
449             # already of type unicode()
450             ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message))
451         elif 'TERM' in os.environ:
452             self._write_string('\033]0;%s\007' % message, self._screen_file)
453
454     def save_console_title(self):
455         if not self.params.get('consoletitle', False):
456             return
457         if 'TERM' in os.environ:
458             # Save the title on stack
459             self._write_string('\033[22;0t', self._screen_file)
460
461     def restore_console_title(self):
462         if not self.params.get('consoletitle', False):
463             return
464         if 'TERM' in os.environ:
465             # Restore the title from stack
466             self._write_string('\033[23;0t', self._screen_file)
467
468     def __enter__(self):
469         self.save_console_title()
470         return self
471
472     def __exit__(self, *args):
473         self.restore_console_title()
474
475         if self.params.get('cookiefile') is not None:
476             self.cookiejar.save()
477
478     def trouble(self, message=None, tb=None):
479         """Determine action to take when a download problem appears.
480
481         Depending on if the downloader has been configured to ignore
482         download errors or not, this method may throw an exception or
483         not when errors are found, after printing the message.
484
485         tb, if given, is additional traceback information.
486         """
487         if message is not None:
488             self.to_stderr(message)
489         if self.params.get('verbose'):
490             if tb is None:
491                 if sys.exc_info()[0]:  # if .trouble has been called from an except block
492                     tb = ''
493                     if hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
494                         tb += ''.join(traceback.format_exception(*sys.exc_info()[1].exc_info))
495                     tb += compat_str(traceback.format_exc())
496                 else:
497                     tb_data = traceback.format_list(traceback.extract_stack())
498                     tb = ''.join(tb_data)
499             self.to_stderr(tb)
500         if not self.params.get('ignoreerrors', False):
501             if sys.exc_info()[0] and hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
502                 exc_info = sys.exc_info()[1].exc_info
503             else:
504                 exc_info = sys.exc_info()
505             raise DownloadError(message, exc_info)
506         self._download_retcode = 1
507
508     def report_warning(self, message):
509         '''
510         Print the message to stderr, it will be prefixed with 'WARNING:'
511         If stderr is a tty file the 'WARNING:' will be colored
512         '''
513         if self.params.get('logger') is not None:
514             self.params['logger'].warning(message)
515         else:
516             if self.params.get('no_warnings'):
517                 return
518             if not self.params.get('no_color') and self._err_file.isatty() and os.name != 'nt':
519                 _msg_header = '\033[0;33mWARNING:\033[0m'
520             else:
521                 _msg_header = 'WARNING:'
522             warning_message = '%s %s' % (_msg_header, message)
523             self.to_stderr(warning_message)
524
525     def report_error(self, message, tb=None):
526         '''
527         Do the same as trouble, but prefixes the message with 'ERROR:', colored
528         in red if stderr is a tty file.
529         '''
530         if not self.params.get('no_color') and self._err_file.isatty() and os.name != 'nt':
531             _msg_header = '\033[0;31mERROR:\033[0m'
532         else:
533             _msg_header = 'ERROR:'
534         error_message = '%s %s' % (_msg_header, message)
535         self.trouble(error_message, tb)
536
537     def report_file_already_downloaded(self, file_name):
538         """Report file has already been fully downloaded."""
539         try:
540             self.to_screen('[download] %s has already been downloaded' % file_name)
541         except UnicodeEncodeError:
542             self.to_screen('[download] The file has already been downloaded')
543
544     def prepare_filename(self, info_dict):
545         """Generate the output filename."""
546         try:
547             template_dict = dict(info_dict)
548
549             template_dict['epoch'] = int(time.time())
550             autonumber_size = self.params.get('autonumber_size')
551             if autonumber_size is None:
552                 autonumber_size = 5
553             autonumber_templ = '%0' + str(autonumber_size) + 'd'
554             template_dict['autonumber'] = autonumber_templ % self._num_downloads
555             if template_dict.get('playlist_index') is not None:
556                 template_dict['playlist_index'] = '%0*d' % (len(str(template_dict['n_entries'])), template_dict['playlist_index'])
557             if template_dict.get('resolution') is None:
558                 if template_dict.get('width') and template_dict.get('height'):
559                     template_dict['resolution'] = '%dx%d' % (template_dict['width'], template_dict['height'])
560                 elif template_dict.get('height'):
561                     template_dict['resolution'] = '%sp' % template_dict['height']
562                 elif template_dict.get('width'):
563                     template_dict['resolution'] = '?x%d' % template_dict['width']
564
565             sanitize = lambda k, v: sanitize_filename(
566                 compat_str(v),
567                 restricted=self.params.get('restrictfilenames'),
568                 is_id=(k == 'id'))
569             template_dict = dict((k, sanitize(k, v))
570                                  for k, v in template_dict.items()
571                                  if v is not None)
572             template_dict = collections.defaultdict(lambda: 'NA', template_dict)
573
574             outtmpl = sanitize_path(self.params.get('outtmpl', DEFAULT_OUTTMPL))
575             tmpl = compat_expanduser(outtmpl)
576             filename = tmpl % template_dict
577             # Temporary fix for #4787
578             # 'Treat' all problem characters by passing filename through preferredencoding
579             # to workaround encoding issues with subprocess on python2 @ Windows
580             if sys.version_info < (3, 0) and sys.platform == 'win32':
581                 filename = encodeFilename(filename, True).decode(preferredencoding())
582             return filename
583         except ValueError as err:
584             self.report_error('Error in output template: ' + str(err) + ' (encoding: ' + repr(preferredencoding()) + ')')
585             return None
586
587     def _match_entry(self, info_dict, incomplete):
588         """ Returns None iff the file should be downloaded """
589
590         video_title = info_dict.get('title', info_dict.get('id', 'video'))
591         if 'title' in info_dict:
592             # This can happen when we're just evaluating the playlist
593             title = info_dict['title']
594             matchtitle = self.params.get('matchtitle', False)
595             if matchtitle:
596                 if not re.search(matchtitle, title, re.IGNORECASE):
597                     return '"' + title + '" title did not match pattern "' + matchtitle + '"'
598             rejecttitle = self.params.get('rejecttitle', False)
599             if rejecttitle:
600                 if re.search(rejecttitle, title, re.IGNORECASE):
601                     return '"' + title + '" title matched reject pattern "' + rejecttitle + '"'
602         date = info_dict.get('upload_date', None)
603         if date is not None:
604             dateRange = self.params.get('daterange', DateRange())
605             if date not in dateRange:
606                 return '%s upload date is not in range %s' % (date_from_str(date).isoformat(), dateRange)
607         view_count = info_dict.get('view_count', None)
608         if view_count is not None:
609             min_views = self.params.get('min_views')
610             if min_views is not None and view_count < min_views:
611                 return 'Skipping %s, because it has not reached minimum view count (%d/%d)' % (video_title, view_count, min_views)
612             max_views = self.params.get('max_views')
613             if max_views is not None and view_count > max_views:
614                 return 'Skipping %s, because it has exceeded the maximum view count (%d/%d)' % (video_title, view_count, max_views)
615         if age_restricted(info_dict.get('age_limit'), self.params.get('age_limit')):
616             return 'Skipping "%s" because it is age restricted' % video_title
617         if self.in_download_archive(info_dict):
618             return '%s has already been recorded in archive' % video_title
619
620         if not incomplete:
621             match_filter = self.params.get('match_filter')
622             if match_filter is not None:
623                 ret = match_filter(info_dict)
624                 if ret is not None:
625                     return ret
626
627         return None
628
629     @staticmethod
630     def add_extra_info(info_dict, extra_info):
631         '''Set the keys from extra_info in info dict if they are missing'''
632         for key, value in extra_info.items():
633             info_dict.setdefault(key, value)
634
635     def extract_info(self, url, download=True, ie_key=None, extra_info={},
636                      process=True, force_generic_extractor=False):
637         '''
638         Returns a list with a dictionary for each video we find.
639         If 'download', also downloads the videos.
640         extra_info is a dict containing the extra values to add to each result
641         '''
642
643         if not ie_key and force_generic_extractor:
644             ie_key = 'Generic'
645
646         if ie_key:
647             ies = [self.get_info_extractor(ie_key)]
648         else:
649             ies = self._ies
650
651         for ie in ies:
652             if not ie.suitable(url):
653                 continue
654
655             if not ie.working():
656                 self.report_warning('The program functionality for this site has been marked as broken, '
657                                     'and will probably not work.')
658
659             try:
660                 ie_result = ie.extract(url)
661                 if ie_result is None:  # Finished already (backwards compatibility; listformats and friends should be moved here)
662                     break
663                 if isinstance(ie_result, list):
664                     # Backwards compatibility: old IE result format
665                     ie_result = {
666                         '_type': 'compat_list',
667                         'entries': ie_result,
668                     }
669                 self.add_default_extra_info(ie_result, ie, url)
670                 if process:
671                     return self.process_ie_result(ie_result, download, extra_info)
672                 else:
673                     return ie_result
674             except ExtractorError as de:  # An error we somewhat expected
675                 self.report_error(compat_str(de), de.format_traceback())
676                 break
677             except MaxDownloadsReached:
678                 raise
679             except Exception as e:
680                 if self.params.get('ignoreerrors', False):
681                     self.report_error(compat_str(e), tb=compat_str(traceback.format_exc()))
682                     break
683                 else:
684                     raise
685         else:
686             self.report_error('no suitable InfoExtractor for URL %s' % url)
687
688     def add_default_extra_info(self, ie_result, ie, url):
689         self.add_extra_info(ie_result, {
690             'extractor': ie.IE_NAME,
691             'webpage_url': url,
692             'webpage_url_basename': url_basename(url),
693             'extractor_key': ie.ie_key(),
694         })
695
696     def process_ie_result(self, ie_result, download=True, extra_info={}):
697         """
698         Take the result of the ie(may be modified) and resolve all unresolved
699         references (URLs, playlist items).
700
701         It will also download the videos if 'download'.
702         Returns the resolved ie_result.
703         """
704
705         result_type = ie_result.get('_type', 'video')
706
707         if result_type in ('url', 'url_transparent'):
708             extract_flat = self.params.get('extract_flat', False)
709             if ((extract_flat == 'in_playlist' and 'playlist' in extra_info) or
710                     extract_flat is True):
711                 if self.params.get('forcejson', False):
712                     self.to_stdout(json.dumps(ie_result))
713                 return ie_result
714
715         if result_type == 'video':
716             self.add_extra_info(ie_result, extra_info)
717             return self.process_video_result(ie_result, download=download)
718         elif result_type == 'url':
719             # We have to add extra_info to the results because it may be
720             # contained in a playlist
721             return self.extract_info(ie_result['url'],
722                                      download,
723                                      ie_key=ie_result.get('ie_key'),
724                                      extra_info=extra_info)
725         elif result_type == 'url_transparent':
726             # Use the information from the embedding page
727             info = self.extract_info(
728                 ie_result['url'], ie_key=ie_result.get('ie_key'),
729                 extra_info=extra_info, download=False, process=False)
730
731             force_properties = dict(
732                 (k, v) for k, v in ie_result.items() if v is not None)
733             for f in ('_type', 'url'):
734                 if f in force_properties:
735                     del force_properties[f]
736             new_result = info.copy()
737             new_result.update(force_properties)
738
739             assert new_result.get('_type') != 'url_transparent'
740
741             return self.process_ie_result(
742                 new_result, download=download, extra_info=extra_info)
743         elif result_type == 'playlist' or result_type == 'multi_video':
744             # We process each entry in the playlist
745             playlist = ie_result.get('title', None) or ie_result.get('id', None)
746             self.to_screen('[download] Downloading playlist: %s' % playlist)
747
748             playlist_results = []
749
750             playliststart = self.params.get('playliststart', 1) - 1
751             playlistend = self.params.get('playlistend', None)
752             # For backwards compatibility, interpret -1 as whole list
753             if playlistend == -1:
754                 playlistend = None
755
756             playlistitems_str = self.params.get('playlist_items', None)
757             playlistitems = None
758             if playlistitems_str is not None:
759                 def iter_playlistitems(format):
760                     for string_segment in format.split(','):
761                         if '-' in string_segment:
762                             start, end = string_segment.split('-')
763                             for item in range(int(start), int(end) + 1):
764                                 yield int(item)
765                         else:
766                             yield int(string_segment)
767                 playlistitems = iter_playlistitems(playlistitems_str)
768
769             ie_entries = ie_result['entries']
770             if isinstance(ie_entries, list):
771                 n_all_entries = len(ie_entries)
772                 if playlistitems:
773                     entries = [
774                         ie_entries[i - 1] for i in playlistitems
775                         if -n_all_entries <= i - 1 < n_all_entries]
776                 else:
777                     entries = ie_entries[playliststart:playlistend]
778                 n_entries = len(entries)
779                 self.to_screen(
780                     "[%s] playlist %s: Collected %d video ids (downloading %d of them)" %
781                     (ie_result['extractor'], playlist, n_all_entries, n_entries))
782             elif isinstance(ie_entries, PagedList):
783                 if playlistitems:
784                     entries = []
785                     for item in playlistitems:
786                         entries.extend(ie_entries.getslice(
787                             item - 1, item
788                         ))
789                 else:
790                     entries = ie_entries.getslice(
791                         playliststart, playlistend)
792                 n_entries = len(entries)
793                 self.to_screen(
794                     "[%s] playlist %s: Downloading %d videos" %
795                     (ie_result['extractor'], playlist, n_entries))
796             else:  # iterable
797                 if playlistitems:
798                     entry_list = list(ie_entries)
799                     entries = [entry_list[i - 1] for i in playlistitems]
800                 else:
801                     entries = list(itertools.islice(
802                         ie_entries, playliststart, playlistend))
803                 n_entries = len(entries)
804                 self.to_screen(
805                     "[%s] playlist %s: Downloading %d videos" %
806                     (ie_result['extractor'], playlist, n_entries))
807
808             if self.params.get('playlistreverse', False):
809                 entries = entries[::-1]
810
811             for i, entry in enumerate(entries, 1):
812                 self.to_screen('[download] Downloading video %s of %s' % (i, n_entries))
813                 extra = {
814                     'n_entries': n_entries,
815                     'playlist': playlist,
816                     'playlist_id': ie_result.get('id'),
817                     'playlist_title': ie_result.get('title'),
818                     'playlist_index': i + playliststart,
819                     'extractor': ie_result['extractor'],
820                     'webpage_url': ie_result['webpage_url'],
821                     'webpage_url_basename': url_basename(ie_result['webpage_url']),
822                     'extractor_key': ie_result['extractor_key'],
823                 }
824
825                 reason = self._match_entry(entry, incomplete=True)
826                 if reason is not None:
827                     self.to_screen('[download] ' + reason)
828                     continue
829
830                 entry_result = self.process_ie_result(entry,
831                                                       download=download,
832                                                       extra_info=extra)
833                 playlist_results.append(entry_result)
834             ie_result['entries'] = playlist_results
835             return ie_result
836         elif result_type == 'compat_list':
837             self.report_warning(
838                 'Extractor %s returned a compat_list result. '
839                 'It needs to be updated.' % ie_result.get('extractor'))
840
841             def _fixup(r):
842                 self.add_extra_info(
843                     r,
844                     {
845                         'extractor': ie_result['extractor'],
846                         'webpage_url': ie_result['webpage_url'],
847                         'webpage_url_basename': url_basename(ie_result['webpage_url']),
848                         'extractor_key': ie_result['extractor_key'],
849                     }
850                 )
851                 return r
852             ie_result['entries'] = [
853                 self.process_ie_result(_fixup(r), download, extra_info)
854                 for r in ie_result['entries']
855             ]
856             return ie_result
857         else:
858             raise Exception('Invalid result type: %s' % result_type)
859
860     def _build_format_filter(self, filter_spec):
861         " Returns a function to filter the formats according to the filter_spec "
862
863         OPERATORS = {
864             '<': operator.lt,
865             '<=': operator.le,
866             '>': operator.gt,
867             '>=': operator.ge,
868             '=': operator.eq,
869             '!=': operator.ne,
870         }
871         operator_rex = re.compile(r'''(?x)\s*
872             (?P<key>width|height|tbr|abr|vbr|asr|filesize|fps)
873             \s*(?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
874             (?P<value>[0-9.]+(?:[kKmMgGtTpPeEzZyY]i?[Bb]?)?)
875             $
876             ''' % '|'.join(map(re.escape, OPERATORS.keys())))
877         m = operator_rex.search(filter_spec)
878         if m:
879             try:
880                 comparison_value = int(m.group('value'))
881             except ValueError:
882                 comparison_value = parse_filesize(m.group('value'))
883                 if comparison_value is None:
884                     comparison_value = parse_filesize(m.group('value') + 'B')
885                 if comparison_value is None:
886                     raise ValueError(
887                         'Invalid value %r in format specification %r' % (
888                             m.group('value'), filter_spec))
889             op = OPERATORS[m.group('op')]
890
891         if not m:
892             STR_OPERATORS = {
893                 '=': operator.eq,
894                 '!=': operator.ne,
895             }
896             str_operator_rex = re.compile(r'''(?x)
897                 \s*(?P<key>ext|acodec|vcodec|container|protocol)
898                 \s*(?P<op>%s)(?P<none_inclusive>\s*\?)?
899                 \s*(?P<value>[a-zA-Z0-9_-]+)
900                 \s*$
901                 ''' % '|'.join(map(re.escape, STR_OPERATORS.keys())))
902             m = str_operator_rex.search(filter_spec)
903             if m:
904                 comparison_value = m.group('value')
905                 op = STR_OPERATORS[m.group('op')]
906
907         if not m:
908             raise ValueError('Invalid filter specification %r' % filter_spec)
909
910         def _filter(f):
911             actual_value = f.get(m.group('key'))
912             if actual_value is None:
913                 return m.group('none_inclusive')
914             return op(actual_value, comparison_value)
915         return _filter
916
917     def build_format_selector(self, format_spec):
918         def syntax_error(note, start):
919             message = (
920                 'Invalid format specification: '
921                 '{0}\n\t{1}\n\t{2}^'.format(note, format_spec, ' ' * start[1]))
922             return SyntaxError(message)
923
924         PICKFIRST = 'PICKFIRST'
925         MERGE = 'MERGE'
926         SINGLE = 'SINGLE'
927         GROUP = 'GROUP'
928         FormatSelector = collections.namedtuple('FormatSelector', ['type', 'selector', 'filters'])
929
930         def _parse_filter(tokens):
931             filter_parts = []
932             for type, string, start, _, _ in tokens:
933                 if type == tokenize.OP and string == ']':
934                     return ''.join(filter_parts)
935                 else:
936                     filter_parts.append(string)
937
938         def _remove_unused_ops(tokens):
939             # Remove operators that we don't use and join them with the sourrounding strings
940             # for example: 'mp4' '-' 'baseline' '-' '16x9' is converted to 'mp4-baseline-16x9'
941             ALLOWED_OPS = ('/', '+', ',', '(', ')')
942             last_string, last_start, last_end, last_line = None, None, None, None
943             for type, string, start, end, line in tokens:
944                 if type == tokenize.OP and string == '[':
945                     if last_string:
946                         yield tokenize.NAME, last_string, last_start, last_end, last_line
947                         last_string = None
948                     yield type, string, start, end, line
949                     # everything inside brackets will be handled by _parse_filter
950                     for type, string, start, end, line in tokens:
951                         yield type, string, start, end, line
952                         if type == tokenize.OP and string == ']':
953                             break
954                 elif type == tokenize.OP and string in ALLOWED_OPS:
955                     if last_string:
956                         yield tokenize.NAME, last_string, last_start, last_end, last_line
957                         last_string = None
958                     yield type, string, start, end, line
959                 elif type in [tokenize.NAME, tokenize.NUMBER, tokenize.OP]:
960                     if not last_string:
961                         last_string = string
962                         last_start = start
963                         last_end = end
964                     else:
965                         last_string += string
966             if last_string:
967                 yield tokenize.NAME, last_string, last_start, last_end, last_line
968
969         def _parse_format_selection(tokens, inside_merge=False, inside_choice=False, inside_group=False):
970             selectors = []
971             current_selector = None
972             for type, string, start, _, _ in tokens:
973                 # ENCODING is only defined in python 3.x
974                 if type == getattr(tokenize, 'ENCODING', None):
975                     continue
976                 elif type in [tokenize.NAME, tokenize.NUMBER]:
977                     current_selector = FormatSelector(SINGLE, string, [])
978                 elif type == tokenize.OP:
979                     if string == ')':
980                         if not inside_group:
981                             # ')' will be handled by the parentheses group
982                             tokens.restore_last_token()
983                         break
984                     elif inside_merge and string in ['/', ',']:
985                         tokens.restore_last_token()
986                         break
987                     elif inside_choice and string == ',':
988                         tokens.restore_last_token()
989                         break
990                     elif string == ',':
991                         if not current_selector:
992                             raise syntax_error('"," must follow a format selector', start)
993                         selectors.append(current_selector)
994                         current_selector = None
995                     elif string == '/':
996                         if not current_selector:
997                             raise syntax_error('"/" must follow a format selector', start)
998                         first_choice = current_selector
999                         second_choice = _parse_format_selection(tokens, inside_choice=True)
1000                         current_selector = FormatSelector(PICKFIRST, (first_choice, second_choice), [])
1001                     elif string == '[':
1002                         if not current_selector:
1003                             current_selector = FormatSelector(SINGLE, 'best', [])
1004                         format_filter = _parse_filter(tokens)
1005                         current_selector.filters.append(format_filter)
1006                     elif string == '(':
1007                         if current_selector:
1008                             raise syntax_error('Unexpected "("', start)
1009                         group = _parse_format_selection(tokens, inside_group=True)
1010                         current_selector = FormatSelector(GROUP, group, [])
1011                     elif string == '+':
1012                         video_selector = current_selector
1013                         audio_selector = _parse_format_selection(tokens, inside_merge=True)
1014                         if not video_selector or not audio_selector:
1015                             raise syntax_error('"+" must be between two format selectors', start)
1016                         current_selector = FormatSelector(MERGE, (video_selector, audio_selector), [])
1017                     else:
1018                         raise syntax_error('Operator not recognized: "{0}"'.format(string), start)
1019                 elif type == tokenize.ENDMARKER:
1020                     break
1021             if current_selector:
1022                 selectors.append(current_selector)
1023             return selectors
1024
1025         def _build_selector_function(selector):
1026             if isinstance(selector, list):
1027                 fs = [_build_selector_function(s) for s in selector]
1028
1029                 def selector_function(formats):
1030                     for f in fs:
1031                         for format in f(formats):
1032                             yield format
1033                 return selector_function
1034             elif selector.type == GROUP:
1035                 selector_function = _build_selector_function(selector.selector)
1036             elif selector.type == PICKFIRST:
1037                 fs = [_build_selector_function(s) for s in selector.selector]
1038
1039                 def selector_function(formats):
1040                     for f in fs:
1041                         picked_formats = list(f(formats))
1042                         if picked_formats:
1043                             return picked_formats
1044                     return []
1045             elif selector.type == SINGLE:
1046                 format_spec = selector.selector
1047
1048                 def selector_function(formats):
1049                     formats = list(formats)
1050                     if not formats:
1051                         return
1052                     if format_spec == 'all':
1053                         for f in formats:
1054                             yield f
1055                     elif format_spec in ['best', 'worst', None]:
1056                         format_idx = 0 if format_spec == 'worst' else -1
1057                         audiovideo_formats = [
1058                             f for f in formats
1059                             if f.get('vcodec') != 'none' and f.get('acodec') != 'none']
1060                         if audiovideo_formats:
1061                             yield audiovideo_formats[format_idx]
1062                         # for audio only (soundcloud) or video only (imgur) urls, select the best/worst audio format
1063                         elif (all(f.get('acodec') != 'none' for f in formats) or
1064                               all(f.get('vcodec') != 'none' for f in formats)):
1065                             yield formats[format_idx]
1066                     elif format_spec == 'bestaudio':
1067                         audio_formats = [
1068                             f for f in formats
1069                             if f.get('vcodec') == 'none']
1070                         if audio_formats:
1071                             yield audio_formats[-1]
1072                     elif format_spec == 'worstaudio':
1073                         audio_formats = [
1074                             f for f in formats
1075                             if f.get('vcodec') == 'none']
1076                         if audio_formats:
1077                             yield audio_formats[0]
1078                     elif format_spec == 'bestvideo':
1079                         video_formats = [
1080                             f for f in formats
1081                             if f.get('acodec') == 'none']
1082                         if video_formats:
1083                             yield video_formats[-1]
1084                     elif format_spec == 'worstvideo':
1085                         video_formats = [
1086                             f for f in formats
1087                             if f.get('acodec') == 'none']
1088                         if video_formats:
1089                             yield video_formats[0]
1090                     else:
1091                         extensions = ['mp4', 'flv', 'webm', '3gp', 'm4a', 'mp3', 'ogg', 'aac', 'wav']
1092                         if format_spec in extensions:
1093                             filter_f = lambda f: f['ext'] == format_spec
1094                         else:
1095                             filter_f = lambda f: f['format_id'] == format_spec
1096                         matches = list(filter(filter_f, formats))
1097                         if matches:
1098                             yield matches[-1]
1099             elif selector.type == MERGE:
1100                 def _merge(formats_info):
1101                     format_1, format_2 = [f['format_id'] for f in formats_info]
1102                     # The first format must contain the video and the
1103                     # second the audio
1104                     if formats_info[0].get('vcodec') == 'none':
1105                         self.report_error('The first format must '
1106                                           'contain the video, try using '
1107                                           '"-f %s+%s"' % (format_2, format_1))
1108                         return
1109                     output_ext = (
1110                         formats_info[0]['ext']
1111                         if self.params.get('merge_output_format') is None
1112                         else self.params['merge_output_format'])
1113                     return {
1114                         'requested_formats': formats_info,
1115                         'format': '%s+%s' % (formats_info[0].get('format'),
1116                                              formats_info[1].get('format')),
1117                         'format_id': '%s+%s' % (formats_info[0].get('format_id'),
1118                                                 formats_info[1].get('format_id')),
1119                         'width': formats_info[0].get('width'),
1120                         'height': formats_info[0].get('height'),
1121                         'resolution': formats_info[0].get('resolution'),
1122                         'fps': formats_info[0].get('fps'),
1123                         'vcodec': formats_info[0].get('vcodec'),
1124                         'vbr': formats_info[0].get('vbr'),
1125                         'stretched_ratio': formats_info[0].get('stretched_ratio'),
1126                         'acodec': formats_info[1].get('acodec'),
1127                         'abr': formats_info[1].get('abr'),
1128                         'ext': output_ext,
1129                     }
1130                 video_selector, audio_selector = map(_build_selector_function, selector.selector)
1131
1132                 def selector_function(formats):
1133                     formats = list(formats)
1134                     for pair in itertools.product(video_selector(formats), audio_selector(formats)):
1135                         yield _merge(pair)
1136
1137             filters = [self._build_format_filter(f) for f in selector.filters]
1138
1139             def final_selector(formats):
1140                 for _filter in filters:
1141                     formats = list(filter(_filter, formats))
1142                 return selector_function(formats)
1143             return final_selector
1144
1145         stream = io.BytesIO(format_spec.encode('utf-8'))
1146         try:
1147             tokens = list(_remove_unused_ops(compat_tokenize_tokenize(stream.readline)))
1148         except tokenize.TokenError:
1149             raise syntax_error('Missing closing/opening brackets or parenthesis', (0, len(format_spec)))
1150
1151         class TokenIterator(object):
1152             def __init__(self, tokens):
1153                 self.tokens = tokens
1154                 self.counter = 0
1155
1156             def __iter__(self):
1157                 return self
1158
1159             def __next__(self):
1160                 if self.counter >= len(self.tokens):
1161                     raise StopIteration()
1162                 value = self.tokens[self.counter]
1163                 self.counter += 1
1164                 return value
1165
1166             next = __next__
1167
1168             def restore_last_token(self):
1169                 self.counter -= 1
1170
1171         parsed_selector = _parse_format_selection(iter(TokenIterator(tokens)))
1172         return _build_selector_function(parsed_selector)
1173
1174     def _calc_headers(self, info_dict):
1175         res = std_headers.copy()
1176
1177         add_headers = info_dict.get('http_headers')
1178         if add_headers:
1179             res.update(add_headers)
1180
1181         cookies = self._calc_cookies(info_dict)
1182         if cookies:
1183             res['Cookie'] = cookies
1184
1185         return res
1186
1187     def _calc_cookies(self, info_dict):
1188         pr = compat_urllib_request.Request(info_dict['url'])
1189         self.cookiejar.add_cookie_header(pr)
1190         return pr.get_header('Cookie')
1191
1192     def process_video_result(self, info_dict, download=True):
1193         assert info_dict.get('_type', 'video') == 'video'
1194
1195         if 'id' not in info_dict:
1196             raise ExtractorError('Missing "id" field in extractor result')
1197         if 'title' not in info_dict:
1198             raise ExtractorError('Missing "title" field in extractor result')
1199
1200         if 'playlist' not in info_dict:
1201             # It isn't part of a playlist
1202             info_dict['playlist'] = None
1203             info_dict['playlist_index'] = None
1204
1205         thumbnails = info_dict.get('thumbnails')
1206         if thumbnails is None:
1207             thumbnail = info_dict.get('thumbnail')
1208             if thumbnail:
1209                 info_dict['thumbnails'] = thumbnails = [{'url': thumbnail}]
1210         if thumbnails:
1211             thumbnails.sort(key=lambda t: (
1212                 t.get('preference'), t.get('width'), t.get('height'),
1213                 t.get('id'), t.get('url')))
1214             for i, t in enumerate(thumbnails):
1215                 if t.get('width') and t.get('height'):
1216                     t['resolution'] = '%dx%d' % (t['width'], t['height'])
1217                 if t.get('id') is None:
1218                     t['id'] = '%d' % i
1219
1220         if thumbnails and 'thumbnail' not in info_dict:
1221             info_dict['thumbnail'] = thumbnails[-1]['url']
1222
1223         if 'display_id' not in info_dict and 'id' in info_dict:
1224             info_dict['display_id'] = info_dict['id']
1225
1226         if info_dict.get('upload_date') is None and info_dict.get('timestamp') is not None:
1227             # Working around out-of-range timestamp values (e.g. negative ones on Windows,
1228             # see http://bugs.python.org/issue1646728)
1229             try:
1230                 upload_date = datetime.datetime.utcfromtimestamp(info_dict['timestamp'])
1231                 info_dict['upload_date'] = upload_date.strftime('%Y%m%d')
1232             except (ValueError, OverflowError, OSError):
1233                 pass
1234
1235         if self.params.get('listsubtitles', False):
1236             if 'automatic_captions' in info_dict:
1237                 self.list_subtitles(info_dict['id'], info_dict.get('automatic_captions'), 'automatic captions')
1238             self.list_subtitles(info_dict['id'], info_dict.get('subtitles'), 'subtitles')
1239             return
1240         info_dict['requested_subtitles'] = self.process_subtitles(
1241             info_dict['id'], info_dict.get('subtitles'),
1242             info_dict.get('automatic_captions'))
1243
1244         # We now pick which formats have to be downloaded
1245         if info_dict.get('formats') is None:
1246             # There's only one format available
1247             formats = [info_dict]
1248         else:
1249             formats = info_dict['formats']
1250
1251         if not formats:
1252             raise ExtractorError('No video formats found!')
1253
1254         formats_dict = {}
1255
1256         # We check that all the formats have the format and format_id fields
1257         for i, format in enumerate(formats):
1258             if 'url' not in format:
1259                 raise ExtractorError('Missing "url" key in result (index %d)' % i)
1260
1261             if format.get('format_id') is None:
1262                 format['format_id'] = compat_str(i)
1263             format_id = format['format_id']
1264             if format_id not in formats_dict:
1265                 formats_dict[format_id] = []
1266             formats_dict[format_id].append(format)
1267
1268         # Make sure all formats have unique format_id
1269         for format_id, ambiguous_formats in formats_dict.items():
1270             if len(ambiguous_formats) > 1:
1271                 for i, format in enumerate(ambiguous_formats):
1272                     format['format_id'] = '%s-%d' % (format_id, i)
1273
1274         for i, format in enumerate(formats):
1275             if format.get('format') is None:
1276                 format['format'] = '{id} - {res}{note}'.format(
1277                     id=format['format_id'],
1278                     res=self.format_resolution(format),
1279                     note=' ({0})'.format(format['format_note']) if format.get('format_note') is not None else '',
1280                 )
1281             # Automatically determine file extension if missing
1282             if 'ext' not in format:
1283                 format['ext'] = determine_ext(format['url']).lower()
1284             # Add HTTP headers, so that external programs can use them from the
1285             # json output
1286             full_format_info = info_dict.copy()
1287             full_format_info.update(format)
1288             format['http_headers'] = self._calc_headers(full_format_info)
1289
1290         # TODO Central sorting goes here
1291
1292         if formats[0] is not info_dict:
1293             # only set the 'formats' fields if the original info_dict list them
1294             # otherwise we end up with a circular reference, the first (and unique)
1295             # element in the 'formats' field in info_dict is info_dict itself,
1296             # wich can't be exported to json
1297             info_dict['formats'] = formats
1298         if self.params.get('listformats'):
1299             self.list_formats(info_dict)
1300             return
1301         if self.params.get('list_thumbnails'):
1302             self.list_thumbnails(info_dict)
1303             return
1304
1305         req_format = self.params.get('format')
1306         if req_format is None:
1307             req_format_list = []
1308             if (self.params.get('outtmpl', DEFAULT_OUTTMPL) != '-' and
1309                     info_dict['extractor'] in ['youtube', 'ted'] and
1310                     not info_dict.get('is_live')):
1311                 merger = FFmpegMergerPP(self)
1312                 if merger.available and merger.can_merge():
1313                     req_format_list.append('bestvideo+bestaudio')
1314             req_format_list.append('best')
1315             req_format = '/'.join(req_format_list)
1316         format_selector = self.build_format_selector(req_format)
1317         formats_to_download = list(format_selector(formats))
1318         if not formats_to_download:
1319             raise ExtractorError('requested format not available',
1320                                  expected=True)
1321
1322         if download:
1323             if len(formats_to_download) > 1:
1324                 self.to_screen('[info] %s: downloading video in %s formats' % (info_dict['id'], len(formats_to_download)))
1325             for format in formats_to_download:
1326                 new_info = dict(info_dict)
1327                 new_info.update(format)
1328                 self.process_info(new_info)
1329         # We update the info dict with the best quality format (backwards compatibility)
1330         info_dict.update(formats_to_download[-1])
1331         return info_dict
1332
1333     def process_subtitles(self, video_id, normal_subtitles, automatic_captions):
1334         """Select the requested subtitles and their format"""
1335         available_subs = {}
1336         if normal_subtitles and self.params.get('writesubtitles'):
1337             available_subs.update(normal_subtitles)
1338         if automatic_captions and self.params.get('writeautomaticsub'):
1339             for lang, cap_info in automatic_captions.items():
1340                 if lang not in available_subs:
1341                     available_subs[lang] = cap_info
1342
1343         if (not self.params.get('writesubtitles') and not
1344                 self.params.get('writeautomaticsub') or not
1345                 available_subs):
1346             return None
1347
1348         if self.params.get('allsubtitles', False):
1349             requested_langs = available_subs.keys()
1350         else:
1351             if self.params.get('subtitleslangs', False):
1352                 requested_langs = self.params.get('subtitleslangs')
1353             elif 'en' in available_subs:
1354                 requested_langs = ['en']
1355             else:
1356                 requested_langs = [list(available_subs.keys())[0]]
1357
1358         formats_query = self.params.get('subtitlesformat', 'best')
1359         formats_preference = formats_query.split('/') if formats_query else []
1360         subs = {}
1361         for lang in requested_langs:
1362             formats = available_subs.get(lang)
1363             if formats is None:
1364                 self.report_warning('%s subtitles not available for %s' % (lang, video_id))
1365                 continue
1366             for ext in formats_preference:
1367                 if ext == 'best':
1368                     f = formats[-1]
1369                     break
1370                 matches = list(filter(lambda f: f['ext'] == ext, formats))
1371                 if matches:
1372                     f = matches[-1]
1373                     break
1374             else:
1375                 f = formats[-1]
1376                 self.report_warning(
1377                     'No subtitle format found matching "%s" for language %s, '
1378                     'using %s' % (formats_query, lang, f['ext']))
1379             subs[lang] = f
1380         return subs
1381
1382     def process_info(self, info_dict):
1383         """Process a single resolved IE result."""
1384
1385         assert info_dict.get('_type', 'video') == 'video'
1386
1387         max_downloads = self.params.get('max_downloads')
1388         if max_downloads is not None:
1389             if self._num_downloads >= int(max_downloads):
1390                 raise MaxDownloadsReached()
1391
1392         info_dict['fulltitle'] = info_dict['title']
1393         if len(info_dict['title']) > 200:
1394             info_dict['title'] = info_dict['title'][:197] + '...'
1395
1396         if 'format' not in info_dict:
1397             info_dict['format'] = info_dict['ext']
1398
1399         reason = self._match_entry(info_dict, incomplete=False)
1400         if reason is not None:
1401             self.to_screen('[download] ' + reason)
1402             return
1403
1404         self._num_downloads += 1
1405
1406         info_dict['_filename'] = filename = self.prepare_filename(info_dict)
1407
1408         # Forced printings
1409         if self.params.get('forcetitle', False):
1410             self.to_stdout(info_dict['fulltitle'])
1411         if self.params.get('forceid', False):
1412             self.to_stdout(info_dict['id'])
1413         if self.params.get('forceurl', False):
1414             if info_dict.get('requested_formats') is not None:
1415                 for f in info_dict['requested_formats']:
1416                     self.to_stdout(f['url'] + f.get('play_path', ''))
1417             else:
1418                 # For RTMP URLs, also include the playpath
1419                 self.to_stdout(info_dict['url'] + info_dict.get('play_path', ''))
1420         if self.params.get('forcethumbnail', False) and info_dict.get('thumbnail') is not None:
1421             self.to_stdout(info_dict['thumbnail'])
1422         if self.params.get('forcedescription', False) and info_dict.get('description') is not None:
1423             self.to_stdout(info_dict['description'])
1424         if self.params.get('forcefilename', False) and filename is not None:
1425             self.to_stdout(filename)
1426         if self.params.get('forceduration', False) and info_dict.get('duration') is not None:
1427             self.to_stdout(formatSeconds(info_dict['duration']))
1428         if self.params.get('forceformat', False):
1429             self.to_stdout(info_dict['format'])
1430         if self.params.get('forcejson', False):
1431             self.to_stdout(json.dumps(info_dict))
1432
1433         # Do nothing else if in simulate mode
1434         if self.params.get('simulate', False):
1435             return
1436
1437         if filename is None:
1438             return
1439
1440         try:
1441             dn = os.path.dirname(sanitize_path(encodeFilename(filename)))
1442             if dn and not os.path.exists(dn):
1443                 os.makedirs(dn)
1444         except (OSError, IOError) as err:
1445             self.report_error('unable to create directory ' + compat_str(err))
1446             return
1447
1448         if self.params.get('writedescription', False):
1449             descfn = replace_extension(filename, 'description', info_dict.get('ext'))
1450             if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(descfn)):
1451                 self.to_screen('[info] Video description is already present')
1452             elif info_dict.get('description') is None:
1453                 self.report_warning('There\'s no description to write.')
1454             else:
1455                 try:
1456                     self.to_screen('[info] Writing video description to: ' + descfn)
1457                     with io.open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile:
1458                         descfile.write(info_dict['description'])
1459                 except (OSError, IOError):
1460                     self.report_error('Cannot write description file ' + descfn)
1461                     return
1462
1463         if self.params.get('writeannotations', False):
1464             annofn = replace_extension(filename, 'annotations.xml', info_dict.get('ext'))
1465             if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(annofn)):
1466                 self.to_screen('[info] Video annotations are already present')
1467             else:
1468                 try:
1469                     self.to_screen('[info] Writing video annotations to: ' + annofn)
1470                     with io.open(encodeFilename(annofn), 'w', encoding='utf-8') as annofile:
1471                         annofile.write(info_dict['annotations'])
1472                 except (KeyError, TypeError):
1473                     self.report_warning('There are no annotations to write.')
1474                 except (OSError, IOError):
1475                     self.report_error('Cannot write annotations file: ' + annofn)
1476                     return
1477
1478         subtitles_are_requested = any([self.params.get('writesubtitles', False),
1479                                        self.params.get('writeautomaticsub')])
1480
1481         if subtitles_are_requested and info_dict.get('requested_subtitles'):
1482             # subtitles download errors are already managed as troubles in relevant IE
1483             # that way it will silently go on when used with unsupporting IE
1484             subtitles = info_dict['requested_subtitles']
1485             ie = self.get_info_extractor(info_dict['extractor_key'])
1486             for sub_lang, sub_info in subtitles.items():
1487                 sub_format = sub_info['ext']
1488                 if sub_info.get('data') is not None:
1489                     sub_data = sub_info['data']
1490                 else:
1491                     try:
1492                         sub_data = ie._download_webpage(
1493                             sub_info['url'], info_dict['id'], note=False)
1494                     except ExtractorError as err:
1495                         self.report_warning('Unable to download subtitle for "%s": %s' %
1496                                             (sub_lang, compat_str(err.cause)))
1497                         continue
1498                 try:
1499                     sub_filename = subtitles_filename(filename, sub_lang, sub_format)
1500                     if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(sub_filename)):
1501                         self.to_screen('[info] Video subtitle %s.%s is already_present' % (sub_lang, sub_format))
1502                     else:
1503                         self.to_screen('[info] Writing video subtitles to: ' + sub_filename)
1504                         with io.open(encodeFilename(sub_filename), 'w', encoding='utf-8') as subfile:
1505                             subfile.write(sub_data)
1506                 except (OSError, IOError):
1507                     self.report_error('Cannot write subtitles file ' + sub_filename)
1508                     return
1509
1510         if self.params.get('writeinfojson', False):
1511             infofn = replace_extension(filename, 'info.json', info_dict.get('ext'))
1512             if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(infofn)):
1513                 self.to_screen('[info] Video description metadata is already present')
1514             else:
1515                 self.to_screen('[info] Writing video description metadata as JSON to: ' + infofn)
1516                 try:
1517                     write_json_file(self.filter_requested_info(info_dict), infofn)
1518                 except (OSError, IOError):
1519                     self.report_error('Cannot write metadata to JSON file ' + infofn)
1520                     return
1521
1522         self._write_thumbnails(info_dict, filename)
1523
1524         if not self.params.get('skip_download', False):
1525             try:
1526                 def dl(name, info):
1527                     fd = get_suitable_downloader(info, self.params)(self, self.params)
1528                     for ph in self._progress_hooks:
1529                         fd.add_progress_hook(ph)
1530                     if self.params.get('verbose'):
1531                         self.to_stdout('[debug] Invoking downloader on %r' % info.get('url'))
1532                     return fd.download(name, info)
1533
1534                 if info_dict.get('requested_formats') is not None:
1535                     downloaded = []
1536                     success = True
1537                     merger = FFmpegMergerPP(self)
1538                     if not merger.available:
1539                         postprocessors = []
1540                         self.report_warning('You have requested multiple '
1541                                             'formats but ffmpeg or avconv are not installed.'
1542                                             ' The formats won\'t be merged.')
1543                     else:
1544                         postprocessors = [merger]
1545
1546                     def compatible_formats(formats):
1547                         video, audio = formats
1548                         # Check extension
1549                         video_ext, audio_ext = audio.get('ext'), video.get('ext')
1550                         if video_ext and audio_ext:
1551                             COMPATIBLE_EXTS = (
1552                                 ('mp3', 'mp4', 'm4a', 'm4p', 'm4b', 'm4r', 'm4v'),
1553                                 ('webm')
1554                             )
1555                             for exts in COMPATIBLE_EXTS:
1556                                 if video_ext in exts and audio_ext in exts:
1557                                     return True
1558                         # TODO: Check acodec/vcodec
1559                         return False
1560
1561                     filename_real_ext = os.path.splitext(filename)[1][1:]
1562                     filename_wo_ext = (
1563                         os.path.splitext(filename)[0]
1564                         if filename_real_ext == info_dict['ext']
1565                         else filename)
1566                     requested_formats = info_dict['requested_formats']
1567                     if self.params.get('merge_output_format') is None and not compatible_formats(requested_formats):
1568                         info_dict['ext'] = 'mkv'
1569                         self.report_warning(
1570                             'Requested formats are incompatible for merge and will be merged into mkv.')
1571                     # Ensure filename always has a correct extension for successful merge
1572                     filename = '%s.%s' % (filename_wo_ext, info_dict['ext'])
1573                     if os.path.exists(encodeFilename(filename)):
1574                         self.to_screen(
1575                             '[download] %s has already been downloaded and '
1576                             'merged' % filename)
1577                     else:
1578                         for f in requested_formats:
1579                             new_info = dict(info_dict)
1580                             new_info.update(f)
1581                             fname = self.prepare_filename(new_info)
1582                             fname = prepend_extension(fname, 'f%s' % f['format_id'], new_info['ext'])
1583                             downloaded.append(fname)
1584                             partial_success = dl(fname, new_info)
1585                             success = success and partial_success
1586                         info_dict['__postprocessors'] = postprocessors
1587                         info_dict['__files_to_merge'] = downloaded
1588                 else:
1589                     # Just a single file
1590                     success = dl(filename, info_dict)
1591             except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
1592                 self.report_error('unable to download video data: %s' % str(err))
1593                 return
1594             except (OSError, IOError) as err:
1595                 raise UnavailableVideoError(err)
1596             except (ContentTooShortError, ) as err:
1597                 self.report_error('content too short (expected %s bytes and served %s)' % (err.expected, err.downloaded))
1598                 return
1599
1600             if success:
1601                 # Fixup content
1602                 fixup_policy = self.params.get('fixup')
1603                 if fixup_policy is None:
1604                     fixup_policy = 'detect_or_warn'
1605
1606                 stretched_ratio = info_dict.get('stretched_ratio')
1607                 if stretched_ratio is not None and stretched_ratio != 1:
1608                     if fixup_policy == 'warn':
1609                         self.report_warning('%s: Non-uniform pixel ratio (%s)' % (
1610                             info_dict['id'], stretched_ratio))
1611                     elif fixup_policy == 'detect_or_warn':
1612                         stretched_pp = FFmpegFixupStretchedPP(self)
1613                         if stretched_pp.available:
1614                             info_dict.setdefault('__postprocessors', [])
1615                             info_dict['__postprocessors'].append(stretched_pp)
1616                         else:
1617                             self.report_warning(
1618                                 '%s: Non-uniform pixel ratio (%s). Install ffmpeg or avconv to fix this automatically.' % (
1619                                     info_dict['id'], stretched_ratio))
1620                     else:
1621                         assert fixup_policy in ('ignore', 'never')
1622
1623                 if info_dict.get('requested_formats') is None and info_dict.get('container') == 'm4a_dash':
1624                     if fixup_policy == 'warn':
1625                         self.report_warning('%s: writing DASH m4a. Only some players support this container.' % (
1626                             info_dict['id']))
1627                     elif fixup_policy == 'detect_or_warn':
1628                         fixup_pp = FFmpegFixupM4aPP(self)
1629                         if fixup_pp.available:
1630                             info_dict.setdefault('__postprocessors', [])
1631                             info_dict['__postprocessors'].append(fixup_pp)
1632                         else:
1633                             self.report_warning(
1634                                 '%s: writing DASH m4a. Only some players support this container. Install ffmpeg or avconv to fix this automatically.' % (
1635                                     info_dict['id']))
1636                     else:
1637                         assert fixup_policy in ('ignore', 'never')
1638
1639                 try:
1640                     self.post_process(filename, info_dict)
1641                 except (PostProcessingError) as err:
1642                     self.report_error('postprocessing: %s' % str(err))
1643                     return
1644                 self.record_download_archive(info_dict)
1645
1646     def download(self, url_list):
1647         """Download a given list of URLs."""
1648         outtmpl = self.params.get('outtmpl', DEFAULT_OUTTMPL)
1649         if (len(url_list) > 1 and
1650                 '%' not in outtmpl and
1651                 self.params.get('max_downloads') != 1):
1652             raise SameFileError(outtmpl)
1653
1654         for url in url_list:
1655             try:
1656                 # It also downloads the videos
1657                 res = self.extract_info(
1658                     url, force_generic_extractor=self.params.get('force_generic_extractor', False))
1659             except UnavailableVideoError:
1660                 self.report_error('unable to download video')
1661             except MaxDownloadsReached:
1662                 self.to_screen('[info] Maximum number of downloaded files reached.')
1663                 raise
1664             else:
1665                 if self.params.get('dump_single_json', False):
1666                     self.to_stdout(json.dumps(res))
1667
1668         return self._download_retcode
1669
1670     def download_with_info_file(self, info_filename):
1671         with contextlib.closing(fileinput.FileInput(
1672                 [info_filename], mode='r',
1673                 openhook=fileinput.hook_encoded('utf-8'))) as f:
1674             # FileInput doesn't have a read method, we can't call json.load
1675             info = self.filter_requested_info(json.loads('\n'.join(f)))
1676         try:
1677             self.process_ie_result(info, download=True)
1678         except DownloadError:
1679             webpage_url = info.get('webpage_url')
1680             if webpage_url is not None:
1681                 self.report_warning('The info failed to download, trying with "%s"' % webpage_url)
1682                 return self.download([webpage_url])
1683             else:
1684                 raise
1685         return self._download_retcode
1686
1687     @staticmethod
1688     def filter_requested_info(info_dict):
1689         return dict(
1690             (k, v) for k, v in info_dict.items()
1691             if k not in ['requested_formats', 'requested_subtitles'])
1692
1693     def post_process(self, filename, ie_info):
1694         """Run all the postprocessors on the given file."""
1695         info = dict(ie_info)
1696         info['filepath'] = filename
1697         pps_chain = []
1698         if ie_info.get('__postprocessors') is not None:
1699             pps_chain.extend(ie_info['__postprocessors'])
1700         pps_chain.extend(self._pps)
1701         for pp in pps_chain:
1702             files_to_delete = []
1703             try:
1704                 files_to_delete, info = pp.run(info)
1705             except PostProcessingError as e:
1706                 self.report_error(e.msg)
1707             if files_to_delete and not self.params.get('keepvideo', False):
1708                 for old_filename in files_to_delete:
1709                     self.to_screen('Deleting original file %s (pass -k to keep)' % old_filename)
1710                     try:
1711                         os.remove(encodeFilename(old_filename))
1712                     except (IOError, OSError):
1713                         self.report_warning('Unable to remove downloaded original file')
1714
1715     def _make_archive_id(self, info_dict):
1716         # Future-proof against any change in case
1717         # and backwards compatibility with prior versions
1718         extractor = info_dict.get('extractor_key')
1719         if extractor is None:
1720             if 'id' in info_dict:
1721                 extractor = info_dict.get('ie_key')  # key in a playlist
1722         if extractor is None:
1723             return None  # Incomplete video information
1724         return extractor.lower() + ' ' + info_dict['id']
1725
1726     def in_download_archive(self, info_dict):
1727         fn = self.params.get('download_archive')
1728         if fn is None:
1729             return False
1730
1731         vid_id = self._make_archive_id(info_dict)
1732         if vid_id is None:
1733             return False  # Incomplete video information
1734
1735         try:
1736             with locked_file(fn, 'r', encoding='utf-8') as archive_file:
1737                 for line in archive_file:
1738                     if line.strip() == vid_id:
1739                         return True
1740         except IOError as ioe:
1741             if ioe.errno != errno.ENOENT:
1742                 raise
1743         return False
1744
1745     def record_download_archive(self, info_dict):
1746         fn = self.params.get('download_archive')
1747         if fn is None:
1748             return
1749         vid_id = self._make_archive_id(info_dict)
1750         assert vid_id
1751         with locked_file(fn, 'a', encoding='utf-8') as archive_file:
1752             archive_file.write(vid_id + '\n')
1753
1754     @staticmethod
1755     def format_resolution(format, default='unknown'):
1756         if format.get('vcodec') == 'none':
1757             return 'audio only'
1758         if format.get('resolution') is not None:
1759             return format['resolution']
1760         if format.get('height') is not None:
1761             if format.get('width') is not None:
1762                 res = '%sx%s' % (format['width'], format['height'])
1763             else:
1764                 res = '%sp' % format['height']
1765         elif format.get('width') is not None:
1766             res = '?x%d' % format['width']
1767         else:
1768             res = default
1769         return res
1770
1771     def _format_note(self, fdict):
1772         res = ''
1773         if fdict.get('ext') in ['f4f', 'f4m']:
1774             res += '(unsupported) '
1775         if fdict.get('format_note') is not None:
1776             res += fdict['format_note'] + ' '
1777         if fdict.get('tbr') is not None:
1778             res += '%4dk ' % fdict['tbr']
1779         if fdict.get('container') is not None:
1780             if res:
1781                 res += ', '
1782             res += '%s container' % fdict['container']
1783         if (fdict.get('vcodec') is not None and
1784                 fdict.get('vcodec') != 'none'):
1785             if res:
1786                 res += ', '
1787             res += fdict['vcodec']
1788             if fdict.get('vbr') is not None:
1789                 res += '@'
1790         elif fdict.get('vbr') is not None and fdict.get('abr') is not None:
1791             res += 'video@'
1792         if fdict.get('vbr') is not None:
1793             res += '%4dk' % fdict['vbr']
1794         if fdict.get('fps') is not None:
1795             res += ', %sfps' % fdict['fps']
1796         if fdict.get('acodec') is not None:
1797             if res:
1798                 res += ', '
1799             if fdict['acodec'] == 'none':
1800                 res += 'video only'
1801             else:
1802                 res += '%-5s' % fdict['acodec']
1803         elif fdict.get('abr') is not None:
1804             if res:
1805                 res += ', '
1806             res += 'audio'
1807         if fdict.get('abr') is not None:
1808             res += '@%3dk' % fdict['abr']
1809         if fdict.get('asr') is not None:
1810             res += ' (%5dHz)' % fdict['asr']
1811         if fdict.get('filesize') is not None:
1812             if res:
1813                 res += ', '
1814             res += format_bytes(fdict['filesize'])
1815         elif fdict.get('filesize_approx') is not None:
1816             if res:
1817                 res += ', '
1818             res += '~' + format_bytes(fdict['filesize_approx'])
1819         return res
1820
1821     def list_formats(self, info_dict):
1822         formats = info_dict.get('formats', [info_dict])
1823         table = [
1824             [f['format_id'], f['ext'], self.format_resolution(f), self._format_note(f)]
1825             for f in formats
1826             if f.get('preference') is None or f['preference'] >= -1000]
1827         if len(formats) > 1:
1828             table[-1][-1] += (' ' if table[-1][-1] else '') + '(best)'
1829
1830         header_line = ['format code', 'extension', 'resolution', 'note']
1831         self.to_screen(
1832             '[info] Available formats for %s:\n%s' %
1833             (info_dict['id'], render_table(header_line, table)))
1834
1835     def list_thumbnails(self, info_dict):
1836         thumbnails = info_dict.get('thumbnails')
1837         if not thumbnails:
1838             tn_url = info_dict.get('thumbnail')
1839             if tn_url:
1840                 thumbnails = [{'id': '0', 'url': tn_url}]
1841             else:
1842                 self.to_screen(
1843                     '[info] No thumbnails present for %s' % info_dict['id'])
1844                 return
1845
1846         self.to_screen(
1847             '[info] Thumbnails for %s:' % info_dict['id'])
1848         self.to_screen(render_table(
1849             ['ID', 'width', 'height', 'URL'],
1850             [[t['id'], t.get('width', 'unknown'), t.get('height', 'unknown'), t['url']] for t in thumbnails]))
1851
1852     def list_subtitles(self, video_id, subtitles, name='subtitles'):
1853         if not subtitles:
1854             self.to_screen('%s has no %s' % (video_id, name))
1855             return
1856         self.to_screen(
1857             'Available %s for %s:' % (name, video_id))
1858         self.to_screen(render_table(
1859             ['Language', 'formats'],
1860             [[lang, ', '.join(f['ext'] for f in reversed(formats))]
1861                 for lang, formats in subtitles.items()]))
1862
1863     def urlopen(self, req):
1864         """ Start an HTTP download """
1865         return self._opener.open(req, timeout=self._socket_timeout)
1866
1867     def print_debug_header(self):
1868         if not self.params.get('verbose'):
1869             return
1870
1871         if type('') is not compat_str:
1872             # Python 2.6 on SLES11 SP1 (https://github.com/rg3/youtube-dl/issues/3326)
1873             self.report_warning(
1874                 'Your Python is broken! Update to a newer and supported version')
1875
1876         stdout_encoding = getattr(
1877             sys.stdout, 'encoding', 'missing (%s)' % type(sys.stdout).__name__)
1878         encoding_str = (
1879             '[debug] Encodings: locale %s, fs %s, out %s, pref %s\n' % (
1880                 locale.getpreferredencoding(),
1881                 sys.getfilesystemencoding(),
1882                 stdout_encoding,
1883                 self.get_encoding()))
1884         write_string(encoding_str, encoding=None)
1885
1886         self._write_string('[debug] youtube-dl version ' + __version__ + '\n')
1887         try:
1888             sp = subprocess.Popen(
1889                 ['git', 'rev-parse', '--short', 'HEAD'],
1890                 stdout=subprocess.PIPE, stderr=subprocess.PIPE,
1891                 cwd=os.path.dirname(os.path.abspath(__file__)))
1892             out, err = sp.communicate()
1893             out = out.decode().strip()
1894             if re.match('[0-9a-f]+', out):
1895                 self._write_string('[debug] Git HEAD: ' + out + '\n')
1896         except Exception:
1897             try:
1898                 sys.exc_clear()
1899             except Exception:
1900                 pass
1901         self._write_string('[debug] Python version %s - %s\n' % (
1902             platform.python_version(), platform_name()))
1903
1904         exe_versions = FFmpegPostProcessor.get_versions(self)
1905         exe_versions['rtmpdump'] = rtmpdump_version()
1906         exe_str = ', '.join(
1907             '%s %s' % (exe, v)
1908             for exe, v in sorted(exe_versions.items())
1909             if v
1910         )
1911         if not exe_str:
1912             exe_str = 'none'
1913         self._write_string('[debug] exe versions: %s\n' % exe_str)
1914
1915         proxy_map = {}
1916         for handler in self._opener.handlers:
1917             if hasattr(handler, 'proxies'):
1918                 proxy_map.update(handler.proxies)
1919         self._write_string('[debug] Proxy map: ' + compat_str(proxy_map) + '\n')
1920
1921         if self.params.get('call_home', False):
1922             ipaddr = self.urlopen('https://yt-dl.org/ip').read().decode('utf-8')
1923             self._write_string('[debug] Public IP address: %s\n' % ipaddr)
1924             latest_version = self.urlopen(
1925                 'https://yt-dl.org/latest/version').read().decode('utf-8')
1926             if version_tuple(latest_version) > version_tuple(__version__):
1927                 self.report_warning(
1928                     'You are using an outdated version (newest version: %s)! '
1929                     'See https://yt-dl.org/update if you need help updating.' %
1930                     latest_version)
1931
1932     def _setup_opener(self):
1933         timeout_val = self.params.get('socket_timeout')
1934         self._socket_timeout = 600 if timeout_val is None else float(timeout_val)
1935
1936         opts_cookiefile = self.params.get('cookiefile')
1937         opts_proxy = self.params.get('proxy')
1938
1939         if opts_cookiefile is None:
1940             self.cookiejar = compat_cookiejar.CookieJar()
1941         else:
1942             self.cookiejar = compat_cookiejar.MozillaCookieJar(
1943                 opts_cookiefile)
1944             if os.access(opts_cookiefile, os.R_OK):
1945                 self.cookiejar.load()
1946
1947         cookie_processor = YoutubeDLCookieProcessor(self.cookiejar)
1948         if opts_proxy is not None:
1949             if opts_proxy == '':
1950                 proxies = {}
1951             else:
1952                 proxies = {'http': opts_proxy, 'https': opts_proxy}
1953         else:
1954             proxies = compat_urllib_request.getproxies()
1955             # Set HTTPS proxy to HTTP one if given (https://github.com/rg3/youtube-dl/issues/805)
1956             if 'http' in proxies and 'https' not in proxies:
1957                 proxies['https'] = proxies['http']
1958         proxy_handler = PerRequestProxyHandler(proxies)
1959
1960         debuglevel = 1 if self.params.get('debug_printtraffic') else 0
1961         https_handler = make_HTTPS_handler(self.params, debuglevel=debuglevel)
1962         ydlh = YoutubeDLHandler(self.params, debuglevel=debuglevel)
1963         opener = compat_urllib_request.build_opener(
1964             proxy_handler, https_handler, cookie_processor, ydlh)
1965
1966         # Delete the default user-agent header, which would otherwise apply in
1967         # cases where our custom HTTP handler doesn't come into play
1968         # (See https://github.com/rg3/youtube-dl/issues/1309 for details)
1969         opener.addheaders = []
1970         self._opener = opener
1971
1972     def encode(self, s):
1973         if isinstance(s, bytes):
1974             return s  # Already encoded
1975
1976         try:
1977             return s.encode(self.get_encoding())
1978         except UnicodeEncodeError as err:
1979             err.reason = err.reason + '. Check your system encoding configuration or use the --encoding option.'
1980             raise
1981
1982     def get_encoding(self):
1983         encoding = self.params.get('encoding')
1984         if encoding is None:
1985             encoding = preferredencoding()
1986         return encoding
1987
1988     def _write_thumbnails(self, info_dict, filename):
1989         if self.params.get('writethumbnail', False):
1990             thumbnails = info_dict.get('thumbnails')
1991             if thumbnails:
1992                 thumbnails = [thumbnails[-1]]
1993         elif self.params.get('write_all_thumbnails', False):
1994             thumbnails = info_dict.get('thumbnails')
1995         else:
1996             return
1997
1998         if not thumbnails:
1999             # No thumbnails present, so return immediately
2000             return
2001
2002         for t in thumbnails:
2003             thumb_ext = determine_ext(t['url'], 'jpg')
2004             suffix = '_%s' % t['id'] if len(thumbnails) > 1 else ''
2005             thumb_display_id = '%s ' % t['id'] if len(thumbnails) > 1 else ''
2006             t['filename'] = thumb_filename = os.path.splitext(filename)[0] + suffix + '.' + thumb_ext
2007
2008             if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(thumb_filename)):
2009                 self.to_screen('[%s] %s: Thumbnail %sis already present' %
2010                                (info_dict['extractor'], info_dict['id'], thumb_display_id))
2011             else:
2012                 self.to_screen('[%s] %s: Downloading thumbnail %s...' %
2013                                (info_dict['extractor'], info_dict['id'], thumb_display_id))
2014                 try:
2015                     uf = self.urlopen(t['url'])
2016                     with open(encodeFilename(thumb_filename), 'wb') as thumbf:
2017                         shutil.copyfileobj(uf, thumbf)
2018                     self.to_screen('[%s] %s: Writing thumbnail %sto: %s' %
2019                                    (info_dict['extractor'], info_dict['id'], thumb_display_id, thumb_filename))
2020                 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
2021                     self.report_warning('Unable to download thumbnail "%s": %s' %
2022                                         (t['url'], compat_str(err)))