]> git.bitcoin.ninja Git - youtube-dl/blob - youtube_dl/YoutubeDL.py
[xhamster] flake8
[youtube-dl] / youtube_dl / YoutubeDL.py
1 #!/usr/bin/env python
2 # -*- coding: utf-8 -*-
3
4 from __future__ import absolute_import, unicode_literals
5
6 import collections
7 import contextlib
8 import datetime
9 import errno
10 import fileinput
11 import io
12 import itertools
13 import json
14 import locale
15 import operator
16 import os
17 import platform
18 import re
19 import shutil
20 import subprocess
21 import socket
22 import sys
23 import time
24 import tokenize
25 import traceback
26
27 if os.name == 'nt':
28     import ctypes
29
30 from .compat import (
31     compat_basestring,
32     compat_cookiejar,
33     compat_expanduser,
34     compat_get_terminal_size,
35     compat_http_client,
36     compat_kwargs,
37     compat_str,
38     compat_tokenize_tokenize,
39     compat_urllib_error,
40     compat_urllib_request,
41 )
42 from .utils import (
43     escape_url,
44     ContentTooShortError,
45     date_from_str,
46     DateRange,
47     DEFAULT_OUTTMPL,
48     determine_ext,
49     DownloadError,
50     encodeFilename,
51     ExtractorError,
52     format_bytes,
53     formatSeconds,
54     HEADRequest,
55     locked_file,
56     make_HTTPS_handler,
57     MaxDownloadsReached,
58     PagedList,
59     parse_filesize,
60     PerRequestProxyHandler,
61     PostProcessingError,
62     platform_name,
63     preferredencoding,
64     render_table,
65     SameFileError,
66     sanitize_filename,
67     sanitize_path,
68     std_headers,
69     subtitles_filename,
70     UnavailableVideoError,
71     url_basename,
72     version_tuple,
73     write_json_file,
74     write_string,
75     YoutubeDLHandler,
76     prepend_extension,
77     replace_extension,
78     args_to_str,
79     age_restricted,
80 )
81 from .cache import Cache
82 from .extractor import get_info_extractor, gen_extractors
83 from .downloader import get_suitable_downloader
84 from .downloader.rtmp import rtmpdump_version
85 from .postprocessor import (
86     FFmpegFixupM4aPP,
87     FFmpegFixupStretchedPP,
88     FFmpegMergerPP,
89     FFmpegPostProcessor,
90     get_postprocessor,
91 )
92 from .version import __version__
93
94
95 class YoutubeDL(object):
96     """YoutubeDL class.
97
98     YoutubeDL objects are the ones responsible of downloading the
99     actual video file and writing it to disk if the user has requested
100     it, among some other tasks. In most cases there should be one per
101     program. As, given a video URL, the downloader doesn't know how to
102     extract all the needed information, task that InfoExtractors do, it
103     has to pass the URL to one of them.
104
105     For this, YoutubeDL objects have a method that allows
106     InfoExtractors to be registered in a given order. When it is passed
107     a URL, the YoutubeDL object handles it to the first InfoExtractor it
108     finds that reports being able to handle it. The InfoExtractor extracts
109     all the information about the video or videos the URL refers to, and
110     YoutubeDL process the extracted information, possibly using a File
111     Downloader to download the video.
112
113     YoutubeDL objects accept a lot of parameters. In order not to saturate
114     the object constructor with arguments, it receives a dictionary of
115     options instead. These options are available through the params
116     attribute for the InfoExtractors to use. The YoutubeDL also
117     registers itself as the downloader in charge for the InfoExtractors
118     that are added to it, so this is a "mutual registration".
119
120     Available options:
121
122     username:          Username for authentication purposes.
123     password:          Password for authentication purposes.
124     videopassword:     Password for accessing a video.
125     usenetrc:          Use netrc for authentication instead.
126     verbose:           Print additional info to stdout.
127     quiet:             Do not print messages to stdout.
128     no_warnings:       Do not print out anything for warnings.
129     forceurl:          Force printing final URL.
130     forcetitle:        Force printing title.
131     forceid:           Force printing ID.
132     forcethumbnail:    Force printing thumbnail URL.
133     forcedescription:  Force printing description.
134     forcefilename:     Force printing final filename.
135     forceduration:     Force printing duration.
136     forcejson:         Force printing info_dict as JSON.
137     dump_single_json:  Force printing the info_dict of the whole playlist
138                        (or video) as a single JSON line.
139     simulate:          Do not download the video files.
140     format:            Video format code. See options.py for more information.
141     outtmpl:           Template for output names.
142     restrictfilenames: Do not allow "&" and spaces in file names
143     ignoreerrors:      Do not stop on download errors.
144     force_generic_extractor: Force downloader to use the generic extractor
145     nooverwrites:      Prevent overwriting files.
146     playliststart:     Playlist item to start at.
147     playlistend:       Playlist item to end at.
148     playlist_items:    Specific indices of playlist to download.
149     playlistreverse:   Download playlist items in reverse order.
150     matchtitle:        Download only matching titles.
151     rejecttitle:       Reject downloads for matching titles.
152     logger:            Log messages to a logging.Logger instance.
153     logtostderr:       Log messages to stderr instead of stdout.
154     writedescription:  Write the video description to a .description file
155     writeinfojson:     Write the video description to a .info.json file
156     writeannotations:  Write the video annotations to a .annotations.xml file
157     writethumbnail:    Write the thumbnail image to a file
158     write_all_thumbnails:  Write all thumbnail formats to files
159     writesubtitles:    Write the video subtitles to a file
160     writeautomaticsub: Write the automatic subtitles to a file
161     allsubtitles:      Downloads all the subtitles of the video
162                        (requires writesubtitles or writeautomaticsub)
163     listsubtitles:     Lists all available subtitles for the video
164     subtitlesformat:   The format code for subtitles
165     subtitleslangs:    List of languages of the subtitles to download
166     keepvideo:         Keep the video file after post-processing
167     daterange:         A DateRange object, download only if the upload_date is in the range.
168     skip_download:     Skip the actual download of the video file
169     cachedir:          Location of the cache files in the filesystem.
170                        False to disable filesystem cache.
171     noplaylist:        Download single video instead of a playlist if in doubt.
172     age_limit:         An integer representing the user's age in years.
173                        Unsuitable videos for the given age are skipped.
174     min_views:         An integer representing the minimum view count the video
175                        must have in order to not be skipped.
176                        Videos without view count information are always
177                        downloaded. None for no limit.
178     max_views:         An integer representing the maximum view count.
179                        Videos that are more popular than that are not
180                        downloaded.
181                        Videos without view count information are always
182                        downloaded. None for no limit.
183     download_archive:  File name of a file where all downloads are recorded.
184                        Videos already present in the file are not downloaded
185                        again.
186     cookiefile:        File name where cookies should be read from and dumped to.
187     nocheckcertificate:Do not verify SSL certificates
188     prefer_insecure:   Use HTTP instead of HTTPS to retrieve information.
189                        At the moment, this is only supported by YouTube.
190     proxy:             URL of the proxy server to use
191     cn_verification_proxy:  URL of the proxy to use for IP address verification
192                        on Chinese sites. (Experimental)
193     socket_timeout:    Time to wait for unresponsive hosts, in seconds
194     bidi_workaround:   Work around buggy terminals without bidirectional text
195                        support, using fridibi
196     debug_printtraffic:Print out sent and received HTTP traffic
197     include_ads:       Download ads as well
198     default_search:    Prepend this string if an input url is not valid.
199                        'auto' for elaborate guessing
200     encoding:          Use this encoding instead of the system-specified.
201     extract_flat:      Do not resolve URLs, return the immediate result.
202                        Pass in 'in_playlist' to only show this behavior for
203                        playlist items.
204     postprocessors:    A list of dictionaries, each with an entry
205                        * key:  The name of the postprocessor. See
206                                youtube_dl/postprocessor/__init__.py for a list.
207                        as well as any further keyword arguments for the
208                        postprocessor.
209     progress_hooks:    A list of functions that get called on download
210                        progress, with a dictionary with the entries
211                        * status: One of "downloading", "error", or "finished".
212                                  Check this first and ignore unknown values.
213
214                        If status is one of "downloading", or "finished", the
215                        following properties may also be present:
216                        * filename: The final filename (always present)
217                        * tmpfilename: The filename we're currently writing to
218                        * downloaded_bytes: Bytes on disk
219                        * total_bytes: Size of the whole file, None if unknown
220                        * total_bytes_estimate: Guess of the eventual file size,
221                                                None if unavailable.
222                        * elapsed: The number of seconds since download started.
223                        * eta: The estimated time in seconds, None if unknown
224                        * speed: The download speed in bytes/second, None if
225                                 unknown
226                        * fragment_index: The counter of the currently
227                                          downloaded video fragment.
228                        * fragment_count: The number of fragments (= individual
229                                          files that will be merged)
230
231                        Progress hooks are guaranteed to be called at least once
232                        (with status "finished") if the download is successful.
233     merge_output_format: Extension to use when merging formats.
234     fixup:             Automatically correct known faults of the file.
235                        One of:
236                        - "never": do nothing
237                        - "warn": only emit a warning
238                        - "detect_or_warn": check whether we can do anything
239                                            about it, warn otherwise (default)
240     source_address:    (Experimental) Client-side IP address to bind to.
241     call_home:         Boolean, true iff we are allowed to contact the
242                        youtube-dl servers for debugging.
243     sleep_interval:    Number of seconds to sleep before each download.
244     listformats:       Print an overview of available video formats and exit.
245     list_thumbnails:   Print a table of all thumbnails and exit.
246     match_filter:      A function that gets called with the info_dict of
247                        every video.
248                        If it returns a message, the video is ignored.
249                        If it returns None, the video is downloaded.
250                        match_filter_func in utils.py is one example for this.
251     no_color:          Do not emit color codes in output.
252
253     The following options determine which downloader is picked:
254     external_downloader: Executable of the external downloader to call.
255                        None or unset for standard (built-in) downloader.
256     hls_prefer_native: Use the native HLS downloader instead of ffmpeg/avconv.
257
258     The following parameters are not used by YoutubeDL itself, they are used by
259     the downloader (see youtube_dl/downloader/common.py):
260     nopart, updatetime, buffersize, ratelimit, min_filesize, max_filesize, test,
261     noresizebuffer, retries, continuedl, noprogress, consoletitle,
262     xattr_set_filesize, external_downloader_args.
263
264     The following options are used by the post processors:
265     prefer_ffmpeg:     If True, use ffmpeg instead of avconv if both are available,
266                        otherwise prefer avconv.
267     postprocessor_args: A list of additional command-line arguments for the
268                         postprocessor.
269     """
270
271     params = None
272     _ies = []
273     _pps = []
274     _download_retcode = None
275     _num_downloads = None
276     _screen_file = None
277
278     def __init__(self, params=None, auto_init=True):
279         """Create a FileDownloader object with the given options."""
280         if params is None:
281             params = {}
282         self._ies = []
283         self._ies_instances = {}
284         self._pps = []
285         self._progress_hooks = []
286         self._download_retcode = 0
287         self._num_downloads = 0
288         self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)]
289         self._err_file = sys.stderr
290         self.params = params
291         self.cache = Cache(self)
292
293         if params.get('bidi_workaround', False):
294             try:
295                 import pty
296                 master, slave = pty.openpty()
297                 width = compat_get_terminal_size().columns
298                 if width is None:
299                     width_args = []
300                 else:
301                     width_args = ['-w', str(width)]
302                 sp_kwargs = dict(
303                     stdin=subprocess.PIPE,
304                     stdout=slave,
305                     stderr=self._err_file)
306                 try:
307                     self._output_process = subprocess.Popen(
308                         ['bidiv'] + width_args, **sp_kwargs
309                     )
310                 except OSError:
311                     self._output_process = subprocess.Popen(
312                         ['fribidi', '-c', 'UTF-8'] + width_args, **sp_kwargs)
313                 self._output_channel = os.fdopen(master, 'rb')
314             except OSError as ose:
315                 if ose.errno == 2:
316                     self.report_warning('Could not find fribidi executable, ignoring --bidi-workaround . Make sure that  fribidi  is an executable file in one of the directories in your $PATH.')
317                 else:
318                     raise
319
320         if (sys.version_info >= (3,) and sys.platform != 'win32' and
321                 sys.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968'] and
322                 not params.get('restrictfilenames', False)):
323             # On Python 3, the Unicode filesystem API will throw errors (#1474)
324             self.report_warning(
325                 'Assuming --restrict-filenames since file system encoding '
326                 'cannot encode all characters. '
327                 'Set the LC_ALL environment variable to fix this.')
328             self.params['restrictfilenames'] = True
329
330         if isinstance(params.get('outtmpl'), bytes):
331             self.report_warning(
332                 'Parameter outtmpl is bytes, but should be a unicode string. '
333                 'Put  from __future__ import unicode_literals  at the top of your code file or consider switching to Python 3.x.')
334
335         self._setup_opener()
336
337         if auto_init:
338             self.print_debug_header()
339             self.add_default_info_extractors()
340
341         for pp_def_raw in self.params.get('postprocessors', []):
342             pp_class = get_postprocessor(pp_def_raw['key'])
343             pp_def = dict(pp_def_raw)
344             del pp_def['key']
345             pp = pp_class(self, **compat_kwargs(pp_def))
346             self.add_post_processor(pp)
347
348         for ph in self.params.get('progress_hooks', []):
349             self.add_progress_hook(ph)
350
351     def warn_if_short_id(self, argv):
352         # short YouTube ID starting with dash?
353         idxs = [
354             i for i, a in enumerate(argv)
355             if re.match(r'^-[0-9A-Za-z_-]{10}$', a)]
356         if idxs:
357             correct_argv = (
358                 ['youtube-dl'] +
359                 [a for i, a in enumerate(argv) if i not in idxs] +
360                 ['--'] + [argv[i] for i in idxs]
361             )
362             self.report_warning(
363                 'Long argument string detected. '
364                 'Use -- to separate parameters and URLs, like this:\n%s\n' %
365                 args_to_str(correct_argv))
366
367     def add_info_extractor(self, ie):
368         """Add an InfoExtractor object to the end of the list."""
369         self._ies.append(ie)
370         self._ies_instances[ie.ie_key()] = ie
371         ie.set_downloader(self)
372
373     def get_info_extractor(self, ie_key):
374         """
375         Get an instance of an IE with name ie_key, it will try to get one from
376         the _ies list, if there's no instance it will create a new one and add
377         it to the extractor list.
378         """
379         ie = self._ies_instances.get(ie_key)
380         if ie is None:
381             ie = get_info_extractor(ie_key)()
382             self.add_info_extractor(ie)
383         return ie
384
385     def add_default_info_extractors(self):
386         """
387         Add the InfoExtractors returned by gen_extractors to the end of the list
388         """
389         for ie in gen_extractors():
390             self.add_info_extractor(ie)
391
392     def add_post_processor(self, pp):
393         """Add a PostProcessor object to the end of the chain."""
394         self._pps.append(pp)
395         pp.set_downloader(self)
396
397     def add_progress_hook(self, ph):
398         """Add the progress hook (currently only for the file downloader)"""
399         self._progress_hooks.append(ph)
400
401     def _bidi_workaround(self, message):
402         if not hasattr(self, '_output_channel'):
403             return message
404
405         assert hasattr(self, '_output_process')
406         assert isinstance(message, compat_str)
407         line_count = message.count('\n') + 1
408         self._output_process.stdin.write((message + '\n').encode('utf-8'))
409         self._output_process.stdin.flush()
410         res = ''.join(self._output_channel.readline().decode('utf-8')
411                       for _ in range(line_count))
412         return res[:-len('\n')]
413
414     def to_screen(self, message, skip_eol=False):
415         """Print message to stdout if not in quiet mode."""
416         return self.to_stdout(message, skip_eol, check_quiet=True)
417
418     def _write_string(self, s, out=None):
419         write_string(s, out=out, encoding=self.params.get('encoding'))
420
421     def to_stdout(self, message, skip_eol=False, check_quiet=False):
422         """Print message to stdout if not in quiet mode."""
423         if self.params.get('logger'):
424             self.params['logger'].debug(message)
425         elif not check_quiet or not self.params.get('quiet', False):
426             message = self._bidi_workaround(message)
427             terminator = ['\n', ''][skip_eol]
428             output = message + terminator
429
430             self._write_string(output, self._screen_file)
431
432     def to_stderr(self, message):
433         """Print message to stderr."""
434         assert isinstance(message, compat_str)
435         if self.params.get('logger'):
436             self.params['logger'].error(message)
437         else:
438             message = self._bidi_workaround(message)
439             output = message + '\n'
440             self._write_string(output, self._err_file)
441
442     def to_console_title(self, message):
443         if not self.params.get('consoletitle', False):
444             return
445         if os.name == 'nt' and ctypes.windll.kernel32.GetConsoleWindow():
446             # c_wchar_p() might not be necessary if `message` is
447             # already of type unicode()
448             ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message))
449         elif 'TERM' in os.environ:
450             self._write_string('\033]0;%s\007' % message, self._screen_file)
451
452     def save_console_title(self):
453         if not self.params.get('consoletitle', False):
454             return
455         if 'TERM' in os.environ:
456             # Save the title on stack
457             self._write_string('\033[22;0t', self._screen_file)
458
459     def restore_console_title(self):
460         if not self.params.get('consoletitle', False):
461             return
462         if 'TERM' in os.environ:
463             # Restore the title from stack
464             self._write_string('\033[23;0t', self._screen_file)
465
466     def __enter__(self):
467         self.save_console_title()
468         return self
469
470     def __exit__(self, *args):
471         self.restore_console_title()
472
473         if self.params.get('cookiefile') is not None:
474             self.cookiejar.save()
475
476     def trouble(self, message=None, tb=None):
477         """Determine action to take when a download problem appears.
478
479         Depending on if the downloader has been configured to ignore
480         download errors or not, this method may throw an exception or
481         not when errors are found, after printing the message.
482
483         tb, if given, is additional traceback information.
484         """
485         if message is not None:
486             self.to_stderr(message)
487         if self.params.get('verbose'):
488             if tb is None:
489                 if sys.exc_info()[0]:  # if .trouble has been called from an except block
490                     tb = ''
491                     if hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
492                         tb += ''.join(traceback.format_exception(*sys.exc_info()[1].exc_info))
493                     tb += compat_str(traceback.format_exc())
494                 else:
495                     tb_data = traceback.format_list(traceback.extract_stack())
496                     tb = ''.join(tb_data)
497             self.to_stderr(tb)
498         if not self.params.get('ignoreerrors', False):
499             if sys.exc_info()[0] and hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
500                 exc_info = sys.exc_info()[1].exc_info
501             else:
502                 exc_info = sys.exc_info()
503             raise DownloadError(message, exc_info)
504         self._download_retcode = 1
505
506     def report_warning(self, message):
507         '''
508         Print the message to stderr, it will be prefixed with 'WARNING:'
509         If stderr is a tty file the 'WARNING:' will be colored
510         '''
511         if self.params.get('logger') is not None:
512             self.params['logger'].warning(message)
513         else:
514             if self.params.get('no_warnings'):
515                 return
516             if not self.params.get('no_color') and self._err_file.isatty() and os.name != 'nt':
517                 _msg_header = '\033[0;33mWARNING:\033[0m'
518             else:
519                 _msg_header = 'WARNING:'
520             warning_message = '%s %s' % (_msg_header, message)
521             self.to_stderr(warning_message)
522
523     def report_error(self, message, tb=None):
524         '''
525         Do the same as trouble, but prefixes the message with 'ERROR:', colored
526         in red if stderr is a tty file.
527         '''
528         if not self.params.get('no_color') and self._err_file.isatty() and os.name != 'nt':
529             _msg_header = '\033[0;31mERROR:\033[0m'
530         else:
531             _msg_header = 'ERROR:'
532         error_message = '%s %s' % (_msg_header, message)
533         self.trouble(error_message, tb)
534
535     def report_file_already_downloaded(self, file_name):
536         """Report file has already been fully downloaded."""
537         try:
538             self.to_screen('[download] %s has already been downloaded' % file_name)
539         except UnicodeEncodeError:
540             self.to_screen('[download] The file has already been downloaded')
541
542     def prepare_filename(self, info_dict):
543         """Generate the output filename."""
544         try:
545             template_dict = dict(info_dict)
546
547             template_dict['epoch'] = int(time.time())
548             autonumber_size = self.params.get('autonumber_size')
549             if autonumber_size is None:
550                 autonumber_size = 5
551             autonumber_templ = '%0' + str(autonumber_size) + 'd'
552             template_dict['autonumber'] = autonumber_templ % self._num_downloads
553             if template_dict.get('playlist_index') is not None:
554                 template_dict['playlist_index'] = '%0*d' % (len(str(template_dict['n_entries'])), template_dict['playlist_index'])
555             if template_dict.get('resolution') is None:
556                 if template_dict.get('width') and template_dict.get('height'):
557                     template_dict['resolution'] = '%dx%d' % (template_dict['width'], template_dict['height'])
558                 elif template_dict.get('height'):
559                     template_dict['resolution'] = '%sp' % template_dict['height']
560                 elif template_dict.get('width'):
561                     template_dict['resolution'] = '?x%d' % template_dict['width']
562
563             sanitize = lambda k, v: sanitize_filename(
564                 compat_str(v),
565                 restricted=self.params.get('restrictfilenames'),
566                 is_id=(k == 'id'))
567             template_dict = dict((k, sanitize(k, v))
568                                  for k, v in template_dict.items()
569                                  if v is not None)
570             template_dict = collections.defaultdict(lambda: 'NA', template_dict)
571
572             outtmpl = sanitize_path(self.params.get('outtmpl', DEFAULT_OUTTMPL))
573             tmpl = compat_expanduser(outtmpl)
574             filename = tmpl % template_dict
575             # Temporary fix for #4787
576             # 'Treat' all problem characters by passing filename through preferredencoding
577             # to workaround encoding issues with subprocess on python2 @ Windows
578             if sys.version_info < (3, 0) and sys.platform == 'win32':
579                 filename = encodeFilename(filename, True).decode(preferredencoding())
580             return filename
581         except ValueError as err:
582             self.report_error('Error in output template: ' + str(err) + ' (encoding: ' + repr(preferredencoding()) + ')')
583             return None
584
585     def _match_entry(self, info_dict, incomplete):
586         """ Returns None iff the file should be downloaded """
587
588         video_title = info_dict.get('title', info_dict.get('id', 'video'))
589         if 'title' in info_dict:
590             # This can happen when we're just evaluating the playlist
591             title = info_dict['title']
592             matchtitle = self.params.get('matchtitle', False)
593             if matchtitle:
594                 if not re.search(matchtitle, title, re.IGNORECASE):
595                     return '"' + title + '" title did not match pattern "' + matchtitle + '"'
596             rejecttitle = self.params.get('rejecttitle', False)
597             if rejecttitle:
598                 if re.search(rejecttitle, title, re.IGNORECASE):
599                     return '"' + title + '" title matched reject pattern "' + rejecttitle + '"'
600         date = info_dict.get('upload_date', None)
601         if date is not None:
602             dateRange = self.params.get('daterange', DateRange())
603             if date not in dateRange:
604                 return '%s upload date is not in range %s' % (date_from_str(date).isoformat(), dateRange)
605         view_count = info_dict.get('view_count', None)
606         if view_count is not None:
607             min_views = self.params.get('min_views')
608             if min_views is not None and view_count < min_views:
609                 return 'Skipping %s, because it has not reached minimum view count (%d/%d)' % (video_title, view_count, min_views)
610             max_views = self.params.get('max_views')
611             if max_views is not None and view_count > max_views:
612                 return 'Skipping %s, because it has exceeded the maximum view count (%d/%d)' % (video_title, view_count, max_views)
613         if age_restricted(info_dict.get('age_limit'), self.params.get('age_limit')):
614             return 'Skipping "%s" because it is age restricted' % video_title
615         if self.in_download_archive(info_dict):
616             return '%s has already been recorded in archive' % video_title
617
618         if not incomplete:
619             match_filter = self.params.get('match_filter')
620             if match_filter is not None:
621                 ret = match_filter(info_dict)
622                 if ret is not None:
623                     return ret
624
625         return None
626
627     @staticmethod
628     def add_extra_info(info_dict, extra_info):
629         '''Set the keys from extra_info in info dict if they are missing'''
630         for key, value in extra_info.items():
631             info_dict.setdefault(key, value)
632
633     def extract_info(self, url, download=True, ie_key=None, extra_info={},
634                      process=True, force_generic_extractor=False):
635         '''
636         Returns a list with a dictionary for each video we find.
637         If 'download', also downloads the videos.
638         extra_info is a dict containing the extra values to add to each result
639         '''
640
641         if not ie_key and force_generic_extractor:
642             ie_key = 'Generic'
643
644         if ie_key:
645             ies = [self.get_info_extractor(ie_key)]
646         else:
647             ies = self._ies
648
649         for ie in ies:
650             if not ie.suitable(url):
651                 continue
652
653             if not ie.working():
654                 self.report_warning('The program functionality for this site has been marked as broken, '
655                                     'and will probably not work.')
656
657             try:
658                 ie_result = ie.extract(url)
659                 if ie_result is None:  # Finished already (backwards compatibility; listformats and friends should be moved here)
660                     break
661                 if isinstance(ie_result, list):
662                     # Backwards compatibility: old IE result format
663                     ie_result = {
664                         '_type': 'compat_list',
665                         'entries': ie_result,
666                     }
667                 self.add_default_extra_info(ie_result, ie, url)
668                 if process:
669                     return self.process_ie_result(ie_result, download, extra_info)
670                 else:
671                     return ie_result
672             except ExtractorError as de:  # An error we somewhat expected
673                 self.report_error(compat_str(de), de.format_traceback())
674                 break
675             except MaxDownloadsReached:
676                 raise
677             except Exception as e:
678                 if self.params.get('ignoreerrors', False):
679                     self.report_error(compat_str(e), tb=compat_str(traceback.format_exc()))
680                     break
681                 else:
682                     raise
683         else:
684             self.report_error('no suitable InfoExtractor for URL %s' % url)
685
686     def add_default_extra_info(self, ie_result, ie, url):
687         self.add_extra_info(ie_result, {
688             'extractor': ie.IE_NAME,
689             'webpage_url': url,
690             'webpage_url_basename': url_basename(url),
691             'extractor_key': ie.ie_key(),
692         })
693
694     def process_ie_result(self, ie_result, download=True, extra_info={}):
695         """
696         Take the result of the ie(may be modified) and resolve all unresolved
697         references (URLs, playlist items).
698
699         It will also download the videos if 'download'.
700         Returns the resolved ie_result.
701         """
702
703         result_type = ie_result.get('_type', 'video')
704
705         if result_type in ('url', 'url_transparent'):
706             extract_flat = self.params.get('extract_flat', False)
707             if ((extract_flat == 'in_playlist' and 'playlist' in extra_info) or
708                     extract_flat is True):
709                 if self.params.get('forcejson', False):
710                     self.to_stdout(json.dumps(ie_result))
711                 return ie_result
712
713         if result_type == 'video':
714             self.add_extra_info(ie_result, extra_info)
715             return self.process_video_result(ie_result, download=download)
716         elif result_type == 'url':
717             # We have to add extra_info to the results because it may be
718             # contained in a playlist
719             return self.extract_info(ie_result['url'],
720                                      download,
721                                      ie_key=ie_result.get('ie_key'),
722                                      extra_info=extra_info)
723         elif result_type == 'url_transparent':
724             # Use the information from the embedding page
725             info = self.extract_info(
726                 ie_result['url'], ie_key=ie_result.get('ie_key'),
727                 extra_info=extra_info, download=False, process=False)
728
729             force_properties = dict(
730                 (k, v) for k, v in ie_result.items() if v is not None)
731             for f in ('_type', 'url'):
732                 if f in force_properties:
733                     del force_properties[f]
734             new_result = info.copy()
735             new_result.update(force_properties)
736
737             assert new_result.get('_type') != 'url_transparent'
738
739             return self.process_ie_result(
740                 new_result, download=download, extra_info=extra_info)
741         elif result_type == 'playlist' or result_type == 'multi_video':
742             # We process each entry in the playlist
743             playlist = ie_result.get('title', None) or ie_result.get('id', None)
744             self.to_screen('[download] Downloading playlist: %s' % playlist)
745
746             playlist_results = []
747
748             playliststart = self.params.get('playliststart', 1) - 1
749             playlistend = self.params.get('playlistend', None)
750             # For backwards compatibility, interpret -1 as whole list
751             if playlistend == -1:
752                 playlistend = None
753
754             playlistitems_str = self.params.get('playlist_items', None)
755             playlistitems = None
756             if playlistitems_str is not None:
757                 def iter_playlistitems(format):
758                     for string_segment in format.split(','):
759                         if '-' in string_segment:
760                             start, end = string_segment.split('-')
761                             for item in range(int(start), int(end) + 1):
762                                 yield int(item)
763                         else:
764                             yield int(string_segment)
765                 playlistitems = iter_playlistitems(playlistitems_str)
766
767             ie_entries = ie_result['entries']
768             if isinstance(ie_entries, list):
769                 n_all_entries = len(ie_entries)
770                 if playlistitems:
771                     entries = [
772                         ie_entries[i - 1] for i in playlistitems
773                         if -n_all_entries <= i - 1 < n_all_entries]
774                 else:
775                     entries = ie_entries[playliststart:playlistend]
776                 n_entries = len(entries)
777                 self.to_screen(
778                     "[%s] playlist %s: Collected %d video ids (downloading %d of them)" %
779                     (ie_result['extractor'], playlist, n_all_entries, n_entries))
780             elif isinstance(ie_entries, PagedList):
781                 if playlistitems:
782                     entries = []
783                     for item in playlistitems:
784                         entries.extend(ie_entries.getslice(
785                             item - 1, item
786                         ))
787                 else:
788                     entries = ie_entries.getslice(
789                         playliststart, playlistend)
790                 n_entries = len(entries)
791                 self.to_screen(
792                     "[%s] playlist %s: Downloading %d videos" %
793                     (ie_result['extractor'], playlist, n_entries))
794             else:  # iterable
795                 if playlistitems:
796                     entry_list = list(ie_entries)
797                     entries = [entry_list[i - 1] for i in playlistitems]
798                 else:
799                     entries = list(itertools.islice(
800                         ie_entries, playliststart, playlistend))
801                 n_entries = len(entries)
802                 self.to_screen(
803                     "[%s] playlist %s: Downloading %d videos" %
804                     (ie_result['extractor'], playlist, n_entries))
805
806             if self.params.get('playlistreverse', False):
807                 entries = entries[::-1]
808
809             for i, entry in enumerate(entries, 1):
810                 self.to_screen('[download] Downloading video %s of %s' % (i, n_entries))
811                 extra = {
812                     'n_entries': n_entries,
813                     'playlist': playlist,
814                     'playlist_id': ie_result.get('id'),
815                     'playlist_title': ie_result.get('title'),
816                     'playlist_index': i + playliststart,
817                     'extractor': ie_result['extractor'],
818                     'webpage_url': ie_result['webpage_url'],
819                     'webpage_url_basename': url_basename(ie_result['webpage_url']),
820                     'extractor_key': ie_result['extractor_key'],
821                 }
822
823                 reason = self._match_entry(entry, incomplete=True)
824                 if reason is not None:
825                     self.to_screen('[download] ' + reason)
826                     continue
827
828                 entry_result = self.process_ie_result(entry,
829                                                       download=download,
830                                                       extra_info=extra)
831                 playlist_results.append(entry_result)
832             ie_result['entries'] = playlist_results
833             return ie_result
834         elif result_type == 'compat_list':
835             self.report_warning(
836                 'Extractor %s returned a compat_list result. '
837                 'It needs to be updated.' % ie_result.get('extractor'))
838
839             def _fixup(r):
840                 self.add_extra_info(
841                     r,
842                     {
843                         'extractor': ie_result['extractor'],
844                         'webpage_url': ie_result['webpage_url'],
845                         'webpage_url_basename': url_basename(ie_result['webpage_url']),
846                         'extractor_key': ie_result['extractor_key'],
847                     }
848                 )
849                 return r
850             ie_result['entries'] = [
851                 self.process_ie_result(_fixup(r), download, extra_info)
852                 for r in ie_result['entries']
853             ]
854             return ie_result
855         else:
856             raise Exception('Invalid result type: %s' % result_type)
857
858     def _build_format_filter(self, filter_spec):
859         " Returns a function to filter the formats according to the filter_spec "
860
861         OPERATORS = {
862             '<': operator.lt,
863             '<=': operator.le,
864             '>': operator.gt,
865             '>=': operator.ge,
866             '=': operator.eq,
867             '!=': operator.ne,
868         }
869         operator_rex = re.compile(r'''(?x)\s*
870             (?P<key>width|height|tbr|abr|vbr|asr|filesize|fps)
871             \s*(?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
872             (?P<value>[0-9.]+(?:[kKmMgGtTpPeEzZyY]i?[Bb]?)?)
873             $
874             ''' % '|'.join(map(re.escape, OPERATORS.keys())))
875         m = operator_rex.search(filter_spec)
876         if m:
877             try:
878                 comparison_value = int(m.group('value'))
879             except ValueError:
880                 comparison_value = parse_filesize(m.group('value'))
881                 if comparison_value is None:
882                     comparison_value = parse_filesize(m.group('value') + 'B')
883                 if comparison_value is None:
884                     raise ValueError(
885                         'Invalid value %r in format specification %r' % (
886                             m.group('value'), filter_spec))
887             op = OPERATORS[m.group('op')]
888
889         if not m:
890             STR_OPERATORS = {
891                 '=': operator.eq,
892                 '!=': operator.ne,
893             }
894             str_operator_rex = re.compile(r'''(?x)
895                 \s*(?P<key>ext|acodec|vcodec|container|protocol)
896                 \s*(?P<op>%s)(?P<none_inclusive>\s*\?)?
897                 \s*(?P<value>[a-zA-Z0-9_-]+)
898                 \s*$
899                 ''' % '|'.join(map(re.escape, STR_OPERATORS.keys())))
900             m = str_operator_rex.search(filter_spec)
901             if m:
902                 comparison_value = m.group('value')
903                 op = STR_OPERATORS[m.group('op')]
904
905         if not m:
906             raise ValueError('Invalid filter specification %r' % filter_spec)
907
908         def _filter(f):
909             actual_value = f.get(m.group('key'))
910             if actual_value is None:
911                 return m.group('none_inclusive')
912             return op(actual_value, comparison_value)
913         return _filter
914
915     def build_format_selector(self, format_spec):
916         def syntax_error(note, start):
917             message = (
918                 'Invalid format specification: '
919                 '{0}\n\t{1}\n\t{2}^'.format(note, format_spec, ' ' * start[1]))
920             return SyntaxError(message)
921
922         PICKFIRST = 'PICKFIRST'
923         MERGE = 'MERGE'
924         SINGLE = 'SINGLE'
925         GROUP = 'GROUP'
926         FormatSelector = collections.namedtuple('FormatSelector', ['type', 'selector', 'filters'])
927
928         def _parse_filter(tokens):
929             filter_parts = []
930             for type, string, start, _, _ in tokens:
931                 if type == tokenize.OP and string == ']':
932                     return ''.join(filter_parts)
933                 else:
934                     filter_parts.append(string)
935
936         def _parse_format_selection(tokens, inside_merge=False, inside_choice=False, inside_group=False):
937             selectors = []
938             current_selector = None
939             for type, string, start, _, _ in tokens:
940                 # ENCODING is only defined in python 3.x
941                 if type == getattr(tokenize, 'ENCODING', None):
942                     continue
943                 elif type in [tokenize.NAME, tokenize.NUMBER]:
944                     current_selector = FormatSelector(SINGLE, string, [])
945                 elif type == tokenize.OP:
946                     if string == ')':
947                         if not inside_group:
948                             # ')' will be handled by the parentheses group
949                             tokens.restore_last_token()
950                         break
951                     elif inside_merge and string in ['/', ',']:
952                         tokens.restore_last_token()
953                         break
954                     elif inside_choice and string == ',':
955                         tokens.restore_last_token()
956                         break
957                     elif string == ',':
958                         if not current_selector:
959                             raise syntax_error('"," must follow a format selector', start)
960                         selectors.append(current_selector)
961                         current_selector = None
962                     elif string == '/':
963                         if not current_selector:
964                             raise syntax_error('"/" must follow a format selector', start)
965                         first_choice = current_selector
966                         second_choice = _parse_format_selection(tokens, inside_choice=True)
967                         current_selector = FormatSelector(PICKFIRST, (first_choice, second_choice), [])
968                     elif string == '[':
969                         if not current_selector:
970                             current_selector = FormatSelector(SINGLE, 'best', [])
971                         format_filter = _parse_filter(tokens)
972                         current_selector.filters.append(format_filter)
973                     elif string == '(':
974                         if current_selector:
975                             raise syntax_error('Unexpected "("', start)
976                         group = _parse_format_selection(tokens, inside_group=True)
977                         current_selector = FormatSelector(GROUP, group, [])
978                     elif string == '+':
979                         video_selector = current_selector
980                         audio_selector = _parse_format_selection(tokens, inside_merge=True)
981                         if not video_selector or not audio_selector:
982                             raise syntax_error('"+" must be between two format selectors', start)
983                         current_selector = FormatSelector(MERGE, (video_selector, audio_selector), [])
984                     else:
985                         raise syntax_error('Operator not recognized: "{0}"'.format(string), start)
986                 elif type == tokenize.ENDMARKER:
987                     break
988             if current_selector:
989                 selectors.append(current_selector)
990             return selectors
991
992         def _build_selector_function(selector):
993             if isinstance(selector, list):
994                 fs = [_build_selector_function(s) for s in selector]
995
996                 def selector_function(formats):
997                     for f in fs:
998                         for format in f(formats):
999                             yield format
1000                 return selector_function
1001             elif selector.type == GROUP:
1002                 selector_function = _build_selector_function(selector.selector)
1003             elif selector.type == PICKFIRST:
1004                 fs = [_build_selector_function(s) for s in selector.selector]
1005
1006                 def selector_function(formats):
1007                     for f in fs:
1008                         picked_formats = list(f(formats))
1009                         if picked_formats:
1010                             return picked_formats
1011                     return []
1012             elif selector.type == SINGLE:
1013                 format_spec = selector.selector
1014
1015                 def selector_function(formats):
1016                     formats = list(formats)
1017                     if not formats:
1018                         return
1019                     if format_spec == 'all':
1020                         for f in formats:
1021                             yield f
1022                     elif format_spec in ['best', 'worst', None]:
1023                         format_idx = 0 if format_spec == 'worst' else -1
1024                         audiovideo_formats = [
1025                             f for f in formats
1026                             if f.get('vcodec') != 'none' and f.get('acodec') != 'none']
1027                         if audiovideo_formats:
1028                             yield audiovideo_formats[format_idx]
1029                         # for audio only (soundcloud) or video only (imgur) urls, select the best/worst audio format
1030                         elif (all(f.get('acodec') != 'none' for f in formats) or
1031                               all(f.get('vcodec') != 'none' for f in formats)):
1032                             yield formats[format_idx]
1033                     elif format_spec == 'bestaudio':
1034                         audio_formats = [
1035                             f for f in formats
1036                             if f.get('vcodec') == 'none']
1037                         if audio_formats:
1038                             yield audio_formats[-1]
1039                     elif format_spec == 'worstaudio':
1040                         audio_formats = [
1041                             f for f in formats
1042                             if f.get('vcodec') == 'none']
1043                         if audio_formats:
1044                             yield audio_formats[0]
1045                     elif format_spec == 'bestvideo':
1046                         video_formats = [
1047                             f for f in formats
1048                             if f.get('acodec') == 'none']
1049                         if video_formats:
1050                             yield video_formats[-1]
1051                     elif format_spec == 'worstvideo':
1052                         video_formats = [
1053                             f for f in formats
1054                             if f.get('acodec') == 'none']
1055                         if video_formats:
1056                             yield video_formats[0]
1057                     else:
1058                         extensions = ['mp4', 'flv', 'webm', '3gp', 'm4a', 'mp3', 'ogg', 'aac', 'wav']
1059                         if format_spec in extensions:
1060                             filter_f = lambda f: f['ext'] == format_spec
1061                         else:
1062                             filter_f = lambda f: f['format_id'] == format_spec
1063                         matches = list(filter(filter_f, formats))
1064                         if matches:
1065                             yield matches[-1]
1066             elif selector.type == MERGE:
1067                 def _merge(formats_info):
1068                     format_1, format_2 = [f['format_id'] for f in formats_info]
1069                     # The first format must contain the video and the
1070                     # second the audio
1071                     if formats_info[0].get('vcodec') == 'none':
1072                         self.report_error('The first format must '
1073                                           'contain the video, try using '
1074                                           '"-f %s+%s"' % (format_2, format_1))
1075                         return
1076                     output_ext = (
1077                         formats_info[0]['ext']
1078                         if self.params.get('merge_output_format') is None
1079                         else self.params['merge_output_format'])
1080                     return {
1081                         'requested_formats': formats_info,
1082                         'format': '%s+%s' % (formats_info[0].get('format'),
1083                                              formats_info[1].get('format')),
1084                         'format_id': '%s+%s' % (formats_info[0].get('format_id'),
1085                                                 formats_info[1].get('format_id')),
1086                         'width': formats_info[0].get('width'),
1087                         'height': formats_info[0].get('height'),
1088                         'resolution': formats_info[0].get('resolution'),
1089                         'fps': formats_info[0].get('fps'),
1090                         'vcodec': formats_info[0].get('vcodec'),
1091                         'vbr': formats_info[0].get('vbr'),
1092                         'stretched_ratio': formats_info[0].get('stretched_ratio'),
1093                         'acodec': formats_info[1].get('acodec'),
1094                         'abr': formats_info[1].get('abr'),
1095                         'ext': output_ext,
1096                     }
1097                 video_selector, audio_selector = map(_build_selector_function, selector.selector)
1098
1099                 def selector_function(formats):
1100                     formats = list(formats)
1101                     for pair in itertools.product(video_selector(formats), audio_selector(formats)):
1102                         yield _merge(pair)
1103
1104             filters = [self._build_format_filter(f) for f in selector.filters]
1105
1106             def final_selector(formats):
1107                 for _filter in filters:
1108                     formats = list(filter(_filter, formats))
1109                 return selector_function(formats)
1110             return final_selector
1111
1112         stream = io.BytesIO(format_spec.encode('utf-8'))
1113         try:
1114             tokens = list(compat_tokenize_tokenize(stream.readline))
1115         except tokenize.TokenError:
1116             raise syntax_error('Missing closing/opening brackets or parenthesis', (0, len(format_spec)))
1117
1118         class TokenIterator(object):
1119             def __init__(self, tokens):
1120                 self.tokens = tokens
1121                 self.counter = 0
1122
1123             def __iter__(self):
1124                 return self
1125
1126             def __next__(self):
1127                 if self.counter >= len(self.tokens):
1128                     raise StopIteration()
1129                 value = self.tokens[self.counter]
1130                 self.counter += 1
1131                 return value
1132
1133             next = __next__
1134
1135             def restore_last_token(self):
1136                 self.counter -= 1
1137
1138         parsed_selector = _parse_format_selection(iter(TokenIterator(tokens)))
1139         return _build_selector_function(parsed_selector)
1140
1141     def _calc_headers(self, info_dict):
1142         res = std_headers.copy()
1143
1144         add_headers = info_dict.get('http_headers')
1145         if add_headers:
1146             res.update(add_headers)
1147
1148         cookies = self._calc_cookies(info_dict)
1149         if cookies:
1150             res['Cookie'] = cookies
1151
1152         return res
1153
1154     def _calc_cookies(self, info_dict):
1155         pr = compat_urllib_request.Request(info_dict['url'])
1156         self.cookiejar.add_cookie_header(pr)
1157         return pr.get_header('Cookie')
1158
1159     def process_video_result(self, info_dict, download=True):
1160         assert info_dict.get('_type', 'video') == 'video'
1161
1162         if 'id' not in info_dict:
1163             raise ExtractorError('Missing "id" field in extractor result')
1164         if 'title' not in info_dict:
1165             raise ExtractorError('Missing "title" field in extractor result')
1166
1167         if 'playlist' not in info_dict:
1168             # It isn't part of a playlist
1169             info_dict['playlist'] = None
1170             info_dict['playlist_index'] = None
1171
1172         thumbnails = info_dict.get('thumbnails')
1173         if thumbnails is None:
1174             thumbnail = info_dict.get('thumbnail')
1175             if thumbnail:
1176                 info_dict['thumbnails'] = thumbnails = [{'url': thumbnail}]
1177         if thumbnails:
1178             thumbnails.sort(key=lambda t: (
1179                 t.get('preference'), t.get('width'), t.get('height'),
1180                 t.get('id'), t.get('url')))
1181             for i, t in enumerate(thumbnails):
1182                 if t.get('width') and t.get('height'):
1183                     t['resolution'] = '%dx%d' % (t['width'], t['height'])
1184                 if t.get('id') is None:
1185                     t['id'] = '%d' % i
1186
1187         if thumbnails and 'thumbnail' not in info_dict:
1188             info_dict['thumbnail'] = thumbnails[-1]['url']
1189
1190         if 'display_id' not in info_dict and 'id' in info_dict:
1191             info_dict['display_id'] = info_dict['id']
1192
1193         if info_dict.get('upload_date') is None and info_dict.get('timestamp') is not None:
1194             # Working around out-of-range timestamp values (e.g. negative ones on Windows,
1195             # see http://bugs.python.org/issue1646728)
1196             try:
1197                 upload_date = datetime.datetime.utcfromtimestamp(info_dict['timestamp'])
1198                 info_dict['upload_date'] = upload_date.strftime('%Y%m%d')
1199             except (ValueError, OverflowError, OSError):
1200                 pass
1201
1202         if self.params.get('listsubtitles', False):
1203             if 'automatic_captions' in info_dict:
1204                 self.list_subtitles(info_dict['id'], info_dict.get('automatic_captions'), 'automatic captions')
1205             self.list_subtitles(info_dict['id'], info_dict.get('subtitles'), 'subtitles')
1206             return
1207         info_dict['requested_subtitles'] = self.process_subtitles(
1208             info_dict['id'], info_dict.get('subtitles'),
1209             info_dict.get('automatic_captions'))
1210
1211         # We now pick which formats have to be downloaded
1212         if info_dict.get('formats') is None:
1213             # There's only one format available
1214             formats = [info_dict]
1215         else:
1216             formats = info_dict['formats']
1217
1218         if not formats:
1219             raise ExtractorError('No video formats found!')
1220
1221         formats_dict = {}
1222
1223         # We check that all the formats have the format and format_id fields
1224         for i, format in enumerate(formats):
1225             if 'url' not in format:
1226                 raise ExtractorError('Missing "url" key in result (index %d)' % i)
1227
1228             if format.get('format_id') is None:
1229                 format['format_id'] = compat_str(i)
1230             format_id = format['format_id']
1231             if format_id not in formats_dict:
1232                 formats_dict[format_id] = []
1233             formats_dict[format_id].append(format)
1234
1235         # Make sure all formats have unique format_id
1236         for format_id, ambiguous_formats in formats_dict.items():
1237             if len(ambiguous_formats) > 1:
1238                 for i, format in enumerate(ambiguous_formats):
1239                     format['format_id'] = '%s-%d' % (format_id, i)
1240
1241         for i, format in enumerate(formats):
1242             if format.get('format') is None:
1243                 format['format'] = '{id} - {res}{note}'.format(
1244                     id=format['format_id'],
1245                     res=self.format_resolution(format),
1246                     note=' ({0})'.format(format['format_note']) if format.get('format_note') is not None else '',
1247                 )
1248             # Automatically determine file extension if missing
1249             if 'ext' not in format:
1250                 format['ext'] = determine_ext(format['url']).lower()
1251             # Add HTTP headers, so that external programs can use them from the
1252             # json output
1253             full_format_info = info_dict.copy()
1254             full_format_info.update(format)
1255             format['http_headers'] = self._calc_headers(full_format_info)
1256
1257         # TODO Central sorting goes here
1258
1259         if formats[0] is not info_dict:
1260             # only set the 'formats' fields if the original info_dict list them
1261             # otherwise we end up with a circular reference, the first (and unique)
1262             # element in the 'formats' field in info_dict is info_dict itself,
1263             # wich can't be exported to json
1264             info_dict['formats'] = formats
1265         if self.params.get('listformats'):
1266             self.list_formats(info_dict)
1267             return
1268         if self.params.get('list_thumbnails'):
1269             self.list_thumbnails(info_dict)
1270             return
1271
1272         req_format = self.params.get('format')
1273         if req_format is None:
1274             req_format_list = []
1275             if (self.params.get('outtmpl', DEFAULT_OUTTMPL) != '-' and
1276                     info_dict['extractor'] in ['youtube', 'ted'] and
1277                     not info_dict.get('is_live')):
1278                 merger = FFmpegMergerPP(self)
1279                 if merger.available and merger.can_merge():
1280                     req_format_list.append('bestvideo+bestaudio')
1281             req_format_list.append('best')
1282             req_format = '/'.join(req_format_list)
1283         format_selector = self.build_format_selector(req_format)
1284         formats_to_download = list(format_selector(formats))
1285         if not formats_to_download:
1286             raise ExtractorError('requested format not available',
1287                                  expected=True)
1288
1289         if download:
1290             if len(formats_to_download) > 1:
1291                 self.to_screen('[info] %s: downloading video in %s formats' % (info_dict['id'], len(formats_to_download)))
1292             for format in formats_to_download:
1293                 new_info = dict(info_dict)
1294                 new_info.update(format)
1295                 self.process_info(new_info)
1296         # We update the info dict with the best quality format (backwards compatibility)
1297         info_dict.update(formats_to_download[-1])
1298         return info_dict
1299
1300     def process_subtitles(self, video_id, normal_subtitles, automatic_captions):
1301         """Select the requested subtitles and their format"""
1302         available_subs = {}
1303         if normal_subtitles and self.params.get('writesubtitles'):
1304             available_subs.update(normal_subtitles)
1305         if automatic_captions and self.params.get('writeautomaticsub'):
1306             for lang, cap_info in automatic_captions.items():
1307                 if lang not in available_subs:
1308                     available_subs[lang] = cap_info
1309
1310         if (not self.params.get('writesubtitles') and not
1311                 self.params.get('writeautomaticsub') or not
1312                 available_subs):
1313             return None
1314
1315         if self.params.get('allsubtitles', False):
1316             requested_langs = available_subs.keys()
1317         else:
1318             if self.params.get('subtitleslangs', False):
1319                 requested_langs = self.params.get('subtitleslangs')
1320             elif 'en' in available_subs:
1321                 requested_langs = ['en']
1322             else:
1323                 requested_langs = [list(available_subs.keys())[0]]
1324
1325         formats_query = self.params.get('subtitlesformat', 'best')
1326         formats_preference = formats_query.split('/') if formats_query else []
1327         subs = {}
1328         for lang in requested_langs:
1329             formats = available_subs.get(lang)
1330             if formats is None:
1331                 self.report_warning('%s subtitles not available for %s' % (lang, video_id))
1332                 continue
1333             for ext in formats_preference:
1334                 if ext == 'best':
1335                     f = formats[-1]
1336                     break
1337                 matches = list(filter(lambda f: f['ext'] == ext, formats))
1338                 if matches:
1339                     f = matches[-1]
1340                     break
1341             else:
1342                 f = formats[-1]
1343                 self.report_warning(
1344                     'No subtitle format found matching "%s" for language %s, '
1345                     'using %s' % (formats_query, lang, f['ext']))
1346             subs[lang] = f
1347         return subs
1348
1349     def process_info(self, info_dict):
1350         """Process a single resolved IE result."""
1351
1352         assert info_dict.get('_type', 'video') == 'video'
1353
1354         max_downloads = self.params.get('max_downloads')
1355         if max_downloads is not None:
1356             if self._num_downloads >= int(max_downloads):
1357                 raise MaxDownloadsReached()
1358
1359         info_dict['fulltitle'] = info_dict['title']
1360         if len(info_dict['title']) > 200:
1361             info_dict['title'] = info_dict['title'][:197] + '...'
1362
1363         if 'format' not in info_dict:
1364             info_dict['format'] = info_dict['ext']
1365
1366         reason = self._match_entry(info_dict, incomplete=False)
1367         if reason is not None:
1368             self.to_screen('[download] ' + reason)
1369             return
1370
1371         self._num_downloads += 1
1372
1373         info_dict['_filename'] = filename = self.prepare_filename(info_dict)
1374
1375         # Forced printings
1376         if self.params.get('forcetitle', False):
1377             self.to_stdout(info_dict['fulltitle'])
1378         if self.params.get('forceid', False):
1379             self.to_stdout(info_dict['id'])
1380         if self.params.get('forceurl', False):
1381             if info_dict.get('requested_formats') is not None:
1382                 for f in info_dict['requested_formats']:
1383                     self.to_stdout(f['url'] + f.get('play_path', ''))
1384             else:
1385                 # For RTMP URLs, also include the playpath
1386                 self.to_stdout(info_dict['url'] + info_dict.get('play_path', ''))
1387         if self.params.get('forcethumbnail', False) and info_dict.get('thumbnail') is not None:
1388             self.to_stdout(info_dict['thumbnail'])
1389         if self.params.get('forcedescription', False) and info_dict.get('description') is not None:
1390             self.to_stdout(info_dict['description'])
1391         if self.params.get('forcefilename', False) and filename is not None:
1392             self.to_stdout(filename)
1393         if self.params.get('forceduration', False) and info_dict.get('duration') is not None:
1394             self.to_stdout(formatSeconds(info_dict['duration']))
1395         if self.params.get('forceformat', False):
1396             self.to_stdout(info_dict['format'])
1397         if self.params.get('forcejson', False):
1398             self.to_stdout(json.dumps(info_dict))
1399
1400         # Do nothing else if in simulate mode
1401         if self.params.get('simulate', False):
1402             return
1403
1404         if filename is None:
1405             return
1406
1407         try:
1408             dn = os.path.dirname(sanitize_path(encodeFilename(filename)))
1409             if dn and not os.path.exists(dn):
1410                 os.makedirs(dn)
1411         except (OSError, IOError) as err:
1412             self.report_error('unable to create directory ' + compat_str(err))
1413             return
1414
1415         if self.params.get('writedescription', False):
1416             descfn = replace_extension(filename, 'description', info_dict.get('ext'))
1417             if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(descfn)):
1418                 self.to_screen('[info] Video description is already present')
1419             elif info_dict.get('description') is None:
1420                 self.report_warning('There\'s no description to write.')
1421             else:
1422                 try:
1423                     self.to_screen('[info] Writing video description to: ' + descfn)
1424                     with io.open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile:
1425                         descfile.write(info_dict['description'])
1426                 except (OSError, IOError):
1427                     self.report_error('Cannot write description file ' + descfn)
1428                     return
1429
1430         if self.params.get('writeannotations', False):
1431             annofn = replace_extension(filename, 'annotations.xml', info_dict.get('ext'))
1432             if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(annofn)):
1433                 self.to_screen('[info] Video annotations are already present')
1434             else:
1435                 try:
1436                     self.to_screen('[info] Writing video annotations to: ' + annofn)
1437                     with io.open(encodeFilename(annofn), 'w', encoding='utf-8') as annofile:
1438                         annofile.write(info_dict['annotations'])
1439                 except (KeyError, TypeError):
1440                     self.report_warning('There are no annotations to write.')
1441                 except (OSError, IOError):
1442                     self.report_error('Cannot write annotations file: ' + annofn)
1443                     return
1444
1445         subtitles_are_requested = any([self.params.get('writesubtitles', False),
1446                                        self.params.get('writeautomaticsub')])
1447
1448         if subtitles_are_requested and info_dict.get('requested_subtitles'):
1449             # subtitles download errors are already managed as troubles in relevant IE
1450             # that way it will silently go on when used with unsupporting IE
1451             subtitles = info_dict['requested_subtitles']
1452             ie = self.get_info_extractor(info_dict['extractor_key'])
1453             for sub_lang, sub_info in subtitles.items():
1454                 sub_format = sub_info['ext']
1455                 if sub_info.get('data') is not None:
1456                     sub_data = sub_info['data']
1457                 else:
1458                     try:
1459                         sub_data = ie._download_webpage(
1460                             sub_info['url'], info_dict['id'], note=False)
1461                     except ExtractorError as err:
1462                         self.report_warning('Unable to download subtitle for "%s": %s' %
1463                                             (sub_lang, compat_str(err.cause)))
1464                         continue
1465                 try:
1466                     sub_filename = subtitles_filename(filename, sub_lang, sub_format)
1467                     if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(sub_filename)):
1468                         self.to_screen('[info] Video subtitle %s.%s is already_present' % (sub_lang, sub_format))
1469                     else:
1470                         self.to_screen('[info] Writing video subtitles to: ' + sub_filename)
1471                         with io.open(encodeFilename(sub_filename), 'w', encoding='utf-8') as subfile:
1472                             subfile.write(sub_data)
1473                 except (OSError, IOError):
1474                     self.report_error('Cannot write subtitles file ' + sub_filename)
1475                     return
1476
1477         if self.params.get('writeinfojson', False):
1478             infofn = replace_extension(filename, 'info.json', info_dict.get('ext'))
1479             if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(infofn)):
1480                 self.to_screen('[info] Video description metadata is already present')
1481             else:
1482                 self.to_screen('[info] Writing video description metadata as JSON to: ' + infofn)
1483                 try:
1484                     write_json_file(self.filter_requested_info(info_dict), infofn)
1485                 except (OSError, IOError):
1486                     self.report_error('Cannot write metadata to JSON file ' + infofn)
1487                     return
1488
1489         self._write_thumbnails(info_dict, filename)
1490
1491         if not self.params.get('skip_download', False):
1492             try:
1493                 def dl(name, info):
1494                     fd = get_suitable_downloader(info, self.params)(self, self.params)
1495                     for ph in self._progress_hooks:
1496                         fd.add_progress_hook(ph)
1497                     if self.params.get('verbose'):
1498                         self.to_stdout('[debug] Invoking downloader on %r' % info.get('url'))
1499                     return fd.download(name, info)
1500
1501                 if info_dict.get('requested_formats') is not None:
1502                     downloaded = []
1503                     success = True
1504                     merger = FFmpegMergerPP(self)
1505                     if not merger.available:
1506                         postprocessors = []
1507                         self.report_warning('You have requested multiple '
1508                                             'formats but ffmpeg or avconv are not installed.'
1509                                             ' The formats won\'t be merged.')
1510                     else:
1511                         postprocessors = [merger]
1512
1513                     def compatible_formats(formats):
1514                         video, audio = formats
1515                         # Check extension
1516                         video_ext, audio_ext = audio.get('ext'), video.get('ext')
1517                         if video_ext and audio_ext:
1518                             COMPATIBLE_EXTS = (
1519                                 ('mp3', 'mp4', 'm4a', 'm4p', 'm4b', 'm4r', 'm4v'),
1520                                 ('webm')
1521                             )
1522                             for exts in COMPATIBLE_EXTS:
1523                                 if video_ext in exts and audio_ext in exts:
1524                                     return True
1525                         # TODO: Check acodec/vcodec
1526                         return False
1527
1528                     filename_real_ext = os.path.splitext(filename)[1][1:]
1529                     filename_wo_ext = (
1530                         os.path.splitext(filename)[0]
1531                         if filename_real_ext == info_dict['ext']
1532                         else filename)
1533                     requested_formats = info_dict['requested_formats']
1534                     if self.params.get('merge_output_format') is None and not compatible_formats(requested_formats):
1535                         info_dict['ext'] = 'mkv'
1536                         self.report_warning(
1537                             'Requested formats are incompatible for merge and will be merged into mkv.')
1538                     # Ensure filename always has a correct extension for successful merge
1539                     filename = '%s.%s' % (filename_wo_ext, info_dict['ext'])
1540                     if os.path.exists(encodeFilename(filename)):
1541                         self.to_screen(
1542                             '[download] %s has already been downloaded and '
1543                             'merged' % filename)
1544                     else:
1545                         for f in requested_formats:
1546                             new_info = dict(info_dict)
1547                             new_info.update(f)
1548                             fname = self.prepare_filename(new_info)
1549                             fname = prepend_extension(fname, 'f%s' % f['format_id'], new_info['ext'])
1550                             downloaded.append(fname)
1551                             partial_success = dl(fname, new_info)
1552                             success = success and partial_success
1553                         info_dict['__postprocessors'] = postprocessors
1554                         info_dict['__files_to_merge'] = downloaded
1555                 else:
1556                     # Just a single file
1557                     success = dl(filename, info_dict)
1558             except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
1559                 self.report_error('unable to download video data: %s' % str(err))
1560                 return
1561             except (OSError, IOError) as err:
1562                 raise UnavailableVideoError(err)
1563             except (ContentTooShortError, ) as err:
1564                 self.report_error('content too short (expected %s bytes and served %s)' % (err.expected, err.downloaded))
1565                 return
1566
1567             if success:
1568                 # Fixup content
1569                 fixup_policy = self.params.get('fixup')
1570                 if fixup_policy is None:
1571                     fixup_policy = 'detect_or_warn'
1572
1573                 stretched_ratio = info_dict.get('stretched_ratio')
1574                 if stretched_ratio is not None and stretched_ratio != 1:
1575                     if fixup_policy == 'warn':
1576                         self.report_warning('%s: Non-uniform pixel ratio (%s)' % (
1577                             info_dict['id'], stretched_ratio))
1578                     elif fixup_policy == 'detect_or_warn':
1579                         stretched_pp = FFmpegFixupStretchedPP(self)
1580                         if stretched_pp.available:
1581                             info_dict.setdefault('__postprocessors', [])
1582                             info_dict['__postprocessors'].append(stretched_pp)
1583                         else:
1584                             self.report_warning(
1585                                 '%s: Non-uniform pixel ratio (%s). Install ffmpeg or avconv to fix this automatically.' % (
1586                                     info_dict['id'], stretched_ratio))
1587                     else:
1588                         assert fixup_policy in ('ignore', 'never')
1589
1590                 if info_dict.get('requested_formats') is None and info_dict.get('container') == 'm4a_dash':
1591                     if fixup_policy == 'warn':
1592                         self.report_warning('%s: writing DASH m4a. Only some players support this container.' % (
1593                             info_dict['id']))
1594                     elif fixup_policy == 'detect_or_warn':
1595                         fixup_pp = FFmpegFixupM4aPP(self)
1596                         if fixup_pp.available:
1597                             info_dict.setdefault('__postprocessors', [])
1598                             info_dict['__postprocessors'].append(fixup_pp)
1599                         else:
1600                             self.report_warning(
1601                                 '%s: writing DASH m4a. Only some players support this container. Install ffmpeg or avconv to fix this automatically.' % (
1602                                     info_dict['id']))
1603                     else:
1604                         assert fixup_policy in ('ignore', 'never')
1605
1606                 try:
1607                     self.post_process(filename, info_dict)
1608                 except (PostProcessingError) as err:
1609                     self.report_error('postprocessing: %s' % str(err))
1610                     return
1611                 self.record_download_archive(info_dict)
1612
1613     def download(self, url_list):
1614         """Download a given list of URLs."""
1615         outtmpl = self.params.get('outtmpl', DEFAULT_OUTTMPL)
1616         if (len(url_list) > 1 and
1617                 '%' not in outtmpl and
1618                 self.params.get('max_downloads') != 1):
1619             raise SameFileError(outtmpl)
1620
1621         for url in url_list:
1622             try:
1623                 # It also downloads the videos
1624                 res = self.extract_info(
1625                     url, force_generic_extractor=self.params.get('force_generic_extractor', False))
1626             except UnavailableVideoError:
1627                 self.report_error('unable to download video')
1628             except MaxDownloadsReached:
1629                 self.to_screen('[info] Maximum number of downloaded files reached.')
1630                 raise
1631             else:
1632                 if self.params.get('dump_single_json', False):
1633                     self.to_stdout(json.dumps(res))
1634
1635         return self._download_retcode
1636
1637     def download_with_info_file(self, info_filename):
1638         with contextlib.closing(fileinput.FileInput(
1639                 [info_filename], mode='r',
1640                 openhook=fileinput.hook_encoded('utf-8'))) as f:
1641             # FileInput doesn't have a read method, we can't call json.load
1642             info = self.filter_requested_info(json.loads('\n'.join(f)))
1643         try:
1644             self.process_ie_result(info, download=True)
1645         except DownloadError:
1646             webpage_url = info.get('webpage_url')
1647             if webpage_url is not None:
1648                 self.report_warning('The info failed to download, trying with "%s"' % webpage_url)
1649                 return self.download([webpage_url])
1650             else:
1651                 raise
1652         return self._download_retcode
1653
1654     @staticmethod
1655     def filter_requested_info(info_dict):
1656         return dict(
1657             (k, v) for k, v in info_dict.items()
1658             if k not in ['requested_formats', 'requested_subtitles'])
1659
1660     def post_process(self, filename, ie_info):
1661         """Run all the postprocessors on the given file."""
1662         info = dict(ie_info)
1663         info['filepath'] = filename
1664         pps_chain = []
1665         if ie_info.get('__postprocessors') is not None:
1666             pps_chain.extend(ie_info['__postprocessors'])
1667         pps_chain.extend(self._pps)
1668         for pp in pps_chain:
1669             files_to_delete = []
1670             try:
1671                 files_to_delete, info = pp.run(info)
1672             except PostProcessingError as e:
1673                 self.report_error(e.msg)
1674             if files_to_delete and not self.params.get('keepvideo', False):
1675                 for old_filename in files_to_delete:
1676                     self.to_screen('Deleting original file %s (pass -k to keep)' % old_filename)
1677                     try:
1678                         os.remove(encodeFilename(old_filename))
1679                     except (IOError, OSError):
1680                         self.report_warning('Unable to remove downloaded original file')
1681
1682     def _make_archive_id(self, info_dict):
1683         # Future-proof against any change in case
1684         # and backwards compatibility with prior versions
1685         extractor = info_dict.get('extractor_key')
1686         if extractor is None:
1687             if 'id' in info_dict:
1688                 extractor = info_dict.get('ie_key')  # key in a playlist
1689         if extractor is None:
1690             return None  # Incomplete video information
1691         return extractor.lower() + ' ' + info_dict['id']
1692
1693     def in_download_archive(self, info_dict):
1694         fn = self.params.get('download_archive')
1695         if fn is None:
1696             return False
1697
1698         vid_id = self._make_archive_id(info_dict)
1699         if vid_id is None:
1700             return False  # Incomplete video information
1701
1702         try:
1703             with locked_file(fn, 'r', encoding='utf-8') as archive_file:
1704                 for line in archive_file:
1705                     if line.strip() == vid_id:
1706                         return True
1707         except IOError as ioe:
1708             if ioe.errno != errno.ENOENT:
1709                 raise
1710         return False
1711
1712     def record_download_archive(self, info_dict):
1713         fn = self.params.get('download_archive')
1714         if fn is None:
1715             return
1716         vid_id = self._make_archive_id(info_dict)
1717         assert vid_id
1718         with locked_file(fn, 'a', encoding='utf-8') as archive_file:
1719             archive_file.write(vid_id + '\n')
1720
1721     @staticmethod
1722     def format_resolution(format, default='unknown'):
1723         if format.get('vcodec') == 'none':
1724             return 'audio only'
1725         if format.get('resolution') is not None:
1726             return format['resolution']
1727         if format.get('height') is not None:
1728             if format.get('width') is not None:
1729                 res = '%sx%s' % (format['width'], format['height'])
1730             else:
1731                 res = '%sp' % format['height']
1732         elif format.get('width') is not None:
1733             res = '?x%d' % format['width']
1734         else:
1735             res = default
1736         return res
1737
1738     def _format_note(self, fdict):
1739         res = ''
1740         if fdict.get('ext') in ['f4f', 'f4m']:
1741             res += '(unsupported) '
1742         if fdict.get('format_note') is not None:
1743             res += fdict['format_note'] + ' '
1744         if fdict.get('tbr') is not None:
1745             res += '%4dk ' % fdict['tbr']
1746         if fdict.get('container') is not None:
1747             if res:
1748                 res += ', '
1749             res += '%s container' % fdict['container']
1750         if (fdict.get('vcodec') is not None and
1751                 fdict.get('vcodec') != 'none'):
1752             if res:
1753                 res += ', '
1754             res += fdict['vcodec']
1755             if fdict.get('vbr') is not None:
1756                 res += '@'
1757         elif fdict.get('vbr') is not None and fdict.get('abr') is not None:
1758             res += 'video@'
1759         if fdict.get('vbr') is not None:
1760             res += '%4dk' % fdict['vbr']
1761         if fdict.get('fps') is not None:
1762             res += ', %sfps' % fdict['fps']
1763         if fdict.get('acodec') is not None:
1764             if res:
1765                 res += ', '
1766             if fdict['acodec'] == 'none':
1767                 res += 'video only'
1768             else:
1769                 res += '%-5s' % fdict['acodec']
1770         elif fdict.get('abr') is not None:
1771             if res:
1772                 res += ', '
1773             res += 'audio'
1774         if fdict.get('abr') is not None:
1775             res += '@%3dk' % fdict['abr']
1776         if fdict.get('asr') is not None:
1777             res += ' (%5dHz)' % fdict['asr']
1778         if fdict.get('filesize') is not None:
1779             if res:
1780                 res += ', '
1781             res += format_bytes(fdict['filesize'])
1782         elif fdict.get('filesize_approx') is not None:
1783             if res:
1784                 res += ', '
1785             res += '~' + format_bytes(fdict['filesize_approx'])
1786         return res
1787
1788     def list_formats(self, info_dict):
1789         formats = info_dict.get('formats', [info_dict])
1790         table = [
1791             [f['format_id'], f['ext'], self.format_resolution(f), self._format_note(f)]
1792             for f in formats
1793             if f.get('preference') is None or f['preference'] >= -1000]
1794         if len(formats) > 1:
1795             table[-1][-1] += (' ' if table[-1][-1] else '') + '(best)'
1796
1797         header_line = ['format code', 'extension', 'resolution', 'note']
1798         self.to_screen(
1799             '[info] Available formats for %s:\n%s' %
1800             (info_dict['id'], render_table(header_line, table)))
1801
1802     def list_thumbnails(self, info_dict):
1803         thumbnails = info_dict.get('thumbnails')
1804         if not thumbnails:
1805             tn_url = info_dict.get('thumbnail')
1806             if tn_url:
1807                 thumbnails = [{'id': '0', 'url': tn_url}]
1808             else:
1809                 self.to_screen(
1810                     '[info] No thumbnails present for %s' % info_dict['id'])
1811                 return
1812
1813         self.to_screen(
1814             '[info] Thumbnails for %s:' % info_dict['id'])
1815         self.to_screen(render_table(
1816             ['ID', 'width', 'height', 'URL'],
1817             [[t['id'], t.get('width', 'unknown'), t.get('height', 'unknown'), t['url']] for t in thumbnails]))
1818
1819     def list_subtitles(self, video_id, subtitles, name='subtitles'):
1820         if not subtitles:
1821             self.to_screen('%s has no %s' % (video_id, name))
1822             return
1823         self.to_screen(
1824             'Available %s for %s:' % (name, video_id))
1825         self.to_screen(render_table(
1826             ['Language', 'formats'],
1827             [[lang, ', '.join(f['ext'] for f in reversed(formats))]
1828                 for lang, formats in subtitles.items()]))
1829
1830     def urlopen(self, req):
1831         """ Start an HTTP download """
1832
1833         # According to RFC 3986, URLs can not contain non-ASCII characters, however this is not
1834         # always respected by websites, some tend to give out URLs with non percent-encoded
1835         # non-ASCII characters (see telemb.py, ard.py [#3412])
1836         # urllib chokes on URLs with non-ASCII characters (see http://bugs.python.org/issue3991)
1837         # To work around aforementioned issue we will replace request's original URL with
1838         # percent-encoded one
1839         req_is_string = isinstance(req, compat_basestring)
1840         url = req if req_is_string else req.get_full_url()
1841         url_escaped = escape_url(url)
1842
1843         # Substitute URL if any change after escaping
1844         if url != url_escaped:
1845             if req_is_string:
1846                 req = url_escaped
1847             else:
1848                 req_type = HEADRequest if req.get_method() == 'HEAD' else compat_urllib_request.Request
1849                 req = req_type(
1850                     url_escaped, data=req.data, headers=req.headers,
1851                     origin_req_host=req.origin_req_host, unverifiable=req.unverifiable)
1852
1853         return self._opener.open(req, timeout=self._socket_timeout)
1854
1855     def print_debug_header(self):
1856         if not self.params.get('verbose'):
1857             return
1858
1859         if type('') is not compat_str:
1860             # Python 2.6 on SLES11 SP1 (https://github.com/rg3/youtube-dl/issues/3326)
1861             self.report_warning(
1862                 'Your Python is broken! Update to a newer and supported version')
1863
1864         stdout_encoding = getattr(
1865             sys.stdout, 'encoding', 'missing (%s)' % type(sys.stdout).__name__)
1866         encoding_str = (
1867             '[debug] Encodings: locale %s, fs %s, out %s, pref %s\n' % (
1868                 locale.getpreferredencoding(),
1869                 sys.getfilesystemencoding(),
1870                 stdout_encoding,
1871                 self.get_encoding()))
1872         write_string(encoding_str, encoding=None)
1873
1874         self._write_string('[debug] youtube-dl version ' + __version__ + '\n')
1875         try:
1876             sp = subprocess.Popen(
1877                 ['git', 'rev-parse', '--short', 'HEAD'],
1878                 stdout=subprocess.PIPE, stderr=subprocess.PIPE,
1879                 cwd=os.path.dirname(os.path.abspath(__file__)))
1880             out, err = sp.communicate()
1881             out = out.decode().strip()
1882             if re.match('[0-9a-f]+', out):
1883                 self._write_string('[debug] Git HEAD: ' + out + '\n')
1884         except Exception:
1885             try:
1886                 sys.exc_clear()
1887             except Exception:
1888                 pass
1889         self._write_string('[debug] Python version %s - %s\n' % (
1890             platform.python_version(), platform_name()))
1891
1892         exe_versions = FFmpegPostProcessor.get_versions(self)
1893         exe_versions['rtmpdump'] = rtmpdump_version()
1894         exe_str = ', '.join(
1895             '%s %s' % (exe, v)
1896             for exe, v in sorted(exe_versions.items())
1897             if v
1898         )
1899         if not exe_str:
1900             exe_str = 'none'
1901         self._write_string('[debug] exe versions: %s\n' % exe_str)
1902
1903         proxy_map = {}
1904         for handler in self._opener.handlers:
1905             if hasattr(handler, 'proxies'):
1906                 proxy_map.update(handler.proxies)
1907         self._write_string('[debug] Proxy map: ' + compat_str(proxy_map) + '\n')
1908
1909         if self.params.get('call_home', False):
1910             ipaddr = self.urlopen('https://yt-dl.org/ip').read().decode('utf-8')
1911             self._write_string('[debug] Public IP address: %s\n' % ipaddr)
1912             latest_version = self.urlopen(
1913                 'https://yt-dl.org/latest/version').read().decode('utf-8')
1914             if version_tuple(latest_version) > version_tuple(__version__):
1915                 self.report_warning(
1916                     'You are using an outdated version (newest version: %s)! '
1917                     'See https://yt-dl.org/update if you need help updating.' %
1918                     latest_version)
1919
1920     def _setup_opener(self):
1921         timeout_val = self.params.get('socket_timeout')
1922         self._socket_timeout = 600 if timeout_val is None else float(timeout_val)
1923
1924         opts_cookiefile = self.params.get('cookiefile')
1925         opts_proxy = self.params.get('proxy')
1926
1927         if opts_cookiefile is None:
1928             self.cookiejar = compat_cookiejar.CookieJar()
1929         else:
1930             self.cookiejar = compat_cookiejar.MozillaCookieJar(
1931                 opts_cookiefile)
1932             if os.access(opts_cookiefile, os.R_OK):
1933                 self.cookiejar.load()
1934
1935         cookie_processor = compat_urllib_request.HTTPCookieProcessor(
1936             self.cookiejar)
1937         if opts_proxy is not None:
1938             if opts_proxy == '':
1939                 proxies = {}
1940             else:
1941                 proxies = {'http': opts_proxy, 'https': opts_proxy}
1942         else:
1943             proxies = compat_urllib_request.getproxies()
1944             # Set HTTPS proxy to HTTP one if given (https://github.com/rg3/youtube-dl/issues/805)
1945             if 'http' in proxies and 'https' not in proxies:
1946                 proxies['https'] = proxies['http']
1947         proxy_handler = PerRequestProxyHandler(proxies)
1948
1949         debuglevel = 1 if self.params.get('debug_printtraffic') else 0
1950         https_handler = make_HTTPS_handler(self.params, debuglevel=debuglevel)
1951         ydlh = YoutubeDLHandler(self.params, debuglevel=debuglevel)
1952         opener = compat_urllib_request.build_opener(
1953             proxy_handler, https_handler, cookie_processor, ydlh)
1954
1955         # Delete the default user-agent header, which would otherwise apply in
1956         # cases where our custom HTTP handler doesn't come into play
1957         # (See https://github.com/rg3/youtube-dl/issues/1309 for details)
1958         opener.addheaders = []
1959         self._opener = opener
1960
1961     def encode(self, s):
1962         if isinstance(s, bytes):
1963             return s  # Already encoded
1964
1965         try:
1966             return s.encode(self.get_encoding())
1967         except UnicodeEncodeError as err:
1968             err.reason = err.reason + '. Check your system encoding configuration or use the --encoding option.'
1969             raise
1970
1971     def get_encoding(self):
1972         encoding = self.params.get('encoding')
1973         if encoding is None:
1974             encoding = preferredencoding()
1975         return encoding
1976
1977     def _write_thumbnails(self, info_dict, filename):
1978         if self.params.get('writethumbnail', False):
1979             thumbnails = info_dict.get('thumbnails')
1980             if thumbnails:
1981                 thumbnails = [thumbnails[-1]]
1982         elif self.params.get('write_all_thumbnails', False):
1983             thumbnails = info_dict.get('thumbnails')
1984         else:
1985             return
1986
1987         if not thumbnails:
1988             # No thumbnails present, so return immediately
1989             return
1990
1991         for t in thumbnails:
1992             thumb_ext = determine_ext(t['url'], 'jpg')
1993             suffix = '_%s' % t['id'] if len(thumbnails) > 1 else ''
1994             thumb_display_id = '%s ' % t['id'] if len(thumbnails) > 1 else ''
1995             t['filename'] = thumb_filename = os.path.splitext(filename)[0] + suffix + '.' + thumb_ext
1996
1997             if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(thumb_filename)):
1998                 self.to_screen('[%s] %s: Thumbnail %sis already present' %
1999                                (info_dict['extractor'], info_dict['id'], thumb_display_id))
2000             else:
2001                 self.to_screen('[%s] %s: Downloading thumbnail %s...' %
2002                                (info_dict['extractor'], info_dict['id'], thumb_display_id))
2003                 try:
2004                     uf = self.urlopen(t['url'])
2005                     with open(thumb_filename, 'wb') as thumbf:
2006                         shutil.copyfileobj(uf, thumbf)
2007                     self.to_screen('[%s] %s: Writing thumbnail %sto: %s' %
2008                                    (info_dict['extractor'], info_dict['id'], thumb_display_id, thumb_filename))
2009                 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
2010                     self.report_warning('Unable to download thumbnail "%s": %s' %
2011                                         (t['url'], compat_str(err)))