Merge remote-tracking branch 'jaimemf/format_spec_groups' (closes #6124)
[youtube-dl] / youtube_dl / YoutubeDL.py
1 #!/usr/bin/env python
2 # -*- coding: utf-8 -*-
3
4 from __future__ import absolute_import, unicode_literals
5
6 import collections
7 import contextlib
8 import datetime
9 import errno
10 import fileinput
11 import io
12 import itertools
13 import json
14 import locale
15 import operator
16 import os
17 import platform
18 import re
19 import shutil
20 import subprocess
21 import socket
22 import sys
23 import time
24 import tokenize
25 import traceback
26
27 if os.name == 'nt':
28     import ctypes
29
30 from .compat import (
31     compat_basestring,
32     compat_cookiejar,
33     compat_expanduser,
34     compat_get_terminal_size,
35     compat_http_client,
36     compat_kwargs,
37     compat_str,
38     compat_tokenize_tokenize,
39     compat_urllib_error,
40     compat_urllib_request,
41 )
42 from .utils import (
43     escape_url,
44     ContentTooShortError,
45     date_from_str,
46     DateRange,
47     DEFAULT_OUTTMPL,
48     determine_ext,
49     DownloadError,
50     encodeFilename,
51     ExtractorError,
52     format_bytes,
53     formatSeconds,
54     HEADRequest,
55     locked_file,
56     make_HTTPS_handler,
57     MaxDownloadsReached,
58     PagedList,
59     parse_filesize,
60     PerRequestProxyHandler,
61     PostProcessingError,
62     platform_name,
63     preferredencoding,
64     render_table,
65     SameFileError,
66     sanitize_filename,
67     sanitize_path,
68     std_headers,
69     subtitles_filename,
70     UnavailableVideoError,
71     url_basename,
72     version_tuple,
73     write_json_file,
74     write_string,
75     YoutubeDLHandler,
76     prepend_extension,
77     replace_extension,
78     args_to_str,
79     age_restricted,
80 )
81 from .cache import Cache
82 from .extractor import get_info_extractor, gen_extractors
83 from .downloader import get_suitable_downloader
84 from .downloader.rtmp import rtmpdump_version
85 from .postprocessor import (
86     FFmpegFixupM4aPP,
87     FFmpegFixupStretchedPP,
88     FFmpegMergerPP,
89     FFmpegPostProcessor,
90     get_postprocessor,
91 )
92 from .version import __version__
93
94
95 class YoutubeDL(object):
96     """YoutubeDL class.
97
98     YoutubeDL objects are the ones responsible of downloading the
99     actual video file and writing it to disk if the user has requested
100     it, among some other tasks. In most cases there should be one per
101     program. As, given a video URL, the downloader doesn't know how to
102     extract all the needed information, task that InfoExtractors do, it
103     has to pass the URL to one of them.
104
105     For this, YoutubeDL objects have a method that allows
106     InfoExtractors to be registered in a given order. When it is passed
107     a URL, the YoutubeDL object handles it to the first InfoExtractor it
108     finds that reports being able to handle it. The InfoExtractor extracts
109     all the information about the video or videos the URL refers to, and
110     YoutubeDL process the extracted information, possibly using a File
111     Downloader to download the video.
112
113     YoutubeDL objects accept a lot of parameters. In order not to saturate
114     the object constructor with arguments, it receives a dictionary of
115     options instead. These options are available through the params
116     attribute for the InfoExtractors to use. The YoutubeDL also
117     registers itself as the downloader in charge for the InfoExtractors
118     that are added to it, so this is a "mutual registration".
119
120     Available options:
121
122     username:          Username for authentication purposes.
123     password:          Password for authentication purposes.
124     videopassword:     Password for accessing a video.
125     usenetrc:          Use netrc for authentication instead.
126     verbose:           Print additional info to stdout.
127     quiet:             Do not print messages to stdout.
128     no_warnings:       Do not print out anything for warnings.
129     forceurl:          Force printing final URL.
130     forcetitle:        Force printing title.
131     forceid:           Force printing ID.
132     forcethumbnail:    Force printing thumbnail URL.
133     forcedescription:  Force printing description.
134     forcefilename:     Force printing final filename.
135     forceduration:     Force printing duration.
136     forcejson:         Force printing info_dict as JSON.
137     dump_single_json:  Force printing the info_dict of the whole playlist
138                        (or video) as a single JSON line.
139     simulate:          Do not download the video files.
140     format:            Video format code. See options.py for more information.
141     outtmpl:           Template for output names.
142     restrictfilenames: Do not allow "&" and spaces in file names
143     ignoreerrors:      Do not stop on download errors.
144     force_generic_extractor: Force downloader to use the generic extractor
145     nooverwrites:      Prevent overwriting files.
146     playliststart:     Playlist item to start at.
147     playlistend:       Playlist item to end at.
148     playlist_items:    Specific indices of playlist to download.
149     playlistreverse:   Download playlist items in reverse order.
150     matchtitle:        Download only matching titles.
151     rejecttitle:       Reject downloads for matching titles.
152     logger:            Log messages to a logging.Logger instance.
153     logtostderr:       Log messages to stderr instead of stdout.
154     writedescription:  Write the video description to a .description file
155     writeinfojson:     Write the video description to a .info.json file
156     writeannotations:  Write the video annotations to a .annotations.xml file
157     writethumbnail:    Write the thumbnail image to a file
158     write_all_thumbnails:  Write all thumbnail formats to files
159     writesubtitles:    Write the video subtitles to a file
160     writeautomaticsub: Write the automatic subtitles to a file
161     allsubtitles:      Downloads all the subtitles of the video
162                        (requires writesubtitles or writeautomaticsub)
163     listsubtitles:     Lists all available subtitles for the video
164     subtitlesformat:   The format code for subtitles
165     subtitleslangs:    List of languages of the subtitles to download
166     keepvideo:         Keep the video file after post-processing
167     daterange:         A DateRange object, download only if the upload_date is in the range.
168     skip_download:     Skip the actual download of the video file
169     cachedir:          Location of the cache files in the filesystem.
170                        False to disable filesystem cache.
171     noplaylist:        Download single video instead of a playlist if in doubt.
172     age_limit:         An integer representing the user's age in years.
173                        Unsuitable videos for the given age are skipped.
174     min_views:         An integer representing the minimum view count the video
175                        must have in order to not be skipped.
176                        Videos without view count information are always
177                        downloaded. None for no limit.
178     max_views:         An integer representing the maximum view count.
179                        Videos that are more popular than that are not
180                        downloaded.
181                        Videos without view count information are always
182                        downloaded. None for no limit.
183     download_archive:  File name of a file where all downloads are recorded.
184                        Videos already present in the file are not downloaded
185                        again.
186     cookiefile:        File name where cookies should be read from and dumped to.
187     nocheckcertificate:Do not verify SSL certificates
188     prefer_insecure:   Use HTTP instead of HTTPS to retrieve information.
189                        At the moment, this is only supported by YouTube.
190     proxy:             URL of the proxy server to use
191     cn_verification_proxy:  URL of the proxy to use for IP address verification
192                        on Chinese sites. (Experimental)
193     socket_timeout:    Time to wait for unresponsive hosts, in seconds
194     bidi_workaround:   Work around buggy terminals without bidirectional text
195                        support, using fridibi
196     debug_printtraffic:Print out sent and received HTTP traffic
197     include_ads:       Download ads as well
198     default_search:    Prepend this string if an input url is not valid.
199                        'auto' for elaborate guessing
200     encoding:          Use this encoding instead of the system-specified.
201     extract_flat:      Do not resolve URLs, return the immediate result.
202                        Pass in 'in_playlist' to only show this behavior for
203                        playlist items.
204     postprocessors:    A list of dictionaries, each with an entry
205                        * key:  The name of the postprocessor. See
206                                youtube_dl/postprocessor/__init__.py for a list.
207                        as well as any further keyword arguments for the
208                        postprocessor.
209     progress_hooks:    A list of functions that get called on download
210                        progress, with a dictionary with the entries
211                        * status: One of "downloading", "error", or "finished".
212                                  Check this first and ignore unknown values.
213
214                        If status is one of "downloading", or "finished", the
215                        following properties may also be present:
216                        * filename: The final filename (always present)
217                        * tmpfilename: The filename we're currently writing to
218                        * downloaded_bytes: Bytes on disk
219                        * total_bytes: Size of the whole file, None if unknown
220                        * total_bytes_estimate: Guess of the eventual file size,
221                                                None if unavailable.
222                        * elapsed: The number of seconds since download started.
223                        * eta: The estimated time in seconds, None if unknown
224                        * speed: The download speed in bytes/second, None if
225                                 unknown
226                        * fragment_index: The counter of the currently
227                                          downloaded video fragment.
228                        * fragment_count: The number of fragments (= individual
229                                          files that will be merged)
230
231                        Progress hooks are guaranteed to be called at least once
232                        (with status "finished") if the download is successful.
233     merge_output_format: Extension to use when merging formats.
234     fixup:             Automatically correct known faults of the file.
235                        One of:
236                        - "never": do nothing
237                        - "warn": only emit a warning
238                        - "detect_or_warn": check whether we can do anything
239                                            about it, warn otherwise (default)
240     source_address:    (Experimental) Client-side IP address to bind to.
241     call_home:         Boolean, true iff we are allowed to contact the
242                        youtube-dl servers for debugging.
243     sleep_interval:    Number of seconds to sleep before each download.
244     listformats:       Print an overview of available video formats and exit.
245     list_thumbnails:   Print a table of all thumbnails and exit.
246     match_filter:      A function that gets called with the info_dict of
247                        every video.
248                        If it returns a message, the video is ignored.
249                        If it returns None, the video is downloaded.
250                        match_filter_func in utils.py is one example for this.
251     no_color:          Do not emit color codes in output.
252
253     The following options determine which downloader is picked:
254     external_downloader: Executable of the external downloader to call.
255                        None or unset for standard (built-in) downloader.
256     hls_prefer_native: Use the native HLS downloader instead of ffmpeg/avconv.
257
258     The following parameters are not used by YoutubeDL itself, they are used by
259     the downloader (see youtube_dl/downloader/common.py):
260     nopart, updatetime, buffersize, ratelimit, min_filesize, max_filesize, test,
261     noresizebuffer, retries, continuedl, noprogress, consoletitle,
262     xattr_set_filesize, external_downloader_args.
263
264     The following options are used by the post processors:
265     prefer_ffmpeg:     If True, use ffmpeg instead of avconv if both are available,
266                        otherwise prefer avconv.
267     postprocessor_args: A list of additional command-line arguments for the
268                         postprocessor.
269     """
270
271     params = None
272     _ies = []
273     _pps = []
274     _download_retcode = None
275     _num_downloads = None
276     _screen_file = None
277
278     def __init__(self, params=None, auto_init=True):
279         """Create a FileDownloader object with the given options."""
280         if params is None:
281             params = {}
282         self._ies = []
283         self._ies_instances = {}
284         self._pps = []
285         self._progress_hooks = []
286         self._download_retcode = 0
287         self._num_downloads = 0
288         self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)]
289         self._err_file = sys.stderr
290         self.params = params
291         self.cache = Cache(self)
292
293         if params.get('bidi_workaround', False):
294             try:
295                 import pty
296                 master, slave = pty.openpty()
297                 width = compat_get_terminal_size().columns
298                 if width is None:
299                     width_args = []
300                 else:
301                     width_args = ['-w', str(width)]
302                 sp_kwargs = dict(
303                     stdin=subprocess.PIPE,
304                     stdout=slave,
305                     stderr=self._err_file)
306                 try:
307                     self._output_process = subprocess.Popen(
308                         ['bidiv'] + width_args, **sp_kwargs
309                     )
310                 except OSError:
311                     self._output_process = subprocess.Popen(
312                         ['fribidi', '-c', 'UTF-8'] + width_args, **sp_kwargs)
313                 self._output_channel = os.fdopen(master, 'rb')
314             except OSError as ose:
315                 if ose.errno == 2:
316                     self.report_warning('Could not find fribidi executable, ignoring --bidi-workaround . Make sure that  fribidi  is an executable file in one of the directories in your $PATH.')
317                 else:
318                     raise
319
320         if (sys.version_info >= (3,) and sys.platform != 'win32' and
321                 sys.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968'] and
322                 not params.get('restrictfilenames', False)):
323             # On Python 3, the Unicode filesystem API will throw errors (#1474)
324             self.report_warning(
325                 'Assuming --restrict-filenames since file system encoding '
326                 'cannot encode all characters. '
327                 'Set the LC_ALL environment variable to fix this.')
328             self.params['restrictfilenames'] = True
329
330         if isinstance(params.get('outtmpl'), bytes):
331             self.report_warning(
332                 'Parameter outtmpl is bytes, but should be a unicode string. '
333                 'Put  from __future__ import unicode_literals  at the top of your code file or consider switching to Python 3.x.')
334
335         self._setup_opener()
336
337         if auto_init:
338             self.print_debug_header()
339             self.add_default_info_extractors()
340
341         for pp_def_raw in self.params.get('postprocessors', []):
342             pp_class = get_postprocessor(pp_def_raw['key'])
343             pp_def = dict(pp_def_raw)
344             del pp_def['key']
345             pp = pp_class(self, **compat_kwargs(pp_def))
346             self.add_post_processor(pp)
347
348         for ph in self.params.get('progress_hooks', []):
349             self.add_progress_hook(ph)
350
351     def warn_if_short_id(self, argv):
352         # short YouTube ID starting with dash?
353         idxs = [
354             i for i, a in enumerate(argv)
355             if re.match(r'^-[0-9A-Za-z_-]{10}$', a)]
356         if idxs:
357             correct_argv = (
358                 ['youtube-dl'] +
359                 [a for i, a in enumerate(argv) if i not in idxs] +
360                 ['--'] + [argv[i] for i in idxs]
361             )
362             self.report_warning(
363                 'Long argument string detected. '
364                 'Use -- to separate parameters and URLs, like this:\n%s\n' %
365                 args_to_str(correct_argv))
366
367     def add_info_extractor(self, ie):
368         """Add an InfoExtractor object to the end of the list."""
369         self._ies.append(ie)
370         self._ies_instances[ie.ie_key()] = ie
371         ie.set_downloader(self)
372
373     def get_info_extractor(self, ie_key):
374         """
375         Get an instance of an IE with name ie_key, it will try to get one from
376         the _ies list, if there's no instance it will create a new one and add
377         it to the extractor list.
378         """
379         ie = self._ies_instances.get(ie_key)
380         if ie is None:
381             ie = get_info_extractor(ie_key)()
382             self.add_info_extractor(ie)
383         return ie
384
385     def add_default_info_extractors(self):
386         """
387         Add the InfoExtractors returned by gen_extractors to the end of the list
388         """
389         for ie in gen_extractors():
390             self.add_info_extractor(ie)
391
392     def add_post_processor(self, pp):
393         """Add a PostProcessor object to the end of the chain."""
394         self._pps.append(pp)
395         pp.set_downloader(self)
396
397     def add_progress_hook(self, ph):
398         """Add the progress hook (currently only for the file downloader)"""
399         self._progress_hooks.append(ph)
400
401     def _bidi_workaround(self, message):
402         if not hasattr(self, '_output_channel'):
403             return message
404
405         assert hasattr(self, '_output_process')
406         assert isinstance(message, compat_str)
407         line_count = message.count('\n') + 1
408         self._output_process.stdin.write((message + '\n').encode('utf-8'))
409         self._output_process.stdin.flush()
410         res = ''.join(self._output_channel.readline().decode('utf-8')
411                       for _ in range(line_count))
412         return res[:-len('\n')]
413
414     def to_screen(self, message, skip_eol=False):
415         """Print message to stdout if not in quiet mode."""
416         return self.to_stdout(message, skip_eol, check_quiet=True)
417
418     def _write_string(self, s, out=None):
419         write_string(s, out=out, encoding=self.params.get('encoding'))
420
421     def to_stdout(self, message, skip_eol=False, check_quiet=False):
422         """Print message to stdout if not in quiet mode."""
423         if self.params.get('logger'):
424             self.params['logger'].debug(message)
425         elif not check_quiet or not self.params.get('quiet', False):
426             message = self._bidi_workaround(message)
427             terminator = ['\n', ''][skip_eol]
428             output = message + terminator
429
430             self._write_string(output, self._screen_file)
431
432     def to_stderr(self, message):
433         """Print message to stderr."""
434         assert isinstance(message, compat_str)
435         if self.params.get('logger'):
436             self.params['logger'].error(message)
437         else:
438             message = self._bidi_workaround(message)
439             output = message + '\n'
440             self._write_string(output, self._err_file)
441
442     def to_console_title(self, message):
443         if not self.params.get('consoletitle', False):
444             return
445         if os.name == 'nt' and ctypes.windll.kernel32.GetConsoleWindow():
446             # c_wchar_p() might not be necessary if `message` is
447             # already of type unicode()
448             ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message))
449         elif 'TERM' in os.environ:
450             self._write_string('\033]0;%s\007' % message, self._screen_file)
451
452     def save_console_title(self):
453         if not self.params.get('consoletitle', False):
454             return
455         if 'TERM' in os.environ:
456             # Save the title on stack
457             self._write_string('\033[22;0t', self._screen_file)
458
459     def restore_console_title(self):
460         if not self.params.get('consoletitle', False):
461             return
462         if 'TERM' in os.environ:
463             # Restore the title from stack
464             self._write_string('\033[23;0t', self._screen_file)
465
466     def __enter__(self):
467         self.save_console_title()
468         return self
469
470     def __exit__(self, *args):
471         self.restore_console_title()
472
473         if self.params.get('cookiefile') is not None:
474             self.cookiejar.save()
475
476     def trouble(self, message=None, tb=None):
477         """Determine action to take when a download problem appears.
478
479         Depending on if the downloader has been configured to ignore
480         download errors or not, this method may throw an exception or
481         not when errors are found, after printing the message.
482
483         tb, if given, is additional traceback information.
484         """
485         if message is not None:
486             self.to_stderr(message)
487         if self.params.get('verbose'):
488             if tb is None:
489                 if sys.exc_info()[0]:  # if .trouble has been called from an except block
490                     tb = ''
491                     if hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
492                         tb += ''.join(traceback.format_exception(*sys.exc_info()[1].exc_info))
493                     tb += compat_str(traceback.format_exc())
494                 else:
495                     tb_data = traceback.format_list(traceback.extract_stack())
496                     tb = ''.join(tb_data)
497             self.to_stderr(tb)
498         if not self.params.get('ignoreerrors', False):
499             if sys.exc_info()[0] and hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
500                 exc_info = sys.exc_info()[1].exc_info
501             else:
502                 exc_info = sys.exc_info()
503             raise DownloadError(message, exc_info)
504         self._download_retcode = 1
505
506     def report_warning(self, message):
507         '''
508         Print the message to stderr, it will be prefixed with 'WARNING:'
509         If stderr is a tty file the 'WARNING:' will be colored
510         '''
511         if self.params.get('logger') is not None:
512             self.params['logger'].warning(message)
513         else:
514             if self.params.get('no_warnings'):
515                 return
516             if not self.params.get('no_color') and self._err_file.isatty() and os.name != 'nt':
517                 _msg_header = '\033[0;33mWARNING:\033[0m'
518             else:
519                 _msg_header = 'WARNING:'
520             warning_message = '%s %s' % (_msg_header, message)
521             self.to_stderr(warning_message)
522
523     def report_error(self, message, tb=None):
524         '''
525         Do the same as trouble, but prefixes the message with 'ERROR:', colored
526         in red if stderr is a tty file.
527         '''
528         if not self.params.get('no_color') and self._err_file.isatty() and os.name != 'nt':
529             _msg_header = '\033[0;31mERROR:\033[0m'
530         else:
531             _msg_header = 'ERROR:'
532         error_message = '%s %s' % (_msg_header, message)
533         self.trouble(error_message, tb)
534
535     def report_file_already_downloaded(self, file_name):
536         """Report file has already been fully downloaded."""
537         try:
538             self.to_screen('[download] %s has already been downloaded' % file_name)
539         except UnicodeEncodeError:
540             self.to_screen('[download] The file has already been downloaded')
541
542     def prepare_filename(self, info_dict):
543         """Generate the output filename."""
544         try:
545             template_dict = dict(info_dict)
546
547             template_dict['epoch'] = int(time.time())
548             autonumber_size = self.params.get('autonumber_size')
549             if autonumber_size is None:
550                 autonumber_size = 5
551             autonumber_templ = '%0' + str(autonumber_size) + 'd'
552             template_dict['autonumber'] = autonumber_templ % self._num_downloads
553             if template_dict.get('playlist_index') is not None:
554                 template_dict['playlist_index'] = '%0*d' % (len(str(template_dict['n_entries'])), template_dict['playlist_index'])
555             if template_dict.get('resolution') is None:
556                 if template_dict.get('width') and template_dict.get('height'):
557                     template_dict['resolution'] = '%dx%d' % (template_dict['width'], template_dict['height'])
558                 elif template_dict.get('height'):
559                     template_dict['resolution'] = '%sp' % template_dict['height']
560                 elif template_dict.get('width'):
561                     template_dict['resolution'] = '?x%d' % template_dict['width']
562
563             sanitize = lambda k, v: sanitize_filename(
564                 compat_str(v),
565                 restricted=self.params.get('restrictfilenames'),
566                 is_id=(k == 'id'))
567             template_dict = dict((k, sanitize(k, v))
568                                  for k, v in template_dict.items()
569                                  if v is not None)
570             template_dict = collections.defaultdict(lambda: 'NA', template_dict)
571
572             outtmpl = sanitize_path(self.params.get('outtmpl', DEFAULT_OUTTMPL))
573             tmpl = compat_expanduser(outtmpl)
574             filename = tmpl % template_dict
575             # Temporary fix for #4787
576             # 'Treat' all problem characters by passing filename through preferredencoding
577             # to workaround encoding issues with subprocess on python2 @ Windows
578             if sys.version_info < (3, 0) and sys.platform == 'win32':
579                 filename = encodeFilename(filename, True).decode(preferredencoding())
580             return filename
581         except ValueError as err:
582             self.report_error('Error in output template: ' + str(err) + ' (encoding: ' + repr(preferredencoding()) + ')')
583             return None
584
585     def _match_entry(self, info_dict, incomplete):
586         """ Returns None iff the file should be downloaded """
587
588         video_title = info_dict.get('title', info_dict.get('id', 'video'))
589         if 'title' in info_dict:
590             # This can happen when we're just evaluating the playlist
591             title = info_dict['title']
592             matchtitle = self.params.get('matchtitle', False)
593             if matchtitle:
594                 if not re.search(matchtitle, title, re.IGNORECASE):
595                     return '"' + title + '" title did not match pattern "' + matchtitle + '"'
596             rejecttitle = self.params.get('rejecttitle', False)
597             if rejecttitle:
598                 if re.search(rejecttitle, title, re.IGNORECASE):
599                     return '"' + title + '" title matched reject pattern "' + rejecttitle + '"'
600         date = info_dict.get('upload_date', None)
601         if date is not None:
602             dateRange = self.params.get('daterange', DateRange())
603             if date not in dateRange:
604                 return '%s upload date is not in range %s' % (date_from_str(date).isoformat(), dateRange)
605         view_count = info_dict.get('view_count', None)
606         if view_count is not None:
607             min_views = self.params.get('min_views')
608             if min_views is not None and view_count < min_views:
609                 return 'Skipping %s, because it has not reached minimum view count (%d/%d)' % (video_title, view_count, min_views)
610             max_views = self.params.get('max_views')
611             if max_views is not None and view_count > max_views:
612                 return 'Skipping %s, because it has exceeded the maximum view count (%d/%d)' % (video_title, view_count, max_views)
613         if age_restricted(info_dict.get('age_limit'), self.params.get('age_limit')):
614             return 'Skipping "%s" because it is age restricted' % video_title
615         if self.in_download_archive(info_dict):
616             return '%s has already been recorded in archive' % video_title
617
618         if not incomplete:
619             match_filter = self.params.get('match_filter')
620             if match_filter is not None:
621                 ret = match_filter(info_dict)
622                 if ret is not None:
623                     return ret
624
625         return None
626
627     @staticmethod
628     def add_extra_info(info_dict, extra_info):
629         '''Set the keys from extra_info in info dict if they are missing'''
630         for key, value in extra_info.items():
631             info_dict.setdefault(key, value)
632
633     def extract_info(self, url, download=True, ie_key=None, extra_info={},
634                      process=True, force_generic_extractor=False):
635         '''
636         Returns a list with a dictionary for each video we find.
637         If 'download', also downloads the videos.
638         extra_info is a dict containing the extra values to add to each result
639         '''
640
641         if not ie_key and force_generic_extractor:
642             ie_key = 'Generic'
643
644         if ie_key:
645             ies = [self.get_info_extractor(ie_key)]
646         else:
647             ies = self._ies
648
649         for ie in ies:
650             if not ie.suitable(url):
651                 continue
652
653             if not ie.working():
654                 self.report_warning('The program functionality for this site has been marked as broken, '
655                                     'and will probably not work.')
656
657             try:
658                 ie_result = ie.extract(url)
659                 if ie_result is None:  # Finished already (backwards compatibility; listformats and friends should be moved here)
660                     break
661                 if isinstance(ie_result, list):
662                     # Backwards compatibility: old IE result format
663                     ie_result = {
664                         '_type': 'compat_list',
665                         'entries': ie_result,
666                     }
667                 self.add_default_extra_info(ie_result, ie, url)
668                 if process:
669                     return self.process_ie_result(ie_result, download, extra_info)
670                 else:
671                     return ie_result
672             except ExtractorError as de:  # An error we somewhat expected
673                 self.report_error(compat_str(de), de.format_traceback())
674                 break
675             except MaxDownloadsReached:
676                 raise
677             except Exception as e:
678                 if self.params.get('ignoreerrors', False):
679                     self.report_error(compat_str(e), tb=compat_str(traceback.format_exc()))
680                     break
681                 else:
682                     raise
683         else:
684             self.report_error('no suitable InfoExtractor for URL %s' % url)
685
686     def add_default_extra_info(self, ie_result, ie, url):
687         self.add_extra_info(ie_result, {
688             'extractor': ie.IE_NAME,
689             'webpage_url': url,
690             'webpage_url_basename': url_basename(url),
691             'extractor_key': ie.ie_key(),
692         })
693
694     def process_ie_result(self, ie_result, download=True, extra_info={}):
695         """
696         Take the result of the ie(may be modified) and resolve all unresolved
697         references (URLs, playlist items).
698
699         It will also download the videos if 'download'.
700         Returns the resolved ie_result.
701         """
702
703         result_type = ie_result.get('_type', 'video')
704
705         if result_type in ('url', 'url_transparent'):
706             extract_flat = self.params.get('extract_flat', False)
707             if ((extract_flat == 'in_playlist' and 'playlist' in extra_info) or
708                     extract_flat is True):
709                 if self.params.get('forcejson', False):
710                     self.to_stdout(json.dumps(ie_result))
711                 return ie_result
712
713         if result_type == 'video':
714             self.add_extra_info(ie_result, extra_info)
715             return self.process_video_result(ie_result, download=download)
716         elif result_type == 'url':
717             # We have to add extra_info to the results because it may be
718             # contained in a playlist
719             return self.extract_info(ie_result['url'],
720                                      download,
721                                      ie_key=ie_result.get('ie_key'),
722                                      extra_info=extra_info)
723         elif result_type == 'url_transparent':
724             # Use the information from the embedding page
725             info = self.extract_info(
726                 ie_result['url'], ie_key=ie_result.get('ie_key'),
727                 extra_info=extra_info, download=False, process=False)
728
729             force_properties = dict(
730                 (k, v) for k, v in ie_result.items() if v is not None)
731             for f in ('_type', 'url'):
732                 if f in force_properties:
733                     del force_properties[f]
734             new_result = info.copy()
735             new_result.update(force_properties)
736
737             assert new_result.get('_type') != 'url_transparent'
738
739             return self.process_ie_result(
740                 new_result, download=download, extra_info=extra_info)
741         elif result_type == 'playlist' or result_type == 'multi_video':
742             # We process each entry in the playlist
743             playlist = ie_result.get('title', None) or ie_result.get('id', None)
744             self.to_screen('[download] Downloading playlist: %s' % playlist)
745
746             playlist_results = []
747
748             playliststart = self.params.get('playliststart', 1) - 1
749             playlistend = self.params.get('playlistend', None)
750             # For backwards compatibility, interpret -1 as whole list
751             if playlistend == -1:
752                 playlistend = None
753
754             playlistitems_str = self.params.get('playlist_items', None)
755             playlistitems = None
756             if playlistitems_str is not None:
757                 def iter_playlistitems(format):
758                     for string_segment in format.split(','):
759                         if '-' in string_segment:
760                             start, end = string_segment.split('-')
761                             for item in range(int(start), int(end) + 1):
762                                 yield int(item)
763                         else:
764                             yield int(string_segment)
765                 playlistitems = iter_playlistitems(playlistitems_str)
766
767             ie_entries = ie_result['entries']
768             if isinstance(ie_entries, list):
769                 n_all_entries = len(ie_entries)
770                 if playlistitems:
771                     entries = [
772                         ie_entries[i - 1] for i in playlistitems
773                         if -n_all_entries <= i - 1 < n_all_entries]
774                 else:
775                     entries = ie_entries[playliststart:playlistend]
776                 n_entries = len(entries)
777                 self.to_screen(
778                     "[%s] playlist %s: Collected %d video ids (downloading %d of them)" %
779                     (ie_result['extractor'], playlist, n_all_entries, n_entries))
780             elif isinstance(ie_entries, PagedList):
781                 if playlistitems:
782                     entries = []
783                     for item in playlistitems:
784                         entries.extend(ie_entries.getslice(
785                             item - 1, item
786                         ))
787                 else:
788                     entries = ie_entries.getslice(
789                         playliststart, playlistend)
790                 n_entries = len(entries)
791                 self.to_screen(
792                     "[%s] playlist %s: Downloading %d videos" %
793                     (ie_result['extractor'], playlist, n_entries))
794             else:  # iterable
795                 if playlistitems:
796                     entry_list = list(ie_entries)
797                     entries = [entry_list[i - 1] for i in playlistitems]
798                 else:
799                     entries = list(itertools.islice(
800                         ie_entries, playliststart, playlistend))
801                 n_entries = len(entries)
802                 self.to_screen(
803                     "[%s] playlist %s: Downloading %d videos" %
804                     (ie_result['extractor'], playlist, n_entries))
805
806             if self.params.get('playlistreverse', False):
807                 entries = entries[::-1]
808
809             for i, entry in enumerate(entries, 1):
810                 self.to_screen('[download] Downloading video %s of %s' % (i, n_entries))
811                 extra = {
812                     'n_entries': n_entries,
813                     'playlist': playlist,
814                     'playlist_id': ie_result.get('id'),
815                     'playlist_title': ie_result.get('title'),
816                     'playlist_index': i + playliststart,
817                     'extractor': ie_result['extractor'],
818                     'webpage_url': ie_result['webpage_url'],
819                     'webpage_url_basename': url_basename(ie_result['webpage_url']),
820                     'extractor_key': ie_result['extractor_key'],
821                 }
822
823                 reason = self._match_entry(entry, incomplete=True)
824                 if reason is not None:
825                     self.to_screen('[download] ' + reason)
826                     continue
827
828                 entry_result = self.process_ie_result(entry,
829                                                       download=download,
830                                                       extra_info=extra)
831                 playlist_results.append(entry_result)
832             ie_result['entries'] = playlist_results
833             return ie_result
834         elif result_type == 'compat_list':
835             self.report_warning(
836                 'Extractor %s returned a compat_list result. '
837                 'It needs to be updated.' % ie_result.get('extractor'))
838
839             def _fixup(r):
840                 self.add_extra_info(
841                     r,
842                     {
843                         'extractor': ie_result['extractor'],
844                         'webpage_url': ie_result['webpage_url'],
845                         'webpage_url_basename': url_basename(ie_result['webpage_url']),
846                         'extractor_key': ie_result['extractor_key'],
847                     }
848                 )
849                 return r
850             ie_result['entries'] = [
851                 self.process_ie_result(_fixup(r), download, extra_info)
852                 for r in ie_result['entries']
853             ]
854             return ie_result
855         else:
856             raise Exception('Invalid result type: %s' % result_type)
857
858     def _build_format_filter(self, filter_spec):
859         " Returns a function to filter the formats according to the filter_spec "
860
861         OPERATORS = {
862             '<': operator.lt,
863             '<=': operator.le,
864             '>': operator.gt,
865             '>=': operator.ge,
866             '=': operator.eq,
867             '!=': operator.ne,
868         }
869         operator_rex = re.compile(r'''(?x)\s*
870             (?P<key>width|height|tbr|abr|vbr|asr|filesize|fps)
871             \s*(?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
872             (?P<value>[0-9.]+(?:[kKmMgGtTpPeEzZyY]i?[Bb]?)?)
873             $
874             ''' % '|'.join(map(re.escape, OPERATORS.keys())))
875         m = operator_rex.search(filter_spec)
876         if m:
877             try:
878                 comparison_value = int(m.group('value'))
879             except ValueError:
880                 comparison_value = parse_filesize(m.group('value'))
881                 if comparison_value is None:
882                     comparison_value = parse_filesize(m.group('value') + 'B')
883                 if comparison_value is None:
884                     raise ValueError(
885                         'Invalid value %r in format specification %r' % (
886                             m.group('value'), filter_spec))
887             op = OPERATORS[m.group('op')]
888
889         if not m:
890             STR_OPERATORS = {
891                 '=': operator.eq,
892                 '!=': operator.ne,
893             }
894             str_operator_rex = re.compile(r'''(?x)
895                 \s*(?P<key>ext|acodec|vcodec|container|protocol)
896                 \s*(?P<op>%s)(?P<none_inclusive>\s*\?)?
897                 \s*(?P<value>[a-zA-Z0-9_-]+)
898                 \s*$
899                 ''' % '|'.join(map(re.escape, STR_OPERATORS.keys())))
900             m = str_operator_rex.search(filter_spec)
901             if m:
902                 comparison_value = m.group('value')
903                 op = STR_OPERATORS[m.group('op')]
904
905         if not m:
906             raise ValueError('Invalid filter specification %r' % filter_spec)
907
908         def _filter(f):
909             actual_value = f.get(m.group('key'))
910             if actual_value is None:
911                 return m.group('none_inclusive')
912             return op(actual_value, comparison_value)
913         return _filter
914
915     def build_format_selector(self, format_spec):
916         def syntax_error(note, start):
917             message = (
918                 'Invalid format specification: '
919                 '{0}\n\t{1}\n\t{2}^'.format(note, format_spec, ' ' * start[1]))
920             return SyntaxError(message)
921
922         PICKFIRST = 'PICKFIRST'
923         MERGE = 'MERGE'
924         SINGLE = 'SINGLE'
925         GROUP = 'GROUP'
926         FormatSelector = collections.namedtuple('FormatSelector', ['type', 'selector', 'filters'])
927
928         def _parse_filter(tokens):
929             filter_parts = []
930             for type, string, start, _, _ in tokens:
931                 if type == tokenize.OP and string == ']':
932                     return ''.join(filter_parts)
933                 else:
934                     filter_parts.append(string)
935
936         def _parse_format_selection(tokens, inside_merge=False, inside_choice=False, inside_group=False):
937             selectors = []
938             current_selector = None
939             for type, string, start, _, _ in tokens:
940                 # ENCODING is only defined in python 3.x
941                 if type == getattr(tokenize, 'ENCODING', None):
942                     continue
943                 elif type in [tokenize.NAME, tokenize.NUMBER]:
944                     current_selector = FormatSelector(SINGLE, string, [])
945                 elif type == tokenize.OP:
946                     if string == ')':
947                         if not inside_group:
948                             # ')' will be handled by the parentheses group
949                             tokens.restore_last_token()
950                         break
951                     elif inside_merge and string in ['/', ',']:
952                         tokens.restore_last_token()
953                         break
954                     elif inside_choice and string == ',':
955                         tokens.restore_last_token()
956                         break
957                     elif string == ',':
958                         if not current_selector:
959                             raise syntax_error('"," must follow a format selector', start)
960                         selectors.append(current_selector)
961                         current_selector = None
962                     elif string == '/':
963                         first_choice = current_selector
964                         second_choice = _parse_format_selection(tokens, inside_choice=True)
965                         current_selector = FormatSelector(PICKFIRST, (first_choice, second_choice), [])
966                     elif string == '[':
967                         if not current_selector:
968                             current_selector = FormatSelector(SINGLE, 'best', [])
969                         format_filter = _parse_filter(tokens)
970                         current_selector.filters.append(format_filter)
971                     elif string == '(':
972                         if current_selector:
973                             raise syntax_error('Unexpected "("', start)
974                         group = _parse_format_selection(tokens, inside_group=True)
975                         current_selector = FormatSelector(GROUP, group, [])
976                     elif string == '+':
977                         video_selector = current_selector
978                         audio_selector = _parse_format_selection(tokens, inside_merge=True)
979                         if not video_selector or not audio_selector:
980                             raise syntax_error('"+" must be between two format selectors', start)
981                         current_selector = FormatSelector(MERGE, (video_selector, audio_selector), [])
982                     else:
983                         raise syntax_error('Operator not recognized: "{0}"'.format(string), start)
984                 elif type == tokenize.ENDMARKER:
985                     break
986             if current_selector:
987                 selectors.append(current_selector)
988             return selectors
989
990         def _build_selector_function(selector):
991             if isinstance(selector, list):
992                 fs = [_build_selector_function(s) for s in selector]
993
994                 def selector_function(formats):
995                     for f in fs:
996                         for format in f(formats):
997                             yield format
998                 return selector_function
999             elif selector.type == GROUP:
1000                 selector_function = _build_selector_function(selector.selector)
1001             elif selector.type == PICKFIRST:
1002                 fs = [_build_selector_function(s) for s in selector.selector]
1003
1004                 def selector_function(formats):
1005                     for f in fs:
1006                         picked_formats = list(f(formats))
1007                         if picked_formats:
1008                             return picked_formats
1009                     return []
1010             elif selector.type == SINGLE:
1011                 format_spec = selector.selector
1012
1013                 def selector_function(formats):
1014                     formats = list(formats)
1015                     if not formats:
1016                         return
1017                     if format_spec == 'all':
1018                         for f in formats:
1019                             yield f
1020                     elif format_spec in ['best', 'worst', None]:
1021                         format_idx = 0 if format_spec == 'worst' else -1
1022                         audiovideo_formats = [
1023                             f for f in formats
1024                             if f.get('vcodec') != 'none' and f.get('acodec') != 'none']
1025                         if audiovideo_formats:
1026                             yield audiovideo_formats[format_idx]
1027                         # for audio only (soundcloud) or video only (imgur) urls, select the best/worst audio format
1028                         elif (all(f.get('acodec') != 'none' for f in formats) or
1029                               all(f.get('vcodec') != 'none' for f in formats)):
1030                             yield formats[format_idx]
1031                     elif format_spec == 'bestaudio':
1032                         audio_formats = [
1033                             f for f in formats
1034                             if f.get('vcodec') == 'none']
1035                         if audio_formats:
1036                             yield audio_formats[-1]
1037                     elif format_spec == 'worstaudio':
1038                         audio_formats = [
1039                             f for f in formats
1040                             if f.get('vcodec') == 'none']
1041                         if audio_formats:
1042                             yield audio_formats[0]
1043                     elif format_spec == 'bestvideo':
1044                         video_formats = [
1045                             f for f in formats
1046                             if f.get('acodec') == 'none']
1047                         if video_formats:
1048                             yield video_formats[-1]
1049                     elif format_spec == 'worstvideo':
1050                         video_formats = [
1051                             f for f in formats
1052                             if f.get('acodec') == 'none']
1053                         if video_formats:
1054                             yield video_formats[0]
1055                     else:
1056                         extensions = ['mp4', 'flv', 'webm', '3gp', 'm4a', 'mp3', 'ogg', 'aac', 'wav']
1057                         if format_spec in extensions:
1058                             filter_f = lambda f: f['ext'] == format_spec
1059                         else:
1060                             filter_f = lambda f: f['format_id'] == format_spec
1061                         matches = list(filter(filter_f, formats))
1062                         if matches:
1063                             yield matches[-1]
1064             elif selector.type == MERGE:
1065                 def _merge(formats_info):
1066                     format_1, format_2 = [f['format_id'] for f in formats_info]
1067                     # The first format must contain the video and the
1068                     # second the audio
1069                     if formats_info[0].get('vcodec') == 'none':
1070                         self.report_error('The first format must '
1071                                           'contain the video, try using '
1072                                           '"-f %s+%s"' % (format_2, format_1))
1073                         return
1074                     output_ext = (
1075                         formats_info[0]['ext']
1076                         if self.params.get('merge_output_format') is None
1077                         else self.params['merge_output_format'])
1078                     return {
1079                         'requested_formats': formats_info,
1080                         'format': '%s+%s' % (formats_info[0].get('format'),
1081                                              formats_info[1].get('format')),
1082                         'format_id': '%s+%s' % (formats_info[0].get('format_id'),
1083                                                 formats_info[1].get('format_id')),
1084                         'width': formats_info[0].get('width'),
1085                         'height': formats_info[0].get('height'),
1086                         'resolution': formats_info[0].get('resolution'),
1087                         'fps': formats_info[0].get('fps'),
1088                         'vcodec': formats_info[0].get('vcodec'),
1089                         'vbr': formats_info[0].get('vbr'),
1090                         'stretched_ratio': formats_info[0].get('stretched_ratio'),
1091                         'acodec': formats_info[1].get('acodec'),
1092                         'abr': formats_info[1].get('abr'),
1093                         'ext': output_ext,
1094                     }
1095                 video_selector, audio_selector = map(_build_selector_function, selector.selector)
1096
1097                 def selector_function(formats):
1098                     formats = list(formats)
1099                     for pair in itertools.product(video_selector(formats), audio_selector(formats)):
1100                         yield _merge(pair)
1101
1102             filters = [self._build_format_filter(f) for f in selector.filters]
1103
1104             def final_selector(formats):
1105                 for _filter in filters:
1106                     formats = list(filter(_filter, formats))
1107                 return selector_function(formats)
1108             return final_selector
1109
1110         stream = io.BytesIO(format_spec.encode('utf-8'))
1111         try:
1112             tokens = list(compat_tokenize_tokenize(stream.readline))
1113         except tokenize.TokenError:
1114             raise syntax_error('Missing closing/opening brackets or parenthesis', (0, len(format_spec)))
1115
1116         class TokenIterator(object):
1117             def __init__(self, tokens):
1118                 self.tokens = tokens
1119                 self.counter = 0
1120
1121             def __iter__(self):
1122                 return self
1123
1124             def __next__(self):
1125                 if self.counter >= len(self.tokens):
1126                     raise StopIteration()
1127                 value = self.tokens[self.counter]
1128                 self.counter += 1
1129                 return value
1130
1131             next = __next__
1132
1133             def restore_last_token(self):
1134                 self.counter -= 1
1135
1136         parsed_selector = _parse_format_selection(iter(TokenIterator(tokens)))
1137         return _build_selector_function(parsed_selector)
1138
1139     def _calc_headers(self, info_dict):
1140         res = std_headers.copy()
1141
1142         add_headers = info_dict.get('http_headers')
1143         if add_headers:
1144             res.update(add_headers)
1145
1146         cookies = self._calc_cookies(info_dict)
1147         if cookies:
1148             res['Cookie'] = cookies
1149
1150         return res
1151
1152     def _calc_cookies(self, info_dict):
1153         pr = compat_urllib_request.Request(info_dict['url'])
1154         self.cookiejar.add_cookie_header(pr)
1155         return pr.get_header('Cookie')
1156
1157     def process_video_result(self, info_dict, download=True):
1158         assert info_dict.get('_type', 'video') == 'video'
1159
1160         if 'id' not in info_dict:
1161             raise ExtractorError('Missing "id" field in extractor result')
1162         if 'title' not in info_dict:
1163             raise ExtractorError('Missing "title" field in extractor result')
1164
1165         if 'playlist' not in info_dict:
1166             # It isn't part of a playlist
1167             info_dict['playlist'] = None
1168             info_dict['playlist_index'] = None
1169
1170         thumbnails = info_dict.get('thumbnails')
1171         if thumbnails is None:
1172             thumbnail = info_dict.get('thumbnail')
1173             if thumbnail:
1174                 info_dict['thumbnails'] = thumbnails = [{'url': thumbnail}]
1175         if thumbnails:
1176             thumbnails.sort(key=lambda t: (
1177                 t.get('preference'), t.get('width'), t.get('height'),
1178                 t.get('id'), t.get('url')))
1179             for i, t in enumerate(thumbnails):
1180                 if t.get('width') and t.get('height'):
1181                     t['resolution'] = '%dx%d' % (t['width'], t['height'])
1182                 if t.get('id') is None:
1183                     t['id'] = '%d' % i
1184
1185         if thumbnails and 'thumbnail' not in info_dict:
1186             info_dict['thumbnail'] = thumbnails[-1]['url']
1187
1188         if 'display_id' not in info_dict and 'id' in info_dict:
1189             info_dict['display_id'] = info_dict['id']
1190
1191         if info_dict.get('upload_date') is None and info_dict.get('timestamp') is not None:
1192             # Working around out-of-range timestamp values (e.g. negative ones on Windows,
1193             # see http://bugs.python.org/issue1646728)
1194             try:
1195                 upload_date = datetime.datetime.utcfromtimestamp(info_dict['timestamp'])
1196                 info_dict['upload_date'] = upload_date.strftime('%Y%m%d')
1197             except (ValueError, OverflowError, OSError):
1198                 pass
1199
1200         if self.params.get('listsubtitles', False):
1201             if 'automatic_captions' in info_dict:
1202                 self.list_subtitles(info_dict['id'], info_dict.get('automatic_captions'), 'automatic captions')
1203             self.list_subtitles(info_dict['id'], info_dict.get('subtitles'), 'subtitles')
1204             return
1205         info_dict['requested_subtitles'] = self.process_subtitles(
1206             info_dict['id'], info_dict.get('subtitles'),
1207             info_dict.get('automatic_captions'))
1208
1209         # We now pick which formats have to be downloaded
1210         if info_dict.get('formats') is None:
1211             # There's only one format available
1212             formats = [info_dict]
1213         else:
1214             formats = info_dict['formats']
1215
1216         if not formats:
1217             raise ExtractorError('No video formats found!')
1218
1219         formats_dict = {}
1220
1221         # We check that all the formats have the format and format_id fields
1222         for i, format in enumerate(formats):
1223             if 'url' not in format:
1224                 raise ExtractorError('Missing "url" key in result (index %d)' % i)
1225
1226             if format.get('format_id') is None:
1227                 format['format_id'] = compat_str(i)
1228             format_id = format['format_id']
1229             if format_id not in formats_dict:
1230                 formats_dict[format_id] = []
1231             formats_dict[format_id].append(format)
1232
1233         # Make sure all formats have unique format_id
1234         for format_id, ambiguous_formats in formats_dict.items():
1235             if len(ambiguous_formats) > 1:
1236                 for i, format in enumerate(ambiguous_formats):
1237                     format['format_id'] = '%s-%d' % (format_id, i)
1238
1239         for i, format in enumerate(formats):
1240             if format.get('format') is None:
1241                 format['format'] = '{id} - {res}{note}'.format(
1242                     id=format['format_id'],
1243                     res=self.format_resolution(format),
1244                     note=' ({0})'.format(format['format_note']) if format.get('format_note') is not None else '',
1245                 )
1246             # Automatically determine file extension if missing
1247             if 'ext' not in format:
1248                 format['ext'] = determine_ext(format['url']).lower()
1249             # Add HTTP headers, so that external programs can use them from the
1250             # json output
1251             full_format_info = info_dict.copy()
1252             full_format_info.update(format)
1253             format['http_headers'] = self._calc_headers(full_format_info)
1254
1255         # TODO Central sorting goes here
1256
1257         if formats[0] is not info_dict:
1258             # only set the 'formats' fields if the original info_dict list them
1259             # otherwise we end up with a circular reference, the first (and unique)
1260             # element in the 'formats' field in info_dict is info_dict itself,
1261             # wich can't be exported to json
1262             info_dict['formats'] = formats
1263         if self.params.get('listformats'):
1264             self.list_formats(info_dict)
1265             return
1266         if self.params.get('list_thumbnails'):
1267             self.list_thumbnails(info_dict)
1268             return
1269
1270         req_format = self.params.get('format')
1271         if req_format is None:
1272             req_format_list = []
1273             if (self.params.get('outtmpl', DEFAULT_OUTTMPL) != '-' and
1274                     info_dict['extractor'] in ['youtube', 'ted'] and
1275                     not info_dict.get('is_live')):
1276                 merger = FFmpegMergerPP(self)
1277                 if merger.available and merger.can_merge():
1278                     req_format_list.append('bestvideo+bestaudio')
1279             req_format_list.append('best')
1280             req_format = '/'.join(req_format_list)
1281         format_selector = self.build_format_selector(req_format)
1282         formats_to_download = list(format_selector(formats))
1283         if not formats_to_download:
1284             raise ExtractorError('requested format not available',
1285                                  expected=True)
1286
1287         if download:
1288             if len(formats_to_download) > 1:
1289                 self.to_screen('[info] %s: downloading video in %s formats' % (info_dict['id'], len(formats_to_download)))
1290             for format in formats_to_download:
1291                 new_info = dict(info_dict)
1292                 new_info.update(format)
1293                 self.process_info(new_info)
1294         # We update the info dict with the best quality format (backwards compatibility)
1295         info_dict.update(formats_to_download[-1])
1296         return info_dict
1297
1298     def process_subtitles(self, video_id, normal_subtitles, automatic_captions):
1299         """Select the requested subtitles and their format"""
1300         available_subs = {}
1301         if normal_subtitles and self.params.get('writesubtitles'):
1302             available_subs.update(normal_subtitles)
1303         if automatic_captions and self.params.get('writeautomaticsub'):
1304             for lang, cap_info in automatic_captions.items():
1305                 if lang not in available_subs:
1306                     available_subs[lang] = cap_info
1307
1308         if (not self.params.get('writesubtitles') and not
1309                 self.params.get('writeautomaticsub') or not
1310                 available_subs):
1311             return None
1312
1313         if self.params.get('allsubtitles', False):
1314             requested_langs = available_subs.keys()
1315         else:
1316             if self.params.get('subtitleslangs', False):
1317                 requested_langs = self.params.get('subtitleslangs')
1318             elif 'en' in available_subs:
1319                 requested_langs = ['en']
1320             else:
1321                 requested_langs = [list(available_subs.keys())[0]]
1322
1323         formats_query = self.params.get('subtitlesformat', 'best')
1324         formats_preference = formats_query.split('/') if formats_query else []
1325         subs = {}
1326         for lang in requested_langs:
1327             formats = available_subs.get(lang)
1328             if formats is None:
1329                 self.report_warning('%s subtitles not available for %s' % (lang, video_id))
1330                 continue
1331             for ext in formats_preference:
1332                 if ext == 'best':
1333                     f = formats[-1]
1334                     break
1335                 matches = list(filter(lambda f: f['ext'] == ext, formats))
1336                 if matches:
1337                     f = matches[-1]
1338                     break
1339             else:
1340                 f = formats[-1]
1341                 self.report_warning(
1342                     'No subtitle format found matching "%s" for language %s, '
1343                     'using %s' % (formats_query, lang, f['ext']))
1344             subs[lang] = f
1345         return subs
1346
1347     def process_info(self, info_dict):
1348         """Process a single resolved IE result."""
1349
1350         assert info_dict.get('_type', 'video') == 'video'
1351
1352         max_downloads = self.params.get('max_downloads')
1353         if max_downloads is not None:
1354             if self._num_downloads >= int(max_downloads):
1355                 raise MaxDownloadsReached()
1356
1357         info_dict['fulltitle'] = info_dict['title']
1358         if len(info_dict['title']) > 200:
1359             info_dict['title'] = info_dict['title'][:197] + '...'
1360
1361         if 'format' not in info_dict:
1362             info_dict['format'] = info_dict['ext']
1363
1364         reason = self._match_entry(info_dict, incomplete=False)
1365         if reason is not None:
1366             self.to_screen('[download] ' + reason)
1367             return
1368
1369         self._num_downloads += 1
1370
1371         info_dict['_filename'] = filename = self.prepare_filename(info_dict)
1372
1373         # Forced printings
1374         if self.params.get('forcetitle', False):
1375             self.to_stdout(info_dict['fulltitle'])
1376         if self.params.get('forceid', False):
1377             self.to_stdout(info_dict['id'])
1378         if self.params.get('forceurl', False):
1379             if info_dict.get('requested_formats') is not None:
1380                 for f in info_dict['requested_formats']:
1381                     self.to_stdout(f['url'] + f.get('play_path', ''))
1382             else:
1383                 # For RTMP URLs, also include the playpath
1384                 self.to_stdout(info_dict['url'] + info_dict.get('play_path', ''))
1385         if self.params.get('forcethumbnail', False) and info_dict.get('thumbnail') is not None:
1386             self.to_stdout(info_dict['thumbnail'])
1387         if self.params.get('forcedescription', False) and info_dict.get('description') is not None:
1388             self.to_stdout(info_dict['description'])
1389         if self.params.get('forcefilename', False) and filename is not None:
1390             self.to_stdout(filename)
1391         if self.params.get('forceduration', False) and info_dict.get('duration') is not None:
1392             self.to_stdout(formatSeconds(info_dict['duration']))
1393         if self.params.get('forceformat', False):
1394             self.to_stdout(info_dict['format'])
1395         if self.params.get('forcejson', False):
1396             self.to_stdout(json.dumps(info_dict))
1397
1398         # Do nothing else if in simulate mode
1399         if self.params.get('simulate', False):
1400             return
1401
1402         if filename is None:
1403             return
1404
1405         try:
1406             dn = os.path.dirname(sanitize_path(encodeFilename(filename)))
1407             if dn and not os.path.exists(dn):
1408                 os.makedirs(dn)
1409         except (OSError, IOError) as err:
1410             self.report_error('unable to create directory ' + compat_str(err))
1411             return
1412
1413         if self.params.get('writedescription', False):
1414             descfn = replace_extension(filename, 'description', info_dict.get('ext'))
1415             if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(descfn)):
1416                 self.to_screen('[info] Video description is already present')
1417             elif info_dict.get('description') is None:
1418                 self.report_warning('There\'s no description to write.')
1419             else:
1420                 try:
1421                     self.to_screen('[info] Writing video description to: ' + descfn)
1422                     with io.open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile:
1423                         descfile.write(info_dict['description'])
1424                 except (OSError, IOError):
1425                     self.report_error('Cannot write description file ' + descfn)
1426                     return
1427
1428         if self.params.get('writeannotations', False):
1429             annofn = replace_extension(filename, 'annotations.xml', info_dict.get('ext'))
1430             if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(annofn)):
1431                 self.to_screen('[info] Video annotations are already present')
1432             else:
1433                 try:
1434                     self.to_screen('[info] Writing video annotations to: ' + annofn)
1435                     with io.open(encodeFilename(annofn), 'w', encoding='utf-8') as annofile:
1436                         annofile.write(info_dict['annotations'])
1437                 except (KeyError, TypeError):
1438                     self.report_warning('There are no annotations to write.')
1439                 except (OSError, IOError):
1440                     self.report_error('Cannot write annotations file: ' + annofn)
1441                     return
1442
1443         subtitles_are_requested = any([self.params.get('writesubtitles', False),
1444                                        self.params.get('writeautomaticsub')])
1445
1446         if subtitles_are_requested and info_dict.get('requested_subtitles'):
1447             # subtitles download errors are already managed as troubles in relevant IE
1448             # that way it will silently go on when used with unsupporting IE
1449             subtitles = info_dict['requested_subtitles']
1450             ie = self.get_info_extractor(info_dict['extractor_key'])
1451             for sub_lang, sub_info in subtitles.items():
1452                 sub_format = sub_info['ext']
1453                 if sub_info.get('data') is not None:
1454                     sub_data = sub_info['data']
1455                 else:
1456                     try:
1457                         sub_data = ie._download_webpage(
1458                             sub_info['url'], info_dict['id'], note=False)
1459                     except ExtractorError as err:
1460                         self.report_warning('Unable to download subtitle for "%s": %s' %
1461                                             (sub_lang, compat_str(err.cause)))
1462                         continue
1463                 try:
1464                     sub_filename = subtitles_filename(filename, sub_lang, sub_format)
1465                     if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(sub_filename)):
1466                         self.to_screen('[info] Video subtitle %s.%s is already_present' % (sub_lang, sub_format))
1467                     else:
1468                         self.to_screen('[info] Writing video subtitles to: ' + sub_filename)
1469                         with io.open(encodeFilename(sub_filename), 'w', encoding='utf-8') as subfile:
1470                             subfile.write(sub_data)
1471                 except (OSError, IOError):
1472                     self.report_error('Cannot write subtitles file ' + sub_filename)
1473                     return
1474
1475         if self.params.get('writeinfojson', False):
1476             infofn = replace_extension(filename, 'info.json', info_dict.get('ext'))
1477             if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(infofn)):
1478                 self.to_screen('[info] Video description metadata is already present')
1479             else:
1480                 self.to_screen('[info] Writing video description metadata as JSON to: ' + infofn)
1481                 try:
1482                     write_json_file(self.filter_requested_info(info_dict), infofn)
1483                 except (OSError, IOError):
1484                     self.report_error('Cannot write metadata to JSON file ' + infofn)
1485                     return
1486
1487         self._write_thumbnails(info_dict, filename)
1488
1489         if not self.params.get('skip_download', False):
1490             try:
1491                 def dl(name, info):
1492                     fd = get_suitable_downloader(info, self.params)(self, self.params)
1493                     for ph in self._progress_hooks:
1494                         fd.add_progress_hook(ph)
1495                     if self.params.get('verbose'):
1496                         self.to_stdout('[debug] Invoking downloader on %r' % info.get('url'))
1497                     return fd.download(name, info)
1498
1499                 if info_dict.get('requested_formats') is not None:
1500                     downloaded = []
1501                     success = True
1502                     merger = FFmpegMergerPP(self)
1503                     if not merger.available:
1504                         postprocessors = []
1505                         self.report_warning('You have requested multiple '
1506                                             'formats but ffmpeg or avconv are not installed.'
1507                                             ' The formats won\'t be merged.')
1508                     else:
1509                         postprocessors = [merger]
1510
1511                     def compatible_formats(formats):
1512                         video, audio = formats
1513                         # Check extension
1514                         video_ext, audio_ext = audio.get('ext'), video.get('ext')
1515                         if video_ext and audio_ext:
1516                             COMPATIBLE_EXTS = (
1517                                 ('mp3', 'mp4', 'm4a', 'm4p', 'm4b', 'm4r', 'm4v'),
1518                                 ('webm')
1519                             )
1520                             for exts in COMPATIBLE_EXTS:
1521                                 if video_ext in exts and audio_ext in exts:
1522                                     return True
1523                         # TODO: Check acodec/vcodec
1524                         return False
1525
1526                     filename_real_ext = os.path.splitext(filename)[1][1:]
1527                     filename_wo_ext = (
1528                         os.path.splitext(filename)[0]
1529                         if filename_real_ext == info_dict['ext']
1530                         else filename)
1531                     requested_formats = info_dict['requested_formats']
1532                     if self.params.get('merge_output_format') is None and not compatible_formats(requested_formats):
1533                         info_dict['ext'] = 'mkv'
1534                         self.report_warning(
1535                             'Requested formats are incompatible for merge and will be merged into mkv.')
1536                     # Ensure filename always has a correct extension for successful merge
1537                     filename = '%s.%s' % (filename_wo_ext, info_dict['ext'])
1538                     if os.path.exists(encodeFilename(filename)):
1539                         self.to_screen(
1540                             '[download] %s has already been downloaded and '
1541                             'merged' % filename)
1542                     else:
1543                         for f in requested_formats:
1544                             new_info = dict(info_dict)
1545                             new_info.update(f)
1546                             fname = self.prepare_filename(new_info)
1547                             fname = prepend_extension(fname, 'f%s' % f['format_id'], new_info['ext'])
1548                             downloaded.append(fname)
1549                             partial_success = dl(fname, new_info)
1550                             success = success and partial_success
1551                         info_dict['__postprocessors'] = postprocessors
1552                         info_dict['__files_to_merge'] = downloaded
1553                 else:
1554                     # Just a single file
1555                     success = dl(filename, info_dict)
1556             except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
1557                 self.report_error('unable to download video data: %s' % str(err))
1558                 return
1559             except (OSError, IOError) as err:
1560                 raise UnavailableVideoError(err)
1561             except (ContentTooShortError, ) as err:
1562                 self.report_error('content too short (expected %s bytes and served %s)' % (err.expected, err.downloaded))
1563                 return
1564
1565             if success:
1566                 # Fixup content
1567                 fixup_policy = self.params.get('fixup')
1568                 if fixup_policy is None:
1569                     fixup_policy = 'detect_or_warn'
1570
1571                 stretched_ratio = info_dict.get('stretched_ratio')
1572                 if stretched_ratio is not None and stretched_ratio != 1:
1573                     if fixup_policy == 'warn':
1574                         self.report_warning('%s: Non-uniform pixel ratio (%s)' % (
1575                             info_dict['id'], stretched_ratio))
1576                     elif fixup_policy == 'detect_or_warn':
1577                         stretched_pp = FFmpegFixupStretchedPP(self)
1578                         if stretched_pp.available:
1579                             info_dict.setdefault('__postprocessors', [])
1580                             info_dict['__postprocessors'].append(stretched_pp)
1581                         else:
1582                             self.report_warning(
1583                                 '%s: Non-uniform pixel ratio (%s). Install ffmpeg or avconv to fix this automatically.' % (
1584                                     info_dict['id'], stretched_ratio))
1585                     else:
1586                         assert fixup_policy in ('ignore', 'never')
1587
1588                 if info_dict.get('requested_formats') is None and info_dict.get('container') == 'm4a_dash':
1589                     if fixup_policy == 'warn':
1590                         self.report_warning('%s: writing DASH m4a. Only some players support this container.' % (
1591                             info_dict['id']))
1592                     elif fixup_policy == 'detect_or_warn':
1593                         fixup_pp = FFmpegFixupM4aPP(self)
1594                         if fixup_pp.available:
1595                             info_dict.setdefault('__postprocessors', [])
1596                             info_dict['__postprocessors'].append(fixup_pp)
1597                         else:
1598                             self.report_warning(
1599                                 '%s: writing DASH m4a. Only some players support this container. Install ffmpeg or avconv to fix this automatically.' % (
1600                                     info_dict['id']))
1601                     else:
1602                         assert fixup_policy in ('ignore', 'never')
1603
1604                 try:
1605                     self.post_process(filename, info_dict)
1606                 except (PostProcessingError) as err:
1607                     self.report_error('postprocessing: %s' % str(err))
1608                     return
1609                 self.record_download_archive(info_dict)
1610
1611     def download(self, url_list):
1612         """Download a given list of URLs."""
1613         outtmpl = self.params.get('outtmpl', DEFAULT_OUTTMPL)
1614         if (len(url_list) > 1 and
1615                 '%' not in outtmpl and
1616                 self.params.get('max_downloads') != 1):
1617             raise SameFileError(outtmpl)
1618
1619         for url in url_list:
1620             try:
1621                 # It also downloads the videos
1622                 res = self.extract_info(
1623                     url, force_generic_extractor=self.params.get('force_generic_extractor', False))
1624             except UnavailableVideoError:
1625                 self.report_error('unable to download video')
1626             except MaxDownloadsReached:
1627                 self.to_screen('[info] Maximum number of downloaded files reached.')
1628                 raise
1629             else:
1630                 if self.params.get('dump_single_json', False):
1631                     self.to_stdout(json.dumps(res))
1632
1633         return self._download_retcode
1634
1635     def download_with_info_file(self, info_filename):
1636         with contextlib.closing(fileinput.FileInput(
1637                 [info_filename], mode='r',
1638                 openhook=fileinput.hook_encoded('utf-8'))) as f:
1639             # FileInput doesn't have a read method, we can't call json.load
1640             info = self.filter_requested_info(json.loads('\n'.join(f)))
1641         try:
1642             self.process_ie_result(info, download=True)
1643         except DownloadError:
1644             webpage_url = info.get('webpage_url')
1645             if webpage_url is not None:
1646                 self.report_warning('The info failed to download, trying with "%s"' % webpage_url)
1647                 return self.download([webpage_url])
1648             else:
1649                 raise
1650         return self._download_retcode
1651
1652     @staticmethod
1653     def filter_requested_info(info_dict):
1654         return dict(
1655             (k, v) for k, v in info_dict.items()
1656             if k not in ['requested_formats', 'requested_subtitles'])
1657
1658     def post_process(self, filename, ie_info):
1659         """Run all the postprocessors on the given file."""
1660         info = dict(ie_info)
1661         info['filepath'] = filename
1662         pps_chain = []
1663         if ie_info.get('__postprocessors') is not None:
1664             pps_chain.extend(ie_info['__postprocessors'])
1665         pps_chain.extend(self._pps)
1666         for pp in pps_chain:
1667             files_to_delete = []
1668             try:
1669                 files_to_delete, info = pp.run(info)
1670             except PostProcessingError as e:
1671                 self.report_error(e.msg)
1672             if files_to_delete and not self.params.get('keepvideo', False):
1673                 for old_filename in files_to_delete:
1674                     self.to_screen('Deleting original file %s (pass -k to keep)' % old_filename)
1675                     try:
1676                         os.remove(encodeFilename(old_filename))
1677                     except (IOError, OSError):
1678                         self.report_warning('Unable to remove downloaded original file')
1679
1680     def _make_archive_id(self, info_dict):
1681         # Future-proof against any change in case
1682         # and backwards compatibility with prior versions
1683         extractor = info_dict.get('extractor_key')
1684         if extractor is None:
1685             if 'id' in info_dict:
1686                 extractor = info_dict.get('ie_key')  # key in a playlist
1687         if extractor is None:
1688             return None  # Incomplete video information
1689         return extractor.lower() + ' ' + info_dict['id']
1690
1691     def in_download_archive(self, info_dict):
1692         fn = self.params.get('download_archive')
1693         if fn is None:
1694             return False
1695
1696         vid_id = self._make_archive_id(info_dict)
1697         if vid_id is None:
1698             return False  # Incomplete video information
1699
1700         try:
1701             with locked_file(fn, 'r', encoding='utf-8') as archive_file:
1702                 for line in archive_file:
1703                     if line.strip() == vid_id:
1704                         return True
1705         except IOError as ioe:
1706             if ioe.errno != errno.ENOENT:
1707                 raise
1708         return False
1709
1710     def record_download_archive(self, info_dict):
1711         fn = self.params.get('download_archive')
1712         if fn is None:
1713             return
1714         vid_id = self._make_archive_id(info_dict)
1715         assert vid_id
1716         with locked_file(fn, 'a', encoding='utf-8') as archive_file:
1717             archive_file.write(vid_id + '\n')
1718
1719     @staticmethod
1720     def format_resolution(format, default='unknown'):
1721         if format.get('vcodec') == 'none':
1722             return 'audio only'
1723         if format.get('resolution') is not None:
1724             return format['resolution']
1725         if format.get('height') is not None:
1726             if format.get('width') is not None:
1727                 res = '%sx%s' % (format['width'], format['height'])
1728             else:
1729                 res = '%sp' % format['height']
1730         elif format.get('width') is not None:
1731             res = '?x%d' % format['width']
1732         else:
1733             res = default
1734         return res
1735
1736     def _format_note(self, fdict):
1737         res = ''
1738         if fdict.get('ext') in ['f4f', 'f4m']:
1739             res += '(unsupported) '
1740         if fdict.get('format_note') is not None:
1741             res += fdict['format_note'] + ' '
1742         if fdict.get('tbr') is not None:
1743             res += '%4dk ' % fdict['tbr']
1744         if fdict.get('container') is not None:
1745             if res:
1746                 res += ', '
1747             res += '%s container' % fdict['container']
1748         if (fdict.get('vcodec') is not None and
1749                 fdict.get('vcodec') != 'none'):
1750             if res:
1751                 res += ', '
1752             res += fdict['vcodec']
1753             if fdict.get('vbr') is not None:
1754                 res += '@'
1755         elif fdict.get('vbr') is not None and fdict.get('abr') is not None:
1756             res += 'video@'
1757         if fdict.get('vbr') is not None:
1758             res += '%4dk' % fdict['vbr']
1759         if fdict.get('fps') is not None:
1760             res += ', %sfps' % fdict['fps']
1761         if fdict.get('acodec') is not None:
1762             if res:
1763                 res += ', '
1764             if fdict['acodec'] == 'none':
1765                 res += 'video only'
1766             else:
1767                 res += '%-5s' % fdict['acodec']
1768         elif fdict.get('abr') is not None:
1769             if res:
1770                 res += ', '
1771             res += 'audio'
1772         if fdict.get('abr') is not None:
1773             res += '@%3dk' % fdict['abr']
1774         if fdict.get('asr') is not None:
1775             res += ' (%5dHz)' % fdict['asr']
1776         if fdict.get('filesize') is not None:
1777             if res:
1778                 res += ', '
1779             res += format_bytes(fdict['filesize'])
1780         elif fdict.get('filesize_approx') is not None:
1781             if res:
1782                 res += ', '
1783             res += '~' + format_bytes(fdict['filesize_approx'])
1784         return res
1785
1786     def list_formats(self, info_dict):
1787         formats = info_dict.get('formats', [info_dict])
1788         table = [
1789             [f['format_id'], f['ext'], self.format_resolution(f), self._format_note(f)]
1790             for f in formats
1791             if f.get('preference') is None or f['preference'] >= -1000]
1792         if len(formats) > 1:
1793             table[-1][-1] += (' ' if table[-1][-1] else '') + '(best)'
1794
1795         header_line = ['format code', 'extension', 'resolution', 'note']
1796         self.to_screen(
1797             '[info] Available formats for %s:\n%s' %
1798             (info_dict['id'], render_table(header_line, table)))
1799
1800     def list_thumbnails(self, info_dict):
1801         thumbnails = info_dict.get('thumbnails')
1802         if not thumbnails:
1803             tn_url = info_dict.get('thumbnail')
1804             if tn_url:
1805                 thumbnails = [{'id': '0', 'url': tn_url}]
1806             else:
1807                 self.to_screen(
1808                     '[info] No thumbnails present for %s' % info_dict['id'])
1809                 return
1810
1811         self.to_screen(
1812             '[info] Thumbnails for %s:' % info_dict['id'])
1813         self.to_screen(render_table(
1814             ['ID', 'width', 'height', 'URL'],
1815             [[t['id'], t.get('width', 'unknown'), t.get('height', 'unknown'), t['url']] for t in thumbnails]))
1816
1817     def list_subtitles(self, video_id, subtitles, name='subtitles'):
1818         if not subtitles:
1819             self.to_screen('%s has no %s' % (video_id, name))
1820             return
1821         self.to_screen(
1822             'Available %s for %s:' % (name, video_id))
1823         self.to_screen(render_table(
1824             ['Language', 'formats'],
1825             [[lang, ', '.join(f['ext'] for f in reversed(formats))]
1826                 for lang, formats in subtitles.items()]))
1827
1828     def urlopen(self, req):
1829         """ Start an HTTP download """
1830
1831         # According to RFC 3986, URLs can not contain non-ASCII characters, however this is not
1832         # always respected by websites, some tend to give out URLs with non percent-encoded
1833         # non-ASCII characters (see telemb.py, ard.py [#3412])
1834         # urllib chokes on URLs with non-ASCII characters (see http://bugs.python.org/issue3991)
1835         # To work around aforementioned issue we will replace request's original URL with
1836         # percent-encoded one
1837         req_is_string = isinstance(req, compat_basestring)
1838         url = req if req_is_string else req.get_full_url()
1839         url_escaped = escape_url(url)
1840
1841         # Substitute URL if any change after escaping
1842         if url != url_escaped:
1843             if req_is_string:
1844                 req = url_escaped
1845             else:
1846                 req_type = HEADRequest if req.get_method() == 'HEAD' else compat_urllib_request.Request
1847                 req = req_type(
1848                     url_escaped, data=req.data, headers=req.headers,
1849                     origin_req_host=req.origin_req_host, unverifiable=req.unverifiable)
1850
1851         return self._opener.open(req, timeout=self._socket_timeout)
1852
1853     def print_debug_header(self):
1854         if not self.params.get('verbose'):
1855             return
1856
1857         if type('') is not compat_str:
1858             # Python 2.6 on SLES11 SP1 (https://github.com/rg3/youtube-dl/issues/3326)
1859             self.report_warning(
1860                 'Your Python is broken! Update to a newer and supported version')
1861
1862         stdout_encoding = getattr(
1863             sys.stdout, 'encoding', 'missing (%s)' % type(sys.stdout).__name__)
1864         encoding_str = (
1865             '[debug] Encodings: locale %s, fs %s, out %s, pref %s\n' % (
1866                 locale.getpreferredencoding(),
1867                 sys.getfilesystemencoding(),
1868                 stdout_encoding,
1869                 self.get_encoding()))
1870         write_string(encoding_str, encoding=None)
1871
1872         self._write_string('[debug] youtube-dl version ' + __version__ + '\n')
1873         try:
1874             sp = subprocess.Popen(
1875                 ['git', 'rev-parse', '--short', 'HEAD'],
1876                 stdout=subprocess.PIPE, stderr=subprocess.PIPE,
1877                 cwd=os.path.dirname(os.path.abspath(__file__)))
1878             out, err = sp.communicate()
1879             out = out.decode().strip()
1880             if re.match('[0-9a-f]+', out):
1881                 self._write_string('[debug] Git HEAD: ' + out + '\n')
1882         except Exception:
1883             try:
1884                 sys.exc_clear()
1885             except Exception:
1886                 pass
1887         self._write_string('[debug] Python version %s - %s\n' % (
1888             platform.python_version(), platform_name()))
1889
1890         exe_versions = FFmpegPostProcessor.get_versions(self)
1891         exe_versions['rtmpdump'] = rtmpdump_version()
1892         exe_str = ', '.join(
1893             '%s %s' % (exe, v)
1894             for exe, v in sorted(exe_versions.items())
1895             if v
1896         )
1897         if not exe_str:
1898             exe_str = 'none'
1899         self._write_string('[debug] exe versions: %s\n' % exe_str)
1900
1901         proxy_map = {}
1902         for handler in self._opener.handlers:
1903             if hasattr(handler, 'proxies'):
1904                 proxy_map.update(handler.proxies)
1905         self._write_string('[debug] Proxy map: ' + compat_str(proxy_map) + '\n')
1906
1907         if self.params.get('call_home', False):
1908             ipaddr = self.urlopen('https://yt-dl.org/ip').read().decode('utf-8')
1909             self._write_string('[debug] Public IP address: %s\n' % ipaddr)
1910             latest_version = self.urlopen(
1911                 'https://yt-dl.org/latest/version').read().decode('utf-8')
1912             if version_tuple(latest_version) > version_tuple(__version__):
1913                 self.report_warning(
1914                     'You are using an outdated version (newest version: %s)! '
1915                     'See https://yt-dl.org/update if you need help updating.' %
1916                     latest_version)
1917
1918     def _setup_opener(self):
1919         timeout_val = self.params.get('socket_timeout')
1920         self._socket_timeout = 600 if timeout_val is None else float(timeout_val)
1921
1922         opts_cookiefile = self.params.get('cookiefile')
1923         opts_proxy = self.params.get('proxy')
1924
1925         if opts_cookiefile is None:
1926             self.cookiejar = compat_cookiejar.CookieJar()
1927         else:
1928             self.cookiejar = compat_cookiejar.MozillaCookieJar(
1929                 opts_cookiefile)
1930             if os.access(opts_cookiefile, os.R_OK):
1931                 self.cookiejar.load()
1932
1933         cookie_processor = compat_urllib_request.HTTPCookieProcessor(
1934             self.cookiejar)
1935         if opts_proxy is not None:
1936             if opts_proxy == '':
1937                 proxies = {}
1938             else:
1939                 proxies = {'http': opts_proxy, 'https': opts_proxy}
1940         else:
1941             proxies = compat_urllib_request.getproxies()
1942             # Set HTTPS proxy to HTTP one if given (https://github.com/rg3/youtube-dl/issues/805)
1943             if 'http' in proxies and 'https' not in proxies:
1944                 proxies['https'] = proxies['http']
1945         proxy_handler = PerRequestProxyHandler(proxies)
1946
1947         debuglevel = 1 if self.params.get('debug_printtraffic') else 0
1948         https_handler = make_HTTPS_handler(self.params, debuglevel=debuglevel)
1949         ydlh = YoutubeDLHandler(self.params, debuglevel=debuglevel)
1950         opener = compat_urllib_request.build_opener(
1951             proxy_handler, https_handler, cookie_processor, ydlh)
1952
1953         # Delete the default user-agent header, which would otherwise apply in
1954         # cases where our custom HTTP handler doesn't come into play
1955         # (See https://github.com/rg3/youtube-dl/issues/1309 for details)
1956         opener.addheaders = []
1957         self._opener = opener
1958
1959     def encode(self, s):
1960         if isinstance(s, bytes):
1961             return s  # Already encoded
1962
1963         try:
1964             return s.encode(self.get_encoding())
1965         except UnicodeEncodeError as err:
1966             err.reason = err.reason + '. Check your system encoding configuration or use the --encoding option.'
1967             raise
1968
1969     def get_encoding(self):
1970         encoding = self.params.get('encoding')
1971         if encoding is None:
1972             encoding = preferredencoding()
1973         return encoding
1974
1975     def _write_thumbnails(self, info_dict, filename):
1976         if self.params.get('writethumbnail', False):
1977             thumbnails = info_dict.get('thumbnails')
1978             if thumbnails:
1979                 thumbnails = [thumbnails[-1]]
1980         elif self.params.get('write_all_thumbnails', False):
1981             thumbnails = info_dict.get('thumbnails')
1982         else:
1983             return
1984
1985         if not thumbnails:
1986             # No thumbnails present, so return immediately
1987             return
1988
1989         for t in thumbnails:
1990             thumb_ext = determine_ext(t['url'], 'jpg')
1991             suffix = '_%s' % t['id'] if len(thumbnails) > 1 else ''
1992             thumb_display_id = '%s ' % t['id'] if len(thumbnails) > 1 else ''
1993             t['filename'] = thumb_filename = os.path.splitext(filename)[0] + suffix + '.' + thumb_ext
1994
1995             if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(thumb_filename)):
1996                 self.to_screen('[%s] %s: Thumbnail %sis already present' %
1997                                (info_dict['extractor'], info_dict['id'], thumb_display_id))
1998             else:
1999                 self.to_screen('[%s] %s: Downloading thumbnail %s...' %
2000                                (info_dict['extractor'], info_dict['id'], thumb_display_id))
2001                 try:
2002                     uf = self.urlopen(t['url'])
2003                     with open(thumb_filename, 'wb') as thumbf:
2004                         shutil.copyfileobj(uf, thumbf)
2005                     self.to_screen('[%s] %s: Writing thumbnail %sto: %s' %
2006                                    (info_dict['extractor'], info_dict['id'], thumb_display_id, thumb_filename))
2007                 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
2008                     self.report_warning('Unable to download thumbnail "%s": %s' %
2009                                         (t['url'], compat_str(err)))