eedab37a7e1baad92b79547cbd4e5dbccc5b560b
[youtube-dl] / youtube_dl / YoutubeDL.py
1 #!/usr/bin/env python
2 # -*- coding: utf-8 -*-
3
4 from __future__ import absolute_import, unicode_literals
5
6 import collections
7 import contextlib
8 import datetime
9 import errno
10 import fileinput
11 import io
12 import itertools
13 import json
14 import locale
15 import operator
16 import os
17 import platform
18 import re
19 import shutil
20 import subprocess
21 import socket
22 import sys
23 import time
24 import tokenize
25 import traceback
26
27 if os.name == 'nt':
28     import ctypes
29
30 from .compat import (
31     compat_basestring,
32     compat_cookiejar,
33     compat_expanduser,
34     compat_get_terminal_size,
35     compat_http_client,
36     compat_kwargs,
37     compat_str,
38     compat_tokenize_tokenize,
39     compat_urllib_error,
40     compat_urllib_request,
41     compat_urllib_request_DataHandler,
42 )
43 from .utils import (
44     ContentTooShortError,
45     date_from_str,
46     DateRange,
47     DEFAULT_OUTTMPL,
48     determine_ext,
49     DownloadError,
50     encodeFilename,
51     ExtractorError,
52     format_bytes,
53     formatSeconds,
54     locked_file,
55     make_HTTPS_handler,
56     MaxDownloadsReached,
57     PagedList,
58     parse_filesize,
59     PerRequestProxyHandler,
60     PostProcessingError,
61     platform_name,
62     preferredencoding,
63     render_table,
64     SameFileError,
65     sanitize_filename,
66     sanitize_path,
67     sanitized_Request,
68     std_headers,
69     subtitles_filename,
70     UnavailableVideoError,
71     url_basename,
72     version_tuple,
73     write_json_file,
74     write_string,
75     YoutubeDLCookieProcessor,
76     YoutubeDLHandler,
77     prepend_extension,
78     replace_extension,
79     args_to_str,
80     age_restricted,
81 )
82 from .cache import Cache
83 from .extractor import get_info_extractor, gen_extractors
84 from .downloader import get_suitable_downloader
85 from .downloader.rtmp import rtmpdump_version
86 from .postprocessor import (
87     FFmpegFixupM4aPP,
88     FFmpegFixupStretchedPP,
89     FFmpegMergerPP,
90     FFmpegPostProcessor,
91     get_postprocessor,
92 )
93 from .version import __version__
94
95
96 class YoutubeDL(object):
97     """YoutubeDL class.
98
99     YoutubeDL objects are the ones responsible of downloading the
100     actual video file and writing it to disk if the user has requested
101     it, among some other tasks. In most cases there should be one per
102     program. As, given a video URL, the downloader doesn't know how to
103     extract all the needed information, task that InfoExtractors do, it
104     has to pass the URL to one of them.
105
106     For this, YoutubeDL objects have a method that allows
107     InfoExtractors to be registered in a given order. When it is passed
108     a URL, the YoutubeDL object handles it to the first InfoExtractor it
109     finds that reports being able to handle it. The InfoExtractor extracts
110     all the information about the video or videos the URL refers to, and
111     YoutubeDL process the extracted information, possibly using a File
112     Downloader to download the video.
113
114     YoutubeDL objects accept a lot of parameters. In order not to saturate
115     the object constructor with arguments, it receives a dictionary of
116     options instead. These options are available through the params
117     attribute for the InfoExtractors to use. The YoutubeDL also
118     registers itself as the downloader in charge for the InfoExtractors
119     that are added to it, so this is a "mutual registration".
120
121     Available options:
122
123     username:          Username for authentication purposes.
124     password:          Password for authentication purposes.
125     videopassword:     Password for accessing a video.
126     usenetrc:          Use netrc for authentication instead.
127     verbose:           Print additional info to stdout.
128     quiet:             Do not print messages to stdout.
129     no_warnings:       Do not print out anything for warnings.
130     forceurl:          Force printing final URL.
131     forcetitle:        Force printing title.
132     forceid:           Force printing ID.
133     forcethumbnail:    Force printing thumbnail URL.
134     forcedescription:  Force printing description.
135     forcefilename:     Force printing final filename.
136     forceduration:     Force printing duration.
137     forcejson:         Force printing info_dict as JSON.
138     dump_single_json:  Force printing the info_dict of the whole playlist
139                        (or video) as a single JSON line.
140     simulate:          Do not download the video files.
141     format:            Video format code. See options.py for more information.
142     outtmpl:           Template for output names.
143     restrictfilenames: Do not allow "&" and spaces in file names
144     ignoreerrors:      Do not stop on download errors.
145     force_generic_extractor: Force downloader to use the generic extractor
146     nooverwrites:      Prevent overwriting files.
147     playliststart:     Playlist item to start at.
148     playlistend:       Playlist item to end at.
149     playlist_items:    Specific indices of playlist to download.
150     playlistreverse:   Download playlist items in reverse order.
151     matchtitle:        Download only matching titles.
152     rejecttitle:       Reject downloads for matching titles.
153     logger:            Log messages to a logging.Logger instance.
154     logtostderr:       Log messages to stderr instead of stdout.
155     writedescription:  Write the video description to a .description file
156     writeinfojson:     Write the video description to a .info.json file
157     writeannotations:  Write the video annotations to a .annotations.xml file
158     writethumbnail:    Write the thumbnail image to a file
159     write_all_thumbnails:  Write all thumbnail formats to files
160     writesubtitles:    Write the video subtitles to a file
161     writeautomaticsub: Write the automatically generated subtitles to a file
162     allsubtitles:      Downloads all the subtitles of the video
163                        (requires writesubtitles or writeautomaticsub)
164     listsubtitles:     Lists all available subtitles for the video
165     subtitlesformat:   The format code for subtitles
166     subtitleslangs:    List of languages of the subtitles to download
167     keepvideo:         Keep the video file after post-processing
168     daterange:         A DateRange object, download only if the upload_date is in the range.
169     skip_download:     Skip the actual download of the video file
170     cachedir:          Location of the cache files in the filesystem.
171                        False to disable filesystem cache.
172     noplaylist:        Download single video instead of a playlist if in doubt.
173     age_limit:         An integer representing the user's age in years.
174                        Unsuitable videos for the given age are skipped.
175     min_views:         An integer representing the minimum view count the video
176                        must have in order to not be skipped.
177                        Videos without view count information are always
178                        downloaded. None for no limit.
179     max_views:         An integer representing the maximum view count.
180                        Videos that are more popular than that are not
181                        downloaded.
182                        Videos without view count information are always
183                        downloaded. None for no limit.
184     download_archive:  File name of a file where all downloads are recorded.
185                        Videos already present in the file are not downloaded
186                        again.
187     cookiefile:        File name where cookies should be read from and dumped to.
188     nocheckcertificate:Do not verify SSL certificates
189     prefer_insecure:   Use HTTP instead of HTTPS to retrieve information.
190                        At the moment, this is only supported by YouTube.
191     proxy:             URL of the proxy server to use
192     cn_verification_proxy:  URL of the proxy to use for IP address verification
193                        on Chinese sites. (Experimental)
194     socket_timeout:    Time to wait for unresponsive hosts, in seconds
195     bidi_workaround:   Work around buggy terminals without bidirectional text
196                        support, using fridibi
197     debug_printtraffic:Print out sent and received HTTP traffic
198     include_ads:       Download ads as well
199     default_search:    Prepend this string if an input url is not valid.
200                        'auto' for elaborate guessing
201     encoding:          Use this encoding instead of the system-specified.
202     extract_flat:      Do not resolve URLs, return the immediate result.
203                        Pass in 'in_playlist' to only show this behavior for
204                        playlist items.
205     postprocessors:    A list of dictionaries, each with an entry
206                        * key:  The name of the postprocessor. See
207                                youtube_dl/postprocessor/__init__.py for a list.
208                        as well as any further keyword arguments for the
209                        postprocessor.
210     progress_hooks:    A list of functions that get called on download
211                        progress, with a dictionary with the entries
212                        * status: One of "downloading", "error", or "finished".
213                                  Check this first and ignore unknown values.
214
215                        If status is one of "downloading", or "finished", the
216                        following properties may also be present:
217                        * filename: The final filename (always present)
218                        * tmpfilename: The filename we're currently writing to
219                        * downloaded_bytes: Bytes on disk
220                        * total_bytes: Size of the whole file, None if unknown
221                        * total_bytes_estimate: Guess of the eventual file size,
222                                                None if unavailable.
223                        * elapsed: The number of seconds since download started.
224                        * eta: The estimated time in seconds, None if unknown
225                        * speed: The download speed in bytes/second, None if
226                                 unknown
227                        * fragment_index: The counter of the currently
228                                          downloaded video fragment.
229                        * fragment_count: The number of fragments (= individual
230                                          files that will be merged)
231
232                        Progress hooks are guaranteed to be called at least once
233                        (with status "finished") if the download is successful.
234     merge_output_format: Extension to use when merging formats.
235     fixup:             Automatically correct known faults of the file.
236                        One of:
237                        - "never": do nothing
238                        - "warn": only emit a warning
239                        - "detect_or_warn": check whether we can do anything
240                                            about it, warn otherwise (default)
241     source_address:    (Experimental) Client-side IP address to bind to.
242     call_home:         Boolean, true iff we are allowed to contact the
243                        youtube-dl servers for debugging.
244     sleep_interval:    Number of seconds to sleep before each download.
245     listformats:       Print an overview of available video formats and exit.
246     list_thumbnails:   Print a table of all thumbnails and exit.
247     match_filter:      A function that gets called with the info_dict of
248                        every video.
249                        If it returns a message, the video is ignored.
250                        If it returns None, the video is downloaded.
251                        match_filter_func in utils.py is one example for this.
252     no_color:          Do not emit color codes in output.
253
254     The following options determine which downloader is picked:
255     external_downloader: Executable of the external downloader to call.
256                        None or unset for standard (built-in) downloader.
257     hls_prefer_native: Use the native HLS downloader instead of ffmpeg/avconv.
258
259     The following parameters are not used by YoutubeDL itself, they are used by
260     the downloader (see youtube_dl/downloader/common.py):
261     nopart, updatetime, buffersize, ratelimit, min_filesize, max_filesize, test,
262     noresizebuffer, retries, continuedl, noprogress, consoletitle,
263     xattr_set_filesize, external_downloader_args.
264
265     The following options are used by the post processors:
266     prefer_ffmpeg:     If True, use ffmpeg instead of avconv if both are available,
267                        otherwise prefer avconv.
268     postprocessor_args: A list of additional command-line arguments for the
269                         postprocessor.
270     """
271
272     params = None
273     _ies = []
274     _pps = []
275     _download_retcode = None
276     _num_downloads = None
277     _screen_file = None
278
279     def __init__(self, params=None, auto_init=True):
280         """Create a FileDownloader object with the given options."""
281         if params is None:
282             params = {}
283         self._ies = []
284         self._ies_instances = {}
285         self._pps = []
286         self._progress_hooks = []
287         self._download_retcode = 0
288         self._num_downloads = 0
289         self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)]
290         self._err_file = sys.stderr
291         self.params = {
292             # Default parameters
293             'nocheckcertificate': False,
294         }
295         self.params.update(params)
296         self.cache = Cache(self)
297
298         if params.get('bidi_workaround', False):
299             try:
300                 import pty
301                 master, slave = pty.openpty()
302                 width = compat_get_terminal_size().columns
303                 if width is None:
304                     width_args = []
305                 else:
306                     width_args = ['-w', str(width)]
307                 sp_kwargs = dict(
308                     stdin=subprocess.PIPE,
309                     stdout=slave,
310                     stderr=self._err_file)
311                 try:
312                     self._output_process = subprocess.Popen(
313                         ['bidiv'] + width_args, **sp_kwargs
314                     )
315                 except OSError:
316                     self._output_process = subprocess.Popen(
317                         ['fribidi', '-c', 'UTF-8'] + width_args, **sp_kwargs)
318                 self._output_channel = os.fdopen(master, 'rb')
319             except OSError as ose:
320                 if ose.errno == 2:
321                     self.report_warning('Could not find fribidi executable, ignoring --bidi-workaround . Make sure that  fribidi  is an executable file in one of the directories in your $PATH.')
322                 else:
323                     raise
324
325         if (sys.version_info >= (3,) and sys.platform != 'win32' and
326                 sys.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968'] and
327                 not params.get('restrictfilenames', False)):
328             # On Python 3, the Unicode filesystem API will throw errors (#1474)
329             self.report_warning(
330                 'Assuming --restrict-filenames since file system encoding '
331                 'cannot encode all characters. '
332                 'Set the LC_ALL environment variable to fix this.')
333             self.params['restrictfilenames'] = True
334
335         if isinstance(params.get('outtmpl'), bytes):
336             self.report_warning(
337                 'Parameter outtmpl is bytes, but should be a unicode string. '
338                 'Put  from __future__ import unicode_literals  at the top of your code file or consider switching to Python 3.x.')
339
340         self._setup_opener()
341
342         if auto_init:
343             self.print_debug_header()
344             self.add_default_info_extractors()
345
346         for pp_def_raw in self.params.get('postprocessors', []):
347             pp_class = get_postprocessor(pp_def_raw['key'])
348             pp_def = dict(pp_def_raw)
349             del pp_def['key']
350             pp = pp_class(self, **compat_kwargs(pp_def))
351             self.add_post_processor(pp)
352
353         for ph in self.params.get('progress_hooks', []):
354             self.add_progress_hook(ph)
355
356     def warn_if_short_id(self, argv):
357         # short YouTube ID starting with dash?
358         idxs = [
359             i for i, a in enumerate(argv)
360             if re.match(r'^-[0-9A-Za-z_-]{10}$', a)]
361         if idxs:
362             correct_argv = (
363                 ['youtube-dl'] +
364                 [a for i, a in enumerate(argv) if i not in idxs] +
365                 ['--'] + [argv[i] for i in idxs]
366             )
367             self.report_warning(
368                 'Long argument string detected. '
369                 'Use -- to separate parameters and URLs, like this:\n%s\n' %
370                 args_to_str(correct_argv))
371
372     def add_info_extractor(self, ie):
373         """Add an InfoExtractor object to the end of the list."""
374         self._ies.append(ie)
375         self._ies_instances[ie.ie_key()] = ie
376         ie.set_downloader(self)
377
378     def get_info_extractor(self, ie_key):
379         """
380         Get an instance of an IE with name ie_key, it will try to get one from
381         the _ies list, if there's no instance it will create a new one and add
382         it to the extractor list.
383         """
384         ie = self._ies_instances.get(ie_key)
385         if ie is None:
386             ie = get_info_extractor(ie_key)()
387             self.add_info_extractor(ie)
388         return ie
389
390     def add_default_info_extractors(self):
391         """
392         Add the InfoExtractors returned by gen_extractors to the end of the list
393         """
394         for ie in gen_extractors():
395             self.add_info_extractor(ie)
396
397     def add_post_processor(self, pp):
398         """Add a PostProcessor object to the end of the chain."""
399         self._pps.append(pp)
400         pp.set_downloader(self)
401
402     def add_progress_hook(self, ph):
403         """Add the progress hook (currently only for the file downloader)"""
404         self._progress_hooks.append(ph)
405
406     def _bidi_workaround(self, message):
407         if not hasattr(self, '_output_channel'):
408             return message
409
410         assert hasattr(self, '_output_process')
411         assert isinstance(message, compat_str)
412         line_count = message.count('\n') + 1
413         self._output_process.stdin.write((message + '\n').encode('utf-8'))
414         self._output_process.stdin.flush()
415         res = ''.join(self._output_channel.readline().decode('utf-8')
416                       for _ in range(line_count))
417         return res[:-len('\n')]
418
419     def to_screen(self, message, skip_eol=False):
420         """Print message to stdout if not in quiet mode."""
421         return self.to_stdout(message, skip_eol, check_quiet=True)
422
423     def _write_string(self, s, out=None):
424         write_string(s, out=out, encoding=self.params.get('encoding'))
425
426     def to_stdout(self, message, skip_eol=False, check_quiet=False):
427         """Print message to stdout if not in quiet mode."""
428         if self.params.get('logger'):
429             self.params['logger'].debug(message)
430         elif not check_quiet or not self.params.get('quiet', False):
431             message = self._bidi_workaround(message)
432             terminator = ['\n', ''][skip_eol]
433             output = message + terminator
434
435             self._write_string(output, self._screen_file)
436
437     def to_stderr(self, message):
438         """Print message to stderr."""
439         assert isinstance(message, compat_str)
440         if self.params.get('logger'):
441             self.params['logger'].error(message)
442         else:
443             message = self._bidi_workaround(message)
444             output = message + '\n'
445             self._write_string(output, self._err_file)
446
447     def to_console_title(self, message):
448         if not self.params.get('consoletitle', False):
449             return
450         if os.name == 'nt' and ctypes.windll.kernel32.GetConsoleWindow():
451             # c_wchar_p() might not be necessary if `message` is
452             # already of type unicode()
453             ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message))
454         elif 'TERM' in os.environ:
455             self._write_string('\033]0;%s\007' % message, self._screen_file)
456
457     def save_console_title(self):
458         if not self.params.get('consoletitle', False):
459             return
460         if 'TERM' in os.environ:
461             # Save the title on stack
462             self._write_string('\033[22;0t', self._screen_file)
463
464     def restore_console_title(self):
465         if not self.params.get('consoletitle', False):
466             return
467         if 'TERM' in os.environ:
468             # Restore the title from stack
469             self._write_string('\033[23;0t', self._screen_file)
470
471     def __enter__(self):
472         self.save_console_title()
473         return self
474
475     def __exit__(self, *args):
476         self.restore_console_title()
477
478         if self.params.get('cookiefile') is not None:
479             self.cookiejar.save()
480
481     def trouble(self, message=None, tb=None):
482         """Determine action to take when a download problem appears.
483
484         Depending on if the downloader has been configured to ignore
485         download errors or not, this method may throw an exception or
486         not when errors are found, after printing the message.
487
488         tb, if given, is additional traceback information.
489         """
490         if message is not None:
491             self.to_stderr(message)
492         if self.params.get('verbose'):
493             if tb is None:
494                 if sys.exc_info()[0]:  # if .trouble has been called from an except block
495                     tb = ''
496                     if hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
497                         tb += ''.join(traceback.format_exception(*sys.exc_info()[1].exc_info))
498                     tb += compat_str(traceback.format_exc())
499                 else:
500                     tb_data = traceback.format_list(traceback.extract_stack())
501                     tb = ''.join(tb_data)
502             self.to_stderr(tb)
503         if not self.params.get('ignoreerrors', False):
504             if sys.exc_info()[0] and hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
505                 exc_info = sys.exc_info()[1].exc_info
506             else:
507                 exc_info = sys.exc_info()
508             raise DownloadError(message, exc_info)
509         self._download_retcode = 1
510
511     def report_warning(self, message):
512         '''
513         Print the message to stderr, it will be prefixed with 'WARNING:'
514         If stderr is a tty file the 'WARNING:' will be colored
515         '''
516         if self.params.get('logger') is not None:
517             self.params['logger'].warning(message)
518         else:
519             if self.params.get('no_warnings'):
520                 return
521             if not self.params.get('no_color') and self._err_file.isatty() and os.name != 'nt':
522                 _msg_header = '\033[0;33mWARNING:\033[0m'
523             else:
524                 _msg_header = 'WARNING:'
525             warning_message = '%s %s' % (_msg_header, message)
526             self.to_stderr(warning_message)
527
528     def report_error(self, message, tb=None):
529         '''
530         Do the same as trouble, but prefixes the message with 'ERROR:', colored
531         in red if stderr is a tty file.
532         '''
533         if not self.params.get('no_color') and self._err_file.isatty() and os.name != 'nt':
534             _msg_header = '\033[0;31mERROR:\033[0m'
535         else:
536             _msg_header = 'ERROR:'
537         error_message = '%s %s' % (_msg_header, message)
538         self.trouble(error_message, tb)
539
540     def report_file_already_downloaded(self, file_name):
541         """Report file has already been fully downloaded."""
542         try:
543             self.to_screen('[download] %s has already been downloaded' % file_name)
544         except UnicodeEncodeError:
545             self.to_screen('[download] The file has already been downloaded')
546
547     def prepare_filename(self, info_dict):
548         """Generate the output filename."""
549         try:
550             template_dict = dict(info_dict)
551
552             template_dict['epoch'] = int(time.time())
553             autonumber_size = self.params.get('autonumber_size')
554             if autonumber_size is None:
555                 autonumber_size = 5
556             autonumber_templ = '%0' + str(autonumber_size) + 'd'
557             template_dict['autonumber'] = autonumber_templ % self._num_downloads
558             if template_dict.get('playlist_index') is not None:
559                 template_dict['playlist_index'] = '%0*d' % (len(str(template_dict['n_entries'])), template_dict['playlist_index'])
560             if template_dict.get('resolution') is None:
561                 if template_dict.get('width') and template_dict.get('height'):
562                     template_dict['resolution'] = '%dx%d' % (template_dict['width'], template_dict['height'])
563                 elif template_dict.get('height'):
564                     template_dict['resolution'] = '%sp' % template_dict['height']
565                 elif template_dict.get('width'):
566                     template_dict['resolution'] = '?x%d' % template_dict['width']
567
568             sanitize = lambda k, v: sanitize_filename(
569                 compat_str(v),
570                 restricted=self.params.get('restrictfilenames'),
571                 is_id=(k == 'id'))
572             template_dict = dict((k, sanitize(k, v))
573                                  for k, v in template_dict.items()
574                                  if v is not None)
575             template_dict = collections.defaultdict(lambda: 'NA', template_dict)
576
577             outtmpl = self.params.get('outtmpl', DEFAULT_OUTTMPL)
578             tmpl = compat_expanduser(outtmpl)
579             filename = tmpl % template_dict
580             # Temporary fix for #4787
581             # 'Treat' all problem characters by passing filename through preferredencoding
582             # to workaround encoding issues with subprocess on python2 @ Windows
583             if sys.version_info < (3, 0) and sys.platform == 'win32':
584                 filename = encodeFilename(filename, True).decode(preferredencoding())
585             return sanitize_path(filename)
586         except ValueError as err:
587             self.report_error('Error in output template: ' + str(err) + ' (encoding: ' + repr(preferredencoding()) + ')')
588             return None
589
590     def _match_entry(self, info_dict, incomplete):
591         """ Returns None iff the file should be downloaded """
592
593         video_title = info_dict.get('title', info_dict.get('id', 'video'))
594         if 'title' in info_dict:
595             # This can happen when we're just evaluating the playlist
596             title = info_dict['title']
597             matchtitle = self.params.get('matchtitle', False)
598             if matchtitle:
599                 if not re.search(matchtitle, title, re.IGNORECASE):
600                     return '"' + title + '" title did not match pattern "' + matchtitle + '"'
601             rejecttitle = self.params.get('rejecttitle', False)
602             if rejecttitle:
603                 if re.search(rejecttitle, title, re.IGNORECASE):
604                     return '"' + title + '" title matched reject pattern "' + rejecttitle + '"'
605         date = info_dict.get('upload_date', None)
606         if date is not None:
607             dateRange = self.params.get('daterange', DateRange())
608             if date not in dateRange:
609                 return '%s upload date is not in range %s' % (date_from_str(date).isoformat(), dateRange)
610         view_count = info_dict.get('view_count', None)
611         if view_count is not None:
612             min_views = self.params.get('min_views')
613             if min_views is not None and view_count < min_views:
614                 return 'Skipping %s, because it has not reached minimum view count (%d/%d)' % (video_title, view_count, min_views)
615             max_views = self.params.get('max_views')
616             if max_views is not None and view_count > max_views:
617                 return 'Skipping %s, because it has exceeded the maximum view count (%d/%d)' % (video_title, view_count, max_views)
618         if age_restricted(info_dict.get('age_limit'), self.params.get('age_limit')):
619             return 'Skipping "%s" because it is age restricted' % video_title
620         if self.in_download_archive(info_dict):
621             return '%s has already been recorded in archive' % video_title
622
623         if not incomplete:
624             match_filter = self.params.get('match_filter')
625             if match_filter is not None:
626                 ret = match_filter(info_dict)
627                 if ret is not None:
628                     return ret
629
630         return None
631
632     @staticmethod
633     def add_extra_info(info_dict, extra_info):
634         '''Set the keys from extra_info in info dict if they are missing'''
635         for key, value in extra_info.items():
636             info_dict.setdefault(key, value)
637
638     def extract_info(self, url, download=True, ie_key=None, extra_info={},
639                      process=True, force_generic_extractor=False):
640         '''
641         Returns a list with a dictionary for each video we find.
642         If 'download', also downloads the videos.
643         extra_info is a dict containing the extra values to add to each result
644         '''
645
646         if not ie_key and force_generic_extractor:
647             ie_key = 'Generic'
648
649         if ie_key:
650             ies = [self.get_info_extractor(ie_key)]
651         else:
652             ies = self._ies
653
654         for ie in ies:
655             if not ie.suitable(url):
656                 continue
657
658             if not ie.working():
659                 self.report_warning('The program functionality for this site has been marked as broken, '
660                                     'and will probably not work.')
661
662             try:
663                 ie_result = ie.extract(url)
664                 if ie_result is None:  # Finished already (backwards compatibility; listformats and friends should be moved here)
665                     break
666                 if isinstance(ie_result, list):
667                     # Backwards compatibility: old IE result format
668                     ie_result = {
669                         '_type': 'compat_list',
670                         'entries': ie_result,
671                     }
672                 self.add_default_extra_info(ie_result, ie, url)
673                 if process:
674                     return self.process_ie_result(ie_result, download, extra_info)
675                 else:
676                     return ie_result
677             except ExtractorError as de:  # An error we somewhat expected
678                 self.report_error(compat_str(de), de.format_traceback())
679                 break
680             except MaxDownloadsReached:
681                 raise
682             except Exception as e:
683                 if self.params.get('ignoreerrors', False):
684                     self.report_error(compat_str(e), tb=compat_str(traceback.format_exc()))
685                     break
686                 else:
687                     raise
688         else:
689             self.report_error('no suitable InfoExtractor for URL %s' % url)
690
691     def add_default_extra_info(self, ie_result, ie, url):
692         self.add_extra_info(ie_result, {
693             'extractor': ie.IE_NAME,
694             'webpage_url': url,
695             'webpage_url_basename': url_basename(url),
696             'extractor_key': ie.ie_key(),
697         })
698
699     def process_ie_result(self, ie_result, download=True, extra_info={}):
700         """
701         Take the result of the ie(may be modified) and resolve all unresolved
702         references (URLs, playlist items).
703
704         It will also download the videos if 'download'.
705         Returns the resolved ie_result.
706         """
707
708         result_type = ie_result.get('_type', 'video')
709
710         if result_type in ('url', 'url_transparent'):
711             extract_flat = self.params.get('extract_flat', False)
712             if ((extract_flat == 'in_playlist' and 'playlist' in extra_info) or
713                     extract_flat is True):
714                 if self.params.get('forcejson', False):
715                     self.to_stdout(json.dumps(ie_result))
716                 return ie_result
717
718         if result_type == 'video':
719             self.add_extra_info(ie_result, extra_info)
720             return self.process_video_result(ie_result, download=download)
721         elif result_type == 'url':
722             # We have to add extra_info to the results because it may be
723             # contained in a playlist
724             return self.extract_info(ie_result['url'],
725                                      download,
726                                      ie_key=ie_result.get('ie_key'),
727                                      extra_info=extra_info)
728         elif result_type == 'url_transparent':
729             # Use the information from the embedding page
730             info = self.extract_info(
731                 ie_result['url'], ie_key=ie_result.get('ie_key'),
732                 extra_info=extra_info, download=False, process=False)
733
734             force_properties = dict(
735                 (k, v) for k, v in ie_result.items() if v is not None)
736             for f in ('_type', 'url'):
737                 if f in force_properties:
738                     del force_properties[f]
739             new_result = info.copy()
740             new_result.update(force_properties)
741
742             assert new_result.get('_type') != 'url_transparent'
743
744             return self.process_ie_result(
745                 new_result, download=download, extra_info=extra_info)
746         elif result_type == 'playlist' or result_type == 'multi_video':
747             # We process each entry in the playlist
748             playlist = ie_result.get('title', None) or ie_result.get('id', None)
749             self.to_screen('[download] Downloading playlist: %s' % playlist)
750
751             playlist_results = []
752
753             playliststart = self.params.get('playliststart', 1) - 1
754             playlistend = self.params.get('playlistend', None)
755             # For backwards compatibility, interpret -1 as whole list
756             if playlistend == -1:
757                 playlistend = None
758
759             playlistitems_str = self.params.get('playlist_items', None)
760             playlistitems = None
761             if playlistitems_str is not None:
762                 def iter_playlistitems(format):
763                     for string_segment in format.split(','):
764                         if '-' in string_segment:
765                             start, end = string_segment.split('-')
766                             for item in range(int(start), int(end) + 1):
767                                 yield int(item)
768                         else:
769                             yield int(string_segment)
770                 playlistitems = iter_playlistitems(playlistitems_str)
771
772             ie_entries = ie_result['entries']
773             if isinstance(ie_entries, list):
774                 n_all_entries = len(ie_entries)
775                 if playlistitems:
776                     entries = [
777                         ie_entries[i - 1] for i in playlistitems
778                         if -n_all_entries <= i - 1 < n_all_entries]
779                 else:
780                     entries = ie_entries[playliststart:playlistend]
781                 n_entries = len(entries)
782                 self.to_screen(
783                     "[%s] playlist %s: Collected %d video ids (downloading %d of them)" %
784                     (ie_result['extractor'], playlist, n_all_entries, n_entries))
785             elif isinstance(ie_entries, PagedList):
786                 if playlistitems:
787                     entries = []
788                     for item in playlistitems:
789                         entries.extend(ie_entries.getslice(
790                             item - 1, item
791                         ))
792                 else:
793                     entries = ie_entries.getslice(
794                         playliststart, playlistend)
795                 n_entries = len(entries)
796                 self.to_screen(
797                     "[%s] playlist %s: Downloading %d videos" %
798                     (ie_result['extractor'], playlist, n_entries))
799             else:  # iterable
800                 if playlistitems:
801                     entry_list = list(ie_entries)
802                     entries = [entry_list[i - 1] for i in playlistitems]
803                 else:
804                     entries = list(itertools.islice(
805                         ie_entries, playliststart, playlistend))
806                 n_entries = len(entries)
807                 self.to_screen(
808                     "[%s] playlist %s: Downloading %d videos" %
809                     (ie_result['extractor'], playlist, n_entries))
810
811             if self.params.get('playlistreverse', False):
812                 entries = entries[::-1]
813
814             for i, entry in enumerate(entries, 1):
815                 self.to_screen('[download] Downloading video %s of %s' % (i, n_entries))
816                 extra = {
817                     'n_entries': n_entries,
818                     'playlist': playlist,
819                     'playlist_id': ie_result.get('id'),
820                     'playlist_title': ie_result.get('title'),
821                     'playlist_index': i + playliststart,
822                     'extractor': ie_result['extractor'],
823                     'webpage_url': ie_result['webpage_url'],
824                     'webpage_url_basename': url_basename(ie_result['webpage_url']),
825                     'extractor_key': ie_result['extractor_key'],
826                 }
827
828                 reason = self._match_entry(entry, incomplete=True)
829                 if reason is not None:
830                     self.to_screen('[download] ' + reason)
831                     continue
832
833                 entry_result = self.process_ie_result(entry,
834                                                       download=download,
835                                                       extra_info=extra)
836                 playlist_results.append(entry_result)
837             ie_result['entries'] = playlist_results
838             self.to_screen('[download] Finished downloading playlist: %s' % playlist)
839             return ie_result
840         elif result_type == 'compat_list':
841             self.report_warning(
842                 'Extractor %s returned a compat_list result. '
843                 'It needs to be updated.' % ie_result.get('extractor'))
844
845             def _fixup(r):
846                 self.add_extra_info(
847                     r,
848                     {
849                         'extractor': ie_result['extractor'],
850                         'webpage_url': ie_result['webpage_url'],
851                         'webpage_url_basename': url_basename(ie_result['webpage_url']),
852                         'extractor_key': ie_result['extractor_key'],
853                     }
854                 )
855                 return r
856             ie_result['entries'] = [
857                 self.process_ie_result(_fixup(r), download, extra_info)
858                 for r in ie_result['entries']
859             ]
860             return ie_result
861         else:
862             raise Exception('Invalid result type: %s' % result_type)
863
864     def _build_format_filter(self, filter_spec):
865         " Returns a function to filter the formats according to the filter_spec "
866
867         OPERATORS = {
868             '<': operator.lt,
869             '<=': operator.le,
870             '>': operator.gt,
871             '>=': operator.ge,
872             '=': operator.eq,
873             '!=': operator.ne,
874         }
875         operator_rex = re.compile(r'''(?x)\s*
876             (?P<key>width|height|tbr|abr|vbr|asr|filesize|fps)
877             \s*(?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
878             (?P<value>[0-9.]+(?:[kKmMgGtTpPeEzZyY]i?[Bb]?)?)
879             $
880             ''' % '|'.join(map(re.escape, OPERATORS.keys())))
881         m = operator_rex.search(filter_spec)
882         if m:
883             try:
884                 comparison_value = int(m.group('value'))
885             except ValueError:
886                 comparison_value = parse_filesize(m.group('value'))
887                 if comparison_value is None:
888                     comparison_value = parse_filesize(m.group('value') + 'B')
889                 if comparison_value is None:
890                     raise ValueError(
891                         'Invalid value %r in format specification %r' % (
892                             m.group('value'), filter_spec))
893             op = OPERATORS[m.group('op')]
894
895         if not m:
896             STR_OPERATORS = {
897                 '=': operator.eq,
898                 '!=': operator.ne,
899             }
900             str_operator_rex = re.compile(r'''(?x)
901                 \s*(?P<key>ext|acodec|vcodec|container|protocol)
902                 \s*(?P<op>%s)(?P<none_inclusive>\s*\?)?
903                 \s*(?P<value>[a-zA-Z0-9_-]+)
904                 \s*$
905                 ''' % '|'.join(map(re.escape, STR_OPERATORS.keys())))
906             m = str_operator_rex.search(filter_spec)
907             if m:
908                 comparison_value = m.group('value')
909                 op = STR_OPERATORS[m.group('op')]
910
911         if not m:
912             raise ValueError('Invalid filter specification %r' % filter_spec)
913
914         def _filter(f):
915             actual_value = f.get(m.group('key'))
916             if actual_value is None:
917                 return m.group('none_inclusive')
918             return op(actual_value, comparison_value)
919         return _filter
920
921     def build_format_selector(self, format_spec):
922         def syntax_error(note, start):
923             message = (
924                 'Invalid format specification: '
925                 '{0}\n\t{1}\n\t{2}^'.format(note, format_spec, ' ' * start[1]))
926             return SyntaxError(message)
927
928         PICKFIRST = 'PICKFIRST'
929         MERGE = 'MERGE'
930         SINGLE = 'SINGLE'
931         GROUP = 'GROUP'
932         FormatSelector = collections.namedtuple('FormatSelector', ['type', 'selector', 'filters'])
933
934         def _parse_filter(tokens):
935             filter_parts = []
936             for type, string, start, _, _ in tokens:
937                 if type == tokenize.OP and string == ']':
938                     return ''.join(filter_parts)
939                 else:
940                     filter_parts.append(string)
941
942         def _remove_unused_ops(tokens):
943             # Remove operators that we don't use and join them with the surrounding strings
944             # for example: 'mp4' '-' 'baseline' '-' '16x9' is converted to 'mp4-baseline-16x9'
945             ALLOWED_OPS = ('/', '+', ',', '(', ')')
946             last_string, last_start, last_end, last_line = None, None, None, None
947             for type, string, start, end, line in tokens:
948                 if type == tokenize.OP and string == '[':
949                     if last_string:
950                         yield tokenize.NAME, last_string, last_start, last_end, last_line
951                         last_string = None
952                     yield type, string, start, end, line
953                     # everything inside brackets will be handled by _parse_filter
954                     for type, string, start, end, line in tokens:
955                         yield type, string, start, end, line
956                         if type == tokenize.OP and string == ']':
957                             break
958                 elif type == tokenize.OP and string in ALLOWED_OPS:
959                     if last_string:
960                         yield tokenize.NAME, last_string, last_start, last_end, last_line
961                         last_string = None
962                     yield type, string, start, end, line
963                 elif type in [tokenize.NAME, tokenize.NUMBER, tokenize.OP]:
964                     if not last_string:
965                         last_string = string
966                         last_start = start
967                         last_end = end
968                     else:
969                         last_string += string
970             if last_string:
971                 yield tokenize.NAME, last_string, last_start, last_end, last_line
972
973         def _parse_format_selection(tokens, inside_merge=False, inside_choice=False, inside_group=False):
974             selectors = []
975             current_selector = None
976             for type, string, start, _, _ in tokens:
977                 # ENCODING is only defined in python 3.x
978                 if type == getattr(tokenize, 'ENCODING', None):
979                     continue
980                 elif type in [tokenize.NAME, tokenize.NUMBER]:
981                     current_selector = FormatSelector(SINGLE, string, [])
982                 elif type == tokenize.OP:
983                     if string == ')':
984                         if not inside_group:
985                             # ')' will be handled by the parentheses group
986                             tokens.restore_last_token()
987                         break
988                     elif inside_merge and string in ['/', ',']:
989                         tokens.restore_last_token()
990                         break
991                     elif inside_choice and string == ',':
992                         tokens.restore_last_token()
993                         break
994                     elif string == ',':
995                         if not current_selector:
996                             raise syntax_error('"," must follow a format selector', start)
997                         selectors.append(current_selector)
998                         current_selector = None
999                     elif string == '/':
1000                         if not current_selector:
1001                             raise syntax_error('"/" must follow a format selector', start)
1002                         first_choice = current_selector
1003                         second_choice = _parse_format_selection(tokens, inside_choice=True)
1004                         current_selector = FormatSelector(PICKFIRST, (first_choice, second_choice), [])
1005                     elif string == '[':
1006                         if not current_selector:
1007                             current_selector = FormatSelector(SINGLE, 'best', [])
1008                         format_filter = _parse_filter(tokens)
1009                         current_selector.filters.append(format_filter)
1010                     elif string == '(':
1011                         if current_selector:
1012                             raise syntax_error('Unexpected "("', start)
1013                         group = _parse_format_selection(tokens, inside_group=True)
1014                         current_selector = FormatSelector(GROUP, group, [])
1015                     elif string == '+':
1016                         video_selector = current_selector
1017                         audio_selector = _parse_format_selection(tokens, inside_merge=True)
1018                         if not video_selector or not audio_selector:
1019                             raise syntax_error('"+" must be between two format selectors', start)
1020                         current_selector = FormatSelector(MERGE, (video_selector, audio_selector), [])
1021                     else:
1022                         raise syntax_error('Operator not recognized: "{0}"'.format(string), start)
1023                 elif type == tokenize.ENDMARKER:
1024                     break
1025             if current_selector:
1026                 selectors.append(current_selector)
1027             return selectors
1028
1029         def _build_selector_function(selector):
1030             if isinstance(selector, list):
1031                 fs = [_build_selector_function(s) for s in selector]
1032
1033                 def selector_function(formats):
1034                     for f in fs:
1035                         for format in f(formats):
1036                             yield format
1037                 return selector_function
1038             elif selector.type == GROUP:
1039                 selector_function = _build_selector_function(selector.selector)
1040             elif selector.type == PICKFIRST:
1041                 fs = [_build_selector_function(s) for s in selector.selector]
1042
1043                 def selector_function(formats):
1044                     for f in fs:
1045                         picked_formats = list(f(formats))
1046                         if picked_formats:
1047                             return picked_formats
1048                     return []
1049             elif selector.type == SINGLE:
1050                 format_spec = selector.selector
1051
1052                 def selector_function(formats):
1053                     formats = list(formats)
1054                     if not formats:
1055                         return
1056                     if format_spec == 'all':
1057                         for f in formats:
1058                             yield f
1059                     elif format_spec in ['best', 'worst', None]:
1060                         format_idx = 0 if format_spec == 'worst' else -1
1061                         audiovideo_formats = [
1062                             f for f in formats
1063                             if f.get('vcodec') != 'none' and f.get('acodec') != 'none']
1064                         if audiovideo_formats:
1065                             yield audiovideo_formats[format_idx]
1066                         # for audio only (soundcloud) or video only (imgur) urls, select the best/worst audio format
1067                         elif (all(f.get('acodec') != 'none' for f in formats) or
1068                               all(f.get('vcodec') != 'none' for f in formats)):
1069                             yield formats[format_idx]
1070                     elif format_spec == 'bestaudio':
1071                         audio_formats = [
1072                             f for f in formats
1073                             if f.get('vcodec') == 'none']
1074                         if audio_formats:
1075                             yield audio_formats[-1]
1076                     elif format_spec == 'worstaudio':
1077                         audio_formats = [
1078                             f for f in formats
1079                             if f.get('vcodec') == 'none']
1080                         if audio_formats:
1081                             yield audio_formats[0]
1082                     elif format_spec == 'bestvideo':
1083                         video_formats = [
1084                             f for f in formats
1085                             if f.get('acodec') == 'none']
1086                         if video_formats:
1087                             yield video_formats[-1]
1088                     elif format_spec == 'worstvideo':
1089                         video_formats = [
1090                             f for f in formats
1091                             if f.get('acodec') == 'none']
1092                         if video_formats:
1093                             yield video_formats[0]
1094                     else:
1095                         extensions = ['mp4', 'flv', 'webm', '3gp', 'm4a', 'mp3', 'ogg', 'aac', 'wav']
1096                         if format_spec in extensions:
1097                             filter_f = lambda f: f['ext'] == format_spec
1098                         else:
1099                             filter_f = lambda f: f['format_id'] == format_spec
1100                         matches = list(filter(filter_f, formats))
1101                         if matches:
1102                             yield matches[-1]
1103             elif selector.type == MERGE:
1104                 def _merge(formats_info):
1105                     format_1, format_2 = [f['format_id'] for f in formats_info]
1106                     # The first format must contain the video and the
1107                     # second the audio
1108                     if formats_info[0].get('vcodec') == 'none':
1109                         self.report_error('The first format must '
1110                                           'contain the video, try using '
1111                                           '"-f %s+%s"' % (format_2, format_1))
1112                         return
1113                     output_ext = (
1114                         formats_info[0]['ext']
1115                         if self.params.get('merge_output_format') is None
1116                         else self.params['merge_output_format'])
1117                     return {
1118                         'requested_formats': formats_info,
1119                         'format': '%s+%s' % (formats_info[0].get('format'),
1120                                              formats_info[1].get('format')),
1121                         'format_id': '%s+%s' % (formats_info[0].get('format_id'),
1122                                                 formats_info[1].get('format_id')),
1123                         'width': formats_info[0].get('width'),
1124                         'height': formats_info[0].get('height'),
1125                         'resolution': formats_info[0].get('resolution'),
1126                         'fps': formats_info[0].get('fps'),
1127                         'vcodec': formats_info[0].get('vcodec'),
1128                         'vbr': formats_info[0].get('vbr'),
1129                         'stretched_ratio': formats_info[0].get('stretched_ratio'),
1130                         'acodec': formats_info[1].get('acodec'),
1131                         'abr': formats_info[1].get('abr'),
1132                         'ext': output_ext,
1133                     }
1134                 video_selector, audio_selector = map(_build_selector_function, selector.selector)
1135
1136                 def selector_function(formats):
1137                     formats = list(formats)
1138                     for pair in itertools.product(video_selector(formats), audio_selector(formats)):
1139                         yield _merge(pair)
1140
1141             filters = [self._build_format_filter(f) for f in selector.filters]
1142
1143             def final_selector(formats):
1144                 for _filter in filters:
1145                     formats = list(filter(_filter, formats))
1146                 return selector_function(formats)
1147             return final_selector
1148
1149         stream = io.BytesIO(format_spec.encode('utf-8'))
1150         try:
1151             tokens = list(_remove_unused_ops(compat_tokenize_tokenize(stream.readline)))
1152         except tokenize.TokenError:
1153             raise syntax_error('Missing closing/opening brackets or parenthesis', (0, len(format_spec)))
1154
1155         class TokenIterator(object):
1156             def __init__(self, tokens):
1157                 self.tokens = tokens
1158                 self.counter = 0
1159
1160             def __iter__(self):
1161                 return self
1162
1163             def __next__(self):
1164                 if self.counter >= len(self.tokens):
1165                     raise StopIteration()
1166                 value = self.tokens[self.counter]
1167                 self.counter += 1
1168                 return value
1169
1170             next = __next__
1171
1172             def restore_last_token(self):
1173                 self.counter -= 1
1174
1175         parsed_selector = _parse_format_selection(iter(TokenIterator(tokens)))
1176         return _build_selector_function(parsed_selector)
1177
1178     def _calc_headers(self, info_dict):
1179         res = std_headers.copy()
1180
1181         add_headers = info_dict.get('http_headers')
1182         if add_headers:
1183             res.update(add_headers)
1184
1185         cookies = self._calc_cookies(info_dict)
1186         if cookies:
1187             res['Cookie'] = cookies
1188
1189         return res
1190
1191     def _calc_cookies(self, info_dict):
1192         pr = compat_urllib_request.Request(info_dict['url'])
1193         self.cookiejar.add_cookie_header(pr)
1194         return pr.get_header('Cookie')
1195
1196     def process_video_result(self, info_dict, download=True):
1197         assert info_dict.get('_type', 'video') == 'video'
1198
1199         if 'id' not in info_dict:
1200             raise ExtractorError('Missing "id" field in extractor result')
1201         if 'title' not in info_dict:
1202             raise ExtractorError('Missing "title" field in extractor result')
1203
1204         if 'playlist' not in info_dict:
1205             # It isn't part of a playlist
1206             info_dict['playlist'] = None
1207             info_dict['playlist_index'] = None
1208
1209         thumbnails = info_dict.get('thumbnails')
1210         if thumbnails is None:
1211             thumbnail = info_dict.get('thumbnail')
1212             if thumbnail:
1213                 info_dict['thumbnails'] = thumbnails = [{'url': thumbnail}]
1214         if thumbnails:
1215             thumbnails.sort(key=lambda t: (
1216                 t.get('preference'), t.get('width'), t.get('height'),
1217                 t.get('id'), t.get('url')))
1218             for i, t in enumerate(thumbnails):
1219                 if t.get('width') and t.get('height'):
1220                     t['resolution'] = '%dx%d' % (t['width'], t['height'])
1221                 if t.get('id') is None:
1222                     t['id'] = '%d' % i
1223
1224         if thumbnails and 'thumbnail' not in info_dict:
1225             info_dict['thumbnail'] = thumbnails[-1]['url']
1226
1227         if 'display_id' not in info_dict and 'id' in info_dict:
1228             info_dict['display_id'] = info_dict['id']
1229
1230         if info_dict.get('upload_date') is None and info_dict.get('timestamp') is not None:
1231             # Working around out-of-range timestamp values (e.g. negative ones on Windows,
1232             # see http://bugs.python.org/issue1646728)
1233             try:
1234                 upload_date = datetime.datetime.utcfromtimestamp(info_dict['timestamp'])
1235                 info_dict['upload_date'] = upload_date.strftime('%Y%m%d')
1236             except (ValueError, OverflowError, OSError):
1237                 pass
1238
1239         subtitles = info_dict.get('subtitles')
1240         if subtitles:
1241             for _, subtitle in subtitles.items():
1242                 for subtitle_format in subtitle:
1243                     if 'ext' not in subtitle_format:
1244                         subtitle_format['ext'] = determine_ext(subtitle_format['url']).lower()
1245
1246         if self.params.get('listsubtitles', False):
1247             if 'automatic_captions' in info_dict:
1248                 self.list_subtitles(info_dict['id'], info_dict.get('automatic_captions'), 'automatic captions')
1249             self.list_subtitles(info_dict['id'], subtitles, 'subtitles')
1250             return
1251         info_dict['requested_subtitles'] = self.process_subtitles(
1252             info_dict['id'], subtitles,
1253             info_dict.get('automatic_captions'))
1254
1255         # We now pick which formats have to be downloaded
1256         if info_dict.get('formats') is None:
1257             # There's only one format available
1258             formats = [info_dict]
1259         else:
1260             formats = info_dict['formats']
1261
1262         if not formats:
1263             raise ExtractorError('No video formats found!')
1264
1265         formats_dict = {}
1266
1267         # We check that all the formats have the format and format_id fields
1268         for i, format in enumerate(formats):
1269             if 'url' not in format:
1270                 raise ExtractorError('Missing "url" key in result (index %d)' % i)
1271
1272             if format.get('format_id') is None:
1273                 format['format_id'] = compat_str(i)
1274             format_id = format['format_id']
1275             if format_id not in formats_dict:
1276                 formats_dict[format_id] = []
1277             formats_dict[format_id].append(format)
1278
1279         # Make sure all formats have unique format_id
1280         for format_id, ambiguous_formats in formats_dict.items():
1281             if len(ambiguous_formats) > 1:
1282                 for i, format in enumerate(ambiguous_formats):
1283                     format['format_id'] = '%s-%d' % (format_id, i)
1284
1285         for i, format in enumerate(formats):
1286             if format.get('format') is None:
1287                 format['format'] = '{id} - {res}{note}'.format(
1288                     id=format['format_id'],
1289                     res=self.format_resolution(format),
1290                     note=' ({0})'.format(format['format_note']) if format.get('format_note') is not None else '',
1291                 )
1292             # Automatically determine file extension if missing
1293             if 'ext' not in format:
1294                 format['ext'] = determine_ext(format['url']).lower()
1295             # Add HTTP headers, so that external programs can use them from the
1296             # json output
1297             full_format_info = info_dict.copy()
1298             full_format_info.update(format)
1299             format['http_headers'] = self._calc_headers(full_format_info)
1300
1301         # TODO Central sorting goes here
1302
1303         if formats[0] is not info_dict:
1304             # only set the 'formats' fields if the original info_dict list them
1305             # otherwise we end up with a circular reference, the first (and unique)
1306             # element in the 'formats' field in info_dict is info_dict itself,
1307             # wich can't be exported to json
1308             info_dict['formats'] = formats
1309         if self.params.get('listformats'):
1310             self.list_formats(info_dict)
1311             return
1312         if self.params.get('list_thumbnails'):
1313             self.list_thumbnails(info_dict)
1314             return
1315
1316         req_format = self.params.get('format')
1317         if req_format is None:
1318             req_format_list = []
1319             if (self.params.get('outtmpl', DEFAULT_OUTTMPL) != '-' and
1320                     info_dict['extractor'] in ['youtube', 'ted'] and
1321                     not info_dict.get('is_live')):
1322                 merger = FFmpegMergerPP(self)
1323                 if merger.available and merger.can_merge():
1324                     req_format_list.append('bestvideo+bestaudio')
1325             req_format_list.append('best')
1326             req_format = '/'.join(req_format_list)
1327         format_selector = self.build_format_selector(req_format)
1328         formats_to_download = list(format_selector(formats))
1329         if not formats_to_download:
1330             raise ExtractorError('requested format not available',
1331                                  expected=True)
1332
1333         if download:
1334             if len(formats_to_download) > 1:
1335                 self.to_screen('[info] %s: downloading video in %s formats' % (info_dict['id'], len(formats_to_download)))
1336             for format in formats_to_download:
1337                 new_info = dict(info_dict)
1338                 new_info.update(format)
1339                 self.process_info(new_info)
1340         # We update the info dict with the best quality format (backwards compatibility)
1341         info_dict.update(formats_to_download[-1])
1342         return info_dict
1343
1344     def process_subtitles(self, video_id, normal_subtitles, automatic_captions):
1345         """Select the requested subtitles and their format"""
1346         available_subs = {}
1347         if normal_subtitles and self.params.get('writesubtitles'):
1348             available_subs.update(normal_subtitles)
1349         if automatic_captions and self.params.get('writeautomaticsub'):
1350             for lang, cap_info in automatic_captions.items():
1351                 if lang not in available_subs:
1352                     available_subs[lang] = cap_info
1353
1354         if (not self.params.get('writesubtitles') and not
1355                 self.params.get('writeautomaticsub') or not
1356                 available_subs):
1357             return None
1358
1359         if self.params.get('allsubtitles', False):
1360             requested_langs = available_subs.keys()
1361         else:
1362             if self.params.get('subtitleslangs', False):
1363                 requested_langs = self.params.get('subtitleslangs')
1364             elif 'en' in available_subs:
1365                 requested_langs = ['en']
1366             else:
1367                 requested_langs = [list(available_subs.keys())[0]]
1368
1369         formats_query = self.params.get('subtitlesformat', 'best')
1370         formats_preference = formats_query.split('/') if formats_query else []
1371         subs = {}
1372         for lang in requested_langs:
1373             formats = available_subs.get(lang)
1374             if formats is None:
1375                 self.report_warning('%s subtitles not available for %s' % (lang, video_id))
1376                 continue
1377             for ext in formats_preference:
1378                 if ext == 'best':
1379                     f = formats[-1]
1380                     break
1381                 matches = list(filter(lambda f: f['ext'] == ext, formats))
1382                 if matches:
1383                     f = matches[-1]
1384                     break
1385             else:
1386                 f = formats[-1]
1387                 self.report_warning(
1388                     'No subtitle format found matching "%s" for language %s, '
1389                     'using %s' % (formats_query, lang, f['ext']))
1390             subs[lang] = f
1391         return subs
1392
1393     def process_info(self, info_dict):
1394         """Process a single resolved IE result."""
1395
1396         assert info_dict.get('_type', 'video') == 'video'
1397
1398         max_downloads = self.params.get('max_downloads')
1399         if max_downloads is not None:
1400             if self._num_downloads >= int(max_downloads):
1401                 raise MaxDownloadsReached()
1402
1403         info_dict['fulltitle'] = info_dict['title']
1404         if len(info_dict['title']) > 200:
1405             info_dict['title'] = info_dict['title'][:197] + '...'
1406
1407         if 'format' not in info_dict:
1408             info_dict['format'] = info_dict['ext']
1409
1410         reason = self._match_entry(info_dict, incomplete=False)
1411         if reason is not None:
1412             self.to_screen('[download] ' + reason)
1413             return
1414
1415         self._num_downloads += 1
1416
1417         info_dict['_filename'] = filename = self.prepare_filename(info_dict)
1418
1419         # Forced printings
1420         if self.params.get('forcetitle', False):
1421             self.to_stdout(info_dict['fulltitle'])
1422         if self.params.get('forceid', False):
1423             self.to_stdout(info_dict['id'])
1424         if self.params.get('forceurl', False):
1425             if info_dict.get('requested_formats') is not None:
1426                 for f in info_dict['requested_formats']:
1427                     self.to_stdout(f['url'] + f.get('play_path', ''))
1428             else:
1429                 # For RTMP URLs, also include the playpath
1430                 self.to_stdout(info_dict['url'] + info_dict.get('play_path', ''))
1431         if self.params.get('forcethumbnail', False) and info_dict.get('thumbnail') is not None:
1432             self.to_stdout(info_dict['thumbnail'])
1433         if self.params.get('forcedescription', False) and info_dict.get('description') is not None:
1434             self.to_stdout(info_dict['description'])
1435         if self.params.get('forcefilename', False) and filename is not None:
1436             self.to_stdout(filename)
1437         if self.params.get('forceduration', False) and info_dict.get('duration') is not None:
1438             self.to_stdout(formatSeconds(info_dict['duration']))
1439         if self.params.get('forceformat', False):
1440             self.to_stdout(info_dict['format'])
1441         if self.params.get('forcejson', False):
1442             self.to_stdout(json.dumps(info_dict))
1443
1444         # Do nothing else if in simulate mode
1445         if self.params.get('simulate', False):
1446             return
1447
1448         if filename is None:
1449             return
1450
1451         try:
1452             dn = os.path.dirname(sanitize_path(encodeFilename(filename)))
1453             if dn and not os.path.exists(dn):
1454                 os.makedirs(dn)
1455         except (OSError, IOError) as err:
1456             self.report_error('unable to create directory ' + compat_str(err))
1457             return
1458
1459         if self.params.get('writedescription', False):
1460             descfn = replace_extension(filename, 'description', info_dict.get('ext'))
1461             if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(descfn)):
1462                 self.to_screen('[info] Video description is already present')
1463             elif info_dict.get('description') is None:
1464                 self.report_warning('There\'s no description to write.')
1465             else:
1466                 try:
1467                     self.to_screen('[info] Writing video description to: ' + descfn)
1468                     with io.open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile:
1469                         descfile.write(info_dict['description'])
1470                 except (OSError, IOError):
1471                     self.report_error('Cannot write description file ' + descfn)
1472                     return
1473
1474         if self.params.get('writeannotations', False):
1475             annofn = replace_extension(filename, 'annotations.xml', info_dict.get('ext'))
1476             if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(annofn)):
1477                 self.to_screen('[info] Video annotations are already present')
1478             else:
1479                 try:
1480                     self.to_screen('[info] Writing video annotations to: ' + annofn)
1481                     with io.open(encodeFilename(annofn), 'w', encoding='utf-8') as annofile:
1482                         annofile.write(info_dict['annotations'])
1483                 except (KeyError, TypeError):
1484                     self.report_warning('There are no annotations to write.')
1485                 except (OSError, IOError):
1486                     self.report_error('Cannot write annotations file: ' + annofn)
1487                     return
1488
1489         subtitles_are_requested = any([self.params.get('writesubtitles', False),
1490                                        self.params.get('writeautomaticsub')])
1491
1492         if subtitles_are_requested and info_dict.get('requested_subtitles'):
1493             # subtitles download errors are already managed as troubles in relevant IE
1494             # that way it will silently go on when used with unsupporting IE
1495             subtitles = info_dict['requested_subtitles']
1496             ie = self.get_info_extractor(info_dict['extractor_key'])
1497             for sub_lang, sub_info in subtitles.items():
1498                 sub_format = sub_info['ext']
1499                 if sub_info.get('data') is not None:
1500                     sub_data = sub_info['data']
1501                 else:
1502                     try:
1503                         sub_data = ie._download_webpage(
1504                             sub_info['url'], info_dict['id'], note=False)
1505                     except ExtractorError as err:
1506                         self.report_warning('Unable to download subtitle for "%s": %s' %
1507                                             (sub_lang, compat_str(err.cause)))
1508                         continue
1509                 try:
1510                     sub_filename = subtitles_filename(filename, sub_lang, sub_format)
1511                     if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(sub_filename)):
1512                         self.to_screen('[info] Video subtitle %s.%s is already_present' % (sub_lang, sub_format))
1513                     else:
1514                         self.to_screen('[info] Writing video subtitles to: ' + sub_filename)
1515                         with io.open(encodeFilename(sub_filename), 'w', encoding='utf-8') as subfile:
1516                             subfile.write(sub_data)
1517                 except (OSError, IOError):
1518                     self.report_error('Cannot write subtitles file ' + sub_filename)
1519                     return
1520
1521         if self.params.get('writeinfojson', False):
1522             infofn = replace_extension(filename, 'info.json', info_dict.get('ext'))
1523             if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(infofn)):
1524                 self.to_screen('[info] Video description metadata is already present')
1525             else:
1526                 self.to_screen('[info] Writing video description metadata as JSON to: ' + infofn)
1527                 try:
1528                     write_json_file(self.filter_requested_info(info_dict), infofn)
1529                 except (OSError, IOError):
1530                     self.report_error('Cannot write metadata to JSON file ' + infofn)
1531                     return
1532
1533         self._write_thumbnails(info_dict, filename)
1534
1535         if not self.params.get('skip_download', False):
1536             try:
1537                 def dl(name, info):
1538                     fd = get_suitable_downloader(info, self.params)(self, self.params)
1539                     for ph in self._progress_hooks:
1540                         fd.add_progress_hook(ph)
1541                     if self.params.get('verbose'):
1542                         self.to_stdout('[debug] Invoking downloader on %r' % info.get('url'))
1543                     return fd.download(name, info)
1544
1545                 if info_dict.get('requested_formats') is not None:
1546                     downloaded = []
1547                     success = True
1548                     merger = FFmpegMergerPP(self)
1549                     if not merger.available:
1550                         postprocessors = []
1551                         self.report_warning('You have requested multiple '
1552                                             'formats but ffmpeg or avconv are not installed.'
1553                                             ' The formats won\'t be merged.')
1554                     else:
1555                         postprocessors = [merger]
1556
1557                     def compatible_formats(formats):
1558                         video, audio = formats
1559                         # Check extension
1560                         video_ext, audio_ext = audio.get('ext'), video.get('ext')
1561                         if video_ext and audio_ext:
1562                             COMPATIBLE_EXTS = (
1563                                 ('mp3', 'mp4', 'm4a', 'm4p', 'm4b', 'm4r', 'm4v'),
1564                                 ('webm')
1565                             )
1566                             for exts in COMPATIBLE_EXTS:
1567                                 if video_ext in exts and audio_ext in exts:
1568                                     return True
1569                         # TODO: Check acodec/vcodec
1570                         return False
1571
1572                     filename_real_ext = os.path.splitext(filename)[1][1:]
1573                     filename_wo_ext = (
1574                         os.path.splitext(filename)[0]
1575                         if filename_real_ext == info_dict['ext']
1576                         else filename)
1577                     requested_formats = info_dict['requested_formats']
1578                     if self.params.get('merge_output_format') is None and not compatible_formats(requested_formats):
1579                         info_dict['ext'] = 'mkv'
1580                         self.report_warning(
1581                             'Requested formats are incompatible for merge and will be merged into mkv.')
1582                     # Ensure filename always has a correct extension for successful merge
1583                     filename = '%s.%s' % (filename_wo_ext, info_dict['ext'])
1584                     if os.path.exists(encodeFilename(filename)):
1585                         self.to_screen(
1586                             '[download] %s has already been downloaded and '
1587                             'merged' % filename)
1588                     else:
1589                         for f in requested_formats:
1590                             new_info = dict(info_dict)
1591                             new_info.update(f)
1592                             fname = self.prepare_filename(new_info)
1593                             fname = prepend_extension(fname, 'f%s' % f['format_id'], new_info['ext'])
1594                             downloaded.append(fname)
1595                             partial_success = dl(fname, new_info)
1596                             success = success and partial_success
1597                         info_dict['__postprocessors'] = postprocessors
1598                         info_dict['__files_to_merge'] = downloaded
1599                 else:
1600                     # Just a single file
1601                     success = dl(filename, info_dict)
1602             except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
1603                 self.report_error('unable to download video data: %s' % str(err))
1604                 return
1605             except (OSError, IOError) as err:
1606                 raise UnavailableVideoError(err)
1607             except (ContentTooShortError, ) as err:
1608                 self.report_error('content too short (expected %s bytes and served %s)' % (err.expected, err.downloaded))
1609                 return
1610
1611             if success:
1612                 # Fixup content
1613                 fixup_policy = self.params.get('fixup')
1614                 if fixup_policy is None:
1615                     fixup_policy = 'detect_or_warn'
1616
1617                 stretched_ratio = info_dict.get('stretched_ratio')
1618                 if stretched_ratio is not None and stretched_ratio != 1:
1619                     if fixup_policy == 'warn':
1620                         self.report_warning('%s: Non-uniform pixel ratio (%s)' % (
1621                             info_dict['id'], stretched_ratio))
1622                     elif fixup_policy == 'detect_or_warn':
1623                         stretched_pp = FFmpegFixupStretchedPP(self)
1624                         if stretched_pp.available:
1625                             info_dict.setdefault('__postprocessors', [])
1626                             info_dict['__postprocessors'].append(stretched_pp)
1627                         else:
1628                             self.report_warning(
1629                                 '%s: Non-uniform pixel ratio (%s). Install ffmpeg or avconv to fix this automatically.' % (
1630                                     info_dict['id'], stretched_ratio))
1631                     else:
1632                         assert fixup_policy in ('ignore', 'never')
1633
1634                 if info_dict.get('requested_formats') is None and info_dict.get('container') == 'm4a_dash':
1635                     if fixup_policy == 'warn':
1636                         self.report_warning('%s: writing DASH m4a. Only some players support this container.' % (
1637                             info_dict['id']))
1638                     elif fixup_policy == 'detect_or_warn':
1639                         fixup_pp = FFmpegFixupM4aPP(self)
1640                         if fixup_pp.available:
1641                             info_dict.setdefault('__postprocessors', [])
1642                             info_dict['__postprocessors'].append(fixup_pp)
1643                         else:
1644                             self.report_warning(
1645                                 '%s: writing DASH m4a. Only some players support this container. Install ffmpeg or avconv to fix this automatically.' % (
1646                                     info_dict['id']))
1647                     else:
1648                         assert fixup_policy in ('ignore', 'never')
1649
1650                 try:
1651                     self.post_process(filename, info_dict)
1652                 except (PostProcessingError) as err:
1653                     self.report_error('postprocessing: %s' % str(err))
1654                     return
1655                 self.record_download_archive(info_dict)
1656
1657     def download(self, url_list):
1658         """Download a given list of URLs."""
1659         outtmpl = self.params.get('outtmpl', DEFAULT_OUTTMPL)
1660         if (len(url_list) > 1 and
1661                 '%' not in outtmpl and
1662                 self.params.get('max_downloads') != 1):
1663             raise SameFileError(outtmpl)
1664
1665         for url in url_list:
1666             try:
1667                 # It also downloads the videos
1668                 res = self.extract_info(
1669                     url, force_generic_extractor=self.params.get('force_generic_extractor', False))
1670             except UnavailableVideoError:
1671                 self.report_error('unable to download video')
1672             except MaxDownloadsReached:
1673                 self.to_screen('[info] Maximum number of downloaded files reached.')
1674                 raise
1675             else:
1676                 if self.params.get('dump_single_json', False):
1677                     self.to_stdout(json.dumps(res))
1678
1679         return self._download_retcode
1680
1681     def download_with_info_file(self, info_filename):
1682         with contextlib.closing(fileinput.FileInput(
1683                 [info_filename], mode='r',
1684                 openhook=fileinput.hook_encoded('utf-8'))) as f:
1685             # FileInput doesn't have a read method, we can't call json.load
1686             info = self.filter_requested_info(json.loads('\n'.join(f)))
1687         try:
1688             self.process_ie_result(info, download=True)
1689         except DownloadError:
1690             webpage_url = info.get('webpage_url')
1691             if webpage_url is not None:
1692                 self.report_warning('The info failed to download, trying with "%s"' % webpage_url)
1693                 return self.download([webpage_url])
1694             else:
1695                 raise
1696         return self._download_retcode
1697
1698     @staticmethod
1699     def filter_requested_info(info_dict):
1700         return dict(
1701             (k, v) for k, v in info_dict.items()
1702             if k not in ['requested_formats', 'requested_subtitles'])
1703
1704     def post_process(self, filename, ie_info):
1705         """Run all the postprocessors on the given file."""
1706         info = dict(ie_info)
1707         info['filepath'] = filename
1708         pps_chain = []
1709         if ie_info.get('__postprocessors') is not None:
1710             pps_chain.extend(ie_info['__postprocessors'])
1711         pps_chain.extend(self._pps)
1712         for pp in pps_chain:
1713             files_to_delete = []
1714             try:
1715                 files_to_delete, info = pp.run(info)
1716             except PostProcessingError as e:
1717                 self.report_error(e.msg)
1718             if files_to_delete and not self.params.get('keepvideo', False):
1719                 for old_filename in files_to_delete:
1720                     self.to_screen('Deleting original file %s (pass -k to keep)' % old_filename)
1721                     try:
1722                         os.remove(encodeFilename(old_filename))
1723                     except (IOError, OSError):
1724                         self.report_warning('Unable to remove downloaded original file')
1725
1726     def _make_archive_id(self, info_dict):
1727         # Future-proof against any change in case
1728         # and backwards compatibility with prior versions
1729         extractor = info_dict.get('extractor_key')
1730         if extractor is None:
1731             if 'id' in info_dict:
1732                 extractor = info_dict.get('ie_key')  # key in a playlist
1733         if extractor is None:
1734             return None  # Incomplete video information
1735         return extractor.lower() + ' ' + info_dict['id']
1736
1737     def in_download_archive(self, info_dict):
1738         fn = self.params.get('download_archive')
1739         if fn is None:
1740             return False
1741
1742         vid_id = self._make_archive_id(info_dict)
1743         if vid_id is None:
1744             return False  # Incomplete video information
1745
1746         try:
1747             with locked_file(fn, 'r', encoding='utf-8') as archive_file:
1748                 for line in archive_file:
1749                     if line.strip() == vid_id:
1750                         return True
1751         except IOError as ioe:
1752             if ioe.errno != errno.ENOENT:
1753                 raise
1754         return False
1755
1756     def record_download_archive(self, info_dict):
1757         fn = self.params.get('download_archive')
1758         if fn is None:
1759             return
1760         vid_id = self._make_archive_id(info_dict)
1761         assert vid_id
1762         with locked_file(fn, 'a', encoding='utf-8') as archive_file:
1763             archive_file.write(vid_id + '\n')
1764
1765     @staticmethod
1766     def format_resolution(format, default='unknown'):
1767         if format.get('vcodec') == 'none':
1768             return 'audio only'
1769         if format.get('resolution') is not None:
1770             return format['resolution']
1771         if format.get('height') is not None:
1772             if format.get('width') is not None:
1773                 res = '%sx%s' % (format['width'], format['height'])
1774             else:
1775                 res = '%sp' % format['height']
1776         elif format.get('width') is not None:
1777             res = '?x%d' % format['width']
1778         else:
1779             res = default
1780         return res
1781
1782     def _format_note(self, fdict):
1783         res = ''
1784         if fdict.get('ext') in ['f4f', 'f4m']:
1785             res += '(unsupported) '
1786         if fdict.get('format_note') is not None:
1787             res += fdict['format_note'] + ' '
1788         if fdict.get('tbr') is not None:
1789             res += '%4dk ' % fdict['tbr']
1790         if fdict.get('container') is not None:
1791             if res:
1792                 res += ', '
1793             res += '%s container' % fdict['container']
1794         if (fdict.get('vcodec') is not None and
1795                 fdict.get('vcodec') != 'none'):
1796             if res:
1797                 res += ', '
1798             res += fdict['vcodec']
1799             if fdict.get('vbr') is not None:
1800                 res += '@'
1801         elif fdict.get('vbr') is not None and fdict.get('abr') is not None:
1802             res += 'video@'
1803         if fdict.get('vbr') is not None:
1804             res += '%4dk' % fdict['vbr']
1805         if fdict.get('fps') is not None:
1806             res += ', %sfps' % fdict['fps']
1807         if fdict.get('acodec') is not None:
1808             if res:
1809                 res += ', '
1810             if fdict['acodec'] == 'none':
1811                 res += 'video only'
1812             else:
1813                 res += '%-5s' % fdict['acodec']
1814         elif fdict.get('abr') is not None:
1815             if res:
1816                 res += ', '
1817             res += 'audio'
1818         if fdict.get('abr') is not None:
1819             res += '@%3dk' % fdict['abr']
1820         if fdict.get('asr') is not None:
1821             res += ' (%5dHz)' % fdict['asr']
1822         if fdict.get('filesize') is not None:
1823             if res:
1824                 res += ', '
1825             res += format_bytes(fdict['filesize'])
1826         elif fdict.get('filesize_approx') is not None:
1827             if res:
1828                 res += ', '
1829             res += '~' + format_bytes(fdict['filesize_approx'])
1830         return res
1831
1832     def list_formats(self, info_dict):
1833         formats = info_dict.get('formats', [info_dict])
1834         table = [
1835             [f['format_id'], f['ext'], self.format_resolution(f), self._format_note(f)]
1836             for f in formats
1837             if f.get('preference') is None or f['preference'] >= -1000]
1838         if len(formats) > 1:
1839             table[-1][-1] += (' ' if table[-1][-1] else '') + '(best)'
1840
1841         header_line = ['format code', 'extension', 'resolution', 'note']
1842         self.to_screen(
1843             '[info] Available formats for %s:\n%s' %
1844             (info_dict['id'], render_table(header_line, table)))
1845
1846     def list_thumbnails(self, info_dict):
1847         thumbnails = info_dict.get('thumbnails')
1848         if not thumbnails:
1849             tn_url = info_dict.get('thumbnail')
1850             if tn_url:
1851                 thumbnails = [{'id': '0', 'url': tn_url}]
1852             else:
1853                 self.to_screen(
1854                     '[info] No thumbnails present for %s' % info_dict['id'])
1855                 return
1856
1857         self.to_screen(
1858             '[info] Thumbnails for %s:' % info_dict['id'])
1859         self.to_screen(render_table(
1860             ['ID', 'width', 'height', 'URL'],
1861             [[t['id'], t.get('width', 'unknown'), t.get('height', 'unknown'), t['url']] for t in thumbnails]))
1862
1863     def list_subtitles(self, video_id, subtitles, name='subtitles'):
1864         if not subtitles:
1865             self.to_screen('%s has no %s' % (video_id, name))
1866             return
1867         self.to_screen(
1868             'Available %s for %s:' % (name, video_id))
1869         self.to_screen(render_table(
1870             ['Language', 'formats'],
1871             [[lang, ', '.join(f['ext'] for f in reversed(formats))]
1872                 for lang, formats in subtitles.items()]))
1873
1874     def urlopen(self, req):
1875         """ Start an HTTP download """
1876         if isinstance(req, compat_basestring):
1877             req = sanitized_Request(req)
1878         return self._opener.open(req, timeout=self._socket_timeout)
1879
1880     def print_debug_header(self):
1881         if not self.params.get('verbose'):
1882             return
1883
1884         if type('') is not compat_str:
1885             # Python 2.6 on SLES11 SP1 (https://github.com/rg3/youtube-dl/issues/3326)
1886             self.report_warning(
1887                 'Your Python is broken! Update to a newer and supported version')
1888
1889         stdout_encoding = getattr(
1890             sys.stdout, 'encoding', 'missing (%s)' % type(sys.stdout).__name__)
1891         encoding_str = (
1892             '[debug] Encodings: locale %s, fs %s, out %s, pref %s\n' % (
1893                 locale.getpreferredencoding(),
1894                 sys.getfilesystemencoding(),
1895                 stdout_encoding,
1896                 self.get_encoding()))
1897         write_string(encoding_str, encoding=None)
1898
1899         self._write_string('[debug] youtube-dl version ' + __version__ + '\n')
1900         try:
1901             sp = subprocess.Popen(
1902                 ['git', 'rev-parse', '--short', 'HEAD'],
1903                 stdout=subprocess.PIPE, stderr=subprocess.PIPE,
1904                 cwd=os.path.dirname(os.path.abspath(__file__)))
1905             out, err = sp.communicate()
1906             out = out.decode().strip()
1907             if re.match('[0-9a-f]+', out):
1908                 self._write_string('[debug] Git HEAD: ' + out + '\n')
1909         except Exception:
1910             try:
1911                 sys.exc_clear()
1912             except Exception:
1913                 pass
1914         self._write_string('[debug] Python version %s - %s\n' % (
1915             platform.python_version(), platform_name()))
1916
1917         exe_versions = FFmpegPostProcessor.get_versions(self)
1918         exe_versions['rtmpdump'] = rtmpdump_version()
1919         exe_str = ', '.join(
1920             '%s %s' % (exe, v)
1921             for exe, v in sorted(exe_versions.items())
1922             if v
1923         )
1924         if not exe_str:
1925             exe_str = 'none'
1926         self._write_string('[debug] exe versions: %s\n' % exe_str)
1927
1928         proxy_map = {}
1929         for handler in self._opener.handlers:
1930             if hasattr(handler, 'proxies'):
1931                 proxy_map.update(handler.proxies)
1932         self._write_string('[debug] Proxy map: ' + compat_str(proxy_map) + '\n')
1933
1934         if self.params.get('call_home', False):
1935             ipaddr = self.urlopen('https://yt-dl.org/ip').read().decode('utf-8')
1936             self._write_string('[debug] Public IP address: %s\n' % ipaddr)
1937             latest_version = self.urlopen(
1938                 'https://yt-dl.org/latest/version').read().decode('utf-8')
1939             if version_tuple(latest_version) > version_tuple(__version__):
1940                 self.report_warning(
1941                     'You are using an outdated version (newest version: %s)! '
1942                     'See https://yt-dl.org/update if you need help updating.' %
1943                     latest_version)
1944
1945     def _setup_opener(self):
1946         timeout_val = self.params.get('socket_timeout')
1947         self._socket_timeout = 600 if timeout_val is None else float(timeout_val)
1948
1949         opts_cookiefile = self.params.get('cookiefile')
1950         opts_proxy = self.params.get('proxy')
1951
1952         if opts_cookiefile is None:
1953             self.cookiejar = compat_cookiejar.CookieJar()
1954         else:
1955             self.cookiejar = compat_cookiejar.MozillaCookieJar(
1956                 opts_cookiefile)
1957             if os.access(opts_cookiefile, os.R_OK):
1958                 self.cookiejar.load()
1959
1960         cookie_processor = YoutubeDLCookieProcessor(self.cookiejar)
1961         if opts_proxy is not None:
1962             if opts_proxy == '':
1963                 proxies = {}
1964             else:
1965                 proxies = {'http': opts_proxy, 'https': opts_proxy}
1966         else:
1967             proxies = compat_urllib_request.getproxies()
1968             # Set HTTPS proxy to HTTP one if given (https://github.com/rg3/youtube-dl/issues/805)
1969             if 'http' in proxies and 'https' not in proxies:
1970                 proxies['https'] = proxies['http']
1971         proxy_handler = PerRequestProxyHandler(proxies)
1972
1973         debuglevel = 1 if self.params.get('debug_printtraffic') else 0
1974         https_handler = make_HTTPS_handler(self.params, debuglevel=debuglevel)
1975         ydlh = YoutubeDLHandler(self.params, debuglevel=debuglevel)
1976         data_handler = compat_urllib_request_DataHandler()
1977         opener = compat_urllib_request.build_opener(
1978             proxy_handler, https_handler, cookie_processor, ydlh, data_handler)
1979
1980         # Delete the default user-agent header, which would otherwise apply in
1981         # cases where our custom HTTP handler doesn't come into play
1982         # (See https://github.com/rg3/youtube-dl/issues/1309 for details)
1983         opener.addheaders = []
1984         self._opener = opener
1985
1986     def encode(self, s):
1987         if isinstance(s, bytes):
1988             return s  # Already encoded
1989
1990         try:
1991             return s.encode(self.get_encoding())
1992         except UnicodeEncodeError as err:
1993             err.reason = err.reason + '. Check your system encoding configuration or use the --encoding option.'
1994             raise
1995
1996     def get_encoding(self):
1997         encoding = self.params.get('encoding')
1998         if encoding is None:
1999             encoding = preferredencoding()
2000         return encoding
2001
2002     def _write_thumbnails(self, info_dict, filename):
2003         if self.params.get('writethumbnail', False):
2004             thumbnails = info_dict.get('thumbnails')
2005             if thumbnails:
2006                 thumbnails = [thumbnails[-1]]
2007         elif self.params.get('write_all_thumbnails', False):
2008             thumbnails = info_dict.get('thumbnails')
2009         else:
2010             return
2011
2012         if not thumbnails:
2013             # No thumbnails present, so return immediately
2014             return
2015
2016         for t in thumbnails:
2017             thumb_ext = determine_ext(t['url'], 'jpg')
2018             suffix = '_%s' % t['id'] if len(thumbnails) > 1 else ''
2019             thumb_display_id = '%s ' % t['id'] if len(thumbnails) > 1 else ''
2020             t['filename'] = thumb_filename = os.path.splitext(filename)[0] + suffix + '.' + thumb_ext
2021
2022             if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(thumb_filename)):
2023                 self.to_screen('[%s] %s: Thumbnail %sis already present' %
2024                                (info_dict['extractor'], info_dict['id'], thumb_display_id))
2025             else:
2026                 self.to_screen('[%s] %s: Downloading thumbnail %s...' %
2027                                (info_dict['extractor'], info_dict['id'], thumb_display_id))
2028                 try:
2029                     uf = self.urlopen(t['url'])
2030                     with open(encodeFilename(thumb_filename), 'wb') as thumbf:
2031                         shutil.copyfileobj(uf, thumbf)
2032                     self.to_screen('[%s] %s: Writing thumbnail %sto: %s' %
2033                                    (info_dict['extractor'], info_dict['id'], thumb_display_id, thumb_filename))
2034                 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
2035                     self.report_warning('Unable to download thumbnail "%s": %s' %
2036                                         (t['url'], compat_str(err)))