Merge pull request #8718 from remitamine/m3u8-fixup
[youtube-dl] / youtube_dl / YoutubeDL.py
1 #!/usr/bin/env python
2 # -*- coding: utf-8 -*-
3
4 from __future__ import absolute_import, unicode_literals
5
6 import collections
7 import contextlib
8 import datetime
9 import errno
10 import fileinput
11 import io
12 import itertools
13 import json
14 import locale
15 import operator
16 import os
17 import platform
18 import re
19 import shutil
20 import subprocess
21 import socket
22 import sys
23 import time
24 import tokenize
25 import traceback
26
27 from .compat import (
28     compat_basestring,
29     compat_cookiejar,
30     compat_expanduser,
31     compat_get_terminal_size,
32     compat_http_client,
33     compat_kwargs,
34     compat_os_name,
35     compat_str,
36     compat_tokenize_tokenize,
37     compat_urllib_error,
38     compat_urllib_request,
39     compat_urllib_request_DataHandler,
40 )
41 from .utils import (
42     ContentTooShortError,
43     date_from_str,
44     DateRange,
45     DEFAULT_OUTTMPL,
46     determine_ext,
47     determine_protocol,
48     DownloadError,
49     encode_compat_str,
50     encodeFilename,
51     error_to_compat_str,
52     ExtractorError,
53     format_bytes,
54     formatSeconds,
55     locked_file,
56     make_HTTPS_handler,
57     MaxDownloadsReached,
58     PagedList,
59     parse_filesize,
60     PerRequestProxyHandler,
61     PostProcessingError,
62     platform_name,
63     preferredencoding,
64     render_table,
65     SameFileError,
66     sanitize_filename,
67     sanitize_path,
68     sanitized_Request,
69     std_headers,
70     subtitles_filename,
71     UnavailableVideoError,
72     url_basename,
73     version_tuple,
74     write_json_file,
75     write_string,
76     YoutubeDLCookieProcessor,
77     YoutubeDLHandler,
78     prepend_extension,
79     replace_extension,
80     args_to_str,
81     age_restricted,
82 )
83 from .cache import Cache
84 from .extractor import get_info_extractor, gen_extractors
85 from .downloader import get_suitable_downloader
86 from .downloader.rtmp import rtmpdump_version
87 from .postprocessor import (
88     FFmpegFixupM3u8PP,
89     FFmpegFixupM4aPP,
90     FFmpegFixupStretchedPP,
91     FFmpegMergerPP,
92     FFmpegPostProcessor,
93     get_postprocessor,
94 )
95 from .version import __version__
96
97 if compat_os_name == 'nt':
98     import ctypes
99
100
101 class YoutubeDL(object):
102     """YoutubeDL class.
103
104     YoutubeDL objects are the ones responsible of downloading the
105     actual video file and writing it to disk if the user has requested
106     it, among some other tasks. In most cases there should be one per
107     program. As, given a video URL, the downloader doesn't know how to
108     extract all the needed information, task that InfoExtractors do, it
109     has to pass the URL to one of them.
110
111     For this, YoutubeDL objects have a method that allows
112     InfoExtractors to be registered in a given order. When it is passed
113     a URL, the YoutubeDL object handles it to the first InfoExtractor it
114     finds that reports being able to handle it. The InfoExtractor extracts
115     all the information about the video or videos the URL refers to, and
116     YoutubeDL process the extracted information, possibly using a File
117     Downloader to download the video.
118
119     YoutubeDL objects accept a lot of parameters. In order not to saturate
120     the object constructor with arguments, it receives a dictionary of
121     options instead. These options are available through the params
122     attribute for the InfoExtractors to use. The YoutubeDL also
123     registers itself as the downloader in charge for the InfoExtractors
124     that are added to it, so this is a "mutual registration".
125
126     Available options:
127
128     username:          Username for authentication purposes.
129     password:          Password for authentication purposes.
130     videopassword:     Password for accessing a video.
131     usenetrc:          Use netrc for authentication instead.
132     verbose:           Print additional info to stdout.
133     quiet:             Do not print messages to stdout.
134     no_warnings:       Do not print out anything for warnings.
135     forceurl:          Force printing final URL.
136     forcetitle:        Force printing title.
137     forceid:           Force printing ID.
138     forcethumbnail:    Force printing thumbnail URL.
139     forcedescription:  Force printing description.
140     forcefilename:     Force printing final filename.
141     forceduration:     Force printing duration.
142     forcejson:         Force printing info_dict as JSON.
143     dump_single_json:  Force printing the info_dict of the whole playlist
144                        (or video) as a single JSON line.
145     simulate:          Do not download the video files.
146     format:            Video format code. See options.py for more information.
147     outtmpl:           Template for output names.
148     restrictfilenames: Do not allow "&" and spaces in file names
149     ignoreerrors:      Do not stop on download errors.
150     force_generic_extractor: Force downloader to use the generic extractor
151     nooverwrites:      Prevent overwriting files.
152     playliststart:     Playlist item to start at.
153     playlistend:       Playlist item to end at.
154     playlist_items:    Specific indices of playlist to download.
155     playlistreverse:   Download playlist items in reverse order.
156     matchtitle:        Download only matching titles.
157     rejecttitle:       Reject downloads for matching titles.
158     logger:            Log messages to a logging.Logger instance.
159     logtostderr:       Log messages to stderr instead of stdout.
160     writedescription:  Write the video description to a .description file
161     writeinfojson:     Write the video description to a .info.json file
162     writeannotations:  Write the video annotations to a .annotations.xml file
163     writethumbnail:    Write the thumbnail image to a file
164     write_all_thumbnails:  Write all thumbnail formats to files
165     writesubtitles:    Write the video subtitles to a file
166     writeautomaticsub: Write the automatically generated subtitles to a file
167     allsubtitles:      Downloads all the subtitles of the video
168                        (requires writesubtitles or writeautomaticsub)
169     listsubtitles:     Lists all available subtitles for the video
170     subtitlesformat:   The format code for subtitles
171     subtitleslangs:    List of languages of the subtitles to download
172     keepvideo:         Keep the video file after post-processing
173     daterange:         A DateRange object, download only if the upload_date is in the range.
174     skip_download:     Skip the actual download of the video file
175     cachedir:          Location of the cache files in the filesystem.
176                        False to disable filesystem cache.
177     noplaylist:        Download single video instead of a playlist if in doubt.
178     age_limit:         An integer representing the user's age in years.
179                        Unsuitable videos for the given age are skipped.
180     min_views:         An integer representing the minimum view count the video
181                        must have in order to not be skipped.
182                        Videos without view count information are always
183                        downloaded. None for no limit.
184     max_views:         An integer representing the maximum view count.
185                        Videos that are more popular than that are not
186                        downloaded.
187                        Videos without view count information are always
188                        downloaded. None for no limit.
189     download_archive:  File name of a file where all downloads are recorded.
190                        Videos already present in the file are not downloaded
191                        again.
192     cookiefile:        File name where cookies should be read from and dumped to.
193     nocheckcertificate:Do not verify SSL certificates
194     prefer_insecure:   Use HTTP instead of HTTPS to retrieve information.
195                        At the moment, this is only supported by YouTube.
196     proxy:             URL of the proxy server to use
197     cn_verification_proxy:  URL of the proxy to use for IP address verification
198                        on Chinese sites. (Experimental)
199     socket_timeout:    Time to wait for unresponsive hosts, in seconds
200     bidi_workaround:   Work around buggy terminals without bidirectional text
201                        support, using fridibi
202     debug_printtraffic:Print out sent and received HTTP traffic
203     include_ads:       Download ads as well
204     default_search:    Prepend this string if an input url is not valid.
205                        'auto' for elaborate guessing
206     encoding:          Use this encoding instead of the system-specified.
207     extract_flat:      Do not resolve URLs, return the immediate result.
208                        Pass in 'in_playlist' to only show this behavior for
209                        playlist items.
210     postprocessors:    A list of dictionaries, each with an entry
211                        * key:  The name of the postprocessor. See
212                                youtube_dl/postprocessor/__init__.py for a list.
213                        as well as any further keyword arguments for the
214                        postprocessor.
215     progress_hooks:    A list of functions that get called on download
216                        progress, with a dictionary with the entries
217                        * status: One of "downloading", "error", or "finished".
218                                  Check this first and ignore unknown values.
219
220                        If status is one of "downloading", or "finished", the
221                        following properties may also be present:
222                        * filename: The final filename (always present)
223                        * tmpfilename: The filename we're currently writing to
224                        * downloaded_bytes: Bytes on disk
225                        * total_bytes: Size of the whole file, None if unknown
226                        * total_bytes_estimate: Guess of the eventual file size,
227                                                None if unavailable.
228                        * elapsed: The number of seconds since download started.
229                        * eta: The estimated time in seconds, None if unknown
230                        * speed: The download speed in bytes/second, None if
231                                 unknown
232                        * fragment_index: The counter of the currently
233                                          downloaded video fragment.
234                        * fragment_count: The number of fragments (= individual
235                                          files that will be merged)
236
237                        Progress hooks are guaranteed to be called at least once
238                        (with status "finished") if the download is successful.
239     merge_output_format: Extension to use when merging formats.
240     fixup:             Automatically correct known faults of the file.
241                        One of:
242                        - "never": do nothing
243                        - "warn": only emit a warning
244                        - "detect_or_warn": check whether we can do anything
245                                            about it, warn otherwise (default)
246     source_address:    (Experimental) Client-side IP address to bind to.
247     call_home:         Boolean, true iff we are allowed to contact the
248                        youtube-dl servers for debugging.
249     sleep_interval:    Number of seconds to sleep before each download.
250     listformats:       Print an overview of available video formats and exit.
251     list_thumbnails:   Print a table of all thumbnails and exit.
252     match_filter:      A function that gets called with the info_dict of
253                        every video.
254                        If it returns a message, the video is ignored.
255                        If it returns None, the video is downloaded.
256                        match_filter_func in utils.py is one example for this.
257     no_color:          Do not emit color codes in output.
258
259     The following options determine which downloader is picked:
260     external_downloader: Executable of the external downloader to call.
261                        None or unset for standard (built-in) downloader.
262     hls_prefer_native: Use the native HLS downloader instead of ffmpeg/avconv.
263
264     The following parameters are not used by YoutubeDL itself, they are used by
265     the downloader (see youtube_dl/downloader/common.py):
266     nopart, updatetime, buffersize, ratelimit, min_filesize, max_filesize, test,
267     noresizebuffer, retries, continuedl, noprogress, consoletitle,
268     xattr_set_filesize, external_downloader_args, hls_use_mpegts.
269
270     The following options are used by the post processors:
271     prefer_ffmpeg:     If True, use ffmpeg instead of avconv if both are available,
272                        otherwise prefer avconv.
273     postprocessor_args: A list of additional command-line arguments for the
274                         postprocessor.
275     """
276
277     params = None
278     _ies = []
279     _pps = []
280     _download_retcode = None
281     _num_downloads = None
282     _screen_file = None
283
284     def __init__(self, params=None, auto_init=True):
285         """Create a FileDownloader object with the given options."""
286         if params is None:
287             params = {}
288         self._ies = []
289         self._ies_instances = {}
290         self._pps = []
291         self._progress_hooks = []
292         self._download_retcode = 0
293         self._num_downloads = 0
294         self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)]
295         self._err_file = sys.stderr
296         self.params = {
297             # Default parameters
298             'nocheckcertificate': False,
299         }
300         self.params.update(params)
301         self.cache = Cache(self)
302
303         if params.get('bidi_workaround', False):
304             try:
305                 import pty
306                 master, slave = pty.openpty()
307                 width = compat_get_terminal_size().columns
308                 if width is None:
309                     width_args = []
310                 else:
311                     width_args = ['-w', str(width)]
312                 sp_kwargs = dict(
313                     stdin=subprocess.PIPE,
314                     stdout=slave,
315                     stderr=self._err_file)
316                 try:
317                     self._output_process = subprocess.Popen(
318                         ['bidiv'] + width_args, **sp_kwargs
319                     )
320                 except OSError:
321                     self._output_process = subprocess.Popen(
322                         ['fribidi', '-c', 'UTF-8'] + width_args, **sp_kwargs)
323                 self._output_channel = os.fdopen(master, 'rb')
324             except OSError as ose:
325                 if ose.errno == 2:
326                     self.report_warning('Could not find fribidi executable, ignoring --bidi-workaround . Make sure that  fribidi  is an executable file in one of the directories in your $PATH.')
327                 else:
328                     raise
329
330         if (sys.version_info >= (3,) and sys.platform != 'win32' and
331                 sys.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968'] and
332                 not params.get('restrictfilenames', False)):
333             # On Python 3, the Unicode filesystem API will throw errors (#1474)
334             self.report_warning(
335                 'Assuming --restrict-filenames since file system encoding '
336                 'cannot encode all characters. '
337                 'Set the LC_ALL environment variable to fix this.')
338             self.params['restrictfilenames'] = True
339
340         if isinstance(params.get('outtmpl'), bytes):
341             self.report_warning(
342                 'Parameter outtmpl is bytes, but should be a unicode string. '
343                 'Put  from __future__ import unicode_literals  at the top of your code file or consider switching to Python 3.x.')
344
345         self._setup_opener()
346
347         if auto_init:
348             self.print_debug_header()
349             self.add_default_info_extractors()
350
351         for pp_def_raw in self.params.get('postprocessors', []):
352             pp_class = get_postprocessor(pp_def_raw['key'])
353             pp_def = dict(pp_def_raw)
354             del pp_def['key']
355             pp = pp_class(self, **compat_kwargs(pp_def))
356             self.add_post_processor(pp)
357
358         for ph in self.params.get('progress_hooks', []):
359             self.add_progress_hook(ph)
360
361     def warn_if_short_id(self, argv):
362         # short YouTube ID starting with dash?
363         idxs = [
364             i for i, a in enumerate(argv)
365             if re.match(r'^-[0-9A-Za-z_-]{10}$', a)]
366         if idxs:
367             correct_argv = (
368                 ['youtube-dl'] +
369                 [a for i, a in enumerate(argv) if i not in idxs] +
370                 ['--'] + [argv[i] for i in idxs]
371             )
372             self.report_warning(
373                 'Long argument string detected. '
374                 'Use -- to separate parameters and URLs, like this:\n%s\n' %
375                 args_to_str(correct_argv))
376
377     def add_info_extractor(self, ie):
378         """Add an InfoExtractor object to the end of the list."""
379         self._ies.append(ie)
380         self._ies_instances[ie.ie_key()] = ie
381         ie.set_downloader(self)
382
383     def get_info_extractor(self, ie_key):
384         """
385         Get an instance of an IE with name ie_key, it will try to get one from
386         the _ies list, if there's no instance it will create a new one and add
387         it to the extractor list.
388         """
389         ie = self._ies_instances.get(ie_key)
390         if ie is None:
391             ie = get_info_extractor(ie_key)()
392             self.add_info_extractor(ie)
393         return ie
394
395     def add_default_info_extractors(self):
396         """
397         Add the InfoExtractors returned by gen_extractors to the end of the list
398         """
399         for ie in gen_extractors():
400             self.add_info_extractor(ie)
401
402     def add_post_processor(self, pp):
403         """Add a PostProcessor object to the end of the chain."""
404         self._pps.append(pp)
405         pp.set_downloader(self)
406
407     def add_progress_hook(self, ph):
408         """Add the progress hook (currently only for the file downloader)"""
409         self._progress_hooks.append(ph)
410
411     def _bidi_workaround(self, message):
412         if not hasattr(self, '_output_channel'):
413             return message
414
415         assert hasattr(self, '_output_process')
416         assert isinstance(message, compat_str)
417         line_count = message.count('\n') + 1
418         self._output_process.stdin.write((message + '\n').encode('utf-8'))
419         self._output_process.stdin.flush()
420         res = ''.join(self._output_channel.readline().decode('utf-8')
421                       for _ in range(line_count))
422         return res[:-len('\n')]
423
424     def to_screen(self, message, skip_eol=False):
425         """Print message to stdout if not in quiet mode."""
426         return self.to_stdout(message, skip_eol, check_quiet=True)
427
428     def _write_string(self, s, out=None):
429         write_string(s, out=out, encoding=self.params.get('encoding'))
430
431     def to_stdout(self, message, skip_eol=False, check_quiet=False):
432         """Print message to stdout if not in quiet mode."""
433         if self.params.get('logger'):
434             self.params['logger'].debug(message)
435         elif not check_quiet or not self.params.get('quiet', False):
436             message = self._bidi_workaround(message)
437             terminator = ['\n', ''][skip_eol]
438             output = message + terminator
439
440             self._write_string(output, self._screen_file)
441
442     def to_stderr(self, message):
443         """Print message to stderr."""
444         assert isinstance(message, compat_str)
445         if self.params.get('logger'):
446             self.params['logger'].error(message)
447         else:
448             message = self._bidi_workaround(message)
449             output = message + '\n'
450             self._write_string(output, self._err_file)
451
452     def to_console_title(self, message):
453         if not self.params.get('consoletitle', False):
454             return
455         if compat_os_name == 'nt' and ctypes.windll.kernel32.GetConsoleWindow():
456             # c_wchar_p() might not be necessary if `message` is
457             # already of type unicode()
458             ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message))
459         elif 'TERM' in os.environ:
460             self._write_string('\033]0;%s\007' % message, self._screen_file)
461
462     def save_console_title(self):
463         if not self.params.get('consoletitle', False):
464             return
465         if 'TERM' in os.environ:
466             # Save the title on stack
467             self._write_string('\033[22;0t', self._screen_file)
468
469     def restore_console_title(self):
470         if not self.params.get('consoletitle', False):
471             return
472         if 'TERM' in os.environ:
473             # Restore the title from stack
474             self._write_string('\033[23;0t', self._screen_file)
475
476     def __enter__(self):
477         self.save_console_title()
478         return self
479
480     def __exit__(self, *args):
481         self.restore_console_title()
482
483         if self.params.get('cookiefile') is not None:
484             self.cookiejar.save()
485
486     def trouble(self, message=None, tb=None):
487         """Determine action to take when a download problem appears.
488
489         Depending on if the downloader has been configured to ignore
490         download errors or not, this method may throw an exception or
491         not when errors are found, after printing the message.
492
493         tb, if given, is additional traceback information.
494         """
495         if message is not None:
496             self.to_stderr(message)
497         if self.params.get('verbose'):
498             if tb is None:
499                 if sys.exc_info()[0]:  # if .trouble has been called from an except block
500                     tb = ''
501                     if hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
502                         tb += ''.join(traceback.format_exception(*sys.exc_info()[1].exc_info))
503                     tb += encode_compat_str(traceback.format_exc())
504                 else:
505                     tb_data = traceback.format_list(traceback.extract_stack())
506                     tb = ''.join(tb_data)
507             self.to_stderr(tb)
508         if not self.params.get('ignoreerrors', False):
509             if sys.exc_info()[0] and hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
510                 exc_info = sys.exc_info()[1].exc_info
511             else:
512                 exc_info = sys.exc_info()
513             raise DownloadError(message, exc_info)
514         self._download_retcode = 1
515
516     def report_warning(self, message):
517         '''
518         Print the message to stderr, it will be prefixed with 'WARNING:'
519         If stderr is a tty file the 'WARNING:' will be colored
520         '''
521         if self.params.get('logger') is not None:
522             self.params['logger'].warning(message)
523         else:
524             if self.params.get('no_warnings'):
525                 return
526             if not self.params.get('no_color') and self._err_file.isatty() and compat_os_name != 'nt':
527                 _msg_header = '\033[0;33mWARNING:\033[0m'
528             else:
529                 _msg_header = 'WARNING:'
530             warning_message = '%s %s' % (_msg_header, message)
531             self.to_stderr(warning_message)
532
533     def report_error(self, message, tb=None):
534         '''
535         Do the same as trouble, but prefixes the message with 'ERROR:', colored
536         in red if stderr is a tty file.
537         '''
538         if not self.params.get('no_color') and self._err_file.isatty() and compat_os_name != 'nt':
539             _msg_header = '\033[0;31mERROR:\033[0m'
540         else:
541             _msg_header = 'ERROR:'
542         error_message = '%s %s' % (_msg_header, message)
543         self.trouble(error_message, tb)
544
545     def report_file_already_downloaded(self, file_name):
546         """Report file has already been fully downloaded."""
547         try:
548             self.to_screen('[download] %s has already been downloaded' % file_name)
549         except UnicodeEncodeError:
550             self.to_screen('[download] The file has already been downloaded')
551
552     def prepare_filename(self, info_dict):
553         """Generate the output filename."""
554         try:
555             template_dict = dict(info_dict)
556
557             template_dict['epoch'] = int(time.time())
558             autonumber_size = self.params.get('autonumber_size')
559             if autonumber_size is None:
560                 autonumber_size = 5
561             autonumber_templ = '%0' + str(autonumber_size) + 'd'
562             template_dict['autonumber'] = autonumber_templ % self._num_downloads
563             if template_dict.get('playlist_index') is not None:
564                 template_dict['playlist_index'] = '%0*d' % (len(str(template_dict['n_entries'])), template_dict['playlist_index'])
565             if template_dict.get('resolution') is None:
566                 if template_dict.get('width') and template_dict.get('height'):
567                     template_dict['resolution'] = '%dx%d' % (template_dict['width'], template_dict['height'])
568                 elif template_dict.get('height'):
569                     template_dict['resolution'] = '%sp' % template_dict['height']
570                 elif template_dict.get('width'):
571                     template_dict['resolution'] = '%dx?' % template_dict['width']
572
573             sanitize = lambda k, v: sanitize_filename(
574                 compat_str(v),
575                 restricted=self.params.get('restrictfilenames'),
576                 is_id=(k == 'id'))
577             template_dict = dict((k, sanitize(k, v))
578                                  for k, v in template_dict.items()
579                                  if v is not None)
580             template_dict = collections.defaultdict(lambda: 'NA', template_dict)
581
582             outtmpl = self.params.get('outtmpl', DEFAULT_OUTTMPL)
583             tmpl = compat_expanduser(outtmpl)
584             filename = tmpl % template_dict
585             # Temporary fix for #4787
586             # 'Treat' all problem characters by passing filename through preferredencoding
587             # to workaround encoding issues with subprocess on python2 @ Windows
588             if sys.version_info < (3, 0) and sys.platform == 'win32':
589                 filename = encodeFilename(filename, True).decode(preferredencoding())
590             return sanitize_path(filename)
591         except ValueError as err:
592             self.report_error('Error in output template: ' + str(err) + ' (encoding: ' + repr(preferredencoding()) + ')')
593             return None
594
595     def _match_entry(self, info_dict, incomplete):
596         """ Returns None iff the file should be downloaded """
597
598         video_title = info_dict.get('title', info_dict.get('id', 'video'))
599         if 'title' in info_dict:
600             # This can happen when we're just evaluating the playlist
601             title = info_dict['title']
602             matchtitle = self.params.get('matchtitle', False)
603             if matchtitle:
604                 if not re.search(matchtitle, title, re.IGNORECASE):
605                     return '"' + title + '" title did not match pattern "' + matchtitle + '"'
606             rejecttitle = self.params.get('rejecttitle', False)
607             if rejecttitle:
608                 if re.search(rejecttitle, title, re.IGNORECASE):
609                     return '"' + title + '" title matched reject pattern "' + rejecttitle + '"'
610         date = info_dict.get('upload_date')
611         if date is not None:
612             dateRange = self.params.get('daterange', DateRange())
613             if date not in dateRange:
614                 return '%s upload date is not in range %s' % (date_from_str(date).isoformat(), dateRange)
615         view_count = info_dict.get('view_count')
616         if view_count is not None:
617             min_views = self.params.get('min_views')
618             if min_views is not None and view_count < min_views:
619                 return 'Skipping %s, because it has not reached minimum view count (%d/%d)' % (video_title, view_count, min_views)
620             max_views = self.params.get('max_views')
621             if max_views is not None and view_count > max_views:
622                 return 'Skipping %s, because it has exceeded the maximum view count (%d/%d)' % (video_title, view_count, max_views)
623         if age_restricted(info_dict.get('age_limit'), self.params.get('age_limit')):
624             return 'Skipping "%s" because it is age restricted' % video_title
625         if self.in_download_archive(info_dict):
626             return '%s has already been recorded in archive' % video_title
627
628         if not incomplete:
629             match_filter = self.params.get('match_filter')
630             if match_filter is not None:
631                 ret = match_filter(info_dict)
632                 if ret is not None:
633                     return ret
634
635         return None
636
637     @staticmethod
638     def add_extra_info(info_dict, extra_info):
639         '''Set the keys from extra_info in info dict if they are missing'''
640         for key, value in extra_info.items():
641             info_dict.setdefault(key, value)
642
643     def extract_info(self, url, download=True, ie_key=None, extra_info={},
644                      process=True, force_generic_extractor=False):
645         '''
646         Returns a list with a dictionary for each video we find.
647         If 'download', also downloads the videos.
648         extra_info is a dict containing the extra values to add to each result
649         '''
650
651         if not ie_key and force_generic_extractor:
652             ie_key = 'Generic'
653
654         if ie_key:
655             ies = [self.get_info_extractor(ie_key)]
656         else:
657             ies = self._ies
658
659         for ie in ies:
660             if not ie.suitable(url):
661                 continue
662
663             if not ie.working():
664                 self.report_warning('The program functionality for this site has been marked as broken, '
665                                     'and will probably not work.')
666
667             try:
668                 ie_result = ie.extract(url)
669                 if ie_result is None:  # Finished already (backwards compatibility; listformats and friends should be moved here)
670                     break
671                 if isinstance(ie_result, list):
672                     # Backwards compatibility: old IE result format
673                     ie_result = {
674                         '_type': 'compat_list',
675                         'entries': ie_result,
676                     }
677                 self.add_default_extra_info(ie_result, ie, url)
678                 if process:
679                     return self.process_ie_result(ie_result, download, extra_info)
680                 else:
681                     return ie_result
682             except ExtractorError as e:  # An error we somewhat expected
683                 self.report_error(compat_str(e), e.format_traceback())
684                 break
685             except MaxDownloadsReached:
686                 raise
687             except Exception as e:
688                 if self.params.get('ignoreerrors', False):
689                     self.report_error(error_to_compat_str(e), tb=encode_compat_str(traceback.format_exc()))
690                     break
691                 else:
692                     raise
693         else:
694             self.report_error('no suitable InfoExtractor for URL %s' % url)
695
696     def add_default_extra_info(self, ie_result, ie, url):
697         self.add_extra_info(ie_result, {
698             'extractor': ie.IE_NAME,
699             'webpage_url': url,
700             'webpage_url_basename': url_basename(url),
701             'extractor_key': ie.ie_key(),
702         })
703
704     def process_ie_result(self, ie_result, download=True, extra_info={}):
705         """
706         Take the result of the ie(may be modified) and resolve all unresolved
707         references (URLs, playlist items).
708
709         It will also download the videos if 'download'.
710         Returns the resolved ie_result.
711         """
712         result_type = ie_result.get('_type', 'video')
713
714         if result_type in ('url', 'url_transparent'):
715             extract_flat = self.params.get('extract_flat', False)
716             if ((extract_flat == 'in_playlist' and 'playlist' in extra_info) or
717                     extract_flat is True):
718                 if self.params.get('forcejson', False):
719                     self.to_stdout(json.dumps(ie_result))
720                 return ie_result
721
722         if result_type == 'video':
723             self.add_extra_info(ie_result, extra_info)
724             return self.process_video_result(ie_result, download=download)
725         elif result_type == 'url':
726             # We have to add extra_info to the results because it may be
727             # contained in a playlist
728             return self.extract_info(ie_result['url'],
729                                      download,
730                                      ie_key=ie_result.get('ie_key'),
731                                      extra_info=extra_info)
732         elif result_type == 'url_transparent':
733             # Use the information from the embedding page
734             info = self.extract_info(
735                 ie_result['url'], ie_key=ie_result.get('ie_key'),
736                 extra_info=extra_info, download=False, process=False)
737
738             force_properties = dict(
739                 (k, v) for k, v in ie_result.items() if v is not None)
740             for f in ('_type', 'url', 'ie_key'):
741                 if f in force_properties:
742                     del force_properties[f]
743             new_result = info.copy()
744             new_result.update(force_properties)
745
746             assert new_result.get('_type') != 'url_transparent'
747
748             return self.process_ie_result(
749                 new_result, download=download, extra_info=extra_info)
750         elif result_type == 'playlist' or result_type == 'multi_video':
751             # We process each entry in the playlist
752             playlist = ie_result.get('title') or ie_result.get('id')
753             self.to_screen('[download] Downloading playlist: %s' % playlist)
754
755             playlist_results = []
756
757             playliststart = self.params.get('playliststart', 1) - 1
758             playlistend = self.params.get('playlistend')
759             # For backwards compatibility, interpret -1 as whole list
760             if playlistend == -1:
761                 playlistend = None
762
763             playlistitems_str = self.params.get('playlist_items')
764             playlistitems = None
765             if playlistitems_str is not None:
766                 def iter_playlistitems(format):
767                     for string_segment in format.split(','):
768                         if '-' in string_segment:
769                             start, end = string_segment.split('-')
770                             for item in range(int(start), int(end) + 1):
771                                 yield int(item)
772                         else:
773                             yield int(string_segment)
774                 playlistitems = iter_playlistitems(playlistitems_str)
775
776             ie_entries = ie_result['entries']
777             if isinstance(ie_entries, list):
778                 n_all_entries = len(ie_entries)
779                 if playlistitems:
780                     entries = [
781                         ie_entries[i - 1] for i in playlistitems
782                         if -n_all_entries <= i - 1 < n_all_entries]
783                 else:
784                     entries = ie_entries[playliststart:playlistend]
785                 n_entries = len(entries)
786                 self.to_screen(
787                     '[%s] playlist %s: Collected %d video ids (downloading %d of them)' %
788                     (ie_result['extractor'], playlist, n_all_entries, n_entries))
789             elif isinstance(ie_entries, PagedList):
790                 if playlistitems:
791                     entries = []
792                     for item in playlistitems:
793                         entries.extend(ie_entries.getslice(
794                             item - 1, item
795                         ))
796                 else:
797                     entries = ie_entries.getslice(
798                         playliststart, playlistend)
799                 n_entries = len(entries)
800                 self.to_screen(
801                     '[%s] playlist %s: Downloading %d videos' %
802                     (ie_result['extractor'], playlist, n_entries))
803             else:  # iterable
804                 if playlistitems:
805                     entry_list = list(ie_entries)
806                     entries = [entry_list[i - 1] for i in playlistitems]
807                 else:
808                     entries = list(itertools.islice(
809                         ie_entries, playliststart, playlistend))
810                 n_entries = len(entries)
811                 self.to_screen(
812                     '[%s] playlist %s: Downloading %d videos' %
813                     (ie_result['extractor'], playlist, n_entries))
814
815             if self.params.get('playlistreverse', False):
816                 entries = entries[::-1]
817
818             for i, entry in enumerate(entries, 1):
819                 self.to_screen('[download] Downloading video %s of %s' % (i, n_entries))
820                 extra = {
821                     'n_entries': n_entries,
822                     'playlist': playlist,
823                     'playlist_id': ie_result.get('id'),
824                     'playlist_title': ie_result.get('title'),
825                     'playlist_index': i + playliststart,
826                     'extractor': ie_result['extractor'],
827                     'webpage_url': ie_result['webpage_url'],
828                     'webpage_url_basename': url_basename(ie_result['webpage_url']),
829                     'extractor_key': ie_result['extractor_key'],
830                 }
831
832                 reason = self._match_entry(entry, incomplete=True)
833                 if reason is not None:
834                     self.to_screen('[download] ' + reason)
835                     continue
836
837                 entry_result = self.process_ie_result(entry,
838                                                       download=download,
839                                                       extra_info=extra)
840                 playlist_results.append(entry_result)
841             ie_result['entries'] = playlist_results
842             self.to_screen('[download] Finished downloading playlist: %s' % playlist)
843             return ie_result
844         elif result_type == 'compat_list':
845             self.report_warning(
846                 'Extractor %s returned a compat_list result. '
847                 'It needs to be updated.' % ie_result.get('extractor'))
848
849             def _fixup(r):
850                 self.add_extra_info(
851                     r,
852                     {
853                         'extractor': ie_result['extractor'],
854                         'webpage_url': ie_result['webpage_url'],
855                         'webpage_url_basename': url_basename(ie_result['webpage_url']),
856                         'extractor_key': ie_result['extractor_key'],
857                     }
858                 )
859                 return r
860             ie_result['entries'] = [
861                 self.process_ie_result(_fixup(r), download, extra_info)
862                 for r in ie_result['entries']
863             ]
864             return ie_result
865         else:
866             raise Exception('Invalid result type: %s' % result_type)
867
868     def _build_format_filter(self, filter_spec):
869         " Returns a function to filter the formats according to the filter_spec "
870
871         OPERATORS = {
872             '<': operator.lt,
873             '<=': operator.le,
874             '>': operator.gt,
875             '>=': operator.ge,
876             '=': operator.eq,
877             '!=': operator.ne,
878         }
879         operator_rex = re.compile(r'''(?x)\s*
880             (?P<key>width|height|tbr|abr|vbr|asr|filesize|fps)
881             \s*(?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
882             (?P<value>[0-9.]+(?:[kKmMgGtTpPeEzZyY]i?[Bb]?)?)
883             $
884             ''' % '|'.join(map(re.escape, OPERATORS.keys())))
885         m = operator_rex.search(filter_spec)
886         if m:
887             try:
888                 comparison_value = int(m.group('value'))
889             except ValueError:
890                 comparison_value = parse_filesize(m.group('value'))
891                 if comparison_value is None:
892                     comparison_value = parse_filesize(m.group('value') + 'B')
893                 if comparison_value is None:
894                     raise ValueError(
895                         'Invalid value %r in format specification %r' % (
896                             m.group('value'), filter_spec))
897             op = OPERATORS[m.group('op')]
898
899         if not m:
900             STR_OPERATORS = {
901                 '=': operator.eq,
902                 '!=': operator.ne,
903                 '^=': lambda attr, value: attr.startswith(value),
904                 '$=': lambda attr, value: attr.endswith(value),
905                 '*=': lambda attr, value: value in attr,
906             }
907             str_operator_rex = re.compile(r'''(?x)
908                 \s*(?P<key>ext|acodec|vcodec|container|protocol)
909                 \s*(?P<op>%s)(?P<none_inclusive>\s*\?)?
910                 \s*(?P<value>[a-zA-Z0-9._-]+)
911                 \s*$
912                 ''' % '|'.join(map(re.escape, STR_OPERATORS.keys())))
913             m = str_operator_rex.search(filter_spec)
914             if m:
915                 comparison_value = m.group('value')
916                 op = STR_OPERATORS[m.group('op')]
917
918         if not m:
919             raise ValueError('Invalid filter specification %r' % filter_spec)
920
921         def _filter(f):
922             actual_value = f.get(m.group('key'))
923             if actual_value is None:
924                 return m.group('none_inclusive')
925             return op(actual_value, comparison_value)
926         return _filter
927
928     def build_format_selector(self, format_spec):
929         def syntax_error(note, start):
930             message = (
931                 'Invalid format specification: '
932                 '{0}\n\t{1}\n\t{2}^'.format(note, format_spec, ' ' * start[1]))
933             return SyntaxError(message)
934
935         PICKFIRST = 'PICKFIRST'
936         MERGE = 'MERGE'
937         SINGLE = 'SINGLE'
938         GROUP = 'GROUP'
939         FormatSelector = collections.namedtuple('FormatSelector', ['type', 'selector', 'filters'])
940
941         def _parse_filter(tokens):
942             filter_parts = []
943             for type, string, start, _, _ in tokens:
944                 if type == tokenize.OP and string == ']':
945                     return ''.join(filter_parts)
946                 else:
947                     filter_parts.append(string)
948
949         def _remove_unused_ops(tokens):
950             # Remove operators that we don't use and join them with the surrounding strings
951             # for example: 'mp4' '-' 'baseline' '-' '16x9' is converted to 'mp4-baseline-16x9'
952             ALLOWED_OPS = ('/', '+', ',', '(', ')')
953             last_string, last_start, last_end, last_line = None, None, None, None
954             for type, string, start, end, line in tokens:
955                 if type == tokenize.OP and string == '[':
956                     if last_string:
957                         yield tokenize.NAME, last_string, last_start, last_end, last_line
958                         last_string = None
959                     yield type, string, start, end, line
960                     # everything inside brackets will be handled by _parse_filter
961                     for type, string, start, end, line in tokens:
962                         yield type, string, start, end, line
963                         if type == tokenize.OP and string == ']':
964                             break
965                 elif type == tokenize.OP and string in ALLOWED_OPS:
966                     if last_string:
967                         yield tokenize.NAME, last_string, last_start, last_end, last_line
968                         last_string = None
969                     yield type, string, start, end, line
970                 elif type in [tokenize.NAME, tokenize.NUMBER, tokenize.OP]:
971                     if not last_string:
972                         last_string = string
973                         last_start = start
974                         last_end = end
975                     else:
976                         last_string += string
977             if last_string:
978                 yield tokenize.NAME, last_string, last_start, last_end, last_line
979
980         def _parse_format_selection(tokens, inside_merge=False, inside_choice=False, inside_group=False):
981             selectors = []
982             current_selector = None
983             for type, string, start, _, _ in tokens:
984                 # ENCODING is only defined in python 3.x
985                 if type == getattr(tokenize, 'ENCODING', None):
986                     continue
987                 elif type in [tokenize.NAME, tokenize.NUMBER]:
988                     current_selector = FormatSelector(SINGLE, string, [])
989                 elif type == tokenize.OP:
990                     if string == ')':
991                         if not inside_group:
992                             # ')' will be handled by the parentheses group
993                             tokens.restore_last_token()
994                         break
995                     elif inside_merge and string in ['/', ',']:
996                         tokens.restore_last_token()
997                         break
998                     elif inside_choice and string == ',':
999                         tokens.restore_last_token()
1000                         break
1001                     elif string == ',':
1002                         if not current_selector:
1003                             raise syntax_error('"," must follow a format selector', start)
1004                         selectors.append(current_selector)
1005                         current_selector = None
1006                     elif string == '/':
1007                         if not current_selector:
1008                             raise syntax_error('"/" must follow a format selector', start)
1009                         first_choice = current_selector
1010                         second_choice = _parse_format_selection(tokens, inside_choice=True)
1011                         current_selector = FormatSelector(PICKFIRST, (first_choice, second_choice), [])
1012                     elif string == '[':
1013                         if not current_selector:
1014                             current_selector = FormatSelector(SINGLE, 'best', [])
1015                         format_filter = _parse_filter(tokens)
1016                         current_selector.filters.append(format_filter)
1017                     elif string == '(':
1018                         if current_selector:
1019                             raise syntax_error('Unexpected "("', start)
1020                         group = _parse_format_selection(tokens, inside_group=True)
1021                         current_selector = FormatSelector(GROUP, group, [])
1022                     elif string == '+':
1023                         video_selector = current_selector
1024                         audio_selector = _parse_format_selection(tokens, inside_merge=True)
1025                         if not video_selector or not audio_selector:
1026                             raise syntax_error('"+" must be between two format selectors', start)
1027                         current_selector = FormatSelector(MERGE, (video_selector, audio_selector), [])
1028                     else:
1029                         raise syntax_error('Operator not recognized: "{0}"'.format(string), start)
1030                 elif type == tokenize.ENDMARKER:
1031                     break
1032             if current_selector:
1033                 selectors.append(current_selector)
1034             return selectors
1035
1036         def _build_selector_function(selector):
1037             if isinstance(selector, list):
1038                 fs = [_build_selector_function(s) for s in selector]
1039
1040                 def selector_function(formats):
1041                     for f in fs:
1042                         for format in f(formats):
1043                             yield format
1044                 return selector_function
1045             elif selector.type == GROUP:
1046                 selector_function = _build_selector_function(selector.selector)
1047             elif selector.type == PICKFIRST:
1048                 fs = [_build_selector_function(s) for s in selector.selector]
1049
1050                 def selector_function(formats):
1051                     for f in fs:
1052                         picked_formats = list(f(formats))
1053                         if picked_formats:
1054                             return picked_formats
1055                     return []
1056             elif selector.type == SINGLE:
1057                 format_spec = selector.selector
1058
1059                 def selector_function(formats):
1060                     formats = list(formats)
1061                     if not formats:
1062                         return
1063                     if format_spec == 'all':
1064                         for f in formats:
1065                             yield f
1066                     elif format_spec in ['best', 'worst', None]:
1067                         format_idx = 0 if format_spec == 'worst' else -1
1068                         audiovideo_formats = [
1069                             f for f in formats
1070                             if f.get('vcodec') != 'none' and f.get('acodec') != 'none']
1071                         if audiovideo_formats:
1072                             yield audiovideo_formats[format_idx]
1073                         # for audio only (soundcloud) or video only (imgur) urls, select the best/worst audio format
1074                         elif (all(f.get('acodec') != 'none' for f in formats) or
1075                               all(f.get('vcodec') != 'none' for f in formats)):
1076                             yield formats[format_idx]
1077                     elif format_spec == 'bestaudio':
1078                         audio_formats = [
1079                             f for f in formats
1080                             if f.get('vcodec') == 'none']
1081                         if audio_formats:
1082                             yield audio_formats[-1]
1083                     elif format_spec == 'worstaudio':
1084                         audio_formats = [
1085                             f for f in formats
1086                             if f.get('vcodec') == 'none']
1087                         if audio_formats:
1088                             yield audio_formats[0]
1089                     elif format_spec == 'bestvideo':
1090                         video_formats = [
1091                             f for f in formats
1092                             if f.get('acodec') == 'none']
1093                         if video_formats:
1094                             yield video_formats[-1]
1095                     elif format_spec == 'worstvideo':
1096                         video_formats = [
1097                             f for f in formats
1098                             if f.get('acodec') == 'none']
1099                         if video_formats:
1100                             yield video_formats[0]
1101                     else:
1102                         extensions = ['mp4', 'flv', 'webm', '3gp', 'm4a', 'mp3', 'ogg', 'aac', 'wav']
1103                         if format_spec in extensions:
1104                             filter_f = lambda f: f['ext'] == format_spec
1105                         else:
1106                             filter_f = lambda f: f['format_id'] == format_spec
1107                         matches = list(filter(filter_f, formats))
1108                         if matches:
1109                             yield matches[-1]
1110             elif selector.type == MERGE:
1111                 def _merge(formats_info):
1112                     format_1, format_2 = [f['format_id'] for f in formats_info]
1113                     # The first format must contain the video and the
1114                     # second the audio
1115                     if formats_info[0].get('vcodec') == 'none':
1116                         self.report_error('The first format must '
1117                                           'contain the video, try using '
1118                                           '"-f %s+%s"' % (format_2, format_1))
1119                         return
1120                     # Formats must be opposite (video+audio)
1121                     if formats_info[0].get('acodec') == 'none' and formats_info[1].get('acodec') == 'none':
1122                         self.report_error(
1123                             'Both formats %s and %s are video-only, you must specify "-f video+audio"'
1124                             % (format_1, format_2))
1125                         return
1126                     output_ext = (
1127                         formats_info[0]['ext']
1128                         if self.params.get('merge_output_format') is None
1129                         else self.params['merge_output_format'])
1130                     return {
1131                         'requested_formats': formats_info,
1132                         'format': '%s+%s' % (formats_info[0].get('format'),
1133                                              formats_info[1].get('format')),
1134                         'format_id': '%s+%s' % (formats_info[0].get('format_id'),
1135                                                 formats_info[1].get('format_id')),
1136                         'width': formats_info[0].get('width'),
1137                         'height': formats_info[0].get('height'),
1138                         'resolution': formats_info[0].get('resolution'),
1139                         'fps': formats_info[0].get('fps'),
1140                         'vcodec': formats_info[0].get('vcodec'),
1141                         'vbr': formats_info[0].get('vbr'),
1142                         'stretched_ratio': formats_info[0].get('stretched_ratio'),
1143                         'acodec': formats_info[1].get('acodec'),
1144                         'abr': formats_info[1].get('abr'),
1145                         'ext': output_ext,
1146                     }
1147                 video_selector, audio_selector = map(_build_selector_function, selector.selector)
1148
1149                 def selector_function(formats):
1150                     formats = list(formats)
1151                     for pair in itertools.product(video_selector(formats), audio_selector(formats)):
1152                         yield _merge(pair)
1153
1154             filters = [self._build_format_filter(f) for f in selector.filters]
1155
1156             def final_selector(formats):
1157                 for _filter in filters:
1158                     formats = list(filter(_filter, formats))
1159                 return selector_function(formats)
1160             return final_selector
1161
1162         stream = io.BytesIO(format_spec.encode('utf-8'))
1163         try:
1164             tokens = list(_remove_unused_ops(compat_tokenize_tokenize(stream.readline)))
1165         except tokenize.TokenError:
1166             raise syntax_error('Missing closing/opening brackets or parenthesis', (0, len(format_spec)))
1167
1168         class TokenIterator(object):
1169             def __init__(self, tokens):
1170                 self.tokens = tokens
1171                 self.counter = 0
1172
1173             def __iter__(self):
1174                 return self
1175
1176             def __next__(self):
1177                 if self.counter >= len(self.tokens):
1178                     raise StopIteration()
1179                 value = self.tokens[self.counter]
1180                 self.counter += 1
1181                 return value
1182
1183             next = __next__
1184
1185             def restore_last_token(self):
1186                 self.counter -= 1
1187
1188         parsed_selector = _parse_format_selection(iter(TokenIterator(tokens)))
1189         return _build_selector_function(parsed_selector)
1190
1191     def _calc_headers(self, info_dict):
1192         res = std_headers.copy()
1193
1194         add_headers = info_dict.get('http_headers')
1195         if add_headers:
1196             res.update(add_headers)
1197
1198         cookies = self._calc_cookies(info_dict)
1199         if cookies:
1200             res['Cookie'] = cookies
1201
1202         return res
1203
1204     def _calc_cookies(self, info_dict):
1205         pr = sanitized_Request(info_dict['url'])
1206         self.cookiejar.add_cookie_header(pr)
1207         return pr.get_header('Cookie')
1208
1209     def process_video_result(self, info_dict, download=True):
1210         assert info_dict.get('_type', 'video') == 'video'
1211
1212         if 'id' not in info_dict:
1213             raise ExtractorError('Missing "id" field in extractor result')
1214         if 'title' not in info_dict:
1215             raise ExtractorError('Missing "title" field in extractor result')
1216
1217         if 'playlist' not in info_dict:
1218             # It isn't part of a playlist
1219             info_dict['playlist'] = None
1220             info_dict['playlist_index'] = None
1221
1222         thumbnails = info_dict.get('thumbnails')
1223         if thumbnails is None:
1224             thumbnail = info_dict.get('thumbnail')
1225             if thumbnail:
1226                 info_dict['thumbnails'] = thumbnails = [{'url': thumbnail}]
1227         if thumbnails:
1228             thumbnails.sort(key=lambda t: (
1229                 t.get('preference'), t.get('width'), t.get('height'),
1230                 t.get('id'), t.get('url')))
1231             for i, t in enumerate(thumbnails):
1232                 if t.get('width') and t.get('height'):
1233                     t['resolution'] = '%dx%d' % (t['width'], t['height'])
1234                 if t.get('id') is None:
1235                     t['id'] = '%d' % i
1236
1237         if thumbnails and 'thumbnail' not in info_dict:
1238             info_dict['thumbnail'] = thumbnails[-1]['url']
1239
1240         if 'display_id' not in info_dict and 'id' in info_dict:
1241             info_dict['display_id'] = info_dict['id']
1242
1243         if info_dict.get('upload_date') is None and info_dict.get('timestamp') is not None:
1244             # Working around out-of-range timestamp values (e.g. negative ones on Windows,
1245             # see http://bugs.python.org/issue1646728)
1246             try:
1247                 upload_date = datetime.datetime.utcfromtimestamp(info_dict['timestamp'])
1248                 info_dict['upload_date'] = upload_date.strftime('%Y%m%d')
1249             except (ValueError, OverflowError, OSError):
1250                 pass
1251
1252         # Auto generate title fields corresponding to the *_number fields when missing
1253         # in order to always have clean titles. This is very common for TV series.
1254         for field in ('chapter', 'season', 'episode'):
1255             if info_dict.get('%s_number' % field) is not None and not info_dict.get(field):
1256                 info_dict[field] = '%s %d' % (field.capitalize(), info_dict['%s_number' % field])
1257
1258         subtitles = info_dict.get('subtitles')
1259         if subtitles:
1260             for _, subtitle in subtitles.items():
1261                 for subtitle_format in subtitle:
1262                     if 'ext' not in subtitle_format:
1263                         subtitle_format['ext'] = determine_ext(subtitle_format['url']).lower()
1264
1265         if self.params.get('listsubtitles', False):
1266             if 'automatic_captions' in info_dict:
1267                 self.list_subtitles(info_dict['id'], info_dict.get('automatic_captions'), 'automatic captions')
1268             self.list_subtitles(info_dict['id'], subtitles, 'subtitles')
1269             return
1270         info_dict['requested_subtitles'] = self.process_subtitles(
1271             info_dict['id'], subtitles,
1272             info_dict.get('automatic_captions'))
1273
1274         # We now pick which formats have to be downloaded
1275         if info_dict.get('formats') is None:
1276             # There's only one format available
1277             formats = [info_dict]
1278         else:
1279             formats = info_dict['formats']
1280
1281         if not formats:
1282             raise ExtractorError('No video formats found!')
1283
1284         formats_dict = {}
1285
1286         # We check that all the formats have the format and format_id fields
1287         for i, format in enumerate(formats):
1288             if 'url' not in format:
1289                 raise ExtractorError('Missing "url" key in result (index %d)' % i)
1290
1291             if format.get('format_id') is None:
1292                 format['format_id'] = compat_str(i)
1293             else:
1294                 # Sanitize format_id from characters used in format selector expression
1295                 format['format_id'] = re.sub('[\s,/+\[\]()]', '_', format['format_id'])
1296             format_id = format['format_id']
1297             if format_id not in formats_dict:
1298                 formats_dict[format_id] = []
1299             formats_dict[format_id].append(format)
1300
1301         # Make sure all formats have unique format_id
1302         for format_id, ambiguous_formats in formats_dict.items():
1303             if len(ambiguous_formats) > 1:
1304                 for i, format in enumerate(ambiguous_formats):
1305                     format['format_id'] = '%s-%d' % (format_id, i)
1306
1307         for i, format in enumerate(formats):
1308             if format.get('format') is None:
1309                 format['format'] = '{id} - {res}{note}'.format(
1310                     id=format['format_id'],
1311                     res=self.format_resolution(format),
1312                     note=' ({0})'.format(format['format_note']) if format.get('format_note') is not None else '',
1313                 )
1314             # Automatically determine file extension if missing
1315             if 'ext' not in format:
1316                 format['ext'] = determine_ext(format['url']).lower()
1317             # Automatically determine protocol if missing (useful for format
1318             # selection purposes)
1319             if 'protocol' not in format:
1320                 format['protocol'] = determine_protocol(format)
1321             # Add HTTP headers, so that external programs can use them from the
1322             # json output
1323             full_format_info = info_dict.copy()
1324             full_format_info.update(format)
1325             format['http_headers'] = self._calc_headers(full_format_info)
1326
1327         # TODO Central sorting goes here
1328
1329         if formats[0] is not info_dict:
1330             # only set the 'formats' fields if the original info_dict list them
1331             # otherwise we end up with a circular reference, the first (and unique)
1332             # element in the 'formats' field in info_dict is info_dict itself,
1333             # which can't be exported to json
1334             info_dict['formats'] = formats
1335         if self.params.get('listformats'):
1336             self.list_formats(info_dict)
1337             return
1338         if self.params.get('list_thumbnails'):
1339             self.list_thumbnails(info_dict)
1340             return
1341
1342         req_format = self.params.get('format')
1343         if req_format is None:
1344             req_format_list = []
1345             if (self.params.get('outtmpl', DEFAULT_OUTTMPL) != '-' and
1346                     not info_dict.get('is_live')):
1347                 merger = FFmpegMergerPP(self)
1348                 if merger.available and merger.can_merge():
1349                     req_format_list.append('bestvideo+bestaudio')
1350             req_format_list.append('best')
1351             req_format = '/'.join(req_format_list)
1352         format_selector = self.build_format_selector(req_format)
1353         formats_to_download = list(format_selector(formats))
1354         if not formats_to_download:
1355             raise ExtractorError('requested format not available',
1356                                  expected=True)
1357
1358         if download:
1359             if len(formats_to_download) > 1:
1360                 self.to_screen('[info] %s: downloading video in %s formats' % (info_dict['id'], len(formats_to_download)))
1361             for format in formats_to_download:
1362                 new_info = dict(info_dict)
1363                 new_info.update(format)
1364                 self.process_info(new_info)
1365         # We update the info dict with the best quality format (backwards compatibility)
1366         info_dict.update(formats_to_download[-1])
1367         return info_dict
1368
1369     def process_subtitles(self, video_id, normal_subtitles, automatic_captions):
1370         """Select the requested subtitles and their format"""
1371         available_subs = {}
1372         if normal_subtitles and self.params.get('writesubtitles'):
1373             available_subs.update(normal_subtitles)
1374         if automatic_captions and self.params.get('writeautomaticsub'):
1375             for lang, cap_info in automatic_captions.items():
1376                 if lang not in available_subs:
1377                     available_subs[lang] = cap_info
1378
1379         if (not self.params.get('writesubtitles') and not
1380                 self.params.get('writeautomaticsub') or not
1381                 available_subs):
1382             return None
1383
1384         if self.params.get('allsubtitles', False):
1385             requested_langs = available_subs.keys()
1386         else:
1387             if self.params.get('subtitleslangs', False):
1388                 requested_langs = self.params.get('subtitleslangs')
1389             elif 'en' in available_subs:
1390                 requested_langs = ['en']
1391             else:
1392                 requested_langs = [list(available_subs.keys())[0]]
1393
1394         formats_query = self.params.get('subtitlesformat', 'best')
1395         formats_preference = formats_query.split('/') if formats_query else []
1396         subs = {}
1397         for lang in requested_langs:
1398             formats = available_subs.get(lang)
1399             if formats is None:
1400                 self.report_warning('%s subtitles not available for %s' % (lang, video_id))
1401                 continue
1402             for ext in formats_preference:
1403                 if ext == 'best':
1404                     f = formats[-1]
1405                     break
1406                 matches = list(filter(lambda f: f['ext'] == ext, formats))
1407                 if matches:
1408                     f = matches[-1]
1409                     break
1410             else:
1411                 f = formats[-1]
1412                 self.report_warning(
1413                     'No subtitle format found matching "%s" for language %s, '
1414                     'using %s' % (formats_query, lang, f['ext']))
1415             subs[lang] = f
1416         return subs
1417
1418     def process_info(self, info_dict):
1419         """Process a single resolved IE result."""
1420
1421         assert info_dict.get('_type', 'video') == 'video'
1422
1423         max_downloads = self.params.get('max_downloads')
1424         if max_downloads is not None:
1425             if self._num_downloads >= int(max_downloads):
1426                 raise MaxDownloadsReached()
1427
1428         info_dict['fulltitle'] = info_dict['title']
1429         if len(info_dict['title']) > 200:
1430             info_dict['title'] = info_dict['title'][:197] + '...'
1431
1432         if 'format' not in info_dict:
1433             info_dict['format'] = info_dict['ext']
1434
1435         reason = self._match_entry(info_dict, incomplete=False)
1436         if reason is not None:
1437             self.to_screen('[download] ' + reason)
1438             return
1439
1440         self._num_downloads += 1
1441
1442         info_dict['_filename'] = filename = self.prepare_filename(info_dict)
1443
1444         # Forced printings
1445         if self.params.get('forcetitle', False):
1446             self.to_stdout(info_dict['fulltitle'])
1447         if self.params.get('forceid', False):
1448             self.to_stdout(info_dict['id'])
1449         if self.params.get('forceurl', False):
1450             if info_dict.get('requested_formats') is not None:
1451                 for f in info_dict['requested_formats']:
1452                     self.to_stdout(f['url'] + f.get('play_path', ''))
1453             else:
1454                 # For RTMP URLs, also include the playpath
1455                 self.to_stdout(info_dict['url'] + info_dict.get('play_path', ''))
1456         if self.params.get('forcethumbnail', False) and info_dict.get('thumbnail') is not None:
1457             self.to_stdout(info_dict['thumbnail'])
1458         if self.params.get('forcedescription', False) and info_dict.get('description') is not None:
1459             self.to_stdout(info_dict['description'])
1460         if self.params.get('forcefilename', False) and filename is not None:
1461             self.to_stdout(filename)
1462         if self.params.get('forceduration', False) and info_dict.get('duration') is not None:
1463             self.to_stdout(formatSeconds(info_dict['duration']))
1464         if self.params.get('forceformat', False):
1465             self.to_stdout(info_dict['format'])
1466         if self.params.get('forcejson', False):
1467             self.to_stdout(json.dumps(info_dict))
1468
1469         # Do nothing else if in simulate mode
1470         if self.params.get('simulate', False):
1471             return
1472
1473         if filename is None:
1474             return
1475
1476         try:
1477             dn = os.path.dirname(sanitize_path(encodeFilename(filename)))
1478             if dn and not os.path.exists(dn):
1479                 os.makedirs(dn)
1480         except (OSError, IOError) as err:
1481             self.report_error('unable to create directory ' + error_to_compat_str(err))
1482             return
1483
1484         if self.params.get('writedescription', False):
1485             descfn = replace_extension(filename, 'description', info_dict.get('ext'))
1486             if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(descfn)):
1487                 self.to_screen('[info] Video description is already present')
1488             elif info_dict.get('description') is None:
1489                 self.report_warning('There\'s no description to write.')
1490             else:
1491                 try:
1492                     self.to_screen('[info] Writing video description to: ' + descfn)
1493                     with io.open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile:
1494                         descfile.write(info_dict['description'])
1495                 except (OSError, IOError):
1496                     self.report_error('Cannot write description file ' + descfn)
1497                     return
1498
1499         if self.params.get('writeannotations', False):
1500             annofn = replace_extension(filename, 'annotations.xml', info_dict.get('ext'))
1501             if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(annofn)):
1502                 self.to_screen('[info] Video annotations are already present')
1503             else:
1504                 try:
1505                     self.to_screen('[info] Writing video annotations to: ' + annofn)
1506                     with io.open(encodeFilename(annofn), 'w', encoding='utf-8') as annofile:
1507                         annofile.write(info_dict['annotations'])
1508                 except (KeyError, TypeError):
1509                     self.report_warning('There are no annotations to write.')
1510                 except (OSError, IOError):
1511                     self.report_error('Cannot write annotations file: ' + annofn)
1512                     return
1513
1514         subtitles_are_requested = any([self.params.get('writesubtitles', False),
1515                                        self.params.get('writeautomaticsub')])
1516
1517         if subtitles_are_requested and info_dict.get('requested_subtitles'):
1518             # subtitles download errors are already managed as troubles in relevant IE
1519             # that way it will silently go on when used with unsupporting IE
1520             subtitles = info_dict['requested_subtitles']
1521             ie = self.get_info_extractor(info_dict['extractor_key'])
1522             for sub_lang, sub_info in subtitles.items():
1523                 sub_format = sub_info['ext']
1524                 if sub_info.get('data') is not None:
1525                     sub_data = sub_info['data']
1526                 else:
1527                     try:
1528                         sub_data = ie._download_webpage(
1529                             sub_info['url'], info_dict['id'], note=False)
1530                     except ExtractorError as err:
1531                         self.report_warning('Unable to download subtitle for "%s": %s' %
1532                                             (sub_lang, error_to_compat_str(err.cause)))
1533                         continue
1534                 try:
1535                     sub_filename = subtitles_filename(filename, sub_lang, sub_format)
1536                     if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(sub_filename)):
1537                         self.to_screen('[info] Video subtitle %s.%s is already_present' % (sub_lang, sub_format))
1538                     else:
1539                         self.to_screen('[info] Writing video subtitles to: ' + sub_filename)
1540                         with io.open(encodeFilename(sub_filename), 'w', encoding='utf-8') as subfile:
1541                             subfile.write(sub_data)
1542                 except (OSError, IOError):
1543                     self.report_error('Cannot write subtitles file ' + sub_filename)
1544                     return
1545
1546         if self.params.get('writeinfojson', False):
1547             infofn = replace_extension(filename, 'info.json', info_dict.get('ext'))
1548             if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(infofn)):
1549                 self.to_screen('[info] Video description metadata is already present')
1550             else:
1551                 self.to_screen('[info] Writing video description metadata as JSON to: ' + infofn)
1552                 try:
1553                     write_json_file(self.filter_requested_info(info_dict), infofn)
1554                 except (OSError, IOError):
1555                     self.report_error('Cannot write metadata to JSON file ' + infofn)
1556                     return
1557
1558         self._write_thumbnails(info_dict, filename)
1559
1560         if not self.params.get('skip_download', False):
1561             try:
1562                 def dl(name, info):
1563                     fd = get_suitable_downloader(info, self.params)(self, self.params)
1564                     for ph in self._progress_hooks:
1565                         fd.add_progress_hook(ph)
1566                     if self.params.get('verbose'):
1567                         self.to_stdout('[debug] Invoking downloader on %r' % info.get('url'))
1568                     return fd.download(name, info)
1569
1570                 if info_dict.get('requested_formats') is not None:
1571                     downloaded = []
1572                     success = True
1573                     merger = FFmpegMergerPP(self)
1574                     if not merger.available:
1575                         postprocessors = []
1576                         self.report_warning('You have requested multiple '
1577                                             'formats but ffmpeg or avconv are not installed.'
1578                                             ' The formats won\'t be merged.')
1579                     else:
1580                         postprocessors = [merger]
1581
1582                     def compatible_formats(formats):
1583                         video, audio = formats
1584                         # Check extension
1585                         video_ext, audio_ext = audio.get('ext'), video.get('ext')
1586                         if video_ext and audio_ext:
1587                             COMPATIBLE_EXTS = (
1588                                 ('mp3', 'mp4', 'm4a', 'm4p', 'm4b', 'm4r', 'm4v'),
1589                                 ('webm')
1590                             )
1591                             for exts in COMPATIBLE_EXTS:
1592                                 if video_ext in exts and audio_ext in exts:
1593                                     return True
1594                         # TODO: Check acodec/vcodec
1595                         return False
1596
1597                     filename_real_ext = os.path.splitext(filename)[1][1:]
1598                     filename_wo_ext = (
1599                         os.path.splitext(filename)[0]
1600                         if filename_real_ext == info_dict['ext']
1601                         else filename)
1602                     requested_formats = info_dict['requested_formats']
1603                     if self.params.get('merge_output_format') is None and not compatible_formats(requested_formats):
1604                         info_dict['ext'] = 'mkv'
1605                         self.report_warning(
1606                             'Requested formats are incompatible for merge and will be merged into mkv.')
1607                     # Ensure filename always has a correct extension for successful merge
1608                     filename = '%s.%s' % (filename_wo_ext, info_dict['ext'])
1609                     if os.path.exists(encodeFilename(filename)):
1610                         self.to_screen(
1611                             '[download] %s has already been downloaded and '
1612                             'merged' % filename)
1613                     else:
1614                         for f in requested_formats:
1615                             new_info = dict(info_dict)
1616                             new_info.update(f)
1617                             fname = self.prepare_filename(new_info)
1618                             fname = prepend_extension(fname, 'f%s' % f['format_id'], new_info['ext'])
1619                             downloaded.append(fname)
1620                             partial_success = dl(fname, new_info)
1621                             success = success and partial_success
1622                         info_dict['__postprocessors'] = postprocessors
1623                         info_dict['__files_to_merge'] = downloaded
1624                 else:
1625                     # Just a single file
1626                     success = dl(filename, info_dict)
1627             except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
1628                 self.report_error('unable to download video data: %s' % str(err))
1629                 return
1630             except (OSError, IOError) as err:
1631                 raise UnavailableVideoError(err)
1632             except (ContentTooShortError, ) as err:
1633                 self.report_error('content too short (expected %s bytes and served %s)' % (err.expected, err.downloaded))
1634                 return
1635
1636             if success and filename != '-':
1637                 # Fixup content
1638                 fixup_policy = self.params.get('fixup')
1639                 if fixup_policy is None:
1640                     fixup_policy = 'detect_or_warn'
1641
1642                 stretched_ratio = info_dict.get('stretched_ratio')
1643                 if stretched_ratio is not None and stretched_ratio != 1:
1644                     if fixup_policy == 'warn':
1645                         self.report_warning('%s: Non-uniform pixel ratio (%s)' % (
1646                             info_dict['id'], stretched_ratio))
1647                     elif fixup_policy == 'detect_or_warn':
1648                         stretched_pp = FFmpegFixupStretchedPP(self)
1649                         if stretched_pp.available:
1650                             info_dict.setdefault('__postprocessors', [])
1651                             info_dict['__postprocessors'].append(stretched_pp)
1652                         else:
1653                             self.report_warning(
1654                                 '%s: Non-uniform pixel ratio (%s). Install ffmpeg or avconv to fix this automatically.' % (
1655                                     info_dict['id'], stretched_ratio))
1656                     else:
1657                         assert fixup_policy in ('ignore', 'never')
1658
1659                 if info_dict.get('requested_formats') is None and info_dict.get('container') == 'm4a_dash':
1660                     if fixup_policy == 'warn':
1661                         self.report_warning('%s: writing DASH m4a. Only some players support this container.' % (
1662                             info_dict['id']))
1663                     elif fixup_policy == 'detect_or_warn':
1664                         fixup_pp = FFmpegFixupM4aPP(self)
1665                         if fixup_pp.available:
1666                             info_dict.setdefault('__postprocessors', [])
1667                             info_dict['__postprocessors'].append(fixup_pp)
1668                         else:
1669                             self.report_warning(
1670                                 '%s: writing DASH m4a. Only some players support this container. Install ffmpeg or avconv to fix this automatically.' % (
1671                                     info_dict['id']))
1672                     else:
1673                         assert fixup_policy in ('ignore', 'never')
1674
1675                 if info_dict.get('protocol') == 'm3u8_native' or info_dict.get('protocol') == 'm3u8' and self._downloader.params.get('hls_prefer_native', False):
1676                     if fixup_policy == 'warn':
1677                         self.report_warning('%s: malformated aac bitstream.' % (
1678                             info_dict['id']))
1679                     elif fixup_policy == 'detect_or_warn':
1680                         fixup_pp = FFmpegFixupM3u8PP(self)
1681                         if fixup_pp.available:
1682                             info_dict.setdefault('__postprocessors', [])
1683                             info_dict['__postprocessors'].append(fixup_pp)
1684                         else:
1685                             self.report_warning(
1686                                 '%s: malformated aac bitstream. Install ffmpeg or avconv to fix this automatically.' % (
1687                                     info_dict['id']))
1688                     else:
1689                         assert fixup_policy in ('ignore', 'never')
1690
1691                 try:
1692                     self.post_process(filename, info_dict)
1693                 except (PostProcessingError) as err:
1694                     self.report_error('postprocessing: %s' % str(err))
1695                     return
1696                 self.record_download_archive(info_dict)
1697
1698     def download(self, url_list):
1699         """Download a given list of URLs."""
1700         outtmpl = self.params.get('outtmpl', DEFAULT_OUTTMPL)
1701         if (len(url_list) > 1 and
1702                 '%' not in outtmpl and
1703                 self.params.get('max_downloads') != 1):
1704             raise SameFileError(outtmpl)
1705
1706         for url in url_list:
1707             try:
1708                 # It also downloads the videos
1709                 res = self.extract_info(
1710                     url, force_generic_extractor=self.params.get('force_generic_extractor', False))
1711             except UnavailableVideoError:
1712                 self.report_error('unable to download video')
1713             except MaxDownloadsReached:
1714                 self.to_screen('[info] Maximum number of downloaded files reached.')
1715                 raise
1716             else:
1717                 if self.params.get('dump_single_json', False):
1718                     self.to_stdout(json.dumps(res))
1719
1720         return self._download_retcode
1721
1722     def download_with_info_file(self, info_filename):
1723         with contextlib.closing(fileinput.FileInput(
1724                 [info_filename], mode='r',
1725                 openhook=fileinput.hook_encoded('utf-8'))) as f:
1726             # FileInput doesn't have a read method, we can't call json.load
1727             info = self.filter_requested_info(json.loads('\n'.join(f)))
1728         try:
1729             self.process_ie_result(info, download=True)
1730         except DownloadError:
1731             webpage_url = info.get('webpage_url')
1732             if webpage_url is not None:
1733                 self.report_warning('The info failed to download, trying with "%s"' % webpage_url)
1734                 return self.download([webpage_url])
1735             else:
1736                 raise
1737         return self._download_retcode
1738
1739     @staticmethod
1740     def filter_requested_info(info_dict):
1741         return dict(
1742             (k, v) for k, v in info_dict.items()
1743             if k not in ['requested_formats', 'requested_subtitles'])
1744
1745     def post_process(self, filename, ie_info):
1746         """Run all the postprocessors on the given file."""
1747         info = dict(ie_info)
1748         info['filepath'] = filename
1749         pps_chain = []
1750         if ie_info.get('__postprocessors') is not None:
1751             pps_chain.extend(ie_info['__postprocessors'])
1752         pps_chain.extend(self._pps)
1753         for pp in pps_chain:
1754             files_to_delete = []
1755             try:
1756                 files_to_delete, info = pp.run(info)
1757             except PostProcessingError as e:
1758                 self.report_error(e.msg)
1759             if files_to_delete and not self.params.get('keepvideo', False):
1760                 for old_filename in files_to_delete:
1761                     self.to_screen('Deleting original file %s (pass -k to keep)' % old_filename)
1762                     try:
1763                         os.remove(encodeFilename(old_filename))
1764                     except (IOError, OSError):
1765                         self.report_warning('Unable to remove downloaded original file')
1766
1767     def _make_archive_id(self, info_dict):
1768         # Future-proof against any change in case
1769         # and backwards compatibility with prior versions
1770         extractor = info_dict.get('extractor_key')
1771         if extractor is None:
1772             if 'id' in info_dict:
1773                 extractor = info_dict.get('ie_key')  # key in a playlist
1774         if extractor is None:
1775             return None  # Incomplete video information
1776         return extractor.lower() + ' ' + info_dict['id']
1777
1778     def in_download_archive(self, info_dict):
1779         fn = self.params.get('download_archive')
1780         if fn is None:
1781             return False
1782
1783         vid_id = self._make_archive_id(info_dict)
1784         if vid_id is None:
1785             return False  # Incomplete video information
1786
1787         try:
1788             with locked_file(fn, 'r', encoding='utf-8') as archive_file:
1789                 for line in archive_file:
1790                     if line.strip() == vid_id:
1791                         return True
1792         except IOError as ioe:
1793             if ioe.errno != errno.ENOENT:
1794                 raise
1795         return False
1796
1797     def record_download_archive(self, info_dict):
1798         fn = self.params.get('download_archive')
1799         if fn is None:
1800             return
1801         vid_id = self._make_archive_id(info_dict)
1802         assert vid_id
1803         with locked_file(fn, 'a', encoding='utf-8') as archive_file:
1804             archive_file.write(vid_id + '\n')
1805
1806     @staticmethod
1807     def format_resolution(format, default='unknown'):
1808         if format.get('vcodec') == 'none':
1809             return 'audio only'
1810         if format.get('resolution') is not None:
1811             return format['resolution']
1812         if format.get('height') is not None:
1813             if format.get('width') is not None:
1814                 res = '%sx%s' % (format['width'], format['height'])
1815             else:
1816                 res = '%sp' % format['height']
1817         elif format.get('width') is not None:
1818             res = '%dx?' % format['width']
1819         else:
1820             res = default
1821         return res
1822
1823     def _format_note(self, fdict):
1824         res = ''
1825         if fdict.get('ext') in ['f4f', 'f4m']:
1826             res += '(unsupported) '
1827         if fdict.get('language'):
1828             if res:
1829                 res += ' '
1830             res += '[%s]' % fdict['language']
1831         if fdict.get('format_note') is not None:
1832             res += fdict['format_note'] + ' '
1833         if fdict.get('tbr') is not None:
1834             res += '%4dk ' % fdict['tbr']
1835         if fdict.get('container') is not None:
1836             if res:
1837                 res += ', '
1838             res += '%s container' % fdict['container']
1839         if (fdict.get('vcodec') is not None and
1840                 fdict.get('vcodec') != 'none'):
1841             if res:
1842                 res += ', '
1843             res += fdict['vcodec']
1844             if fdict.get('vbr') is not None:
1845                 res += '@'
1846         elif fdict.get('vbr') is not None and fdict.get('abr') is not None:
1847             res += 'video@'
1848         if fdict.get('vbr') is not None:
1849             res += '%4dk' % fdict['vbr']
1850         if fdict.get('fps') is not None:
1851             res += ', %sfps' % fdict['fps']
1852         if fdict.get('acodec') is not None:
1853             if res:
1854                 res += ', '
1855             if fdict['acodec'] == 'none':
1856                 res += 'video only'
1857             else:
1858                 res += '%-5s' % fdict['acodec']
1859         elif fdict.get('abr') is not None:
1860             if res:
1861                 res += ', '
1862             res += 'audio'
1863         if fdict.get('abr') is not None:
1864             res += '@%3dk' % fdict['abr']
1865         if fdict.get('asr') is not None:
1866             res += ' (%5dHz)' % fdict['asr']
1867         if fdict.get('filesize') is not None:
1868             if res:
1869                 res += ', '
1870             res += format_bytes(fdict['filesize'])
1871         elif fdict.get('filesize_approx') is not None:
1872             if res:
1873                 res += ', '
1874             res += '~' + format_bytes(fdict['filesize_approx'])
1875         return res
1876
1877     def list_formats(self, info_dict):
1878         formats = info_dict.get('formats', [info_dict])
1879         table = [
1880             [f['format_id'], f['ext'], self.format_resolution(f), self._format_note(f)]
1881             for f in formats
1882             if f.get('preference') is None or f['preference'] >= -1000]
1883         if len(formats) > 1:
1884             table[-1][-1] += (' ' if table[-1][-1] else '') + '(best)'
1885
1886         header_line = ['format code', 'extension', 'resolution', 'note']
1887         self.to_screen(
1888             '[info] Available formats for %s:\n%s' %
1889             (info_dict['id'], render_table(header_line, table)))
1890
1891     def list_thumbnails(self, info_dict):
1892         thumbnails = info_dict.get('thumbnails')
1893         if not thumbnails:
1894             tn_url = info_dict.get('thumbnail')
1895             if tn_url:
1896                 thumbnails = [{'id': '0', 'url': tn_url}]
1897             else:
1898                 self.to_screen(
1899                     '[info] No thumbnails present for %s' % info_dict['id'])
1900                 return
1901
1902         self.to_screen(
1903             '[info] Thumbnails for %s:' % info_dict['id'])
1904         self.to_screen(render_table(
1905             ['ID', 'width', 'height', 'URL'],
1906             [[t['id'], t.get('width', 'unknown'), t.get('height', 'unknown'), t['url']] for t in thumbnails]))
1907
1908     def list_subtitles(self, video_id, subtitles, name='subtitles'):
1909         if not subtitles:
1910             self.to_screen('%s has no %s' % (video_id, name))
1911             return
1912         self.to_screen(
1913             'Available %s for %s:' % (name, video_id))
1914         self.to_screen(render_table(
1915             ['Language', 'formats'],
1916             [[lang, ', '.join(f['ext'] for f in reversed(formats))]
1917                 for lang, formats in subtitles.items()]))
1918
1919     def urlopen(self, req):
1920         """ Start an HTTP download """
1921         if isinstance(req, compat_basestring):
1922             req = sanitized_Request(req)
1923         return self._opener.open(req, timeout=self._socket_timeout)
1924
1925     def print_debug_header(self):
1926         if not self.params.get('verbose'):
1927             return
1928
1929         if type('') is not compat_str:
1930             # Python 2.6 on SLES11 SP1 (https://github.com/rg3/youtube-dl/issues/3326)
1931             self.report_warning(
1932                 'Your Python is broken! Update to a newer and supported version')
1933
1934         stdout_encoding = getattr(
1935             sys.stdout, 'encoding', 'missing (%s)' % type(sys.stdout).__name__)
1936         encoding_str = (
1937             '[debug] Encodings: locale %s, fs %s, out %s, pref %s\n' % (
1938                 locale.getpreferredencoding(),
1939                 sys.getfilesystemencoding(),
1940                 stdout_encoding,
1941                 self.get_encoding()))
1942         write_string(encoding_str, encoding=None)
1943
1944         self._write_string('[debug] youtube-dl version ' + __version__ + '\n')
1945         try:
1946             sp = subprocess.Popen(
1947                 ['git', 'rev-parse', '--short', 'HEAD'],
1948                 stdout=subprocess.PIPE, stderr=subprocess.PIPE,
1949                 cwd=os.path.dirname(os.path.abspath(__file__)))
1950             out, err = sp.communicate()
1951             out = out.decode().strip()
1952             if re.match('[0-9a-f]+', out):
1953                 self._write_string('[debug] Git HEAD: ' + out + '\n')
1954         except Exception:
1955             try:
1956                 sys.exc_clear()
1957             except Exception:
1958                 pass
1959         self._write_string('[debug] Python version %s - %s\n' % (
1960             platform.python_version(), platform_name()))
1961
1962         exe_versions = FFmpegPostProcessor.get_versions(self)
1963         exe_versions['rtmpdump'] = rtmpdump_version()
1964         exe_str = ', '.join(
1965             '%s %s' % (exe, v)
1966             for exe, v in sorted(exe_versions.items())
1967             if v
1968         )
1969         if not exe_str:
1970             exe_str = 'none'
1971         self._write_string('[debug] exe versions: %s\n' % exe_str)
1972
1973         proxy_map = {}
1974         for handler in self._opener.handlers:
1975             if hasattr(handler, 'proxies'):
1976                 proxy_map.update(handler.proxies)
1977         self._write_string('[debug] Proxy map: ' + compat_str(proxy_map) + '\n')
1978
1979         if self.params.get('call_home', False):
1980             ipaddr = self.urlopen('https://yt-dl.org/ip').read().decode('utf-8')
1981             self._write_string('[debug] Public IP address: %s\n' % ipaddr)
1982             latest_version = self.urlopen(
1983                 'https://yt-dl.org/latest/version').read().decode('utf-8')
1984             if version_tuple(latest_version) > version_tuple(__version__):
1985                 self.report_warning(
1986                     'You are using an outdated version (newest version: %s)! '
1987                     'See https://yt-dl.org/update if you need help updating.' %
1988                     latest_version)
1989
1990     def _setup_opener(self):
1991         timeout_val = self.params.get('socket_timeout')
1992         self._socket_timeout = 600 if timeout_val is None else float(timeout_val)
1993
1994         opts_cookiefile = self.params.get('cookiefile')
1995         opts_proxy = self.params.get('proxy')
1996
1997         if opts_cookiefile is None:
1998             self.cookiejar = compat_cookiejar.CookieJar()
1999         else:
2000             self.cookiejar = compat_cookiejar.MozillaCookieJar(
2001                 opts_cookiefile)
2002             if os.access(opts_cookiefile, os.R_OK):
2003                 self.cookiejar.load()
2004
2005         cookie_processor = YoutubeDLCookieProcessor(self.cookiejar)
2006         if opts_proxy is not None:
2007             if opts_proxy == '':
2008                 proxies = {}
2009             else:
2010                 proxies = {'http': opts_proxy, 'https': opts_proxy}
2011         else:
2012             proxies = compat_urllib_request.getproxies()
2013             # Set HTTPS proxy to HTTP one if given (https://github.com/rg3/youtube-dl/issues/805)
2014             if 'http' in proxies and 'https' not in proxies:
2015                 proxies['https'] = proxies['http']
2016         proxy_handler = PerRequestProxyHandler(proxies)
2017
2018         debuglevel = 1 if self.params.get('debug_printtraffic') else 0
2019         https_handler = make_HTTPS_handler(self.params, debuglevel=debuglevel)
2020         ydlh = YoutubeDLHandler(self.params, debuglevel=debuglevel)
2021         data_handler = compat_urllib_request_DataHandler()
2022
2023         # When passing our own FileHandler instance, build_opener won't add the
2024         # default FileHandler and allows us to disable the file protocol, which
2025         # can be used for malicious purposes (see
2026         # https://github.com/rg3/youtube-dl/issues/8227)
2027         file_handler = compat_urllib_request.FileHandler()
2028
2029         def file_open(*args, **kwargs):
2030             raise compat_urllib_error.URLError('file:// scheme is explicitly disabled in youtube-dl for security reasons')
2031         file_handler.file_open = file_open
2032
2033         opener = compat_urllib_request.build_opener(
2034             proxy_handler, https_handler, cookie_processor, ydlh, data_handler, file_handler)
2035
2036         # Delete the default user-agent header, which would otherwise apply in
2037         # cases where our custom HTTP handler doesn't come into play
2038         # (See https://github.com/rg3/youtube-dl/issues/1309 for details)
2039         opener.addheaders = []
2040         self._opener = opener
2041
2042     def encode(self, s):
2043         if isinstance(s, bytes):
2044             return s  # Already encoded
2045
2046         try:
2047             return s.encode(self.get_encoding())
2048         except UnicodeEncodeError as err:
2049             err.reason = err.reason + '. Check your system encoding configuration or use the --encoding option.'
2050             raise
2051
2052     def get_encoding(self):
2053         encoding = self.params.get('encoding')
2054         if encoding is None:
2055             encoding = preferredencoding()
2056         return encoding
2057
2058     def _write_thumbnails(self, info_dict, filename):
2059         if self.params.get('writethumbnail', False):
2060             thumbnails = info_dict.get('thumbnails')
2061             if thumbnails:
2062                 thumbnails = [thumbnails[-1]]
2063         elif self.params.get('write_all_thumbnails', False):
2064             thumbnails = info_dict.get('thumbnails')
2065         else:
2066             return
2067
2068         if not thumbnails:
2069             # No thumbnails present, so return immediately
2070             return
2071
2072         for t in thumbnails:
2073             thumb_ext = determine_ext(t['url'], 'jpg')
2074             suffix = '_%s' % t['id'] if len(thumbnails) > 1 else ''
2075             thumb_display_id = '%s ' % t['id'] if len(thumbnails) > 1 else ''
2076             t['filename'] = thumb_filename = os.path.splitext(filename)[0] + suffix + '.' + thumb_ext
2077
2078             if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(thumb_filename)):
2079                 self.to_screen('[%s] %s: Thumbnail %sis already present' %
2080                                (info_dict['extractor'], info_dict['id'], thumb_display_id))
2081             else:
2082                 self.to_screen('[%s] %s: Downloading thumbnail %s...' %
2083                                (info_dict['extractor'], info_dict['id'], thumb_display_id))
2084                 try:
2085                     uf = self.urlopen(t['url'])
2086                     with open(encodeFilename(thumb_filename), 'wb') as thumbf:
2087                         shutil.copyfileobj(uf, thumbf)
2088                     self.to_screen('[%s] %s: Writing thumbnail %sto: %s' %
2089                                    (info_dict['extractor'], info_dict['id'], thumb_display_id, thumb_filename))
2090                 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
2091                     self.report_warning('Unable to download thumbnail "%s": %s' %
2092                                         (t['url'], error_to_compat_str(err)))