Merge pull request #8876 from remitamine/html5_media
[youtube-dl] / youtube_dl / YoutubeDL.py
1 #!/usr/bin/env python
2 # -*- coding: utf-8 -*-
3
4 from __future__ import absolute_import, unicode_literals
5
6 import collections
7 import contextlib
8 import datetime
9 import errno
10 import fileinput
11 import io
12 import itertools
13 import json
14 import locale
15 import operator
16 import os
17 import platform
18 import re
19 import shutil
20 import subprocess
21 import socket
22 import sys
23 import time
24 import tokenize
25 import traceback
26
27 from .compat import (
28     compat_basestring,
29     compat_cookiejar,
30     compat_expanduser,
31     compat_get_terminal_size,
32     compat_http_client,
33     compat_kwargs,
34     compat_os_name,
35     compat_str,
36     compat_tokenize_tokenize,
37     compat_urllib_error,
38     compat_urllib_request,
39     compat_urllib_request_DataHandler,
40 )
41 from .utils import (
42     age_restricted,
43     args_to_str,
44     ContentTooShortError,
45     date_from_str,
46     DateRange,
47     DEFAULT_OUTTMPL,
48     determine_ext,
49     determine_protocol,
50     DownloadError,
51     encode_compat_str,
52     encodeFilename,
53     error_to_compat_str,
54     ExtractorError,
55     format_bytes,
56     formatSeconds,
57     locked_file,
58     make_HTTPS_handler,
59     MaxDownloadsReached,
60     PagedList,
61     parse_filesize,
62     PerRequestProxyHandler,
63     platform_name,
64     PostProcessingError,
65     preferredencoding,
66     prepend_extension,
67     register_socks_protocols,
68     render_table,
69     replace_extension,
70     SameFileError,
71     sanitize_filename,
72     sanitize_path,
73     sanitize_url,
74     sanitized_Request,
75     std_headers,
76     subtitles_filename,
77     UnavailableVideoError,
78     url_basename,
79     version_tuple,
80     write_json_file,
81     write_string,
82     YoutubeDLCookieProcessor,
83     YoutubeDLHandler,
84 )
85 from .cache import Cache
86 from .extractor import get_info_extractor, gen_extractor_classes, _LAZY_LOADER
87 from .downloader import get_suitable_downloader
88 from .downloader.rtmp import rtmpdump_version
89 from .postprocessor import (
90     FFmpegFixupM3u8PP,
91     FFmpegFixupM4aPP,
92     FFmpegFixupStretchedPP,
93     FFmpegMergerPP,
94     FFmpegPostProcessor,
95     get_postprocessor,
96 )
97 from .version import __version__
98
99 if compat_os_name == 'nt':
100     import ctypes
101
102
103 class YoutubeDL(object):
104     """YoutubeDL class.
105
106     YoutubeDL objects are the ones responsible of downloading the
107     actual video file and writing it to disk if the user has requested
108     it, among some other tasks. In most cases there should be one per
109     program. As, given a video URL, the downloader doesn't know how to
110     extract all the needed information, task that InfoExtractors do, it
111     has to pass the URL to one of them.
112
113     For this, YoutubeDL objects have a method that allows
114     InfoExtractors to be registered in a given order. When it is passed
115     a URL, the YoutubeDL object handles it to the first InfoExtractor it
116     finds that reports being able to handle it. The InfoExtractor extracts
117     all the information about the video or videos the URL refers to, and
118     YoutubeDL process the extracted information, possibly using a File
119     Downloader to download the video.
120
121     YoutubeDL objects accept a lot of parameters. In order not to saturate
122     the object constructor with arguments, it receives a dictionary of
123     options instead. These options are available through the params
124     attribute for the InfoExtractors to use. The YoutubeDL also
125     registers itself as the downloader in charge for the InfoExtractors
126     that are added to it, so this is a "mutual registration".
127
128     Available options:
129
130     username:          Username for authentication purposes.
131     password:          Password for authentication purposes.
132     videopassword:     Password for accessing a video.
133     usenetrc:          Use netrc for authentication instead.
134     verbose:           Print additional info to stdout.
135     quiet:             Do not print messages to stdout.
136     no_warnings:       Do not print out anything for warnings.
137     forceurl:          Force printing final URL.
138     forcetitle:        Force printing title.
139     forceid:           Force printing ID.
140     forcethumbnail:    Force printing thumbnail URL.
141     forcedescription:  Force printing description.
142     forcefilename:     Force printing final filename.
143     forceduration:     Force printing duration.
144     forcejson:         Force printing info_dict as JSON.
145     dump_single_json:  Force printing the info_dict of the whole playlist
146                        (or video) as a single JSON line.
147     simulate:          Do not download the video files.
148     format:            Video format code. See options.py for more information.
149     outtmpl:           Template for output names.
150     restrictfilenames: Do not allow "&" and spaces in file names
151     ignoreerrors:      Do not stop on download errors.
152     force_generic_extractor: Force downloader to use the generic extractor
153     nooverwrites:      Prevent overwriting files.
154     playliststart:     Playlist item to start at.
155     playlistend:       Playlist item to end at.
156     playlist_items:    Specific indices of playlist to download.
157     playlistreverse:   Download playlist items in reverse order.
158     matchtitle:        Download only matching titles.
159     rejecttitle:       Reject downloads for matching titles.
160     logger:            Log messages to a logging.Logger instance.
161     logtostderr:       Log messages to stderr instead of stdout.
162     writedescription:  Write the video description to a .description file
163     writeinfojson:     Write the video description to a .info.json file
164     writeannotations:  Write the video annotations to a .annotations.xml file
165     writethumbnail:    Write the thumbnail image to a file
166     write_all_thumbnails:  Write all thumbnail formats to files
167     writesubtitles:    Write the video subtitles to a file
168     writeautomaticsub: Write the automatically generated subtitles to a file
169     allsubtitles:      Downloads all the subtitles of the video
170                        (requires writesubtitles or writeautomaticsub)
171     listsubtitles:     Lists all available subtitles for the video
172     subtitlesformat:   The format code for subtitles
173     subtitleslangs:    List of languages of the subtitles to download
174     keepvideo:         Keep the video file after post-processing
175     daterange:         A DateRange object, download only if the upload_date is in the range.
176     skip_download:     Skip the actual download of the video file
177     cachedir:          Location of the cache files in the filesystem.
178                        False to disable filesystem cache.
179     noplaylist:        Download single video instead of a playlist if in doubt.
180     age_limit:         An integer representing the user's age in years.
181                        Unsuitable videos for the given age are skipped.
182     min_views:         An integer representing the minimum view count the video
183                        must have in order to not be skipped.
184                        Videos without view count information are always
185                        downloaded. None for no limit.
186     max_views:         An integer representing the maximum view count.
187                        Videos that are more popular than that are not
188                        downloaded.
189                        Videos without view count information are always
190                        downloaded. None for no limit.
191     download_archive:  File name of a file where all downloads are recorded.
192                        Videos already present in the file are not downloaded
193                        again.
194     cookiefile:        File name where cookies should be read from and dumped to.
195     nocheckcertificate:Do not verify SSL certificates
196     prefer_insecure:   Use HTTP instead of HTTPS to retrieve information.
197                        At the moment, this is only supported by YouTube.
198     proxy:             URL of the proxy server to use
199     geo_verification_proxy:  URL of the proxy to use for IP address verification
200                        on geo-restricted sites. (Experimental)
201     socket_timeout:    Time to wait for unresponsive hosts, in seconds
202     bidi_workaround:   Work around buggy terminals without bidirectional text
203                        support, using fridibi
204     debug_printtraffic:Print out sent and received HTTP traffic
205     include_ads:       Download ads as well
206     default_search:    Prepend this string if an input url is not valid.
207                        'auto' for elaborate guessing
208     encoding:          Use this encoding instead of the system-specified.
209     extract_flat:      Do not resolve URLs, return the immediate result.
210                        Pass in 'in_playlist' to only show this behavior for
211                        playlist items.
212     postprocessors:    A list of dictionaries, each with an entry
213                        * key:  The name of the postprocessor. See
214                                youtube_dl/postprocessor/__init__.py for a list.
215                        as well as any further keyword arguments for the
216                        postprocessor.
217     progress_hooks:    A list of functions that get called on download
218                        progress, with a dictionary with the entries
219                        * status: One of "downloading", "error", or "finished".
220                                  Check this first and ignore unknown values.
221
222                        If status is one of "downloading", or "finished", the
223                        following properties may also be present:
224                        * filename: The final filename (always present)
225                        * tmpfilename: The filename we're currently writing to
226                        * downloaded_bytes: Bytes on disk
227                        * total_bytes: Size of the whole file, None if unknown
228                        * total_bytes_estimate: Guess of the eventual file size,
229                                                None if unavailable.
230                        * elapsed: The number of seconds since download started.
231                        * eta: The estimated time in seconds, None if unknown
232                        * speed: The download speed in bytes/second, None if
233                                 unknown
234                        * fragment_index: The counter of the currently
235                                          downloaded video fragment.
236                        * fragment_count: The number of fragments (= individual
237                                          files that will be merged)
238
239                        Progress hooks are guaranteed to be called at least once
240                        (with status "finished") if the download is successful.
241     merge_output_format: Extension to use when merging formats.
242     fixup:             Automatically correct known faults of the file.
243                        One of:
244                        - "never": do nothing
245                        - "warn": only emit a warning
246                        - "detect_or_warn": check whether we can do anything
247                                            about it, warn otherwise (default)
248     source_address:    (Experimental) Client-side IP address to bind to.
249     call_home:         Boolean, true iff we are allowed to contact the
250                        youtube-dl servers for debugging.
251     sleep_interval:    Number of seconds to sleep before each download.
252     listformats:       Print an overview of available video formats and exit.
253     list_thumbnails:   Print a table of all thumbnails and exit.
254     match_filter:      A function that gets called with the info_dict of
255                        every video.
256                        If it returns a message, the video is ignored.
257                        If it returns None, the video is downloaded.
258                        match_filter_func in utils.py is one example for this.
259     no_color:          Do not emit color codes in output.
260
261     The following options determine which downloader is picked:
262     external_downloader: Executable of the external downloader to call.
263                        None or unset for standard (built-in) downloader.
264     hls_prefer_native: Use the native HLS downloader instead of ffmpeg/avconv
265                        if True, otherwise use ffmpeg/avconv if False, otherwise
266                        use downloader suggested by extractor if None.
267
268     The following parameters are not used by YoutubeDL itself, they are used by
269     the downloader (see youtube_dl/downloader/common.py):
270     nopart, updatetime, buffersize, ratelimit, min_filesize, max_filesize, test,
271     noresizebuffer, retries, continuedl, noprogress, consoletitle,
272     xattr_set_filesize, external_downloader_args, hls_use_mpegts.
273
274     The following options are used by the post processors:
275     prefer_ffmpeg:     If True, use ffmpeg instead of avconv if both are available,
276                        otherwise prefer avconv.
277     postprocessor_args: A list of additional command-line arguments for the
278                         postprocessor.
279     """
280
281     params = None
282     _ies = []
283     _pps = []
284     _download_retcode = None
285     _num_downloads = None
286     _screen_file = None
287
288     def __init__(self, params=None, auto_init=True):
289         """Create a FileDownloader object with the given options."""
290         if params is None:
291             params = {}
292         self._ies = []
293         self._ies_instances = {}
294         self._pps = []
295         self._progress_hooks = []
296         self._download_retcode = 0
297         self._num_downloads = 0
298         self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)]
299         self._err_file = sys.stderr
300         self.params = {
301             # Default parameters
302             'nocheckcertificate': False,
303         }
304         self.params.update(params)
305         self.cache = Cache(self)
306
307         if self.params.get('cn_verification_proxy') is not None:
308             self.report_warning('--cn-verification-proxy is deprecated. Use --geo-verification-proxy instead.')
309             if self.params.get('geo_verification_proxy') is None:
310                 self.params['geo_verification_proxy'] = self.params['cn_verification_proxy']
311
312         if params.get('bidi_workaround', False):
313             try:
314                 import pty
315                 master, slave = pty.openpty()
316                 width = compat_get_terminal_size().columns
317                 if width is None:
318                     width_args = []
319                 else:
320                     width_args = ['-w', str(width)]
321                 sp_kwargs = dict(
322                     stdin=subprocess.PIPE,
323                     stdout=slave,
324                     stderr=self._err_file)
325                 try:
326                     self._output_process = subprocess.Popen(
327                         ['bidiv'] + width_args, **sp_kwargs
328                     )
329                 except OSError:
330                     self._output_process = subprocess.Popen(
331                         ['fribidi', '-c', 'UTF-8'] + width_args, **sp_kwargs)
332                 self._output_channel = os.fdopen(master, 'rb')
333             except OSError as ose:
334                 if ose.errno == errno.ENOENT:
335                     self.report_warning('Could not find fribidi executable, ignoring --bidi-workaround . Make sure that  fribidi  is an executable file in one of the directories in your $PATH.')
336                 else:
337                     raise
338
339         if (sys.version_info >= (3,) and sys.platform != 'win32' and
340                 sys.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968'] and
341                 not params.get('restrictfilenames', False)):
342             # On Python 3, the Unicode filesystem API will throw errors (#1474)
343             self.report_warning(
344                 'Assuming --restrict-filenames since file system encoding '
345                 'cannot encode all characters. '
346                 'Set the LC_ALL environment variable to fix this.')
347             self.params['restrictfilenames'] = True
348
349         if isinstance(params.get('outtmpl'), bytes):
350             self.report_warning(
351                 'Parameter outtmpl is bytes, but should be a unicode string. '
352                 'Put  from __future__ import unicode_literals  at the top of your code file or consider switching to Python 3.x.')
353
354         self._setup_opener()
355
356         if auto_init:
357             self.print_debug_header()
358             self.add_default_info_extractors()
359
360         for pp_def_raw in self.params.get('postprocessors', []):
361             pp_class = get_postprocessor(pp_def_raw['key'])
362             pp_def = dict(pp_def_raw)
363             del pp_def['key']
364             pp = pp_class(self, **compat_kwargs(pp_def))
365             self.add_post_processor(pp)
366
367         for ph in self.params.get('progress_hooks', []):
368             self.add_progress_hook(ph)
369
370         register_socks_protocols()
371
372     def warn_if_short_id(self, argv):
373         # short YouTube ID starting with dash?
374         idxs = [
375             i for i, a in enumerate(argv)
376             if re.match(r'^-[0-9A-Za-z_-]{10}$', a)]
377         if idxs:
378             correct_argv = (
379                 ['youtube-dl'] +
380                 [a for i, a in enumerate(argv) if i not in idxs] +
381                 ['--'] + [argv[i] for i in idxs]
382             )
383             self.report_warning(
384                 'Long argument string detected. '
385                 'Use -- to separate parameters and URLs, like this:\n%s\n' %
386                 args_to_str(correct_argv))
387
388     def add_info_extractor(self, ie):
389         """Add an InfoExtractor object to the end of the list."""
390         self._ies.append(ie)
391         if not isinstance(ie, type):
392             self._ies_instances[ie.ie_key()] = ie
393             ie.set_downloader(self)
394
395     def get_info_extractor(self, ie_key):
396         """
397         Get an instance of an IE with name ie_key, it will try to get one from
398         the _ies list, if there's no instance it will create a new one and add
399         it to the extractor list.
400         """
401         ie = self._ies_instances.get(ie_key)
402         if ie is None:
403             ie = get_info_extractor(ie_key)()
404             self.add_info_extractor(ie)
405         return ie
406
407     def add_default_info_extractors(self):
408         """
409         Add the InfoExtractors returned by gen_extractors to the end of the list
410         """
411         for ie in gen_extractor_classes():
412             self.add_info_extractor(ie)
413
414     def add_post_processor(self, pp):
415         """Add a PostProcessor object to the end of the chain."""
416         self._pps.append(pp)
417         pp.set_downloader(self)
418
419     def add_progress_hook(self, ph):
420         """Add the progress hook (currently only for the file downloader)"""
421         self._progress_hooks.append(ph)
422
423     def _bidi_workaround(self, message):
424         if not hasattr(self, '_output_channel'):
425             return message
426
427         assert hasattr(self, '_output_process')
428         assert isinstance(message, compat_str)
429         line_count = message.count('\n') + 1
430         self._output_process.stdin.write((message + '\n').encode('utf-8'))
431         self._output_process.stdin.flush()
432         res = ''.join(self._output_channel.readline().decode('utf-8')
433                       for _ in range(line_count))
434         return res[:-len('\n')]
435
436     def to_screen(self, message, skip_eol=False):
437         """Print message to stdout if not in quiet mode."""
438         return self.to_stdout(message, skip_eol, check_quiet=True)
439
440     def _write_string(self, s, out=None):
441         write_string(s, out=out, encoding=self.params.get('encoding'))
442
443     def to_stdout(self, message, skip_eol=False, check_quiet=False):
444         """Print message to stdout if not in quiet mode."""
445         if self.params.get('logger'):
446             self.params['logger'].debug(message)
447         elif not check_quiet or not self.params.get('quiet', False):
448             message = self._bidi_workaround(message)
449             terminator = ['\n', ''][skip_eol]
450             output = message + terminator
451
452             self._write_string(output, self._screen_file)
453
454     def to_stderr(self, message):
455         """Print message to stderr."""
456         assert isinstance(message, compat_str)
457         if self.params.get('logger'):
458             self.params['logger'].error(message)
459         else:
460             message = self._bidi_workaround(message)
461             output = message + '\n'
462             self._write_string(output, self._err_file)
463
464     def to_console_title(self, message):
465         if not self.params.get('consoletitle', False):
466             return
467         if compat_os_name == 'nt' and ctypes.windll.kernel32.GetConsoleWindow():
468             # c_wchar_p() might not be necessary if `message` is
469             # already of type unicode()
470             ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message))
471         elif 'TERM' in os.environ:
472             self._write_string('\033]0;%s\007' % message, self._screen_file)
473
474     def save_console_title(self):
475         if not self.params.get('consoletitle', False):
476             return
477         if 'TERM' in os.environ:
478             # Save the title on stack
479             self._write_string('\033[22;0t', self._screen_file)
480
481     def restore_console_title(self):
482         if not self.params.get('consoletitle', False):
483             return
484         if 'TERM' in os.environ:
485             # Restore the title from stack
486             self._write_string('\033[23;0t', self._screen_file)
487
488     def __enter__(self):
489         self.save_console_title()
490         return self
491
492     def __exit__(self, *args):
493         self.restore_console_title()
494
495         if self.params.get('cookiefile') is not None:
496             self.cookiejar.save()
497
498     def trouble(self, message=None, tb=None):
499         """Determine action to take when a download problem appears.
500
501         Depending on if the downloader has been configured to ignore
502         download errors or not, this method may throw an exception or
503         not when errors are found, after printing the message.
504
505         tb, if given, is additional traceback information.
506         """
507         if message is not None:
508             self.to_stderr(message)
509         if self.params.get('verbose'):
510             if tb is None:
511                 if sys.exc_info()[0]:  # if .trouble has been called from an except block
512                     tb = ''
513                     if hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
514                         tb += ''.join(traceback.format_exception(*sys.exc_info()[1].exc_info))
515                     tb += encode_compat_str(traceback.format_exc())
516                 else:
517                     tb_data = traceback.format_list(traceback.extract_stack())
518                     tb = ''.join(tb_data)
519             self.to_stderr(tb)
520         if not self.params.get('ignoreerrors', False):
521             if sys.exc_info()[0] and hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
522                 exc_info = sys.exc_info()[1].exc_info
523             else:
524                 exc_info = sys.exc_info()
525             raise DownloadError(message, exc_info)
526         self._download_retcode = 1
527
528     def report_warning(self, message):
529         '''
530         Print the message to stderr, it will be prefixed with 'WARNING:'
531         If stderr is a tty file the 'WARNING:' will be colored
532         '''
533         if self.params.get('logger') is not None:
534             self.params['logger'].warning(message)
535         else:
536             if self.params.get('no_warnings'):
537                 return
538             if not self.params.get('no_color') and self._err_file.isatty() and compat_os_name != 'nt':
539                 _msg_header = '\033[0;33mWARNING:\033[0m'
540             else:
541                 _msg_header = 'WARNING:'
542             warning_message = '%s %s' % (_msg_header, message)
543             self.to_stderr(warning_message)
544
545     def report_error(self, message, tb=None):
546         '''
547         Do the same as trouble, but prefixes the message with 'ERROR:', colored
548         in red if stderr is a tty file.
549         '''
550         if not self.params.get('no_color') and self._err_file.isatty() and compat_os_name != 'nt':
551             _msg_header = '\033[0;31mERROR:\033[0m'
552         else:
553             _msg_header = 'ERROR:'
554         error_message = '%s %s' % (_msg_header, message)
555         self.trouble(error_message, tb)
556
557     def report_file_already_downloaded(self, file_name):
558         """Report file has already been fully downloaded."""
559         try:
560             self.to_screen('[download] %s has already been downloaded' % file_name)
561         except UnicodeEncodeError:
562             self.to_screen('[download] The file has already been downloaded')
563
564     def prepare_filename(self, info_dict):
565         """Generate the output filename."""
566         try:
567             template_dict = dict(info_dict)
568
569             template_dict['epoch'] = int(time.time())
570             autonumber_size = self.params.get('autonumber_size')
571             if autonumber_size is None:
572                 autonumber_size = 5
573             autonumber_templ = '%0' + str(autonumber_size) + 'd'
574             template_dict['autonumber'] = autonumber_templ % self._num_downloads
575             if template_dict.get('playlist_index') is not None:
576                 template_dict['playlist_index'] = '%0*d' % (len(str(template_dict['n_entries'])), template_dict['playlist_index'])
577             if template_dict.get('resolution') is None:
578                 if template_dict.get('width') and template_dict.get('height'):
579                     template_dict['resolution'] = '%dx%d' % (template_dict['width'], template_dict['height'])
580                 elif template_dict.get('height'):
581                     template_dict['resolution'] = '%sp' % template_dict['height']
582                 elif template_dict.get('width'):
583                     template_dict['resolution'] = '%dx?' % template_dict['width']
584
585             sanitize = lambda k, v: sanitize_filename(
586                 compat_str(v),
587                 restricted=self.params.get('restrictfilenames'),
588                 is_id=(k == 'id'))
589             template_dict = dict((k, sanitize(k, v))
590                                  for k, v in template_dict.items()
591                                  if v is not None and not isinstance(v, (list, tuple, dict)))
592             template_dict = collections.defaultdict(lambda: 'NA', template_dict)
593
594             outtmpl = self.params.get('outtmpl', DEFAULT_OUTTMPL)
595             tmpl = compat_expanduser(outtmpl)
596             filename = tmpl % template_dict
597             # Temporary fix for #4787
598             # 'Treat' all problem characters by passing filename through preferredencoding
599             # to workaround encoding issues with subprocess on python2 @ Windows
600             if sys.version_info < (3, 0) and sys.platform == 'win32':
601                 filename = encodeFilename(filename, True).decode(preferredencoding())
602             return sanitize_path(filename)
603         except ValueError as err:
604             self.report_error('Error in output template: ' + str(err) + ' (encoding: ' + repr(preferredencoding()) + ')')
605             return None
606
607     def _match_entry(self, info_dict, incomplete):
608         """ Returns None iff the file should be downloaded """
609
610         video_title = info_dict.get('title', info_dict.get('id', 'video'))
611         if 'title' in info_dict:
612             # This can happen when we're just evaluating the playlist
613             title = info_dict['title']
614             matchtitle = self.params.get('matchtitle', False)
615             if matchtitle:
616                 if not re.search(matchtitle, title, re.IGNORECASE):
617                     return '"' + title + '" title did not match pattern "' + matchtitle + '"'
618             rejecttitle = self.params.get('rejecttitle', False)
619             if rejecttitle:
620                 if re.search(rejecttitle, title, re.IGNORECASE):
621                     return '"' + title + '" title matched reject pattern "' + rejecttitle + '"'
622         date = info_dict.get('upload_date')
623         if date is not None:
624             dateRange = self.params.get('daterange', DateRange())
625             if date not in dateRange:
626                 return '%s upload date is not in range %s' % (date_from_str(date).isoformat(), dateRange)
627         view_count = info_dict.get('view_count')
628         if view_count is not None:
629             min_views = self.params.get('min_views')
630             if min_views is not None and view_count < min_views:
631                 return 'Skipping %s, because it has not reached minimum view count (%d/%d)' % (video_title, view_count, min_views)
632             max_views = self.params.get('max_views')
633             if max_views is not None and view_count > max_views:
634                 return 'Skipping %s, because it has exceeded the maximum view count (%d/%d)' % (video_title, view_count, max_views)
635         if age_restricted(info_dict.get('age_limit'), self.params.get('age_limit')):
636             return 'Skipping "%s" because it is age restricted' % video_title
637         if self.in_download_archive(info_dict):
638             return '%s has already been recorded in archive' % video_title
639
640         if not incomplete:
641             match_filter = self.params.get('match_filter')
642             if match_filter is not None:
643                 ret = match_filter(info_dict)
644                 if ret is not None:
645                     return ret
646
647         return None
648
649     @staticmethod
650     def add_extra_info(info_dict, extra_info):
651         '''Set the keys from extra_info in info dict if they are missing'''
652         for key, value in extra_info.items():
653             info_dict.setdefault(key, value)
654
655     def extract_info(self, url, download=True, ie_key=None, extra_info={},
656                      process=True, force_generic_extractor=False):
657         '''
658         Returns a list with a dictionary for each video we find.
659         If 'download', also downloads the videos.
660         extra_info is a dict containing the extra values to add to each result
661         '''
662
663         if not ie_key and force_generic_extractor:
664             ie_key = 'Generic'
665
666         if ie_key:
667             ies = [self.get_info_extractor(ie_key)]
668         else:
669             ies = self._ies
670
671         for ie in ies:
672             if not ie.suitable(url):
673                 continue
674
675             ie = self.get_info_extractor(ie.ie_key())
676             if not ie.working():
677                 self.report_warning('The program functionality for this site has been marked as broken, '
678                                     'and will probably not work.')
679
680             try:
681                 ie_result = ie.extract(url)
682                 if ie_result is None:  # Finished already (backwards compatibility; listformats and friends should be moved here)
683                     break
684                 if isinstance(ie_result, list):
685                     # Backwards compatibility: old IE result format
686                     ie_result = {
687                         '_type': 'compat_list',
688                         'entries': ie_result,
689                     }
690                 self.add_default_extra_info(ie_result, ie, url)
691                 if process:
692                     return self.process_ie_result(ie_result, download, extra_info)
693                 else:
694                     return ie_result
695             except ExtractorError as e:  # An error we somewhat expected
696                 self.report_error(compat_str(e), e.format_traceback())
697                 break
698             except MaxDownloadsReached:
699                 raise
700             except Exception as e:
701                 if self.params.get('ignoreerrors', False):
702                     self.report_error(error_to_compat_str(e), tb=encode_compat_str(traceback.format_exc()))
703                     break
704                 else:
705                     raise
706         else:
707             self.report_error('no suitable InfoExtractor for URL %s' % url)
708
709     def add_default_extra_info(self, ie_result, ie, url):
710         self.add_extra_info(ie_result, {
711             'extractor': ie.IE_NAME,
712             'webpage_url': url,
713             'webpage_url_basename': url_basename(url),
714             'extractor_key': ie.ie_key(),
715         })
716
717     def process_ie_result(self, ie_result, download=True, extra_info={}):
718         """
719         Take the result of the ie(may be modified) and resolve all unresolved
720         references (URLs, playlist items).
721
722         It will also download the videos if 'download'.
723         Returns the resolved ie_result.
724         """
725         result_type = ie_result.get('_type', 'video')
726
727         if result_type in ('url', 'url_transparent'):
728             ie_result['url'] = sanitize_url(ie_result['url'])
729             extract_flat = self.params.get('extract_flat', False)
730             if ((extract_flat == 'in_playlist' and 'playlist' in extra_info) or
731                     extract_flat is True):
732                 if self.params.get('forcejson', False):
733                     self.to_stdout(json.dumps(ie_result))
734                 return ie_result
735
736         if result_type == 'video':
737             self.add_extra_info(ie_result, extra_info)
738             return self.process_video_result(ie_result, download=download)
739         elif result_type == 'url':
740             # We have to add extra_info to the results because it may be
741             # contained in a playlist
742             return self.extract_info(ie_result['url'],
743                                      download,
744                                      ie_key=ie_result.get('ie_key'),
745                                      extra_info=extra_info)
746         elif result_type == 'url_transparent':
747             # Use the information from the embedding page
748             info = self.extract_info(
749                 ie_result['url'], ie_key=ie_result.get('ie_key'),
750                 extra_info=extra_info, download=False, process=False)
751
752             force_properties = dict(
753                 (k, v) for k, v in ie_result.items() if v is not None)
754             for f in ('_type', 'url', 'ie_key'):
755                 if f in force_properties:
756                     del force_properties[f]
757             new_result = info.copy()
758             new_result.update(force_properties)
759
760             assert new_result.get('_type') != 'url_transparent'
761
762             return self.process_ie_result(
763                 new_result, download=download, extra_info=extra_info)
764         elif result_type == 'playlist' or result_type == 'multi_video':
765             # We process each entry in the playlist
766             playlist = ie_result.get('title') or ie_result.get('id')
767             self.to_screen('[download] Downloading playlist: %s' % playlist)
768
769             playlist_results = []
770
771             playliststart = self.params.get('playliststart', 1) - 1
772             playlistend = self.params.get('playlistend')
773             # For backwards compatibility, interpret -1 as whole list
774             if playlistend == -1:
775                 playlistend = None
776
777             playlistitems_str = self.params.get('playlist_items')
778             playlistitems = None
779             if playlistitems_str is not None:
780                 def iter_playlistitems(format):
781                     for string_segment in format.split(','):
782                         if '-' in string_segment:
783                             start, end = string_segment.split('-')
784                             for item in range(int(start), int(end) + 1):
785                                 yield int(item)
786                         else:
787                             yield int(string_segment)
788                 playlistitems = iter_playlistitems(playlistitems_str)
789
790             ie_entries = ie_result['entries']
791             if isinstance(ie_entries, list):
792                 n_all_entries = len(ie_entries)
793                 if playlistitems:
794                     entries = [
795                         ie_entries[i - 1] for i in playlistitems
796                         if -n_all_entries <= i - 1 < n_all_entries]
797                 else:
798                     entries = ie_entries[playliststart:playlistend]
799                 n_entries = len(entries)
800                 self.to_screen(
801                     '[%s] playlist %s: Collected %d video ids (downloading %d of them)' %
802                     (ie_result['extractor'], playlist, n_all_entries, n_entries))
803             elif isinstance(ie_entries, PagedList):
804                 if playlistitems:
805                     entries = []
806                     for item in playlistitems:
807                         entries.extend(ie_entries.getslice(
808                             item - 1, item
809                         ))
810                 else:
811                     entries = ie_entries.getslice(
812                         playliststart, playlistend)
813                 n_entries = len(entries)
814                 self.to_screen(
815                     '[%s] playlist %s: Downloading %d videos' %
816                     (ie_result['extractor'], playlist, n_entries))
817             else:  # iterable
818                 if playlistitems:
819                     entry_list = list(ie_entries)
820                     entries = [entry_list[i - 1] for i in playlistitems]
821                 else:
822                     entries = list(itertools.islice(
823                         ie_entries, playliststart, playlistend))
824                 n_entries = len(entries)
825                 self.to_screen(
826                     '[%s] playlist %s: Downloading %d videos' %
827                     (ie_result['extractor'], playlist, n_entries))
828
829             if self.params.get('playlistreverse', False):
830                 entries = entries[::-1]
831
832             for i, entry in enumerate(entries, 1):
833                 self.to_screen('[download] Downloading video %s of %s' % (i, n_entries))
834                 extra = {
835                     'n_entries': n_entries,
836                     'playlist': playlist,
837                     'playlist_id': ie_result.get('id'),
838                     'playlist_title': ie_result.get('title'),
839                     'playlist_index': i + playliststart,
840                     'extractor': ie_result['extractor'],
841                     'webpage_url': ie_result['webpage_url'],
842                     'webpage_url_basename': url_basename(ie_result['webpage_url']),
843                     'extractor_key': ie_result['extractor_key'],
844                 }
845
846                 reason = self._match_entry(entry, incomplete=True)
847                 if reason is not None:
848                     self.to_screen('[download] ' + reason)
849                     continue
850
851                 entry_result = self.process_ie_result(entry,
852                                                       download=download,
853                                                       extra_info=extra)
854                 playlist_results.append(entry_result)
855             ie_result['entries'] = playlist_results
856             self.to_screen('[download] Finished downloading playlist: %s' % playlist)
857             return ie_result
858         elif result_type == 'compat_list':
859             self.report_warning(
860                 'Extractor %s returned a compat_list result. '
861                 'It needs to be updated.' % ie_result.get('extractor'))
862
863             def _fixup(r):
864                 self.add_extra_info(
865                     r,
866                     {
867                         'extractor': ie_result['extractor'],
868                         'webpage_url': ie_result['webpage_url'],
869                         'webpage_url_basename': url_basename(ie_result['webpage_url']),
870                         'extractor_key': ie_result['extractor_key'],
871                     }
872                 )
873                 return r
874             ie_result['entries'] = [
875                 self.process_ie_result(_fixup(r), download, extra_info)
876                 for r in ie_result['entries']
877             ]
878             return ie_result
879         else:
880             raise Exception('Invalid result type: %s' % result_type)
881
882     def _build_format_filter(self, filter_spec):
883         " Returns a function to filter the formats according to the filter_spec "
884
885         OPERATORS = {
886             '<': operator.lt,
887             '<=': operator.le,
888             '>': operator.gt,
889             '>=': operator.ge,
890             '=': operator.eq,
891             '!=': operator.ne,
892         }
893         operator_rex = re.compile(r'''(?x)\s*
894             (?P<key>width|height|tbr|abr|vbr|asr|filesize|fps)
895             \s*(?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
896             (?P<value>[0-9.]+(?:[kKmMgGtTpPeEzZyY]i?[Bb]?)?)
897             $
898             ''' % '|'.join(map(re.escape, OPERATORS.keys())))
899         m = operator_rex.search(filter_spec)
900         if m:
901             try:
902                 comparison_value = int(m.group('value'))
903             except ValueError:
904                 comparison_value = parse_filesize(m.group('value'))
905                 if comparison_value is None:
906                     comparison_value = parse_filesize(m.group('value') + 'B')
907                 if comparison_value is None:
908                     raise ValueError(
909                         'Invalid value %r in format specification %r' % (
910                             m.group('value'), filter_spec))
911             op = OPERATORS[m.group('op')]
912
913         if not m:
914             STR_OPERATORS = {
915                 '=': operator.eq,
916                 '!=': operator.ne,
917                 '^=': lambda attr, value: attr.startswith(value),
918                 '$=': lambda attr, value: attr.endswith(value),
919                 '*=': lambda attr, value: value in attr,
920             }
921             str_operator_rex = re.compile(r'''(?x)
922                 \s*(?P<key>ext|acodec|vcodec|container|protocol|format_id)
923                 \s*(?P<op>%s)(?P<none_inclusive>\s*\?)?
924                 \s*(?P<value>[a-zA-Z0-9._-]+)
925                 \s*$
926                 ''' % '|'.join(map(re.escape, STR_OPERATORS.keys())))
927             m = str_operator_rex.search(filter_spec)
928             if m:
929                 comparison_value = m.group('value')
930                 op = STR_OPERATORS[m.group('op')]
931
932         if not m:
933             raise ValueError('Invalid filter specification %r' % filter_spec)
934
935         def _filter(f):
936             actual_value = f.get(m.group('key'))
937             if actual_value is None:
938                 return m.group('none_inclusive')
939             return op(actual_value, comparison_value)
940         return _filter
941
942     def build_format_selector(self, format_spec):
943         def syntax_error(note, start):
944             message = (
945                 'Invalid format specification: '
946                 '{0}\n\t{1}\n\t{2}^'.format(note, format_spec, ' ' * start[1]))
947             return SyntaxError(message)
948
949         PICKFIRST = 'PICKFIRST'
950         MERGE = 'MERGE'
951         SINGLE = 'SINGLE'
952         GROUP = 'GROUP'
953         FormatSelector = collections.namedtuple('FormatSelector', ['type', 'selector', 'filters'])
954
955         def _parse_filter(tokens):
956             filter_parts = []
957             for type, string, start, _, _ in tokens:
958                 if type == tokenize.OP and string == ']':
959                     return ''.join(filter_parts)
960                 else:
961                     filter_parts.append(string)
962
963         def _remove_unused_ops(tokens):
964             # Remove operators that we don't use and join them with the surrounding strings
965             # for example: 'mp4' '-' 'baseline' '-' '16x9' is converted to 'mp4-baseline-16x9'
966             ALLOWED_OPS = ('/', '+', ',', '(', ')')
967             last_string, last_start, last_end, last_line = None, None, None, None
968             for type, string, start, end, line in tokens:
969                 if type == tokenize.OP and string == '[':
970                     if last_string:
971                         yield tokenize.NAME, last_string, last_start, last_end, last_line
972                         last_string = None
973                     yield type, string, start, end, line
974                     # everything inside brackets will be handled by _parse_filter
975                     for type, string, start, end, line in tokens:
976                         yield type, string, start, end, line
977                         if type == tokenize.OP and string == ']':
978                             break
979                 elif type == tokenize.OP and string in ALLOWED_OPS:
980                     if last_string:
981                         yield tokenize.NAME, last_string, last_start, last_end, last_line
982                         last_string = None
983                     yield type, string, start, end, line
984                 elif type in [tokenize.NAME, tokenize.NUMBER, tokenize.OP]:
985                     if not last_string:
986                         last_string = string
987                         last_start = start
988                         last_end = end
989                     else:
990                         last_string += string
991             if last_string:
992                 yield tokenize.NAME, last_string, last_start, last_end, last_line
993
994         def _parse_format_selection(tokens, inside_merge=False, inside_choice=False, inside_group=False):
995             selectors = []
996             current_selector = None
997             for type, string, start, _, _ in tokens:
998                 # ENCODING is only defined in python 3.x
999                 if type == getattr(tokenize, 'ENCODING', None):
1000                     continue
1001                 elif type in [tokenize.NAME, tokenize.NUMBER]:
1002                     current_selector = FormatSelector(SINGLE, string, [])
1003                 elif type == tokenize.OP:
1004                     if string == ')':
1005                         if not inside_group:
1006                             # ')' will be handled by the parentheses group
1007                             tokens.restore_last_token()
1008                         break
1009                     elif inside_merge and string in ['/', ',']:
1010                         tokens.restore_last_token()
1011                         break
1012                     elif inside_choice and string == ',':
1013                         tokens.restore_last_token()
1014                         break
1015                     elif string == ',':
1016                         if not current_selector:
1017                             raise syntax_error('"," must follow a format selector', start)
1018                         selectors.append(current_selector)
1019                         current_selector = None
1020                     elif string == '/':
1021                         if not current_selector:
1022                             raise syntax_error('"/" must follow a format selector', start)
1023                         first_choice = current_selector
1024                         second_choice = _parse_format_selection(tokens, inside_choice=True)
1025                         current_selector = FormatSelector(PICKFIRST, (first_choice, second_choice), [])
1026                     elif string == '[':
1027                         if not current_selector:
1028                             current_selector = FormatSelector(SINGLE, 'best', [])
1029                         format_filter = _parse_filter(tokens)
1030                         current_selector.filters.append(format_filter)
1031                     elif string == '(':
1032                         if current_selector:
1033                             raise syntax_error('Unexpected "("', start)
1034                         group = _parse_format_selection(tokens, inside_group=True)
1035                         current_selector = FormatSelector(GROUP, group, [])
1036                     elif string == '+':
1037                         video_selector = current_selector
1038                         audio_selector = _parse_format_selection(tokens, inside_merge=True)
1039                         if not video_selector or not audio_selector:
1040                             raise syntax_error('"+" must be between two format selectors', start)
1041                         current_selector = FormatSelector(MERGE, (video_selector, audio_selector), [])
1042                     else:
1043                         raise syntax_error('Operator not recognized: "{0}"'.format(string), start)
1044                 elif type == tokenize.ENDMARKER:
1045                     break
1046             if current_selector:
1047                 selectors.append(current_selector)
1048             return selectors
1049
1050         def _build_selector_function(selector):
1051             if isinstance(selector, list):
1052                 fs = [_build_selector_function(s) for s in selector]
1053
1054                 def selector_function(formats):
1055                     for f in fs:
1056                         for format in f(formats):
1057                             yield format
1058                 return selector_function
1059             elif selector.type == GROUP:
1060                 selector_function = _build_selector_function(selector.selector)
1061             elif selector.type == PICKFIRST:
1062                 fs = [_build_selector_function(s) for s in selector.selector]
1063
1064                 def selector_function(formats):
1065                     for f in fs:
1066                         picked_formats = list(f(formats))
1067                         if picked_formats:
1068                             return picked_formats
1069                     return []
1070             elif selector.type == SINGLE:
1071                 format_spec = selector.selector
1072
1073                 def selector_function(formats):
1074                     formats = list(formats)
1075                     if not formats:
1076                         return
1077                     if format_spec == 'all':
1078                         for f in formats:
1079                             yield f
1080                     elif format_spec in ['best', 'worst', None]:
1081                         format_idx = 0 if format_spec == 'worst' else -1
1082                         audiovideo_formats = [
1083                             f for f in formats
1084                             if f.get('vcodec') != 'none' and f.get('acodec') != 'none']
1085                         if audiovideo_formats:
1086                             yield audiovideo_formats[format_idx]
1087                         # for audio only (soundcloud) or video only (imgur) urls, select the best/worst audio format
1088                         elif (all(f.get('acodec') != 'none' for f in formats) or
1089                               all(f.get('vcodec') != 'none' for f in formats)):
1090                             yield formats[format_idx]
1091                     elif format_spec == 'bestaudio':
1092                         audio_formats = [
1093                             f for f in formats
1094                             if f.get('vcodec') == 'none']
1095                         if audio_formats:
1096                             yield audio_formats[-1]
1097                     elif format_spec == 'worstaudio':
1098                         audio_formats = [
1099                             f for f in formats
1100                             if f.get('vcodec') == 'none']
1101                         if audio_formats:
1102                             yield audio_formats[0]
1103                     elif format_spec == 'bestvideo':
1104                         video_formats = [
1105                             f for f in formats
1106                             if f.get('acodec') == 'none']
1107                         if video_formats:
1108                             yield video_formats[-1]
1109                     elif format_spec == 'worstvideo':
1110                         video_formats = [
1111                             f for f in formats
1112                             if f.get('acodec') == 'none']
1113                         if video_formats:
1114                             yield video_formats[0]
1115                     else:
1116                         extensions = ['mp4', 'flv', 'webm', '3gp', 'm4a', 'mp3', 'ogg', 'aac', 'wav']
1117                         if format_spec in extensions:
1118                             filter_f = lambda f: f['ext'] == format_spec
1119                         else:
1120                             filter_f = lambda f: f['format_id'] == format_spec
1121                         matches = list(filter(filter_f, formats))
1122                         if matches:
1123                             yield matches[-1]
1124             elif selector.type == MERGE:
1125                 def _merge(formats_info):
1126                     format_1, format_2 = [f['format_id'] for f in formats_info]
1127                     # The first format must contain the video and the
1128                     # second the audio
1129                     if formats_info[0].get('vcodec') == 'none':
1130                         self.report_error('The first format must '
1131                                           'contain the video, try using '
1132                                           '"-f %s+%s"' % (format_2, format_1))
1133                         return
1134                     # Formats must be opposite (video+audio)
1135                     if formats_info[0].get('acodec') == 'none' and formats_info[1].get('acodec') == 'none':
1136                         self.report_error(
1137                             'Both formats %s and %s are video-only, you must specify "-f video+audio"'
1138                             % (format_1, format_2))
1139                         return
1140                     output_ext = (
1141                         formats_info[0]['ext']
1142                         if self.params.get('merge_output_format') is None
1143                         else self.params['merge_output_format'])
1144                     return {
1145                         'requested_formats': formats_info,
1146                         'format': '%s+%s' % (formats_info[0].get('format'),
1147                                              formats_info[1].get('format')),
1148                         'format_id': '%s+%s' % (formats_info[0].get('format_id'),
1149                                                 formats_info[1].get('format_id')),
1150                         'width': formats_info[0].get('width'),
1151                         'height': formats_info[0].get('height'),
1152                         'resolution': formats_info[0].get('resolution'),
1153                         'fps': formats_info[0].get('fps'),
1154                         'vcodec': formats_info[0].get('vcodec'),
1155                         'vbr': formats_info[0].get('vbr'),
1156                         'stretched_ratio': formats_info[0].get('stretched_ratio'),
1157                         'acodec': formats_info[1].get('acodec'),
1158                         'abr': formats_info[1].get('abr'),
1159                         'ext': output_ext,
1160                     }
1161                 video_selector, audio_selector = map(_build_selector_function, selector.selector)
1162
1163                 def selector_function(formats):
1164                     formats = list(formats)
1165                     for pair in itertools.product(video_selector(formats), audio_selector(formats)):
1166                         yield _merge(pair)
1167
1168             filters = [self._build_format_filter(f) for f in selector.filters]
1169
1170             def final_selector(formats):
1171                 for _filter in filters:
1172                     formats = list(filter(_filter, formats))
1173                 return selector_function(formats)
1174             return final_selector
1175
1176         stream = io.BytesIO(format_spec.encode('utf-8'))
1177         try:
1178             tokens = list(_remove_unused_ops(compat_tokenize_tokenize(stream.readline)))
1179         except tokenize.TokenError:
1180             raise syntax_error('Missing closing/opening brackets or parenthesis', (0, len(format_spec)))
1181
1182         class TokenIterator(object):
1183             def __init__(self, tokens):
1184                 self.tokens = tokens
1185                 self.counter = 0
1186
1187             def __iter__(self):
1188                 return self
1189
1190             def __next__(self):
1191                 if self.counter >= len(self.tokens):
1192                     raise StopIteration()
1193                 value = self.tokens[self.counter]
1194                 self.counter += 1
1195                 return value
1196
1197             next = __next__
1198
1199             def restore_last_token(self):
1200                 self.counter -= 1
1201
1202         parsed_selector = _parse_format_selection(iter(TokenIterator(tokens)))
1203         return _build_selector_function(parsed_selector)
1204
1205     def _calc_headers(self, info_dict):
1206         res = std_headers.copy()
1207
1208         add_headers = info_dict.get('http_headers')
1209         if add_headers:
1210             res.update(add_headers)
1211
1212         cookies = self._calc_cookies(info_dict)
1213         if cookies:
1214             res['Cookie'] = cookies
1215
1216         return res
1217
1218     def _calc_cookies(self, info_dict):
1219         pr = sanitized_Request(info_dict['url'])
1220         self.cookiejar.add_cookie_header(pr)
1221         return pr.get_header('Cookie')
1222
1223     def process_video_result(self, info_dict, download=True):
1224         assert info_dict.get('_type', 'video') == 'video'
1225
1226         if 'id' not in info_dict:
1227             raise ExtractorError('Missing "id" field in extractor result')
1228         if 'title' not in info_dict:
1229             raise ExtractorError('Missing "title" field in extractor result')
1230
1231         if not isinstance(info_dict['id'], compat_str):
1232             self.report_warning('"id" field is not a string - forcing string conversion')
1233             info_dict['id'] = compat_str(info_dict['id'])
1234
1235         if 'playlist' not in info_dict:
1236             # It isn't part of a playlist
1237             info_dict['playlist'] = None
1238             info_dict['playlist_index'] = None
1239
1240         thumbnails = info_dict.get('thumbnails')
1241         if thumbnails is None:
1242             thumbnail = info_dict.get('thumbnail')
1243             if thumbnail:
1244                 info_dict['thumbnails'] = thumbnails = [{'url': thumbnail}]
1245         if thumbnails:
1246             thumbnails.sort(key=lambda t: (
1247                 t.get('preference'), t.get('width'), t.get('height'),
1248                 t.get('id'), t.get('url')))
1249             for i, t in enumerate(thumbnails):
1250                 t['url'] = sanitize_url(t['url'])
1251                 if t.get('width') and t.get('height'):
1252                     t['resolution'] = '%dx%d' % (t['width'], t['height'])
1253                 if t.get('id') is None:
1254                     t['id'] = '%d' % i
1255
1256         if self.params.get('list_thumbnails'):
1257             self.list_thumbnails(info_dict)
1258             return
1259
1260         thumbnail = info_dict.get('thumbnail')
1261         if thumbnail:
1262             info_dict['thumbnail'] = sanitize_url(thumbnail)
1263         elif thumbnails:
1264             info_dict['thumbnail'] = thumbnails[-1]['url']
1265
1266         if 'display_id' not in info_dict and 'id' in info_dict:
1267             info_dict['display_id'] = info_dict['id']
1268
1269         if info_dict.get('upload_date') is None and info_dict.get('timestamp') is not None:
1270             # Working around out-of-range timestamp values (e.g. negative ones on Windows,
1271             # see http://bugs.python.org/issue1646728)
1272             try:
1273                 upload_date = datetime.datetime.utcfromtimestamp(info_dict['timestamp'])
1274                 info_dict['upload_date'] = upload_date.strftime('%Y%m%d')
1275             except (ValueError, OverflowError, OSError):
1276                 pass
1277
1278         # Auto generate title fields corresponding to the *_number fields when missing
1279         # in order to always have clean titles. This is very common for TV series.
1280         for field in ('chapter', 'season', 'episode'):
1281             if info_dict.get('%s_number' % field) is not None and not info_dict.get(field):
1282                 info_dict[field] = '%s %d' % (field.capitalize(), info_dict['%s_number' % field])
1283
1284         subtitles = info_dict.get('subtitles')
1285         if subtitles:
1286             for _, subtitle in subtitles.items():
1287                 for subtitle_format in subtitle:
1288                     if subtitle_format.get('url'):
1289                         subtitle_format['url'] = sanitize_url(subtitle_format['url'])
1290                     if 'ext' not in subtitle_format:
1291                         subtitle_format['ext'] = determine_ext(subtitle_format['url']).lower()
1292
1293         if self.params.get('listsubtitles', False):
1294             if 'automatic_captions' in info_dict:
1295                 self.list_subtitles(info_dict['id'], info_dict.get('automatic_captions'), 'automatic captions')
1296             self.list_subtitles(info_dict['id'], subtitles, 'subtitles')
1297             return
1298         info_dict['requested_subtitles'] = self.process_subtitles(
1299             info_dict['id'], subtitles,
1300             info_dict.get('automatic_captions'))
1301
1302         # We now pick which formats have to be downloaded
1303         if info_dict.get('formats') is None:
1304             # There's only one format available
1305             formats = [info_dict]
1306         else:
1307             formats = info_dict['formats']
1308
1309         if not formats:
1310             raise ExtractorError('No video formats found!')
1311
1312         formats_dict = {}
1313
1314         # We check that all the formats have the format and format_id fields
1315         for i, format in enumerate(formats):
1316             if 'url' not in format:
1317                 raise ExtractorError('Missing "url" key in result (index %d)' % i)
1318
1319             format['url'] = sanitize_url(format['url'])
1320
1321             if format.get('format_id') is None:
1322                 format['format_id'] = compat_str(i)
1323             else:
1324                 # Sanitize format_id from characters used in format selector expression
1325                 format['format_id'] = re.sub('[\s,/+\[\]()]', '_', format['format_id'])
1326             format_id = format['format_id']
1327             if format_id not in formats_dict:
1328                 formats_dict[format_id] = []
1329             formats_dict[format_id].append(format)
1330
1331         # Make sure all formats have unique format_id
1332         for format_id, ambiguous_formats in formats_dict.items():
1333             if len(ambiguous_formats) > 1:
1334                 for i, format in enumerate(ambiguous_formats):
1335                     format['format_id'] = '%s-%d' % (format_id, i)
1336
1337         for i, format in enumerate(formats):
1338             if format.get('format') is None:
1339                 format['format'] = '{id} - {res}{note}'.format(
1340                     id=format['format_id'],
1341                     res=self.format_resolution(format),
1342                     note=' ({0})'.format(format['format_note']) if format.get('format_note') is not None else '',
1343                 )
1344             # Automatically determine file extension if missing
1345             if 'ext' not in format:
1346                 format['ext'] = determine_ext(format['url']).lower()
1347             # Automatically determine protocol if missing (useful for format
1348             # selection purposes)
1349             if 'protocol' not in format:
1350                 format['protocol'] = determine_protocol(format)
1351             # Add HTTP headers, so that external programs can use them from the
1352             # json output
1353             full_format_info = info_dict.copy()
1354             full_format_info.update(format)
1355             format['http_headers'] = self._calc_headers(full_format_info)
1356
1357         # TODO Central sorting goes here
1358
1359         if formats[0] is not info_dict:
1360             # only set the 'formats' fields if the original info_dict list them
1361             # otherwise we end up with a circular reference, the first (and unique)
1362             # element in the 'formats' field in info_dict is info_dict itself,
1363             # which can't be exported to json
1364             info_dict['formats'] = formats
1365         if self.params.get('listformats'):
1366             self.list_formats(info_dict)
1367             return
1368
1369         req_format = self.params.get('format')
1370         if req_format is None:
1371             req_format_list = []
1372             if (self.params.get('outtmpl', DEFAULT_OUTTMPL) != '-' and
1373                     not info_dict.get('is_live')):
1374                 merger = FFmpegMergerPP(self)
1375                 if merger.available and merger.can_merge():
1376                     req_format_list.append('bestvideo+bestaudio')
1377             req_format_list.append('best')
1378             req_format = '/'.join(req_format_list)
1379         format_selector = self.build_format_selector(req_format)
1380         formats_to_download = list(format_selector(formats))
1381         if not formats_to_download:
1382             raise ExtractorError('requested format not available',
1383                                  expected=True)
1384
1385         if download:
1386             if len(formats_to_download) > 1:
1387                 self.to_screen('[info] %s: downloading video in %s formats' % (info_dict['id'], len(formats_to_download)))
1388             for format in formats_to_download:
1389                 new_info = dict(info_dict)
1390                 new_info.update(format)
1391                 self.process_info(new_info)
1392         # We update the info dict with the best quality format (backwards compatibility)
1393         info_dict.update(formats_to_download[-1])
1394         return info_dict
1395
1396     def process_subtitles(self, video_id, normal_subtitles, automatic_captions):
1397         """Select the requested subtitles and their format"""
1398         available_subs = {}
1399         if normal_subtitles and self.params.get('writesubtitles'):
1400             available_subs.update(normal_subtitles)
1401         if automatic_captions and self.params.get('writeautomaticsub'):
1402             for lang, cap_info in automatic_captions.items():
1403                 if lang not in available_subs:
1404                     available_subs[lang] = cap_info
1405
1406         if (not self.params.get('writesubtitles') and not
1407                 self.params.get('writeautomaticsub') or not
1408                 available_subs):
1409             return None
1410
1411         if self.params.get('allsubtitles', False):
1412             requested_langs = available_subs.keys()
1413         else:
1414             if self.params.get('subtitleslangs', False):
1415                 requested_langs = self.params.get('subtitleslangs')
1416             elif 'en' in available_subs:
1417                 requested_langs = ['en']
1418             else:
1419                 requested_langs = [list(available_subs.keys())[0]]
1420
1421         formats_query = self.params.get('subtitlesformat', 'best')
1422         formats_preference = formats_query.split('/') if formats_query else []
1423         subs = {}
1424         for lang in requested_langs:
1425             formats = available_subs.get(lang)
1426             if formats is None:
1427                 self.report_warning('%s subtitles not available for %s' % (lang, video_id))
1428                 continue
1429             for ext in formats_preference:
1430                 if ext == 'best':
1431                     f = formats[-1]
1432                     break
1433                 matches = list(filter(lambda f: f['ext'] == ext, formats))
1434                 if matches:
1435                     f = matches[-1]
1436                     break
1437             else:
1438                 f = formats[-1]
1439                 self.report_warning(
1440                     'No subtitle format found matching "%s" for language %s, '
1441                     'using %s' % (formats_query, lang, f['ext']))
1442             subs[lang] = f
1443         return subs
1444
1445     def process_info(self, info_dict):
1446         """Process a single resolved IE result."""
1447
1448         assert info_dict.get('_type', 'video') == 'video'
1449
1450         max_downloads = self.params.get('max_downloads')
1451         if max_downloads is not None:
1452             if self._num_downloads >= int(max_downloads):
1453                 raise MaxDownloadsReached()
1454
1455         info_dict['fulltitle'] = info_dict['title']
1456         if len(info_dict['title']) > 200:
1457             info_dict['title'] = info_dict['title'][:197] + '...'
1458
1459         if 'format' not in info_dict:
1460             info_dict['format'] = info_dict['ext']
1461
1462         reason = self._match_entry(info_dict, incomplete=False)
1463         if reason is not None:
1464             self.to_screen('[download] ' + reason)
1465             return
1466
1467         self._num_downloads += 1
1468
1469         info_dict['_filename'] = filename = self.prepare_filename(info_dict)
1470
1471         # Forced printings
1472         if self.params.get('forcetitle', False):
1473             self.to_stdout(info_dict['fulltitle'])
1474         if self.params.get('forceid', False):
1475             self.to_stdout(info_dict['id'])
1476         if self.params.get('forceurl', False):
1477             if info_dict.get('requested_formats') is not None:
1478                 for f in info_dict['requested_formats']:
1479                     self.to_stdout(f['url'] + f.get('play_path', ''))
1480             else:
1481                 # For RTMP URLs, also include the playpath
1482                 self.to_stdout(info_dict['url'] + info_dict.get('play_path', ''))
1483         if self.params.get('forcethumbnail', False) and info_dict.get('thumbnail') is not None:
1484             self.to_stdout(info_dict['thumbnail'])
1485         if self.params.get('forcedescription', False) and info_dict.get('description') is not None:
1486             self.to_stdout(info_dict['description'])
1487         if self.params.get('forcefilename', False) and filename is not None:
1488             self.to_stdout(filename)
1489         if self.params.get('forceduration', False) and info_dict.get('duration') is not None:
1490             self.to_stdout(formatSeconds(info_dict['duration']))
1491         if self.params.get('forceformat', False):
1492             self.to_stdout(info_dict['format'])
1493         if self.params.get('forcejson', False):
1494             self.to_stdout(json.dumps(info_dict))
1495
1496         # Do nothing else if in simulate mode
1497         if self.params.get('simulate', False):
1498             return
1499
1500         if filename is None:
1501             return
1502
1503         try:
1504             dn = os.path.dirname(sanitize_path(encodeFilename(filename)))
1505             if dn and not os.path.exists(dn):
1506                 os.makedirs(dn)
1507         except (OSError, IOError) as err:
1508             self.report_error('unable to create directory ' + error_to_compat_str(err))
1509             return
1510
1511         if self.params.get('writedescription', False):
1512             descfn = replace_extension(filename, 'description', info_dict.get('ext'))
1513             if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(descfn)):
1514                 self.to_screen('[info] Video description is already present')
1515             elif info_dict.get('description') is None:
1516                 self.report_warning('There\'s no description to write.')
1517             else:
1518                 try:
1519                     self.to_screen('[info] Writing video description to: ' + descfn)
1520                     with io.open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile:
1521                         descfile.write(info_dict['description'])
1522                 except (OSError, IOError):
1523                     self.report_error('Cannot write description file ' + descfn)
1524                     return
1525
1526         if self.params.get('writeannotations', False):
1527             annofn = replace_extension(filename, 'annotations.xml', info_dict.get('ext'))
1528             if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(annofn)):
1529                 self.to_screen('[info] Video annotations are already present')
1530             else:
1531                 try:
1532                     self.to_screen('[info] Writing video annotations to: ' + annofn)
1533                     with io.open(encodeFilename(annofn), 'w', encoding='utf-8') as annofile:
1534                         annofile.write(info_dict['annotations'])
1535                 except (KeyError, TypeError):
1536                     self.report_warning('There are no annotations to write.')
1537                 except (OSError, IOError):
1538                     self.report_error('Cannot write annotations file: ' + annofn)
1539                     return
1540
1541         subtitles_are_requested = any([self.params.get('writesubtitles', False),
1542                                        self.params.get('writeautomaticsub')])
1543
1544         if subtitles_are_requested and info_dict.get('requested_subtitles'):
1545             # subtitles download errors are already managed as troubles in relevant IE
1546             # that way it will silently go on when used with unsupporting IE
1547             subtitles = info_dict['requested_subtitles']
1548             ie = self.get_info_extractor(info_dict['extractor_key'])
1549             for sub_lang, sub_info in subtitles.items():
1550                 sub_format = sub_info['ext']
1551                 if sub_info.get('data') is not None:
1552                     sub_data = sub_info['data']
1553                 else:
1554                     try:
1555                         sub_data = ie._download_webpage(
1556                             sub_info['url'], info_dict['id'], note=False)
1557                     except ExtractorError as err:
1558                         self.report_warning('Unable to download subtitle for "%s": %s' %
1559                                             (sub_lang, error_to_compat_str(err.cause)))
1560                         continue
1561                 try:
1562                     sub_filename = subtitles_filename(filename, sub_lang, sub_format)
1563                     if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(sub_filename)):
1564                         self.to_screen('[info] Video subtitle %s.%s is already_present' % (sub_lang, sub_format))
1565                     else:
1566                         self.to_screen('[info] Writing video subtitles to: ' + sub_filename)
1567                         with io.open(encodeFilename(sub_filename), 'w', encoding='utf-8') as subfile:
1568                             subfile.write(sub_data)
1569                 except (OSError, IOError):
1570                     self.report_error('Cannot write subtitles file ' + sub_filename)
1571                     return
1572
1573         if self.params.get('writeinfojson', False):
1574             infofn = replace_extension(filename, 'info.json', info_dict.get('ext'))
1575             if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(infofn)):
1576                 self.to_screen('[info] Video description metadata is already present')
1577             else:
1578                 self.to_screen('[info] Writing video description metadata as JSON to: ' + infofn)
1579                 try:
1580                     write_json_file(self.filter_requested_info(info_dict), infofn)
1581                 except (OSError, IOError):
1582                     self.report_error('Cannot write metadata to JSON file ' + infofn)
1583                     return
1584
1585         self._write_thumbnails(info_dict, filename)
1586
1587         if not self.params.get('skip_download', False):
1588             try:
1589                 def dl(name, info):
1590                     fd = get_suitable_downloader(info, self.params)(self, self.params)
1591                     for ph in self._progress_hooks:
1592                         fd.add_progress_hook(ph)
1593                     if self.params.get('verbose'):
1594                         self.to_stdout('[debug] Invoking downloader on %r' % info.get('url'))
1595                     return fd.download(name, info)
1596
1597                 if info_dict.get('requested_formats') is not None:
1598                     downloaded = []
1599                     success = True
1600                     merger = FFmpegMergerPP(self)
1601                     if not merger.available:
1602                         postprocessors = []
1603                         self.report_warning('You have requested multiple '
1604                                             'formats but ffmpeg or avconv are not installed.'
1605                                             ' The formats won\'t be merged.')
1606                     else:
1607                         postprocessors = [merger]
1608
1609                     def compatible_formats(formats):
1610                         video, audio = formats
1611                         # Check extension
1612                         video_ext, audio_ext = audio.get('ext'), video.get('ext')
1613                         if video_ext and audio_ext:
1614                             COMPATIBLE_EXTS = (
1615                                 ('mp3', 'mp4', 'm4a', 'm4p', 'm4b', 'm4r', 'm4v'),
1616                                 ('webm')
1617                             )
1618                             for exts in COMPATIBLE_EXTS:
1619                                 if video_ext in exts and audio_ext in exts:
1620                                     return True
1621                         # TODO: Check acodec/vcodec
1622                         return False
1623
1624                     filename_real_ext = os.path.splitext(filename)[1][1:]
1625                     filename_wo_ext = (
1626                         os.path.splitext(filename)[0]
1627                         if filename_real_ext == info_dict['ext']
1628                         else filename)
1629                     requested_formats = info_dict['requested_formats']
1630                     if self.params.get('merge_output_format') is None and not compatible_formats(requested_formats):
1631                         info_dict['ext'] = 'mkv'
1632                         self.report_warning(
1633                             'Requested formats are incompatible for merge and will be merged into mkv.')
1634                     # Ensure filename always has a correct extension for successful merge
1635                     filename = '%s.%s' % (filename_wo_ext, info_dict['ext'])
1636                     if os.path.exists(encodeFilename(filename)):
1637                         self.to_screen(
1638                             '[download] %s has already been downloaded and '
1639                             'merged' % filename)
1640                     else:
1641                         for f in requested_formats:
1642                             new_info = dict(info_dict)
1643                             new_info.update(f)
1644                             fname = self.prepare_filename(new_info)
1645                             fname = prepend_extension(fname, 'f%s' % f['format_id'], new_info['ext'])
1646                             downloaded.append(fname)
1647                             partial_success = dl(fname, new_info)
1648                             success = success and partial_success
1649                         info_dict['__postprocessors'] = postprocessors
1650                         info_dict['__files_to_merge'] = downloaded
1651                 else:
1652                     # Just a single file
1653                     success = dl(filename, info_dict)
1654             except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
1655                 self.report_error('unable to download video data: %s' % error_to_compat_str(err))
1656                 return
1657             except (OSError, IOError) as err:
1658                 raise UnavailableVideoError(err)
1659             except (ContentTooShortError, ) as err:
1660                 self.report_error('content too short (expected %s bytes and served %s)' % (err.expected, err.downloaded))
1661                 return
1662
1663             if success and filename != '-':
1664                 # Fixup content
1665                 fixup_policy = self.params.get('fixup')
1666                 if fixup_policy is None:
1667                     fixup_policy = 'detect_or_warn'
1668
1669                 INSTALL_FFMPEG_MESSAGE = 'Install ffmpeg or avconv to fix this automatically.'
1670
1671                 stretched_ratio = info_dict.get('stretched_ratio')
1672                 if stretched_ratio is not None and stretched_ratio != 1:
1673                     if fixup_policy == 'warn':
1674                         self.report_warning('%s: Non-uniform pixel ratio (%s)' % (
1675                             info_dict['id'], stretched_ratio))
1676                     elif fixup_policy == 'detect_or_warn':
1677                         stretched_pp = FFmpegFixupStretchedPP(self)
1678                         if stretched_pp.available:
1679                             info_dict.setdefault('__postprocessors', [])
1680                             info_dict['__postprocessors'].append(stretched_pp)
1681                         else:
1682                             self.report_warning(
1683                                 '%s: Non-uniform pixel ratio (%s). %s'
1684                                 % (info_dict['id'], stretched_ratio, INSTALL_FFMPEG_MESSAGE))
1685                     else:
1686                         assert fixup_policy in ('ignore', 'never')
1687
1688                 if (info_dict.get('requested_formats') is None and
1689                         info_dict.get('container') == 'm4a_dash'):
1690                     if fixup_policy == 'warn':
1691                         self.report_warning(
1692                             '%s: writing DASH m4a. '
1693                             'Only some players support this container.'
1694                             % info_dict['id'])
1695                     elif fixup_policy == 'detect_or_warn':
1696                         fixup_pp = FFmpegFixupM4aPP(self)
1697                         if fixup_pp.available:
1698                             info_dict.setdefault('__postprocessors', [])
1699                             info_dict['__postprocessors'].append(fixup_pp)
1700                         else:
1701                             self.report_warning(
1702                                 '%s: writing DASH m4a. '
1703                                 'Only some players support this container. %s'
1704                                 % (info_dict['id'], INSTALL_FFMPEG_MESSAGE))
1705                     else:
1706                         assert fixup_policy in ('ignore', 'never')
1707
1708                 if (info_dict.get('protocol') == 'm3u8_native' or
1709                         info_dict.get('protocol') == 'm3u8' and
1710                         self.params.get('hls_prefer_native')):
1711                     if fixup_policy == 'warn':
1712                         self.report_warning('%s: malformated aac bitstream.' % (
1713                             info_dict['id']))
1714                     elif fixup_policy == 'detect_or_warn':
1715                         fixup_pp = FFmpegFixupM3u8PP(self)
1716                         if fixup_pp.available:
1717                             info_dict.setdefault('__postprocessors', [])
1718                             info_dict['__postprocessors'].append(fixup_pp)
1719                         else:
1720                             self.report_warning(
1721                                 '%s: malformated aac bitstream. %s'
1722                                 % (info_dict['id'], INSTALL_FFMPEG_MESSAGE))
1723                     else:
1724                         assert fixup_policy in ('ignore', 'never')
1725
1726                 try:
1727                     self.post_process(filename, info_dict)
1728                 except (PostProcessingError) as err:
1729                     self.report_error('postprocessing: %s' % str(err))
1730                     return
1731                 self.record_download_archive(info_dict)
1732
1733     def download(self, url_list):
1734         """Download a given list of URLs."""
1735         outtmpl = self.params.get('outtmpl', DEFAULT_OUTTMPL)
1736         if (len(url_list) > 1 and
1737                 '%' not in outtmpl and
1738                 self.params.get('max_downloads') != 1):
1739             raise SameFileError(outtmpl)
1740
1741         for url in url_list:
1742             try:
1743                 # It also downloads the videos
1744                 res = self.extract_info(
1745                     url, force_generic_extractor=self.params.get('force_generic_extractor', False))
1746             except UnavailableVideoError:
1747                 self.report_error('unable to download video')
1748             except MaxDownloadsReached:
1749                 self.to_screen('[info] Maximum number of downloaded files reached.')
1750                 raise
1751             else:
1752                 if self.params.get('dump_single_json', False):
1753                     self.to_stdout(json.dumps(res))
1754
1755         return self._download_retcode
1756
1757     def download_with_info_file(self, info_filename):
1758         with contextlib.closing(fileinput.FileInput(
1759                 [info_filename], mode='r',
1760                 openhook=fileinput.hook_encoded('utf-8'))) as f:
1761             # FileInput doesn't have a read method, we can't call json.load
1762             info = self.filter_requested_info(json.loads('\n'.join(f)))
1763         try:
1764             self.process_ie_result(info, download=True)
1765         except DownloadError:
1766             webpage_url = info.get('webpage_url')
1767             if webpage_url is not None:
1768                 self.report_warning('The info failed to download, trying with "%s"' % webpage_url)
1769                 return self.download([webpage_url])
1770             else:
1771                 raise
1772         return self._download_retcode
1773
1774     @staticmethod
1775     def filter_requested_info(info_dict):
1776         return dict(
1777             (k, v) for k, v in info_dict.items()
1778             if k not in ['requested_formats', 'requested_subtitles'])
1779
1780     def post_process(self, filename, ie_info):
1781         """Run all the postprocessors on the given file."""
1782         info = dict(ie_info)
1783         info['filepath'] = filename
1784         pps_chain = []
1785         if ie_info.get('__postprocessors') is not None:
1786             pps_chain.extend(ie_info['__postprocessors'])
1787         pps_chain.extend(self._pps)
1788         for pp in pps_chain:
1789             files_to_delete = []
1790             try:
1791                 files_to_delete, info = pp.run(info)
1792             except PostProcessingError as e:
1793                 self.report_error(e.msg)
1794             if files_to_delete and not self.params.get('keepvideo', False):
1795                 for old_filename in files_to_delete:
1796                     self.to_screen('Deleting original file %s (pass -k to keep)' % old_filename)
1797                     try:
1798                         os.remove(encodeFilename(old_filename))
1799                     except (IOError, OSError):
1800                         self.report_warning('Unable to remove downloaded original file')
1801
1802     def _make_archive_id(self, info_dict):
1803         # Future-proof against any change in case
1804         # and backwards compatibility with prior versions
1805         extractor = info_dict.get('extractor_key')
1806         if extractor is None:
1807             if 'id' in info_dict:
1808                 extractor = info_dict.get('ie_key')  # key in a playlist
1809         if extractor is None:
1810             return None  # Incomplete video information
1811         return extractor.lower() + ' ' + info_dict['id']
1812
1813     def in_download_archive(self, info_dict):
1814         fn = self.params.get('download_archive')
1815         if fn is None:
1816             return False
1817
1818         vid_id = self._make_archive_id(info_dict)
1819         if vid_id is None:
1820             return False  # Incomplete video information
1821
1822         try:
1823             with locked_file(fn, 'r', encoding='utf-8') as archive_file:
1824                 for line in archive_file:
1825                     if line.strip() == vid_id:
1826                         return True
1827         except IOError as ioe:
1828             if ioe.errno != errno.ENOENT:
1829                 raise
1830         return False
1831
1832     def record_download_archive(self, info_dict):
1833         fn = self.params.get('download_archive')
1834         if fn is None:
1835             return
1836         vid_id = self._make_archive_id(info_dict)
1837         assert vid_id
1838         with locked_file(fn, 'a', encoding='utf-8') as archive_file:
1839             archive_file.write(vid_id + '\n')
1840
1841     @staticmethod
1842     def format_resolution(format, default='unknown'):
1843         if format.get('vcodec') == 'none':
1844             return 'audio only'
1845         if format.get('resolution') is not None:
1846             return format['resolution']
1847         if format.get('height') is not None:
1848             if format.get('width') is not None:
1849                 res = '%sx%s' % (format['width'], format['height'])
1850             else:
1851                 res = '%sp' % format['height']
1852         elif format.get('width') is not None:
1853             res = '%dx?' % format['width']
1854         else:
1855             res = default
1856         return res
1857
1858     def _format_note(self, fdict):
1859         res = ''
1860         if fdict.get('ext') in ['f4f', 'f4m']:
1861             res += '(unsupported) '
1862         if fdict.get('language'):
1863             if res:
1864                 res += ' '
1865             res += '[%s] ' % fdict['language']
1866         if fdict.get('format_note') is not None:
1867             res += fdict['format_note'] + ' '
1868         if fdict.get('tbr') is not None:
1869             res += '%4dk ' % fdict['tbr']
1870         if fdict.get('container') is not None:
1871             if res:
1872                 res += ', '
1873             res += '%s container' % fdict['container']
1874         if (fdict.get('vcodec') is not None and
1875                 fdict.get('vcodec') != 'none'):
1876             if res:
1877                 res += ', '
1878             res += fdict['vcodec']
1879             if fdict.get('vbr') is not None:
1880                 res += '@'
1881         elif fdict.get('vbr') is not None and fdict.get('abr') is not None:
1882             res += 'video@'
1883         if fdict.get('vbr') is not None:
1884             res += '%4dk' % fdict['vbr']
1885         if fdict.get('fps') is not None:
1886             if res:
1887                 res += ', '
1888             res += '%sfps' % fdict['fps']
1889         if fdict.get('acodec') is not None:
1890             if res:
1891                 res += ', '
1892             if fdict['acodec'] == 'none':
1893                 res += 'video only'
1894             else:
1895                 res += '%-5s' % fdict['acodec']
1896         elif fdict.get('abr') is not None:
1897             if res:
1898                 res += ', '
1899             res += 'audio'
1900         if fdict.get('abr') is not None:
1901             res += '@%3dk' % fdict['abr']
1902         if fdict.get('asr') is not None:
1903             res += ' (%5dHz)' % fdict['asr']
1904         if fdict.get('filesize') is not None:
1905             if res:
1906                 res += ', '
1907             res += format_bytes(fdict['filesize'])
1908         elif fdict.get('filesize_approx') is not None:
1909             if res:
1910                 res += ', '
1911             res += '~' + format_bytes(fdict['filesize_approx'])
1912         return res
1913
1914     def list_formats(self, info_dict):
1915         formats = info_dict.get('formats', [info_dict])
1916         table = [
1917             [f['format_id'], f['ext'], self.format_resolution(f), self._format_note(f)]
1918             for f in formats
1919             if f.get('preference') is None or f['preference'] >= -1000]
1920         if len(formats) > 1:
1921             table[-1][-1] += (' ' if table[-1][-1] else '') + '(best)'
1922
1923         header_line = ['format code', 'extension', 'resolution', 'note']
1924         self.to_screen(
1925             '[info] Available formats for %s:\n%s' %
1926             (info_dict['id'], render_table(header_line, table)))
1927
1928     def list_thumbnails(self, info_dict):
1929         thumbnails = info_dict.get('thumbnails')
1930         if not thumbnails:
1931             self.to_screen('[info] No thumbnails present for %s' % info_dict['id'])
1932             return
1933
1934         self.to_screen(
1935             '[info] Thumbnails for %s:' % info_dict['id'])
1936         self.to_screen(render_table(
1937             ['ID', 'width', 'height', 'URL'],
1938             [[t['id'], t.get('width', 'unknown'), t.get('height', 'unknown'), t['url']] for t in thumbnails]))
1939
1940     def list_subtitles(self, video_id, subtitles, name='subtitles'):
1941         if not subtitles:
1942             self.to_screen('%s has no %s' % (video_id, name))
1943             return
1944         self.to_screen(
1945             'Available %s for %s:' % (name, video_id))
1946         self.to_screen(render_table(
1947             ['Language', 'formats'],
1948             [[lang, ', '.join(f['ext'] for f in reversed(formats))]
1949                 for lang, formats in subtitles.items()]))
1950
1951     def urlopen(self, req):
1952         """ Start an HTTP download """
1953         if isinstance(req, compat_basestring):
1954             req = sanitized_Request(req)
1955         return self._opener.open(req, timeout=self._socket_timeout)
1956
1957     def print_debug_header(self):
1958         if not self.params.get('verbose'):
1959             return
1960
1961         if type('') is not compat_str:
1962             # Python 2.6 on SLES11 SP1 (https://github.com/rg3/youtube-dl/issues/3326)
1963             self.report_warning(
1964                 'Your Python is broken! Update to a newer and supported version')
1965
1966         stdout_encoding = getattr(
1967             sys.stdout, 'encoding', 'missing (%s)' % type(sys.stdout).__name__)
1968         encoding_str = (
1969             '[debug] Encodings: locale %s, fs %s, out %s, pref %s\n' % (
1970                 locale.getpreferredencoding(),
1971                 sys.getfilesystemencoding(),
1972                 stdout_encoding,
1973                 self.get_encoding()))
1974         write_string(encoding_str, encoding=None)
1975
1976         self._write_string('[debug] youtube-dl version ' + __version__ + '\n')
1977         if _LAZY_LOADER:
1978             self._write_string('[debug] Lazy loading extractors enabled' + '\n')
1979         try:
1980             sp = subprocess.Popen(
1981                 ['git', 'rev-parse', '--short', 'HEAD'],
1982                 stdout=subprocess.PIPE, stderr=subprocess.PIPE,
1983                 cwd=os.path.dirname(os.path.abspath(__file__)))
1984             out, err = sp.communicate()
1985             out = out.decode().strip()
1986             if re.match('[0-9a-f]+', out):
1987                 self._write_string('[debug] Git HEAD: ' + out + '\n')
1988         except Exception:
1989             try:
1990                 sys.exc_clear()
1991             except Exception:
1992                 pass
1993         self._write_string('[debug] Python version %s - %s\n' % (
1994             platform.python_version(), platform_name()))
1995
1996         exe_versions = FFmpegPostProcessor.get_versions(self)
1997         exe_versions['rtmpdump'] = rtmpdump_version()
1998         exe_str = ', '.join(
1999             '%s %s' % (exe, v)
2000             for exe, v in sorted(exe_versions.items())
2001             if v
2002         )
2003         if not exe_str:
2004             exe_str = 'none'
2005         self._write_string('[debug] exe versions: %s\n' % exe_str)
2006
2007         proxy_map = {}
2008         for handler in self._opener.handlers:
2009             if hasattr(handler, 'proxies'):
2010                 proxy_map.update(handler.proxies)
2011         self._write_string('[debug] Proxy map: ' + compat_str(proxy_map) + '\n')
2012
2013         if self.params.get('call_home', False):
2014             ipaddr = self.urlopen('https://yt-dl.org/ip').read().decode('utf-8')
2015             self._write_string('[debug] Public IP address: %s\n' % ipaddr)
2016             latest_version = self.urlopen(
2017                 'https://yt-dl.org/latest/version').read().decode('utf-8')
2018             if version_tuple(latest_version) > version_tuple(__version__):
2019                 self.report_warning(
2020                     'You are using an outdated version (newest version: %s)! '
2021                     'See https://yt-dl.org/update if you need help updating.' %
2022                     latest_version)
2023
2024     def _setup_opener(self):
2025         timeout_val = self.params.get('socket_timeout')
2026         self._socket_timeout = 600 if timeout_val is None else float(timeout_val)
2027
2028         opts_cookiefile = self.params.get('cookiefile')
2029         opts_proxy = self.params.get('proxy')
2030
2031         if opts_cookiefile is None:
2032             self.cookiejar = compat_cookiejar.CookieJar()
2033         else:
2034             opts_cookiefile = compat_expanduser(opts_cookiefile)
2035             self.cookiejar = compat_cookiejar.MozillaCookieJar(
2036                 opts_cookiefile)
2037             if os.access(opts_cookiefile, os.R_OK):
2038                 self.cookiejar.load()
2039
2040         cookie_processor = YoutubeDLCookieProcessor(self.cookiejar)
2041         if opts_proxy is not None:
2042             if opts_proxy == '':
2043                 proxies = {}
2044             else:
2045                 proxies = {'http': opts_proxy, 'https': opts_proxy}
2046         else:
2047             proxies = compat_urllib_request.getproxies()
2048             # Set HTTPS proxy to HTTP one if given (https://github.com/rg3/youtube-dl/issues/805)
2049             if 'http' in proxies and 'https' not in proxies:
2050                 proxies['https'] = proxies['http']
2051         proxy_handler = PerRequestProxyHandler(proxies)
2052
2053         debuglevel = 1 if self.params.get('debug_printtraffic') else 0
2054         https_handler = make_HTTPS_handler(self.params, debuglevel=debuglevel)
2055         ydlh = YoutubeDLHandler(self.params, debuglevel=debuglevel)
2056         data_handler = compat_urllib_request_DataHandler()
2057
2058         # When passing our own FileHandler instance, build_opener won't add the
2059         # default FileHandler and allows us to disable the file protocol, which
2060         # can be used for malicious purposes (see
2061         # https://github.com/rg3/youtube-dl/issues/8227)
2062         file_handler = compat_urllib_request.FileHandler()
2063
2064         def file_open(*args, **kwargs):
2065             raise compat_urllib_error.URLError('file:// scheme is explicitly disabled in youtube-dl for security reasons')
2066         file_handler.file_open = file_open
2067
2068         opener = compat_urllib_request.build_opener(
2069             proxy_handler, https_handler, cookie_processor, ydlh, data_handler, file_handler)
2070
2071         # Delete the default user-agent header, which would otherwise apply in
2072         # cases where our custom HTTP handler doesn't come into play
2073         # (See https://github.com/rg3/youtube-dl/issues/1309 for details)
2074         opener.addheaders = []
2075         self._opener = opener
2076
2077     def encode(self, s):
2078         if isinstance(s, bytes):
2079             return s  # Already encoded
2080
2081         try:
2082             return s.encode(self.get_encoding())
2083         except UnicodeEncodeError as err:
2084             err.reason = err.reason + '. Check your system encoding configuration or use the --encoding option.'
2085             raise
2086
2087     def get_encoding(self):
2088         encoding = self.params.get('encoding')
2089         if encoding is None:
2090             encoding = preferredencoding()
2091         return encoding
2092
2093     def _write_thumbnails(self, info_dict, filename):
2094         if self.params.get('writethumbnail', False):
2095             thumbnails = info_dict.get('thumbnails')
2096             if thumbnails:
2097                 thumbnails = [thumbnails[-1]]
2098         elif self.params.get('write_all_thumbnails', False):
2099             thumbnails = info_dict.get('thumbnails')
2100         else:
2101             return
2102
2103         if not thumbnails:
2104             # No thumbnails present, so return immediately
2105             return
2106
2107         for t in thumbnails:
2108             thumb_ext = determine_ext(t['url'], 'jpg')
2109             suffix = '_%s' % t['id'] if len(thumbnails) > 1 else ''
2110             thumb_display_id = '%s ' % t['id'] if len(thumbnails) > 1 else ''
2111             t['filename'] = thumb_filename = os.path.splitext(filename)[0] + suffix + '.' + thumb_ext
2112
2113             if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(thumb_filename)):
2114                 self.to_screen('[%s] %s: Thumbnail %sis already present' %
2115                                (info_dict['extractor'], info_dict['id'], thumb_display_id))
2116             else:
2117                 self.to_screen('[%s] %s: Downloading thumbnail %s...' %
2118                                (info_dict['extractor'], info_dict['id'], thumb_display_id))
2119                 try:
2120                     uf = self.urlopen(t['url'])
2121                     with open(encodeFilename(thumb_filename), 'wb') as thumbf:
2122                         shutil.copyfileobj(uf, thumbf)
2123                     self.to_screen('[%s] %s: Writing thumbnail %sto: %s' %
2124                                    (info_dict['extractor'], info_dict['id'], thumb_display_id, thumb_filename))
2125                 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
2126                     self.report_warning('Unable to download thumbnail "%s": %s' %
2127                                         (t['url'], error_to_compat_str(err)))