Merge branch 'HanYOLO-puls4'
[youtube-dl] / youtube_dl / YoutubeDL.py
1 #!/usr/bin/env python
2 # -*- coding: utf-8 -*-
3
4 from __future__ import absolute_import, unicode_literals
5
6 import collections
7 import datetime
8 import errno
9 import io
10 import itertools
11 import json
12 import locale
13 import operator
14 import os
15 import platform
16 import re
17 import shutil
18 import subprocess
19 import socket
20 import sys
21 import time
22 import traceback
23
24 if os.name == 'nt':
25     import ctypes
26
27 from .compat import (
28     compat_basestring,
29     compat_cookiejar,
30     compat_expanduser,
31     compat_http_client,
32     compat_kwargs,
33     compat_str,
34     compat_urllib_error,
35     compat_urllib_request,
36 )
37 from .utils import (
38     escape_url,
39     ContentTooShortError,
40     date_from_str,
41     DateRange,
42     DEFAULT_OUTTMPL,
43     determine_ext,
44     DownloadError,
45     encodeFilename,
46     ExtractorError,
47     format_bytes,
48     formatSeconds,
49     get_term_width,
50     locked_file,
51     make_HTTPS_handler,
52     MaxDownloadsReached,
53     PagedList,
54     parse_filesize,
55     PostProcessingError,
56     platform_name,
57     preferredencoding,
58     render_table,
59     SameFileError,
60     sanitize_filename,
61     std_headers,
62     subtitles_filename,
63     takewhile_inclusive,
64     UnavailableVideoError,
65     url_basename,
66     version_tuple,
67     write_json_file,
68     write_string,
69     YoutubeDLHandler,
70     prepend_extension,
71     args_to_str,
72     age_restricted,
73 )
74 from .cache import Cache
75 from .extractor import get_info_extractor, gen_extractors
76 from .downloader import get_suitable_downloader
77 from .downloader.rtmp import rtmpdump_version
78 from .postprocessor import (
79     FFmpegFixupM4aPP,
80     FFmpegFixupStretchedPP,
81     FFmpegMergerPP,
82     FFmpegPostProcessor,
83     get_postprocessor,
84 )
85 from .version import __version__
86
87
88 class YoutubeDL(object):
89     """YoutubeDL class.
90
91     YoutubeDL objects are the ones responsible of downloading the
92     actual video file and writing it to disk if the user has requested
93     it, among some other tasks. In most cases there should be one per
94     program. As, given a video URL, the downloader doesn't know how to
95     extract all the needed information, task that InfoExtractors do, it
96     has to pass the URL to one of them.
97
98     For this, YoutubeDL objects have a method that allows
99     InfoExtractors to be registered in a given order. When it is passed
100     a URL, the YoutubeDL object handles it to the first InfoExtractor it
101     finds that reports being able to handle it. The InfoExtractor extracts
102     all the information about the video or videos the URL refers to, and
103     YoutubeDL process the extracted information, possibly using a File
104     Downloader to download the video.
105
106     YoutubeDL objects accept a lot of parameters. In order not to saturate
107     the object constructor with arguments, it receives a dictionary of
108     options instead. These options are available through the params
109     attribute for the InfoExtractors to use. The YoutubeDL also
110     registers itself as the downloader in charge for the InfoExtractors
111     that are added to it, so this is a "mutual registration".
112
113     Available options:
114
115     username:          Username for authentication purposes.
116     password:          Password for authentication purposes.
117     videopassword:     Password for acces a video.
118     usenetrc:          Use netrc for authentication instead.
119     verbose:           Print additional info to stdout.
120     quiet:             Do not print messages to stdout.
121     no_warnings:       Do not print out anything for warnings.
122     forceurl:          Force printing final URL.
123     forcetitle:        Force printing title.
124     forceid:           Force printing ID.
125     forcethumbnail:    Force printing thumbnail URL.
126     forcedescription:  Force printing description.
127     forcefilename:     Force printing final filename.
128     forceduration:     Force printing duration.
129     forcejson:         Force printing info_dict as JSON.
130     dump_single_json:  Force printing the info_dict of the whole playlist
131                        (or video) as a single JSON line.
132     simulate:          Do not download the video files.
133     format:            Video format code. See options.py for more information.
134     format_limit:      Highest quality format to try.
135     outtmpl:           Template for output names.
136     restrictfilenames: Do not allow "&" and spaces in file names
137     ignoreerrors:      Do not stop on download errors.
138     nooverwrites:      Prevent overwriting files.
139     playliststart:     Playlist item to start at.
140     playlistend:       Playlist item to end at.
141     playlist_items:    Specific indices of playlist to download.
142     playlistreverse:   Download playlist items in reverse order.
143     matchtitle:        Download only matching titles.
144     rejecttitle:       Reject downloads for matching titles.
145     logger:            Log messages to a logging.Logger instance.
146     logtostderr:       Log messages to stderr instead of stdout.
147     writedescription:  Write the video description to a .description file
148     writeinfojson:     Write the video description to a .info.json file
149     writeannotations:  Write the video annotations to a .annotations.xml file
150     writethumbnail:    Write the thumbnail image to a file
151     write_all_thumbnails:  Write all thumbnail formats to files
152     writesubtitles:    Write the video subtitles to a file
153     writeautomaticsub: Write the automatic subtitles to a file
154     allsubtitles:      Downloads all the subtitles of the video
155                        (requires writesubtitles or writeautomaticsub)
156     listsubtitles:     Lists all available subtitles for the video
157     subtitlesformat:   The format code for subtitles
158     subtitleslangs:    List of languages of the subtitles to download
159     keepvideo:         Keep the video file after post-processing
160     daterange:         A DateRange object, download only if the upload_date is in the range.
161     skip_download:     Skip the actual download of the video file
162     cachedir:          Location of the cache files in the filesystem.
163                        False to disable filesystem cache.
164     noplaylist:        Download single video instead of a playlist if in doubt.
165     age_limit:         An integer representing the user's age in years.
166                        Unsuitable videos for the given age are skipped.
167     min_views:         An integer representing the minimum view count the video
168                        must have in order to not be skipped.
169                        Videos without view count information are always
170                        downloaded. None for no limit.
171     max_views:         An integer representing the maximum view count.
172                        Videos that are more popular than that are not
173                        downloaded.
174                        Videos without view count information are always
175                        downloaded. None for no limit.
176     download_archive:  File name of a file where all downloads are recorded.
177                        Videos already present in the file are not downloaded
178                        again.
179     cookiefile:        File name where cookies should be read from and dumped to.
180     nocheckcertificate:Do not verify SSL certificates
181     prefer_insecure:   Use HTTP instead of HTTPS to retrieve information.
182                        At the moment, this is only supported by YouTube.
183     proxy:             URL of the proxy server to use
184     socket_timeout:    Time to wait for unresponsive hosts, in seconds
185     bidi_workaround:   Work around buggy terminals without bidirectional text
186                        support, using fridibi
187     debug_printtraffic:Print out sent and received HTTP traffic
188     include_ads:       Download ads as well
189     default_search:    Prepend this string if an input url is not valid.
190                        'auto' for elaborate guessing
191     encoding:          Use this encoding instead of the system-specified.
192     extract_flat:      Do not resolve URLs, return the immediate result.
193                        Pass in 'in_playlist' to only show this behavior for
194                        playlist items.
195     postprocessors:    A list of dictionaries, each with an entry
196                        * key:  The name of the postprocessor. See
197                                youtube_dl/postprocessor/__init__.py for a list.
198                        as well as any further keyword arguments for the
199                        postprocessor.
200     progress_hooks:    A list of functions that get called on download
201                        progress, with a dictionary with the entries
202                        * status: One of "downloading", "error", or "finished".
203                                  Check this first and ignore unknown values.
204
205                        If status is one of "downloading", or "finished", the
206                        following properties may also be present:
207                        * filename: The final filename (always present)
208                        * tmpfilename: The filename we're currently writing to
209                        * downloaded_bytes: Bytes on disk
210                        * total_bytes: Size of the whole file, None if unknown
211                        * total_bytes_estimate: Guess of the eventual file size,
212                                                None if unavailable.
213                        * elapsed: The number of seconds since download started.
214                        * eta: The estimated time in seconds, None if unknown
215                        * speed: The download speed in bytes/second, None if
216                                 unknown
217                        * fragment_index: The counter of the currently
218                                          downloaded video fragment.
219                        * fragment_count: The number of fragments (= individual
220                                          files that will be merged)
221
222                        Progress hooks are guaranteed to be called at least once
223                        (with status "finished") if the download is successful.
224     merge_output_format: Extension to use when merging formats.
225     fixup:             Automatically correct known faults of the file.
226                        One of:
227                        - "never": do nothing
228                        - "warn": only emit a warning
229                        - "detect_or_warn": check whether we can do anything
230                                            about it, warn otherwise (default)
231     source_address:    (Experimental) Client-side IP address to bind to.
232     call_home:         Boolean, true iff we are allowed to contact the
233                        youtube-dl servers for debugging.
234     sleep_interval:    Number of seconds to sleep before each download.
235     listformats:       Print an overview of available video formats and exit.
236     list_thumbnails:   Print a table of all thumbnails and exit.
237     match_filter:      A function that gets called with the info_dict of
238                        every video.
239                        If it returns a message, the video is ignored.
240                        If it returns None, the video is downloaded.
241                        match_filter_func in utils.py is one example for this.
242     no_color:          Do not emit color codes in output.
243
244     The following options determine which downloader is picked:
245     external_downloader: Executable of the external downloader to call.
246                        None or unset for standard (built-in) downloader.
247     hls_prefer_native: Use the native HLS downloader instead of ffmpeg/avconv.
248
249     The following parameters are not used by YoutubeDL itself, they are used by
250     the FileDownloader:
251     nopart, updatetime, buffersize, ratelimit, min_filesize, max_filesize, test,
252     noresizebuffer, retries, continuedl, noprogress, consoletitle,
253     xattr_set_filesize.
254
255     The following options are used by the post processors:
256     prefer_ffmpeg:     If True, use ffmpeg instead of avconv if both are available,
257                        otherwise prefer avconv.
258     exec_cmd:          Arbitrary command to run after downloading
259     """
260
261     params = None
262     _ies = []
263     _pps = []
264     _download_retcode = None
265     _num_downloads = None
266     _screen_file = None
267
268     def __init__(self, params=None, auto_init=True):
269         """Create a FileDownloader object with the given options."""
270         if params is None:
271             params = {}
272         self._ies = []
273         self._ies_instances = {}
274         self._pps = []
275         self._progress_hooks = []
276         self._download_retcode = 0
277         self._num_downloads = 0
278         self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)]
279         self._err_file = sys.stderr
280         self.params = params
281         self.cache = Cache(self)
282
283         if params.get('bidi_workaround', False):
284             try:
285                 import pty
286                 master, slave = pty.openpty()
287                 width = get_term_width()
288                 if width is None:
289                     width_args = []
290                 else:
291                     width_args = ['-w', str(width)]
292                 sp_kwargs = dict(
293                     stdin=subprocess.PIPE,
294                     stdout=slave,
295                     stderr=self._err_file)
296                 try:
297                     self._output_process = subprocess.Popen(
298                         ['bidiv'] + width_args, **sp_kwargs
299                     )
300                 except OSError:
301                     self._output_process = subprocess.Popen(
302                         ['fribidi', '-c', 'UTF-8'] + width_args, **sp_kwargs)
303                 self._output_channel = os.fdopen(master, 'rb')
304             except OSError as ose:
305                 if ose.errno == 2:
306                     self.report_warning('Could not find fribidi executable, ignoring --bidi-workaround . Make sure that  fribidi  is an executable file in one of the directories in your $PATH.')
307                 else:
308                     raise
309
310         if (sys.version_info >= (3,) and sys.platform != 'win32' and
311                 sys.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968'] and
312                 not params.get('restrictfilenames', False)):
313             # On Python 3, the Unicode filesystem API will throw errors (#1474)
314             self.report_warning(
315                 'Assuming --restrict-filenames since file system encoding '
316                 'cannot encode all characters. '
317                 'Set the LC_ALL environment variable to fix this.')
318             self.params['restrictfilenames'] = True
319
320         if '%(stitle)s' in self.params.get('outtmpl', ''):
321             self.report_warning('%(stitle)s is deprecated. Use the %(title)s and the --restrict-filenames flag(which also secures %(uploader)s et al) instead.')
322
323         self._setup_opener()
324
325         if auto_init:
326             self.print_debug_header()
327             self.add_default_info_extractors()
328
329         for pp_def_raw in self.params.get('postprocessors', []):
330             pp_class = get_postprocessor(pp_def_raw['key'])
331             pp_def = dict(pp_def_raw)
332             del pp_def['key']
333             pp = pp_class(self, **compat_kwargs(pp_def))
334             self.add_post_processor(pp)
335
336         for ph in self.params.get('progress_hooks', []):
337             self.add_progress_hook(ph)
338
339     def warn_if_short_id(self, argv):
340         # short YouTube ID starting with dash?
341         idxs = [
342             i for i, a in enumerate(argv)
343             if re.match(r'^-[0-9A-Za-z_-]{10}$', a)]
344         if idxs:
345             correct_argv = (
346                 ['youtube-dl'] +
347                 [a for i, a in enumerate(argv) if i not in idxs] +
348                 ['--'] + [argv[i] for i in idxs]
349             )
350             self.report_warning(
351                 'Long argument string detected. '
352                 'Use -- to separate parameters and URLs, like this:\n%s\n' %
353                 args_to_str(correct_argv))
354
355     def add_info_extractor(self, ie):
356         """Add an InfoExtractor object to the end of the list."""
357         self._ies.append(ie)
358         self._ies_instances[ie.ie_key()] = ie
359         ie.set_downloader(self)
360
361     def get_info_extractor(self, ie_key):
362         """
363         Get an instance of an IE with name ie_key, it will try to get one from
364         the _ies list, if there's no instance it will create a new one and add
365         it to the extractor list.
366         """
367         ie = self._ies_instances.get(ie_key)
368         if ie is None:
369             ie = get_info_extractor(ie_key)()
370             self.add_info_extractor(ie)
371         return ie
372
373     def add_default_info_extractors(self):
374         """
375         Add the InfoExtractors returned by gen_extractors to the end of the list
376         """
377         for ie in gen_extractors():
378             self.add_info_extractor(ie)
379
380     def add_post_processor(self, pp):
381         """Add a PostProcessor object to the end of the chain."""
382         self._pps.append(pp)
383         pp.set_downloader(self)
384
385     def add_progress_hook(self, ph):
386         """Add the progress hook (currently only for the file downloader)"""
387         self._progress_hooks.append(ph)
388
389     def _bidi_workaround(self, message):
390         if not hasattr(self, '_output_channel'):
391             return message
392
393         assert hasattr(self, '_output_process')
394         assert isinstance(message, compat_str)
395         line_count = message.count('\n') + 1
396         self._output_process.stdin.write((message + '\n').encode('utf-8'))
397         self._output_process.stdin.flush()
398         res = ''.join(self._output_channel.readline().decode('utf-8')
399                       for _ in range(line_count))
400         return res[:-len('\n')]
401
402     def to_screen(self, message, skip_eol=False):
403         """Print message to stdout if not in quiet mode."""
404         return self.to_stdout(message, skip_eol, check_quiet=True)
405
406     def _write_string(self, s, out=None):
407         write_string(s, out=out, encoding=self.params.get('encoding'))
408
409     def to_stdout(self, message, skip_eol=False, check_quiet=False):
410         """Print message to stdout if not in quiet mode."""
411         if self.params.get('logger'):
412             self.params['logger'].debug(message)
413         elif not check_quiet or not self.params.get('quiet', False):
414             message = self._bidi_workaround(message)
415             terminator = ['\n', ''][skip_eol]
416             output = message + terminator
417
418             self._write_string(output, self._screen_file)
419
420     def to_stderr(self, message):
421         """Print message to stderr."""
422         assert isinstance(message, compat_str)
423         if self.params.get('logger'):
424             self.params['logger'].error(message)
425         else:
426             message = self._bidi_workaround(message)
427             output = message + '\n'
428             self._write_string(output, self._err_file)
429
430     def to_console_title(self, message):
431         if not self.params.get('consoletitle', False):
432             return
433         if os.name == 'nt' and ctypes.windll.kernel32.GetConsoleWindow():
434             # c_wchar_p() might not be necessary if `message` is
435             # already of type unicode()
436             ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message))
437         elif 'TERM' in os.environ:
438             self._write_string('\033]0;%s\007' % message, self._screen_file)
439
440     def save_console_title(self):
441         if not self.params.get('consoletitle', False):
442             return
443         if 'TERM' in os.environ:
444             # Save the title on stack
445             self._write_string('\033[22;0t', self._screen_file)
446
447     def restore_console_title(self):
448         if not self.params.get('consoletitle', False):
449             return
450         if 'TERM' in os.environ:
451             # Restore the title from stack
452             self._write_string('\033[23;0t', self._screen_file)
453
454     def __enter__(self):
455         self.save_console_title()
456         return self
457
458     def __exit__(self, *args):
459         self.restore_console_title()
460
461         if self.params.get('cookiefile') is not None:
462             self.cookiejar.save()
463
464     def trouble(self, message=None, tb=None):
465         """Determine action to take when a download problem appears.
466
467         Depending on if the downloader has been configured to ignore
468         download errors or not, this method may throw an exception or
469         not when errors are found, after printing the message.
470
471         tb, if given, is additional traceback information.
472         """
473         if message is not None:
474             self.to_stderr(message)
475         if self.params.get('verbose'):
476             if tb is None:
477                 if sys.exc_info()[0]:  # if .trouble has been called from an except block
478                     tb = ''
479                     if hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
480                         tb += ''.join(traceback.format_exception(*sys.exc_info()[1].exc_info))
481                     tb += compat_str(traceback.format_exc())
482                 else:
483                     tb_data = traceback.format_list(traceback.extract_stack())
484                     tb = ''.join(tb_data)
485             self.to_stderr(tb)
486         if not self.params.get('ignoreerrors', False):
487             if sys.exc_info()[0] and hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
488                 exc_info = sys.exc_info()[1].exc_info
489             else:
490                 exc_info = sys.exc_info()
491             raise DownloadError(message, exc_info)
492         self._download_retcode = 1
493
494     def report_warning(self, message):
495         '''
496         Print the message to stderr, it will be prefixed with 'WARNING:'
497         If stderr is a tty file the 'WARNING:' will be colored
498         '''
499         if self.params.get('logger') is not None:
500             self.params['logger'].warning(message)
501         else:
502             if self.params.get('no_warnings'):
503                 return
504             if not self.params.get('no_color') and self._err_file.isatty() and os.name != 'nt':
505                 _msg_header = '\033[0;33mWARNING:\033[0m'
506             else:
507                 _msg_header = 'WARNING:'
508             warning_message = '%s %s' % (_msg_header, message)
509             self.to_stderr(warning_message)
510
511     def report_error(self, message, tb=None):
512         '''
513         Do the same as trouble, but prefixes the message with 'ERROR:', colored
514         in red if stderr is a tty file.
515         '''
516         if not self.params.get('no_color') and self._err_file.isatty() and os.name != 'nt':
517             _msg_header = '\033[0;31mERROR:\033[0m'
518         else:
519             _msg_header = 'ERROR:'
520         error_message = '%s %s' % (_msg_header, message)
521         self.trouble(error_message, tb)
522
523     def report_file_already_downloaded(self, file_name):
524         """Report file has already been fully downloaded."""
525         try:
526             self.to_screen('[download] %s has already been downloaded' % file_name)
527         except UnicodeEncodeError:
528             self.to_screen('[download] The file has already been downloaded')
529
530     def prepare_filename(self, info_dict):
531         """Generate the output filename."""
532         try:
533             template_dict = dict(info_dict)
534
535             template_dict['epoch'] = int(time.time())
536             autonumber_size = self.params.get('autonumber_size')
537             if autonumber_size is None:
538                 autonumber_size = 5
539             autonumber_templ = '%0' + str(autonumber_size) + 'd'
540             template_dict['autonumber'] = autonumber_templ % self._num_downloads
541             if template_dict.get('playlist_index') is not None:
542                 template_dict['playlist_index'] = '%0*d' % (len(str(template_dict['n_entries'])), template_dict['playlist_index'])
543             if template_dict.get('resolution') is None:
544                 if template_dict.get('width') and template_dict.get('height'):
545                     template_dict['resolution'] = '%dx%d' % (template_dict['width'], template_dict['height'])
546                 elif template_dict.get('height'):
547                     template_dict['resolution'] = '%sp' % template_dict['height']
548                 elif template_dict.get('width'):
549                     template_dict['resolution'] = '?x%d' % template_dict['width']
550
551             sanitize = lambda k, v: sanitize_filename(
552                 compat_str(v),
553                 restricted=self.params.get('restrictfilenames'),
554                 is_id=(k == 'id'))
555             template_dict = dict((k, sanitize(k, v))
556                                  for k, v in template_dict.items()
557                                  if v is not None)
558             template_dict = collections.defaultdict(lambda: 'NA', template_dict)
559
560             outtmpl = self.params.get('outtmpl', DEFAULT_OUTTMPL)
561             tmpl = compat_expanduser(outtmpl)
562             filename = tmpl % template_dict
563             # Temporary fix for #4787
564             # 'Treat' all problem characters by passing filename through preferredencoding
565             # to workaround encoding issues with subprocess on python2 @ Windows
566             if sys.version_info < (3, 0) and sys.platform == 'win32':
567                 filename = encodeFilename(filename, True).decode(preferredencoding())
568             return filename
569         except ValueError as err:
570             self.report_error('Error in output template: ' + str(err) + ' (encoding: ' + repr(preferredencoding()) + ')')
571             return None
572
573     def _match_entry(self, info_dict, incomplete):
574         """ Returns None iff the file should be downloaded """
575
576         video_title = info_dict.get('title', info_dict.get('id', 'video'))
577         if 'title' in info_dict:
578             # This can happen when we're just evaluating the playlist
579             title = info_dict['title']
580             matchtitle = self.params.get('matchtitle', False)
581             if matchtitle:
582                 if not re.search(matchtitle, title, re.IGNORECASE):
583                     return '"' + title + '" title did not match pattern "' + matchtitle + '"'
584             rejecttitle = self.params.get('rejecttitle', False)
585             if rejecttitle:
586                 if re.search(rejecttitle, title, re.IGNORECASE):
587                     return '"' + title + '" title matched reject pattern "' + rejecttitle + '"'
588         date = info_dict.get('upload_date', None)
589         if date is not None:
590             dateRange = self.params.get('daterange', DateRange())
591             if date not in dateRange:
592                 return '%s upload date is not in range %s' % (date_from_str(date).isoformat(), dateRange)
593         view_count = info_dict.get('view_count', None)
594         if view_count is not None:
595             min_views = self.params.get('min_views')
596             if min_views is not None and view_count < min_views:
597                 return 'Skipping %s, because it has not reached minimum view count (%d/%d)' % (video_title, view_count, min_views)
598             max_views = self.params.get('max_views')
599             if max_views is not None and view_count > max_views:
600                 return 'Skipping %s, because it has exceeded the maximum view count (%d/%d)' % (video_title, view_count, max_views)
601         if age_restricted(info_dict.get('age_limit'), self.params.get('age_limit')):
602             return 'Skipping "%s" because it is age restricted' % video_title
603         if self.in_download_archive(info_dict):
604             return '%s has already been recorded in archive' % video_title
605
606         if not incomplete:
607             match_filter = self.params.get('match_filter')
608             if match_filter is not None:
609                 ret = match_filter(info_dict)
610                 if ret is not None:
611                     return ret
612
613         return None
614
615     @staticmethod
616     def add_extra_info(info_dict, extra_info):
617         '''Set the keys from extra_info in info dict if they are missing'''
618         for key, value in extra_info.items():
619             info_dict.setdefault(key, value)
620
621     def extract_info(self, url, download=True, ie_key=None, extra_info={},
622                      process=True):
623         '''
624         Returns a list with a dictionary for each video we find.
625         If 'download', also downloads the videos.
626         extra_info is a dict containing the extra values to add to each result
627          '''
628
629         if ie_key:
630             ies = [self.get_info_extractor(ie_key)]
631         else:
632             ies = self._ies
633
634         for ie in ies:
635             if not ie.suitable(url):
636                 continue
637
638             if not ie.working():
639                 self.report_warning('The program functionality for this site has been marked as broken, '
640                                     'and will probably not work.')
641
642             try:
643                 ie_result = ie.extract(url)
644                 if ie_result is None:  # Finished already (backwards compatibility; listformats and friends should be moved here)
645                     break
646                 if isinstance(ie_result, list):
647                     # Backwards compatibility: old IE result format
648                     ie_result = {
649                         '_type': 'compat_list',
650                         'entries': ie_result,
651                     }
652                 self.add_default_extra_info(ie_result, ie, url)
653                 if process:
654                     return self.process_ie_result(ie_result, download, extra_info)
655                 else:
656                     return ie_result
657             except ExtractorError as de:  # An error we somewhat expected
658                 self.report_error(compat_str(de), de.format_traceback())
659                 break
660             except MaxDownloadsReached:
661                 raise
662             except Exception as e:
663                 if self.params.get('ignoreerrors', False):
664                     self.report_error(compat_str(e), tb=compat_str(traceback.format_exc()))
665                     break
666                 else:
667                     raise
668         else:
669             self.report_error('no suitable InfoExtractor for URL %s' % url)
670
671     def add_default_extra_info(self, ie_result, ie, url):
672         self.add_extra_info(ie_result, {
673             'extractor': ie.IE_NAME,
674             'webpage_url': url,
675             'webpage_url_basename': url_basename(url),
676             'extractor_key': ie.ie_key(),
677         })
678
679     def process_ie_result(self, ie_result, download=True, extra_info={}):
680         """
681         Take the result of the ie(may be modified) and resolve all unresolved
682         references (URLs, playlist items).
683
684         It will also download the videos if 'download'.
685         Returns the resolved ie_result.
686         """
687
688         result_type = ie_result.get('_type', 'video')
689
690         if result_type in ('url', 'url_transparent'):
691             extract_flat = self.params.get('extract_flat', False)
692             if ((extract_flat == 'in_playlist' and 'playlist' in extra_info) or
693                     extract_flat is True):
694                 if self.params.get('forcejson', False):
695                     self.to_stdout(json.dumps(ie_result))
696                 return ie_result
697
698         if result_type == 'video':
699             self.add_extra_info(ie_result, extra_info)
700             return self.process_video_result(ie_result, download=download)
701         elif result_type == 'url':
702             # We have to add extra_info to the results because it may be
703             # contained in a playlist
704             return self.extract_info(ie_result['url'],
705                                      download,
706                                      ie_key=ie_result.get('ie_key'),
707                                      extra_info=extra_info)
708         elif result_type == 'url_transparent':
709             # Use the information from the embedding page
710             info = self.extract_info(
711                 ie_result['url'], ie_key=ie_result.get('ie_key'),
712                 extra_info=extra_info, download=False, process=False)
713
714             force_properties = dict(
715                 (k, v) for k, v in ie_result.items() if v is not None)
716             for f in ('_type', 'url'):
717                 if f in force_properties:
718                     del force_properties[f]
719             new_result = info.copy()
720             new_result.update(force_properties)
721
722             assert new_result.get('_type') != 'url_transparent'
723
724             return self.process_ie_result(
725                 new_result, download=download, extra_info=extra_info)
726         elif result_type == 'playlist' or result_type == 'multi_video':
727             # We process each entry in the playlist
728             playlist = ie_result.get('title', None) or ie_result.get('id', None)
729             self.to_screen('[download] Downloading playlist: %s' % playlist)
730
731             playlist_results = []
732
733             playliststart = self.params.get('playliststart', 1) - 1
734             playlistend = self.params.get('playlistend', None)
735             # For backwards compatibility, interpret -1 as whole list
736             if playlistend == -1:
737                 playlistend = None
738
739             playlistitems_str = self.params.get('playlist_items', None)
740             playlistitems = None
741             if playlistitems_str is not None:
742                 def iter_playlistitems(format):
743                     for string_segment in format.split(','):
744                         if '-' in string_segment:
745                             start, end = string_segment.split('-')
746                             for item in range(int(start), int(end) + 1):
747                                 yield int(item)
748                         else:
749                             yield int(string_segment)
750                 playlistitems = iter_playlistitems(playlistitems_str)
751
752             ie_entries = ie_result['entries']
753             if isinstance(ie_entries, list):
754                 n_all_entries = len(ie_entries)
755                 if playlistitems:
756                     entries = [ie_entries[i - 1] for i in playlistitems]
757                 else:
758                     entries = ie_entries[playliststart:playlistend]
759                 n_entries = len(entries)
760                 self.to_screen(
761                     "[%s] playlist %s: Collected %d video ids (downloading %d of them)" %
762                     (ie_result['extractor'], playlist, n_all_entries, n_entries))
763             elif isinstance(ie_entries, PagedList):
764                 if playlistitems:
765                     entries = []
766                     for item in playlistitems:
767                         entries.extend(ie_entries.getslice(
768                             item - 1, item
769                         ))
770                 else:
771                     entries = ie_entries.getslice(
772                         playliststart, playlistend)
773                 n_entries = len(entries)
774                 self.to_screen(
775                     "[%s] playlist %s: Downloading %d videos" %
776                     (ie_result['extractor'], playlist, n_entries))
777             else:  # iterable
778                 if playlistitems:
779                     entry_list = list(ie_entries)
780                     entries = [entry_list[i - 1] for i in playlistitems]
781                 else:
782                     entries = list(itertools.islice(
783                         ie_entries, playliststart, playlistend))
784                 n_entries = len(entries)
785                 self.to_screen(
786                     "[%s] playlist %s: Downloading %d videos" %
787                     (ie_result['extractor'], playlist, n_entries))
788
789             if self.params.get('playlistreverse', False):
790                 entries = entries[::-1]
791
792             for i, entry in enumerate(entries, 1):
793                 self.to_screen('[download] Downloading video %s of %s' % (i, n_entries))
794                 extra = {
795                     'n_entries': n_entries,
796                     'playlist': playlist,
797                     'playlist_id': ie_result.get('id'),
798                     'playlist_title': ie_result.get('title'),
799                     'playlist_index': i + playliststart,
800                     'extractor': ie_result['extractor'],
801                     'webpage_url': ie_result['webpage_url'],
802                     'webpage_url_basename': url_basename(ie_result['webpage_url']),
803                     'extractor_key': ie_result['extractor_key'],
804                 }
805
806                 reason = self._match_entry(entry, incomplete=True)
807                 if reason is not None:
808                     self.to_screen('[download] ' + reason)
809                     continue
810
811                 entry_result = self.process_ie_result(entry,
812                                                       download=download,
813                                                       extra_info=extra)
814                 playlist_results.append(entry_result)
815             ie_result['entries'] = playlist_results
816             return ie_result
817         elif result_type == 'compat_list':
818             self.report_warning(
819                 'Extractor %s returned a compat_list result. '
820                 'It needs to be updated.' % ie_result.get('extractor'))
821
822             def _fixup(r):
823                 self.add_extra_info(
824                     r,
825                     {
826                         'extractor': ie_result['extractor'],
827                         'webpage_url': ie_result['webpage_url'],
828                         'webpage_url_basename': url_basename(ie_result['webpage_url']),
829                         'extractor_key': ie_result['extractor_key'],
830                     }
831                 )
832                 return r
833             ie_result['entries'] = [
834                 self.process_ie_result(_fixup(r), download, extra_info)
835                 for r in ie_result['entries']
836             ]
837             return ie_result
838         else:
839             raise Exception('Invalid result type: %s' % result_type)
840
841     def _apply_format_filter(self, format_spec, available_formats):
842         " Returns a tuple of the remaining format_spec and filtered formats "
843
844         OPERATORS = {
845             '<': operator.lt,
846             '<=': operator.le,
847             '>': operator.gt,
848             '>=': operator.ge,
849             '=': operator.eq,
850             '!=': operator.ne,
851         }
852         operator_rex = re.compile(r'''(?x)\s*\[
853             (?P<key>width|height|tbr|abr|vbr|asr|filesize|fps)
854             \s*(?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
855             (?P<value>[0-9.]+(?:[kKmMgGtTpPeEzZyY]i?[Bb]?)?)
856             \]$
857             ''' % '|'.join(map(re.escape, OPERATORS.keys())))
858         m = operator_rex.search(format_spec)
859         if m:
860             try:
861                 comparison_value = int(m.group('value'))
862             except ValueError:
863                 comparison_value = parse_filesize(m.group('value'))
864                 if comparison_value is None:
865                     comparison_value = parse_filesize(m.group('value') + 'B')
866                 if comparison_value is None:
867                     raise ValueError(
868                         'Invalid value %r in format specification %r' % (
869                             m.group('value'), format_spec))
870             op = OPERATORS[m.group('op')]
871
872         if not m:
873             STR_OPERATORS = {
874                 '=': operator.eq,
875                 '!=': operator.ne,
876             }
877             str_operator_rex = re.compile(r'''(?x)\s*\[
878                 \s*(?P<key>ext|acodec|vcodec|container|protocol)
879                 \s*(?P<op>%s)(?P<none_inclusive>\s*\?)?
880                 \s*(?P<value>[a-zA-Z0-9_-]+)
881                 \s*\]$
882                 ''' % '|'.join(map(re.escape, STR_OPERATORS.keys())))
883             m = str_operator_rex.search(format_spec)
884             if m:
885                 comparison_value = m.group('value')
886                 op = STR_OPERATORS[m.group('op')]
887
888         if not m:
889             raise ValueError('Invalid format specification %r' % format_spec)
890
891         def _filter(f):
892             actual_value = f.get(m.group('key'))
893             if actual_value is None:
894                 return m.group('none_inclusive')
895             return op(actual_value, comparison_value)
896         new_formats = [f for f in available_formats if _filter(f)]
897
898         new_format_spec = format_spec[:-len(m.group(0))]
899         if not new_format_spec:
900             new_format_spec = 'best'
901
902         return (new_format_spec, new_formats)
903
904     def select_format(self, format_spec, available_formats):
905         while format_spec.endswith(']'):
906             format_spec, available_formats = self._apply_format_filter(
907                 format_spec, available_formats)
908         if not available_formats:
909             return None
910
911         if format_spec == 'best' or format_spec is None:
912             return available_formats[-1]
913         elif format_spec == 'worst':
914             return available_formats[0]
915         elif format_spec == 'bestaudio':
916             audio_formats = [
917                 f for f in available_formats
918                 if f.get('vcodec') == 'none']
919             if audio_formats:
920                 return audio_formats[-1]
921         elif format_spec == 'worstaudio':
922             audio_formats = [
923                 f for f in available_formats
924                 if f.get('vcodec') == 'none']
925             if audio_formats:
926                 return audio_formats[0]
927         elif format_spec == 'bestvideo':
928             video_formats = [
929                 f for f in available_formats
930                 if f.get('acodec') == 'none']
931             if video_formats:
932                 return video_formats[-1]
933         elif format_spec == 'worstvideo':
934             video_formats = [
935                 f for f in available_formats
936                 if f.get('acodec') == 'none']
937             if video_formats:
938                 return video_formats[0]
939         else:
940             extensions = ['mp4', 'flv', 'webm', '3gp', 'm4a', 'mp3', 'ogg', 'aac', 'wav']
941             if format_spec in extensions:
942                 filter_f = lambda f: f['ext'] == format_spec
943             else:
944                 filter_f = lambda f: f['format_id'] == format_spec
945             matches = list(filter(filter_f, available_formats))
946             if matches:
947                 return matches[-1]
948         return None
949
950     def _calc_headers(self, info_dict):
951         res = std_headers.copy()
952
953         add_headers = info_dict.get('http_headers')
954         if add_headers:
955             res.update(add_headers)
956
957         cookies = self._calc_cookies(info_dict)
958         if cookies:
959             res['Cookie'] = cookies
960
961         return res
962
963     def _calc_cookies(self, info_dict):
964         pr = compat_urllib_request.Request(info_dict['url'])
965         self.cookiejar.add_cookie_header(pr)
966         return pr.get_header('Cookie')
967
968     def process_video_result(self, info_dict, download=True):
969         assert info_dict.get('_type', 'video') == 'video'
970
971         if 'id' not in info_dict:
972             raise ExtractorError('Missing "id" field in extractor result')
973         if 'title' not in info_dict:
974             raise ExtractorError('Missing "title" field in extractor result')
975
976         if 'playlist' not in info_dict:
977             # It isn't part of a playlist
978             info_dict['playlist'] = None
979             info_dict['playlist_index'] = None
980
981         thumbnails = info_dict.get('thumbnails')
982         if thumbnails is None:
983             thumbnail = info_dict.get('thumbnail')
984             if thumbnail:
985                 info_dict['thumbnails'] = thumbnails = [{'url': thumbnail}]
986         if thumbnails:
987             thumbnails.sort(key=lambda t: (
988                 t.get('preference'), t.get('width'), t.get('height'),
989                 t.get('id'), t.get('url')))
990             for i, t in enumerate(thumbnails):
991                 if 'width' in t and 'height' in t:
992                     t['resolution'] = '%dx%d' % (t['width'], t['height'])
993                 if t.get('id') is None:
994                     t['id'] = '%d' % i
995
996         if thumbnails and 'thumbnail' not in info_dict:
997             info_dict['thumbnail'] = thumbnails[-1]['url']
998
999         if 'display_id' not in info_dict and 'id' in info_dict:
1000             info_dict['display_id'] = info_dict['id']
1001
1002         if info_dict.get('upload_date') is None and info_dict.get('timestamp') is not None:
1003             # Working around negative timestamps in Windows
1004             # (see http://bugs.python.org/issue1646728)
1005             if info_dict['timestamp'] < 0 and os.name == 'nt':
1006                 info_dict['timestamp'] = 0
1007             upload_date = datetime.datetime.utcfromtimestamp(
1008                 info_dict['timestamp'])
1009             info_dict['upload_date'] = upload_date.strftime('%Y%m%d')
1010
1011         if self.params.get('listsubtitles', False):
1012             if 'automatic_captions' in info_dict:
1013                 self.list_subtitles(info_dict['id'], info_dict.get('automatic_captions'), 'automatic captions')
1014             self.list_subtitles(info_dict['id'], info_dict.get('subtitles'), 'subtitles')
1015             return
1016         info_dict['requested_subtitles'] = self.process_subtitles(
1017             info_dict['id'], info_dict.get('subtitles'),
1018             info_dict.get('automatic_captions'))
1019
1020         # This extractors handle format selection themselves
1021         if info_dict['extractor'] in ['Youku']:
1022             if download:
1023                 self.process_info(info_dict)
1024             return info_dict
1025
1026         # We now pick which formats have to be downloaded
1027         if info_dict.get('formats') is None:
1028             # There's only one format available
1029             formats = [info_dict]
1030         else:
1031             formats = info_dict['formats']
1032
1033         if not formats:
1034             raise ExtractorError('No video formats found!')
1035
1036         # We check that all the formats have the format and format_id fields
1037         for i, format in enumerate(formats):
1038             if 'url' not in format:
1039                 raise ExtractorError('Missing "url" key in result (index %d)' % i)
1040
1041             if format.get('format_id') is None:
1042                 format['format_id'] = compat_str(i)
1043             if format.get('format') is None:
1044                 format['format'] = '{id} - {res}{note}'.format(
1045                     id=format['format_id'],
1046                     res=self.format_resolution(format),
1047                     note=' ({0})'.format(format['format_note']) if format.get('format_note') is not None else '',
1048                 )
1049             # Automatically determine file extension if missing
1050             if 'ext' not in format:
1051                 format['ext'] = determine_ext(format['url']).lower()
1052             # Add HTTP headers, so that external programs can use them from the
1053             # json output
1054             full_format_info = info_dict.copy()
1055             full_format_info.update(format)
1056             format['http_headers'] = self._calc_headers(full_format_info)
1057
1058         format_limit = self.params.get('format_limit', None)
1059         if format_limit:
1060             formats = list(takewhile_inclusive(
1061                 lambda f: f['format_id'] != format_limit, formats
1062             ))
1063
1064         # TODO Central sorting goes here
1065
1066         if formats[0] is not info_dict:
1067             # only set the 'formats' fields if the original info_dict list them
1068             # otherwise we end up with a circular reference, the first (and unique)
1069             # element in the 'formats' field in info_dict is info_dict itself,
1070             # wich can't be exported to json
1071             info_dict['formats'] = formats
1072         if self.params.get('listformats'):
1073             self.list_formats(info_dict)
1074             return
1075         if self.params.get('list_thumbnails'):
1076             self.list_thumbnails(info_dict)
1077             return
1078
1079         req_format = self.params.get('format')
1080         if req_format is None:
1081             req_format = 'best'
1082         formats_to_download = []
1083         # The -1 is for supporting YoutubeIE
1084         if req_format in ('-1', 'all'):
1085             formats_to_download = formats
1086         else:
1087             for rfstr in req_format.split(','):
1088                 # We can accept formats requested in the format: 34/5/best, we pick
1089                 # the first that is available, starting from left
1090                 req_formats = rfstr.split('/')
1091                 for rf in req_formats:
1092                     if re.match(r'.+?\+.+?', rf) is not None:
1093                         # Two formats have been requested like '137+139'
1094                         format_1, format_2 = rf.split('+')
1095                         formats_info = (self.select_format(format_1, formats),
1096                                         self.select_format(format_2, formats))
1097                         if all(formats_info):
1098                             # The first format must contain the video and the
1099                             # second the audio
1100                             if formats_info[0].get('vcodec') == 'none':
1101                                 self.report_error('The first format must '
1102                                                   'contain the video, try using '
1103                                                   '"-f %s+%s"' % (format_2, format_1))
1104                                 return
1105                             output_ext = (
1106                                 formats_info[0]['ext']
1107                                 if self.params.get('merge_output_format') is None
1108                                 else self.params['merge_output_format'])
1109                             selected_format = {
1110                                 'requested_formats': formats_info,
1111                                 'format': '%s+%s' % (formats_info[0].get('format'),
1112                                                      formats_info[1].get('format')),
1113                                 'format_id': '%s+%s' % (formats_info[0].get('format_id'),
1114                                                         formats_info[1].get('format_id')),
1115                                 'width': formats_info[0].get('width'),
1116                                 'height': formats_info[0].get('height'),
1117                                 'resolution': formats_info[0].get('resolution'),
1118                                 'fps': formats_info[0].get('fps'),
1119                                 'vcodec': formats_info[0].get('vcodec'),
1120                                 'vbr': formats_info[0].get('vbr'),
1121                                 'stretched_ratio': formats_info[0].get('stretched_ratio'),
1122                                 'acodec': formats_info[1].get('acodec'),
1123                                 'abr': formats_info[1].get('abr'),
1124                                 'ext': output_ext,
1125                             }
1126                         else:
1127                             selected_format = None
1128                     else:
1129                         selected_format = self.select_format(rf, formats)
1130                     if selected_format is not None:
1131                         formats_to_download.append(selected_format)
1132                         break
1133         if not formats_to_download:
1134             raise ExtractorError('requested format not available',
1135                                  expected=True)
1136
1137         if download:
1138             if len(formats_to_download) > 1:
1139                 self.to_screen('[info] %s: downloading video in %s formats' % (info_dict['id'], len(formats_to_download)))
1140             for format in formats_to_download:
1141                 new_info = dict(info_dict)
1142                 new_info.update(format)
1143                 self.process_info(new_info)
1144         # We update the info dict with the best quality format (backwards compatibility)
1145         info_dict.update(formats_to_download[-1])
1146         return info_dict
1147
1148     def process_subtitles(self, video_id, normal_subtitles, automatic_captions):
1149         """Select the requested subtitles and their format"""
1150         available_subs = {}
1151         if normal_subtitles and self.params.get('writesubtitles'):
1152             available_subs.update(normal_subtitles)
1153         if automatic_captions and self.params.get('writeautomaticsub'):
1154             for lang, cap_info in automatic_captions.items():
1155                 if lang not in available_subs:
1156                     available_subs[lang] = cap_info
1157
1158         if (not self.params.get('writesubtitles') and not
1159                 self.params.get('writeautomaticsub') or not
1160                 available_subs):
1161             return None
1162
1163         if self.params.get('allsubtitles', False):
1164             requested_langs = available_subs.keys()
1165         else:
1166             if self.params.get('subtitleslangs', False):
1167                 requested_langs = self.params.get('subtitleslangs')
1168             elif 'en' in available_subs:
1169                 requested_langs = ['en']
1170             else:
1171                 requested_langs = [list(available_subs.keys())[0]]
1172
1173         formats_query = self.params.get('subtitlesformat', 'best')
1174         formats_preference = formats_query.split('/') if formats_query else []
1175         subs = {}
1176         for lang in requested_langs:
1177             formats = available_subs.get(lang)
1178             if formats is None:
1179                 self.report_warning('%s subtitles not available for %s' % (lang, video_id))
1180                 continue
1181             for ext in formats_preference:
1182                 if ext == 'best':
1183                     f = formats[-1]
1184                     break
1185                 matches = list(filter(lambda f: f['ext'] == ext, formats))
1186                 if matches:
1187                     f = matches[-1]
1188                     break
1189             else:
1190                 f = formats[-1]
1191                 self.report_warning(
1192                     'No subtitle format found matching "%s" for language %s, '
1193                     'using %s' % (formats_query, lang, f['ext']))
1194             subs[lang] = f
1195         return subs
1196
1197     def process_info(self, info_dict):
1198         """Process a single resolved IE result."""
1199
1200         assert info_dict.get('_type', 'video') == 'video'
1201
1202         max_downloads = self.params.get('max_downloads')
1203         if max_downloads is not None:
1204             if self._num_downloads >= int(max_downloads):
1205                 raise MaxDownloadsReached()
1206
1207         info_dict['fulltitle'] = info_dict['title']
1208         if len(info_dict['title']) > 200:
1209             info_dict['title'] = info_dict['title'][:197] + '...'
1210
1211         # Keep for backwards compatibility
1212         info_dict['stitle'] = info_dict['title']
1213
1214         if 'format' not in info_dict:
1215             info_dict['format'] = info_dict['ext']
1216
1217         reason = self._match_entry(info_dict, incomplete=False)
1218         if reason is not None:
1219             self.to_screen('[download] ' + reason)
1220             return
1221
1222         self._num_downloads += 1
1223
1224         info_dict['_filename'] = filename = self.prepare_filename(info_dict)
1225
1226         # Forced printings
1227         if self.params.get('forcetitle', False):
1228             self.to_stdout(info_dict['fulltitle'])
1229         if self.params.get('forceid', False):
1230             self.to_stdout(info_dict['id'])
1231         if self.params.get('forceurl', False):
1232             if info_dict.get('requested_formats') is not None:
1233                 for f in info_dict['requested_formats']:
1234                     self.to_stdout(f['url'] + f.get('play_path', ''))
1235             else:
1236                 # For RTMP URLs, also include the playpath
1237                 self.to_stdout(info_dict['url'] + info_dict.get('play_path', ''))
1238         if self.params.get('forcethumbnail', False) and info_dict.get('thumbnail') is not None:
1239             self.to_stdout(info_dict['thumbnail'])
1240         if self.params.get('forcedescription', False) and info_dict.get('description') is not None:
1241             self.to_stdout(info_dict['description'])
1242         if self.params.get('forcefilename', False) and filename is not None:
1243             self.to_stdout(filename)
1244         if self.params.get('forceduration', False) and info_dict.get('duration') is not None:
1245             self.to_stdout(formatSeconds(info_dict['duration']))
1246         if self.params.get('forceformat', False):
1247             self.to_stdout(info_dict['format'])
1248         if self.params.get('forcejson', False):
1249             self.to_stdout(json.dumps(info_dict))
1250
1251         # Do nothing else if in simulate mode
1252         if self.params.get('simulate', False):
1253             return
1254
1255         if filename is None:
1256             return
1257
1258         try:
1259             dn = os.path.dirname(encodeFilename(filename))
1260             if dn and not os.path.exists(dn):
1261                 os.makedirs(dn)
1262         except (OSError, IOError) as err:
1263             self.report_error('unable to create directory ' + compat_str(err))
1264             return
1265
1266         if self.params.get('writedescription', False):
1267             descfn = filename + '.description'
1268             if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(descfn)):
1269                 self.to_screen('[info] Video description is already present')
1270             elif info_dict.get('description') is None:
1271                 self.report_warning('There\'s no description to write.')
1272             else:
1273                 try:
1274                     self.to_screen('[info] Writing video description to: ' + descfn)
1275                     with io.open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile:
1276                         descfile.write(info_dict['description'])
1277                 except (OSError, IOError):
1278                     self.report_error('Cannot write description file ' + descfn)
1279                     return
1280
1281         if self.params.get('writeannotations', False):
1282             annofn = filename + '.annotations.xml'
1283             if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(annofn)):
1284                 self.to_screen('[info] Video annotations are already present')
1285             else:
1286                 try:
1287                     self.to_screen('[info] Writing video annotations to: ' + annofn)
1288                     with io.open(encodeFilename(annofn), 'w', encoding='utf-8') as annofile:
1289                         annofile.write(info_dict['annotations'])
1290                 except (KeyError, TypeError):
1291                     self.report_warning('There are no annotations to write.')
1292                 except (OSError, IOError):
1293                     self.report_error('Cannot write annotations file: ' + annofn)
1294                     return
1295
1296         subtitles_are_requested = any([self.params.get('writesubtitles', False),
1297                                        self.params.get('writeautomaticsub')])
1298
1299         if subtitles_are_requested and info_dict.get('requested_subtitles'):
1300             # subtitles download errors are already managed as troubles in relevant IE
1301             # that way it will silently go on when used with unsupporting IE
1302             subtitles = info_dict['requested_subtitles']
1303             ie = self.get_info_extractor(info_dict['extractor_key'])
1304             for sub_lang, sub_info in subtitles.items():
1305                 sub_format = sub_info['ext']
1306                 if sub_info.get('data') is not None:
1307                     sub_data = sub_info['data']
1308                 else:
1309                     try:
1310                         sub_data = ie._download_webpage(
1311                             sub_info['url'], info_dict['id'], note=False)
1312                     except ExtractorError as err:
1313                         self.report_warning('Unable to download subtitle for "%s": %s' %
1314                                             (sub_lang, compat_str(err.cause)))
1315                         continue
1316                 try:
1317                     sub_filename = subtitles_filename(filename, sub_lang, sub_format)
1318                     if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(sub_filename)):
1319                         self.to_screen('[info] Video subtitle %s.%s is already_present' % (sub_lang, sub_format))
1320                     else:
1321                         self.to_screen('[info] Writing video subtitles to: ' + sub_filename)
1322                         with io.open(encodeFilename(sub_filename), 'w', encoding='utf-8') as subfile:
1323                             subfile.write(sub_data)
1324                 except (OSError, IOError):
1325                     self.report_error('Cannot write subtitles file ' + sub_filename)
1326                     return
1327
1328         if self.params.get('writeinfojson', False):
1329             infofn = os.path.splitext(filename)[0] + '.info.json'
1330             if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(infofn)):
1331                 self.to_screen('[info] Video description metadata is already present')
1332             else:
1333                 self.to_screen('[info] Writing video description metadata as JSON to: ' + infofn)
1334                 try:
1335                     write_json_file(info_dict, infofn)
1336                 except (OSError, IOError):
1337                     self.report_error('Cannot write metadata to JSON file ' + infofn)
1338                     return
1339
1340         self._write_thumbnails(info_dict, filename)
1341
1342         if not self.params.get('skip_download', False):
1343             try:
1344                 def dl(name, info):
1345                     fd = get_suitable_downloader(info, self.params)(self, self.params)
1346                     for ph in self._progress_hooks:
1347                         fd.add_progress_hook(ph)
1348                     if self.params.get('verbose'):
1349                         self.to_stdout('[debug] Invoking downloader on %r' % info.get('url'))
1350                     return fd.download(name, info)
1351
1352                 if info_dict.get('requested_formats') is not None:
1353                     downloaded = []
1354                     success = True
1355                     merger = FFmpegMergerPP(self, not self.params.get('keepvideo'))
1356                     if not merger.available:
1357                         postprocessors = []
1358                         self.report_warning('You have requested multiple '
1359                                             'formats but ffmpeg or avconv are not installed.'
1360                                             ' The formats won\'t be merged')
1361                     else:
1362                         postprocessors = [merger]
1363                     for f in info_dict['requested_formats']:
1364                         new_info = dict(info_dict)
1365                         new_info.update(f)
1366                         fname = self.prepare_filename(new_info)
1367                         fname = prepend_extension(fname, 'f%s' % f['format_id'])
1368                         downloaded.append(fname)
1369                         partial_success = dl(fname, new_info)
1370                         success = success and partial_success
1371                     info_dict['__postprocessors'] = postprocessors
1372                     info_dict['__files_to_merge'] = downloaded
1373                 else:
1374                     # Just a single file
1375                     success = dl(filename, info_dict)
1376             except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
1377                 self.report_error('unable to download video data: %s' % str(err))
1378                 return
1379             except (OSError, IOError) as err:
1380                 raise UnavailableVideoError(err)
1381             except (ContentTooShortError, ) as err:
1382                 self.report_error('content too short (expected %s bytes and served %s)' % (err.expected, err.downloaded))
1383                 return
1384
1385             if success:
1386                 # Fixup content
1387                 fixup_policy = self.params.get('fixup')
1388                 if fixup_policy is None:
1389                     fixup_policy = 'detect_or_warn'
1390
1391                 stretched_ratio = info_dict.get('stretched_ratio')
1392                 if stretched_ratio is not None and stretched_ratio != 1:
1393                     if fixup_policy == 'warn':
1394                         self.report_warning('%s: Non-uniform pixel ratio (%s)' % (
1395                             info_dict['id'], stretched_ratio))
1396                     elif fixup_policy == 'detect_or_warn':
1397                         stretched_pp = FFmpegFixupStretchedPP(self)
1398                         if stretched_pp.available:
1399                             info_dict.setdefault('__postprocessors', [])
1400                             info_dict['__postprocessors'].append(stretched_pp)
1401                         else:
1402                             self.report_warning(
1403                                 '%s: Non-uniform pixel ratio (%s). Install ffmpeg or avconv to fix this automatically.' % (
1404                                     info_dict['id'], stretched_ratio))
1405                     else:
1406                         assert fixup_policy in ('ignore', 'never')
1407
1408                 if info_dict.get('requested_formats') is None and info_dict.get('container') == 'm4a_dash':
1409                     if fixup_policy == 'warn':
1410                         self.report_warning('%s: writing DASH m4a. Only some players support this container.' % (
1411                             info_dict['id']))
1412                     elif fixup_policy == 'detect_or_warn':
1413                         fixup_pp = FFmpegFixupM4aPP(self)
1414                         if fixup_pp.available:
1415                             info_dict.setdefault('__postprocessors', [])
1416                             info_dict['__postprocessors'].append(fixup_pp)
1417                         else:
1418                             self.report_warning(
1419                                 '%s: writing DASH m4a. Only some players support this container. Install ffmpeg or avconv to fix this automatically.' % (
1420                                     info_dict['id']))
1421                     else:
1422                         assert fixup_policy in ('ignore', 'never')
1423
1424                 try:
1425                     self.post_process(filename, info_dict)
1426                 except (PostProcessingError) as err:
1427                     self.report_error('postprocessing: %s' % str(err))
1428                     return
1429                 self.record_download_archive(info_dict)
1430
1431     def download(self, url_list):
1432         """Download a given list of URLs."""
1433         outtmpl = self.params.get('outtmpl', DEFAULT_OUTTMPL)
1434         if (len(url_list) > 1 and
1435                 '%' not in outtmpl and
1436                 self.params.get('max_downloads') != 1):
1437             raise SameFileError(outtmpl)
1438
1439         for url in url_list:
1440             try:
1441                 # It also downloads the videos
1442                 res = self.extract_info(url)
1443             except UnavailableVideoError:
1444                 self.report_error('unable to download video')
1445             except MaxDownloadsReached:
1446                 self.to_screen('[info] Maximum number of downloaded files reached.')
1447                 raise
1448             else:
1449                 if self.params.get('dump_single_json', False):
1450                     self.to_stdout(json.dumps(res))
1451
1452         return self._download_retcode
1453
1454     def download_with_info_file(self, info_filename):
1455         with io.open(info_filename, 'r', encoding='utf-8') as f:
1456             info = json.load(f)
1457         try:
1458             self.process_ie_result(info, download=True)
1459         except DownloadError:
1460             webpage_url = info.get('webpage_url')
1461             if webpage_url is not None:
1462                 self.report_warning('The info failed to download, trying with "%s"' % webpage_url)
1463                 return self.download([webpage_url])
1464             else:
1465                 raise
1466         return self._download_retcode
1467
1468     def post_process(self, filename, ie_info):
1469         """Run all the postprocessors on the given file."""
1470         info = dict(ie_info)
1471         info['filepath'] = filename
1472         pps_chain = []
1473         if ie_info.get('__postprocessors') is not None:
1474             pps_chain.extend(ie_info['__postprocessors'])
1475         pps_chain.extend(self._pps)
1476         for pp in pps_chain:
1477             keep_video = None
1478             old_filename = info['filepath']
1479             try:
1480                 keep_video_wish, info = pp.run(info)
1481                 if keep_video_wish is not None:
1482                     if keep_video_wish:
1483                         keep_video = keep_video_wish
1484                     elif keep_video is None:
1485                         # No clear decision yet, let IE decide
1486                         keep_video = keep_video_wish
1487             except PostProcessingError as e:
1488                 self.report_error(e.msg)
1489             if keep_video is False and not self.params.get('keepvideo', False):
1490                 try:
1491                     self.to_screen('Deleting original file %s (pass -k to keep)' % old_filename)
1492                     os.remove(encodeFilename(old_filename))
1493                 except (IOError, OSError):
1494                     self.report_warning('Unable to remove downloaded video file')
1495
1496     def _make_archive_id(self, info_dict):
1497         # Future-proof against any change in case
1498         # and backwards compatibility with prior versions
1499         extractor = info_dict.get('extractor_key')
1500         if extractor is None:
1501             if 'id' in info_dict:
1502                 extractor = info_dict.get('ie_key')  # key in a playlist
1503         if extractor is None:
1504             return None  # Incomplete video information
1505         return extractor.lower() + ' ' + info_dict['id']
1506
1507     def in_download_archive(self, info_dict):
1508         fn = self.params.get('download_archive')
1509         if fn is None:
1510             return False
1511
1512         vid_id = self._make_archive_id(info_dict)
1513         if vid_id is None:
1514             return False  # Incomplete video information
1515
1516         try:
1517             with locked_file(fn, 'r', encoding='utf-8') as archive_file:
1518                 for line in archive_file:
1519                     if line.strip() == vid_id:
1520                         return True
1521         except IOError as ioe:
1522             if ioe.errno != errno.ENOENT:
1523                 raise
1524         return False
1525
1526     def record_download_archive(self, info_dict):
1527         fn = self.params.get('download_archive')
1528         if fn is None:
1529             return
1530         vid_id = self._make_archive_id(info_dict)
1531         assert vid_id
1532         with locked_file(fn, 'a', encoding='utf-8') as archive_file:
1533             archive_file.write(vid_id + '\n')
1534
1535     @staticmethod
1536     def format_resolution(format, default='unknown'):
1537         if format.get('vcodec') == 'none':
1538             return 'audio only'
1539         if format.get('resolution') is not None:
1540             return format['resolution']
1541         if format.get('height') is not None:
1542             if format.get('width') is not None:
1543                 res = '%sx%s' % (format['width'], format['height'])
1544             else:
1545                 res = '%sp' % format['height']
1546         elif format.get('width') is not None:
1547             res = '?x%d' % format['width']
1548         else:
1549             res = default
1550         return res
1551
1552     def _format_note(self, fdict):
1553         res = ''
1554         if fdict.get('ext') in ['f4f', 'f4m']:
1555             res += '(unsupported) '
1556         if fdict.get('format_note') is not None:
1557             res += fdict['format_note'] + ' '
1558         if fdict.get('tbr') is not None:
1559             res += '%4dk ' % fdict['tbr']
1560         if fdict.get('container') is not None:
1561             if res:
1562                 res += ', '
1563             res += '%s container' % fdict['container']
1564         if (fdict.get('vcodec') is not None and
1565                 fdict.get('vcodec') != 'none'):
1566             if res:
1567                 res += ', '
1568             res += fdict['vcodec']
1569             if fdict.get('vbr') is not None:
1570                 res += '@'
1571         elif fdict.get('vbr') is not None and fdict.get('abr') is not None:
1572             res += 'video@'
1573         if fdict.get('vbr') is not None:
1574             res += '%4dk' % fdict['vbr']
1575         if fdict.get('fps') is not None:
1576             res += ', %sfps' % fdict['fps']
1577         if fdict.get('acodec') is not None:
1578             if res:
1579                 res += ', '
1580             if fdict['acodec'] == 'none':
1581                 res += 'video only'
1582             else:
1583                 res += '%-5s' % fdict['acodec']
1584         elif fdict.get('abr') is not None:
1585             if res:
1586                 res += ', '
1587             res += 'audio'
1588         if fdict.get('abr') is not None:
1589             res += '@%3dk' % fdict['abr']
1590         if fdict.get('asr') is not None:
1591             res += ' (%5dHz)' % fdict['asr']
1592         if fdict.get('filesize') is not None:
1593             if res:
1594                 res += ', '
1595             res += format_bytes(fdict['filesize'])
1596         elif fdict.get('filesize_approx') is not None:
1597             if res:
1598                 res += ', '
1599             res += '~' + format_bytes(fdict['filesize_approx'])
1600         return res
1601
1602     def list_formats(self, info_dict):
1603         formats = info_dict.get('formats', [info_dict])
1604         table = [
1605             [f['format_id'], f['ext'], self.format_resolution(f), self._format_note(f)]
1606             for f in formats
1607             if f.get('preference') is None or f['preference'] >= -1000]
1608         if len(formats) > 1:
1609             table[-1][-1] += (' ' if table[-1][-1] else '') + '(best)'
1610
1611         header_line = ['format code', 'extension', 'resolution', 'note']
1612         self.to_screen(
1613             '[info] Available formats for %s:\n%s' %
1614             (info_dict['id'], render_table(header_line, table)))
1615
1616     def list_thumbnails(self, info_dict):
1617         thumbnails = info_dict.get('thumbnails')
1618         if not thumbnails:
1619             tn_url = info_dict.get('thumbnail')
1620             if tn_url:
1621                 thumbnails = [{'id': '0', 'url': tn_url}]
1622             else:
1623                 self.to_screen(
1624                     '[info] No thumbnails present for %s' % info_dict['id'])
1625                 return
1626
1627         self.to_screen(
1628             '[info] Thumbnails for %s:' % info_dict['id'])
1629         self.to_screen(render_table(
1630             ['ID', 'width', 'height', 'URL'],
1631             [[t['id'], t.get('width', 'unknown'), t.get('height', 'unknown'), t['url']] for t in thumbnails]))
1632
1633     def list_subtitles(self, video_id, subtitles, name='subtitles'):
1634         if not subtitles:
1635             self.to_screen('%s has no %s' % (video_id, name))
1636             return
1637         self.to_screen(
1638             'Available %s for %s:' % (name, video_id))
1639         self.to_screen(render_table(
1640             ['Language', 'formats'],
1641             [[lang, ', '.join(f['ext'] for f in reversed(formats))]
1642                 for lang, formats in subtitles.items()]))
1643
1644     def urlopen(self, req):
1645         """ Start an HTTP download """
1646
1647         # According to RFC 3986, URLs can not contain non-ASCII characters, however this is not
1648         # always respected by websites, some tend to give out URLs with non percent-encoded
1649         # non-ASCII characters (see telemb.py, ard.py [#3412])
1650         # urllib chokes on URLs with non-ASCII characters (see http://bugs.python.org/issue3991)
1651         # To work around aforementioned issue we will replace request's original URL with
1652         # percent-encoded one
1653         req_is_string = isinstance(req, compat_basestring)
1654         url = req if req_is_string else req.get_full_url()
1655         url_escaped = escape_url(url)
1656
1657         # Substitute URL if any change after escaping
1658         if url != url_escaped:
1659             if req_is_string:
1660                 req = url_escaped
1661             else:
1662                 req = compat_urllib_request.Request(
1663                     url_escaped, data=req.data, headers=req.headers,
1664                     origin_req_host=req.origin_req_host, unverifiable=req.unverifiable)
1665
1666         return self._opener.open(req, timeout=self._socket_timeout)
1667
1668     def print_debug_header(self):
1669         if not self.params.get('verbose'):
1670             return
1671
1672         if type('') is not compat_str:
1673             # Python 2.6 on SLES11 SP1 (https://github.com/rg3/youtube-dl/issues/3326)
1674             self.report_warning(
1675                 'Your Python is broken! Update to a newer and supported version')
1676
1677         stdout_encoding = getattr(
1678             sys.stdout, 'encoding', 'missing (%s)' % type(sys.stdout).__name__)
1679         encoding_str = (
1680             '[debug] Encodings: locale %s, fs %s, out %s, pref %s\n' % (
1681                 locale.getpreferredencoding(),
1682                 sys.getfilesystemencoding(),
1683                 stdout_encoding,
1684                 self.get_encoding()))
1685         write_string(encoding_str, encoding=None)
1686
1687         self._write_string('[debug] youtube-dl version ' + __version__ + '\n')
1688         try:
1689             sp = subprocess.Popen(
1690                 ['git', 'rev-parse', '--short', 'HEAD'],
1691                 stdout=subprocess.PIPE, stderr=subprocess.PIPE,
1692                 cwd=os.path.dirname(os.path.abspath(__file__)))
1693             out, err = sp.communicate()
1694             out = out.decode().strip()
1695             if re.match('[0-9a-f]+', out):
1696                 self._write_string('[debug] Git HEAD: ' + out + '\n')
1697         except:
1698             try:
1699                 sys.exc_clear()
1700             except:
1701                 pass
1702         self._write_string('[debug] Python version %s - %s\n' % (
1703             platform.python_version(), platform_name()))
1704
1705         exe_versions = FFmpegPostProcessor.get_versions(self)
1706         exe_versions['rtmpdump'] = rtmpdump_version()
1707         exe_str = ', '.join(
1708             '%s %s' % (exe, v)
1709             for exe, v in sorted(exe_versions.items())
1710             if v
1711         )
1712         if not exe_str:
1713             exe_str = 'none'
1714         self._write_string('[debug] exe versions: %s\n' % exe_str)
1715
1716         proxy_map = {}
1717         for handler in self._opener.handlers:
1718             if hasattr(handler, 'proxies'):
1719                 proxy_map.update(handler.proxies)
1720         self._write_string('[debug] Proxy map: ' + compat_str(proxy_map) + '\n')
1721
1722         if self.params.get('call_home', False):
1723             ipaddr = self.urlopen('https://yt-dl.org/ip').read().decode('utf-8')
1724             self._write_string('[debug] Public IP address: %s\n' % ipaddr)
1725             latest_version = self.urlopen(
1726                 'https://yt-dl.org/latest/version').read().decode('utf-8')
1727             if version_tuple(latest_version) > version_tuple(__version__):
1728                 self.report_warning(
1729                     'You are using an outdated version (newest version: %s)! '
1730                     'See https://yt-dl.org/update if you need help updating.' %
1731                     latest_version)
1732
1733     def _setup_opener(self):
1734         timeout_val = self.params.get('socket_timeout')
1735         self._socket_timeout = 600 if timeout_val is None else float(timeout_val)
1736
1737         opts_cookiefile = self.params.get('cookiefile')
1738         opts_proxy = self.params.get('proxy')
1739
1740         if opts_cookiefile is None:
1741             self.cookiejar = compat_cookiejar.CookieJar()
1742         else:
1743             self.cookiejar = compat_cookiejar.MozillaCookieJar(
1744                 opts_cookiefile)
1745             if os.access(opts_cookiefile, os.R_OK):
1746                 self.cookiejar.load()
1747
1748         cookie_processor = compat_urllib_request.HTTPCookieProcessor(
1749             self.cookiejar)
1750         if opts_proxy is not None:
1751             if opts_proxy == '':
1752                 proxies = {}
1753             else:
1754                 proxies = {'http': opts_proxy, 'https': opts_proxy}
1755         else:
1756             proxies = compat_urllib_request.getproxies()
1757             # Set HTTPS proxy to HTTP one if given (https://github.com/rg3/youtube-dl/issues/805)
1758             if 'http' in proxies and 'https' not in proxies:
1759                 proxies['https'] = proxies['http']
1760         proxy_handler = compat_urllib_request.ProxyHandler(proxies)
1761
1762         debuglevel = 1 if self.params.get('debug_printtraffic') else 0
1763         https_handler = make_HTTPS_handler(self.params, debuglevel=debuglevel)
1764         ydlh = YoutubeDLHandler(self.params, debuglevel=debuglevel)
1765         opener = compat_urllib_request.build_opener(
1766             https_handler, proxy_handler, cookie_processor, ydlh)
1767         # Delete the default user-agent header, which would otherwise apply in
1768         # cases where our custom HTTP handler doesn't come into play
1769         # (See https://github.com/rg3/youtube-dl/issues/1309 for details)
1770         opener.addheaders = []
1771         self._opener = opener
1772
1773     def encode(self, s):
1774         if isinstance(s, bytes):
1775             return s  # Already encoded
1776
1777         try:
1778             return s.encode(self.get_encoding())
1779         except UnicodeEncodeError as err:
1780             err.reason = err.reason + '. Check your system encoding configuration or use the --encoding option.'
1781             raise
1782
1783     def get_encoding(self):
1784         encoding = self.params.get('encoding')
1785         if encoding is None:
1786             encoding = preferredencoding()
1787         return encoding
1788
1789     def _write_thumbnails(self, info_dict, filename):
1790         if self.params.get('writethumbnail', False):
1791             thumbnails = info_dict.get('thumbnails')
1792             if thumbnails:
1793                 thumbnails = [thumbnails[-1]]
1794         elif self.params.get('write_all_thumbnails', False):
1795             thumbnails = info_dict.get('thumbnails')
1796         else:
1797             return
1798
1799         if not thumbnails:
1800             # No thumbnails present, so return immediately
1801             return
1802
1803         for t in thumbnails:
1804             thumb_ext = determine_ext(t['url'], 'jpg')
1805             suffix = '_%s' % t['id'] if len(thumbnails) > 1 else ''
1806             thumb_display_id = '%s ' % t['id'] if len(thumbnails) > 1 else ''
1807             thumb_filename = os.path.splitext(filename)[0] + suffix + '.' + thumb_ext
1808
1809             if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(thumb_filename)):
1810                 self.to_screen('[%s] %s: Thumbnail %sis already present' %
1811                                (info_dict['extractor'], info_dict['id'], thumb_display_id))
1812             else:
1813                 self.to_screen('[%s] %s: Downloading thumbnail %s...' %
1814                                (info_dict['extractor'], info_dict['id'], thumb_display_id))
1815                 try:
1816                     uf = self.urlopen(t['url'])
1817                     with open(thumb_filename, 'wb') as thumbf:
1818                         shutil.copyfileobj(uf, thumbf)
1819                     self.to_screen('[%s] %s: Writing thumbnail %sto: %s' %
1820                                    (info_dict['extractor'], info_dict['id'], thumb_display_id, thumb_filename))
1821                 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
1822                     self.report_warning('Unable to download thumbnail "%s": %s' %
1823                                         (t['url'], compat_str(err)))